|
| 1 | +package edu.stanford.nlp.parser.lexparser; |
| 2 | + |
| 3 | +import org.junit.Assert; |
| 4 | +import org.junit.Test; |
| 5 | + |
| 6 | +import edu.stanford.nlp.trees.Tree; |
| 7 | + |
| 8 | +public class NegraPennCollinizerTest { |
| 9 | + @Test |
| 10 | + public void testRemovePunct() { |
| 11 | + NegraPennTreebankParserParams tlpp = new NegraPennTreebankParserParams(); |
| 12 | + NegraPennCollinizer collinizer = new NegraPennCollinizer(tlpp); |
| 13 | + |
| 14 | + // Test that the collinizer removes a comma |
| 15 | + // Lazy test writing: just use the English version, updated to work with the German tags |
| 16 | + Tree gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))"); |
| 17 | + Tree goldT = collinizer.transformTree(gold, gold); |
| 18 | + Tree goldExpected = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))"); |
| 19 | + Assert.assertEquals(goldExpected, goldT); |
| 20 | + |
| 21 | + // Same test, but it should pick up the comma just based on the tag |
| 22 | + gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))"); |
| 23 | + goldT = collinizer.transformTree(gold, gold); |
| 24 | + Assert.assertEquals(goldExpected, goldT); |
| 25 | + |
| 26 | + // Difference with the English: the Negra collinizer does not look at punct words |
| 27 | + // Perhaps that was a mistake? |
| 28 | + gold = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))"); |
| 29 | + goldT = collinizer.transformTree(gold, gold); |
| 30 | + Assert.assertEquals(gold, goldT); |
| 31 | + |
| 32 | + // Double check that (CC zzzzz) is not deleted by default |
| 33 | + Tree guess = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))"); |
| 34 | + Tree guessT = collinizer.transformTree(guess, guess); |
| 35 | + Assert.assertEquals(guess, guessT); |
| 36 | + |
| 37 | + // Check that the guess tree has the non-punct word removed if it is a punct in the gold tree |
| 38 | + gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))"); |
| 39 | + guessT = collinizer.transformTree(guess, gold); |
| 40 | + Assert.assertEquals(goldExpected, guessT); |
| 41 | + } |
| 42 | +} |
0 commit comments