Skip to content

Commit 262493f

Browse files
committed
Add tests of the other two collinizers
1 parent 1fb4772 commit 262493f

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package edu.stanford.nlp.parser.lexparser;
2+
3+
import org.junit.Assert;
4+
import org.junit.Test;
5+
6+
import edu.stanford.nlp.trees.Tree;
7+
8+
public class NegraPennCollinizerTest {
9+
@Test
10+
public void testRemovePunct() {
11+
NegraPennTreebankParserParams tlpp = new NegraPennTreebankParserParams();
12+
NegraPennCollinizer collinizer = new NegraPennCollinizer(tlpp);
13+
14+
// Test that the collinizer removes a comma
15+
// Lazy test writing: just use the English version, updated to work with the German tags
16+
Tree gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))");
17+
Tree goldT = collinizer.transformTree(gold, gold);
18+
Tree goldExpected = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
19+
Assert.assertEquals(goldExpected, goldT);
20+
21+
// Same test, but it should pick up the comma just based on the tag
22+
gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))");
23+
goldT = collinizer.transformTree(gold, gold);
24+
Assert.assertEquals(goldExpected, goldT);
25+
26+
// Difference with the English: the Negra collinizer does not look at punct words
27+
// Perhaps that was a mistake?
28+
gold = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
29+
goldT = collinizer.transformTree(gold, gold);
30+
Assert.assertEquals(gold, goldT);
31+
32+
// Double check that (CC zzzzz) is not deleted by default
33+
Tree guess = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
34+
Tree guessT = collinizer.transformTree(guess, guess);
35+
Assert.assertEquals(guess, guessT);
36+
37+
// Check that the guess tree has the non-punct word removed if it is a punct in the gold tree
38+
gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) ($, zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))");
39+
guessT = collinizer.transformTree(guess, gold);
40+
Assert.assertEquals(goldExpected, guessT);
41+
}
42+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package edu.stanford.nlp.trees.international.pennchinese;
2+
3+
import org.junit.Assert;
4+
import org.junit.Test;
5+
6+
import edu.stanford.nlp.trees.Tree;
7+
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
8+
9+
public class ChineseCollinizerTest {
10+
@Test
11+
public void testRemovePunct() {
12+
ChineseTreebankLanguagePack tlp = new ChineseTreebankLanguagePack();
13+
ChineseCollinizer collinizer = new ChineseCollinizer(tlp);
14+
15+
// Test that the collinizer removes a comma
16+
// Lazy test writing: just use the English version, updated to work with the Chinese tags
17+
Tree gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (PU ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))");
18+
Tree goldT = collinizer.transformTree(gold, gold);
19+
Tree goldExpected = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
20+
Assert.assertEquals(goldExpected, goldT);
21+
22+
// Same test, but it should pick up the comma just based on the tag
23+
gold = Tree.valueOf("(ROOT (S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (PU zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie))))))");
24+
goldT = collinizer.transformTree(gold, gold);
25+
Assert.assertEquals(goldExpected, goldT);
26+
27+
// It should also pick up the comma based on the word
28+
gold = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC ,) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
29+
goldT = collinizer.transformTree(gold, gold);
30+
Assert.assertEquals(goldExpected, goldT);
31+
32+
// Double check that (CC zzzzz) is not deleted by default
33+
Tree guess = Tree.valueOf("(S (S (NP (PRP I)) (VP (VBP like) (NP (JJ blue) (NN skin)))) (CC zzzzz) (CC and) (S (NP (PRP I)) (VP (MD cannot) (VP (VB lie)))))");
34+
Tree guessT = collinizer.transformTree(guess, guess);
35+
Assert.assertEquals(guess, guessT);
36+
37+
// Check that the guess tree has the non-punct word removed if it is a punct in the gold tree
38+
guessT = collinizer.transformTree(guess, gold);
39+
Assert.assertEquals(goldExpected, guessT);
40+
}
41+
}

0 commit comments

Comments
 (0)