Skip to content

Commit 5d3367e

Browse files
committed
Upgrade Ssurgeon MergeNodes to treat links inside the same subtree as not relevant to which node is the head. Only consider links outside the subtree when picking a parent
1 parent dc898c5 commit 5d3367e

File tree

1 file changed

+42
-22
lines changed

1 file changed

+42
-22
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/MergeNodes.java

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -66,48 +66,68 @@ public String toEditString() {
6666
*/
6767
@Override
6868
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
69-
List<IndexedWord> nodes = new ArrayList<>();
69+
Set<IndexedWord> nodeSet = new HashSet<>();
7070
for (String name : names) {
7171
IndexedWord node = sm.getNode(name);
7272
if (node == null) {
7373
return false;
7474
}
75-
nodes.add(node);
75+
nodeSet.add(node);
7676
}
77-
Collections.sort(nodes);
7877

7978
IndexedWord head = null;
80-
for (IndexedWord candidate : nodes) {
81-
if (sg.hasChildren(candidate)) {
82-
// if multiple nodes have children inside the graph,
83-
// perhaps we could merge them all,
84-
// but the easiest thing to do is just abort
85-
// TODO: an alternate approach would be to look for nodes with a head
86-
// outside the nodes in the phrase to merge
79+
for (IndexedWord candidate : nodeSet) {
80+
Set<IndexedWord> parents = sg.getParents(candidate);
81+
if (parents.size() == 0) {
82+
// found a root
83+
// if something else is already the head,
84+
// we don't know how to handle that,
85+
// so we abort this operation
8786
if (head != null) {
8887
return false;
8988
}
9089
head = candidate;
90+
continue;
91+
}
92+
for (IndexedWord parent : parents) {
93+
if (nodeSet.contains(parent)) {
94+
continue;
95+
}
96+
// parent is outside this subtree
97+
// therefore, we can use this word as the head of the subtree
98+
// but if we already have a head, give up instead
99+
if (head != null) {
100+
return false;
101+
}
102+
head = candidate;
103+
break;
91104
}
92105
}
106+
if (head == null) {
107+
return false;
108+
}
93109

94-
Set<Integer> depIndices = new HashSet<Integer>();
95-
for (IndexedWord other : nodes) {
96-
if (other == head) {
110+
// for now, only allow the head to have edges to children outside the subtree
111+
// TODO: instead, could make them all point to the new merged word...
112+
// but it's not clear that's a structure we want to allow merged
113+
for (IndexedWord candidate : nodeSet) {
114+
if (candidate == head) {
97115
continue;
98116
}
99-
Set<IndexedWord> parents = sg.getParents(other);
100-
// this shouldn't happen
101-
if (parents.size() == 0) {
102-
return false;
103-
}
104-
// iterate instead of just do the first in case
105-
// this one day is doing an graph with extra dependencies
106-
for (IndexedWord parent : parents) {
107-
if (parent != head) {
117+
for (IndexedWord child : sg.getChildren(candidate)) {
118+
if (!nodeSet.contains(child)) {
108119
return false;
109120
}
110121
}
122+
}
123+
ArrayList<IndexedWord> nodes = new ArrayList<>(nodeSet);
124+
Collections.sort(nodes);
125+
126+
Set<Integer> depIndices = new HashSet<Integer>();
127+
for (IndexedWord other : nodes) {
128+
if (other == head) {
129+
continue;
130+
}
111131
depIndices.add(other.index());
112132
}
113133

0 commit comments

Comments
 (0)