Skip to content

Commit facd62f

Browse files
committed
Add 'uniq' as a keyword to SemgrexParser.jj
Add a UniqPattern which removes duplicates based on the node names given (using the values of those nodes) Add some unit testing of the uniq search parsing functionality - SemgrexParser should fail if a requested node is not in the pattern. Also, uniq should be usable as a node name Test a couple varieties of this operation To allow uniq for a ProcessSemgrexRequest, need to decode all sentences from the request first, then turn that into a response. Flip the order of matching in ProcessSemgrexRequest so that for each pattern, it matches all of the sentences at once. Allows for operations on the complete batch of matches, such as the new uniq operator We also refactor the ProcessSemgrexRequest and make the CoreNLPServer use the refactored method as well Add a test of uniq for ProcessSemgrexRequest as well (it should only produce one result now for two graphs, not two)
1 parent 47b349d commit facd62f

File tree

10 files changed

+802
-320
lines changed

10 files changed

+802
-320
lines changed

src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,18 +1348,12 @@ public void handle(HttpExchange httpExchange) throws IOException {
13481348
return Pair.makePair("".getBytes(), null);
13491349
}
13501350

1351-
CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder();
1352-
int sentenceIdx = 0;
1353-
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
1354-
SemanticGraph graph = sentence.get(dependenciesType.annotation());
1355-
CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
1356-
graphResultBuilder.addResult(ProcessSemgrexRequest.matchSentence(regex, graph, 0, sentenceIdx));
1357-
responseBuilder.addResult(graphResultBuilder.build());
1358-
++sentenceIdx;
1359-
}
1351+
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
1352+
List<SemgrexPattern> patterns = Collections.singletonList(regex);
1353+
CoreNLPProtos.SemgrexResponse semgrexResponse = ProcessSemgrexRequest.processRequest(sentences, patterns);
13601354

13611355
ByteArrayOutputStream os = new ByteArrayOutputStream();
1362-
responseBuilder.build().writeTo(os);
1356+
semgrexResponse.writeTo(os);
13631357
os.close();
13641358

13651359
return Pair.makePair(os.toByteArray(), doc);

src/edu/stanford/nlp/semgraph/semgrex/ProcessSemgrexRequest.java

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,31 @@
1010
import java.io.InputStream;
1111
import java.io.IOException;
1212
import java.io.OutputStream;
13+
import java.util.ArrayList;
1314
import java.util.List;
1415
import java.util.stream.Collectors;
1516

17+
import edu.stanford.nlp.ling.CoreAnnotations;
1618
import edu.stanford.nlp.ling.CoreLabel;
1719
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
1820
import edu.stanford.nlp.pipeline.CoreNLPProtos;
1921
import edu.stanford.nlp.semgraph.SemanticGraph;
22+
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
2023
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
2124
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
2225
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
26+
import edu.stanford.nlp.util.ArrayCoreMap;
27+
import edu.stanford.nlp.util.CoreMap;
28+
import edu.stanford.nlp.util.Pair;
2329
import edu.stanford.nlp.util.ProcessProtobufRequest;
2430

2531
public class ProcessSemgrexRequest extends ProcessProtobufRequest {
2632
/**
2733
* Builds a single inner SemgrexResult structure from the pair of a SemgrexPattern and a SemanticGraph
2834
*/
29-
public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, int patternIdx, int graphIdx) {
35+
public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, List<SemgrexMatch> matches, int patternIdx, int graphIdx) {
3036
CoreNLPProtos.SemgrexResponse.SemgrexResult.Builder semgrexResultBuilder = CoreNLPProtos.SemgrexResponse.SemgrexResult.newBuilder();
31-
SemgrexMatcher matcher = pattern.matcher(graph);
32-
while (matcher.find()) {
37+
for (SemgrexMatch matcher : matches) {
3338
CoreNLPProtos.SemgrexResponse.Match.Builder matchBuilder = CoreNLPProtos.SemgrexResponse.Match.newBuilder();
3439
matchBuilder.setMatchIndex(matcher.getMatch().index());
3540
matchBuilder.setSemgrexIndex(patternIdx);
@@ -74,37 +79,63 @@ public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexP
7479
return semgrexResultBuilder.build();
7580
}
7681

82+
public static CoreNLPProtos.SemgrexResponse processRequest(List<CoreMap> sentences, List<SemgrexPattern> patterns) {
83+
CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder();
84+
List<Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>>> allMatches = new ArrayList<>();
85+
for (CoreMap sentence : sentences) {
86+
allMatches.add(new Pair<>(sentence, new ArrayList<>()));
87+
}
88+
for (SemgrexPattern pattern : patterns) {
89+
List<Pair<CoreMap, List<SemgrexMatch>>> patternMatches = pattern.matchSentences(sentences, true);
90+
for (int i = 0; i < sentences.size(); ++i) {
91+
Pair<CoreMap, List<SemgrexMatch>> sentenceMatches = patternMatches.get(i);
92+
allMatches.get(i).second().add(new Pair<>(pattern, sentenceMatches.second()));
93+
}
94+
}
95+
96+
int graphIdx = 0;
97+
for (Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>> sentenceMatches : allMatches) {
98+
CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
99+
100+
int patternIdx = 0;
101+
SemanticGraph graph = sentenceMatches.first().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
102+
for (Pair<SemgrexPattern, List<SemgrexMatch>> patternMatches : sentenceMatches.second()) {
103+
SemgrexPattern pattern = patternMatches.first();
104+
graphResultBuilder.addResult(matchSentence(pattern, graph, patternMatches.second(), patternIdx, graphIdx));
105+
++patternIdx;
106+
}
107+
108+
responseBuilder.addResult(graphResultBuilder.build());
109+
++graphIdx;
110+
}
111+
return responseBuilder.build();
112+
}
113+
77114
/**
78115
* For a single request, iterate through the SemanticGraphs it
79116
* includes, and add the results of each Semgrex operation included
80117
* in the request.
81118
*/
82119
public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.SemgrexRequest request) {
83120
ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
84-
CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder();
85121

86-
List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList());
87-
int graphIdx = 0;
122+
List<CoreMap> sentences = new ArrayList<>();
88123
for (CoreNLPProtos.SemgrexRequest.Dependencies sentence : request.getQueryList()) {
89-
CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
90-
91124
final List<CoreLabel> tokens;
92125
if (sentence.getGraph().getTokenList().size() > 0) {
93126
tokens = sentence.getGraph().getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
94127
} else {
95128
tokens = sentence.getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
96129
}
97130
SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(sentence.getGraph(), tokens, "semgrex");
98-
int patternIdx = 0;
99-
for (SemgrexPattern pattern : patterns) {
100-
graphResultBuilder.addResult(matchSentence(pattern, graph, patternIdx, graphIdx));
101-
++patternIdx;
102-
}
103-
104-
responseBuilder.addResult(graphResultBuilder.build());
105-
++graphIdx;
131+
CoreMap coremap = new ArrayCoreMap();
132+
coremap.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
133+
coremap.set(CoreAnnotations.TokensAnnotation.class, tokens);
134+
sentences.add(coremap);
106135
}
107-
return responseBuilder.build();
136+
137+
List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList());
138+
return processRequest(sentences, patterns);
108139
}
109140

110141
/**

0 commit comments

Comments
 (0)