|
10 | 10 | import java.io.InputStream;
|
11 | 11 | import java.io.IOException;
|
12 | 12 | import java.io.OutputStream;
|
| 13 | +import java.util.ArrayList; |
13 | 14 | import java.util.List;
|
14 | 15 | import java.util.stream.Collectors;
|
15 | 16 |
|
| 17 | +import edu.stanford.nlp.ling.CoreAnnotations; |
16 | 18 | import edu.stanford.nlp.ling.CoreLabel;
|
17 | 19 | import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
|
18 | 20 | import edu.stanford.nlp.pipeline.CoreNLPProtos;
|
19 | 21 | import edu.stanford.nlp.semgraph.SemanticGraph;
|
| 22 | +import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; |
20 | 23 | import edu.stanford.nlp.semgraph.SemanticGraphEdge;
|
21 | 24 | import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
|
22 | 25 | import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
|
| 26 | +import edu.stanford.nlp.util.ArrayCoreMap; |
| 27 | +import edu.stanford.nlp.util.CoreMap; |
| 28 | +import edu.stanford.nlp.util.Pair; |
23 | 29 | import edu.stanford.nlp.util.ProcessProtobufRequest;
|
24 | 30 |
|
25 | 31 | public class ProcessSemgrexRequest extends ProcessProtobufRequest {
|
26 | 32 | /**
|
27 | 33 | * Builds a single inner SemgrexResult structure from the pair of a SemgrexPattern and a SemanticGraph
|
28 | 34 | */
|
29 |
| - public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, int patternIdx, int graphIdx) { |
| 35 | + public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, List<SemgrexMatch> matches, int patternIdx, int graphIdx) { |
30 | 36 | CoreNLPProtos.SemgrexResponse.SemgrexResult.Builder semgrexResultBuilder = CoreNLPProtos.SemgrexResponse.SemgrexResult.newBuilder();
|
31 |
| - SemgrexMatcher matcher = pattern.matcher(graph); |
32 |
| - while (matcher.find()) { |
| 37 | + for (SemgrexMatch matcher : matches) { |
33 | 38 | CoreNLPProtos.SemgrexResponse.Match.Builder matchBuilder = CoreNLPProtos.SemgrexResponse.Match.newBuilder();
|
34 | 39 | matchBuilder.setMatchIndex(matcher.getMatch().index());
|
35 | 40 | matchBuilder.setSemgrexIndex(patternIdx);
|
@@ -74,37 +79,63 @@ public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexP
|
74 | 79 | return semgrexResultBuilder.build();
|
75 | 80 | }
|
76 | 81 |
|
| 82 | + public static CoreNLPProtos.SemgrexResponse processRequest(List<CoreMap> sentences, List<SemgrexPattern> patterns) { |
| 83 | + CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder(); |
| 84 | + List<Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>>> allMatches = new ArrayList<>(); |
| 85 | + for (CoreMap sentence : sentences) { |
| 86 | + allMatches.add(new Pair<>(sentence, new ArrayList<>())); |
| 87 | + } |
| 88 | + for (SemgrexPattern pattern : patterns) { |
| 89 | + List<Pair<CoreMap, List<SemgrexMatch>>> patternMatches = pattern.matchSentences(sentences, true); |
| 90 | + for (int i = 0; i < sentences.size(); ++i) { |
| 91 | + Pair<CoreMap, List<SemgrexMatch>> sentenceMatches = patternMatches.get(i); |
| 92 | + allMatches.get(i).second().add(new Pair<>(pattern, sentenceMatches.second())); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + int graphIdx = 0; |
| 97 | + for (Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>> sentenceMatches : allMatches) { |
| 98 | + CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder(); |
| 99 | + |
| 100 | + int patternIdx = 0; |
| 101 | + SemanticGraph graph = sentenceMatches.first().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); |
| 102 | + for (Pair<SemgrexPattern, List<SemgrexMatch>> patternMatches : sentenceMatches.second()) { |
| 103 | + SemgrexPattern pattern = patternMatches.first(); |
| 104 | + graphResultBuilder.addResult(matchSentence(pattern, graph, patternMatches.second(), patternIdx, graphIdx)); |
| 105 | + ++patternIdx; |
| 106 | + } |
| 107 | + |
| 108 | + responseBuilder.addResult(graphResultBuilder.build()); |
| 109 | + ++graphIdx; |
| 110 | + } |
| 111 | + return responseBuilder.build(); |
| 112 | + } |
| 113 | + |
77 | 114 | /**
|
78 | 115 | * For a single request, iterate through the SemanticGraphs it
|
79 | 116 | * includes, and add the results of each Semgrex operation included
|
80 | 117 | * in the request.
|
81 | 118 | */
|
82 | 119 | public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.SemgrexRequest request) {
|
83 | 120 | ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
|
84 |
| - CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder(); |
85 | 121 |
|
86 |
| - List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList()); |
87 |
| - int graphIdx = 0; |
| 122 | + List<CoreMap> sentences = new ArrayList<>(); |
88 | 123 | for (CoreNLPProtos.SemgrexRequest.Dependencies sentence : request.getQueryList()) {
|
89 |
| - CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder(); |
90 |
| - |
91 | 124 | final List<CoreLabel> tokens;
|
92 | 125 | if (sentence.getGraph().getTokenList().size() > 0) {
|
93 | 126 | tokens = sentence.getGraph().getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
|
94 | 127 | } else {
|
95 | 128 | tokens = sentence.getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
|
96 | 129 | }
|
97 | 130 | SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(sentence.getGraph(), tokens, "semgrex");
|
98 |
| - int patternIdx = 0; |
99 |
| - for (SemgrexPattern pattern : patterns) { |
100 |
| - graphResultBuilder.addResult(matchSentence(pattern, graph, patternIdx, graphIdx)); |
101 |
| - ++patternIdx; |
102 |
| - } |
103 |
| - |
104 |
| - responseBuilder.addResult(graphResultBuilder.build()); |
105 |
| - ++graphIdx; |
| 131 | + CoreMap coremap = new ArrayCoreMap(); |
| 132 | + coremap.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph); |
| 133 | + coremap.set(CoreAnnotations.TokensAnnotation.class, tokens); |
| 134 | + sentences.add(coremap); |
106 | 135 | }
|
107 |
| - return responseBuilder.build(); |
| 136 | + |
| 137 | + List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList()); |
| 138 | + return processRequest(sentences, patterns); |
108 | 139 | }
|
109 | 140 |
|
110 | 141 | /**
|
|
0 commit comments