Skip to content

Commit 8b40947

Browse files
committed
Add an endpoint for 'scenegraph' to the server.
Processes requests using the scenegraph package: https://nlp.stanford.edu/software/scenegraph-parser.shtml Output is in either the text or json format from the scenegraph package. Requested in #1346 Leave a note about not having tested the scenegraph parser for thread safety
1 parent 267041e commit 8b40947

File tree

1 file changed

+206
-21
lines changed

1 file changed

+206
-21
lines changed

src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java

Lines changed: 206 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
1212
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
1313
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
14+
import edu.stanford.nlp.scenegraph.RuleBasedParser;
15+
import edu.stanford.nlp.scenegraph.SceneGraph;
1416
import edu.stanford.nlp.semgraph.SemanticGraph;
1517
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
1618
import edu.stanford.nlp.semgraph.semgrex.ProcessSemgrexRequest;
@@ -109,6 +111,8 @@ public class StanfordCoreNLPServer implements Runnable {
109111
*/
110112
private SoftReference<Pair<String, StanfordCoreNLP>> lastPipeline = new SoftReference<>(null);
111113

114+
private RuleBasedParser sceneParser = null;
115+
112116
/**
113117
* An executor to time out CoreNLP execution with.
114118
*/
@@ -295,35 +299,18 @@ private static Map<String, String> getURLParams(URI uri) throws UnsupportedEncod
295299
* @throws ClassNotFoundException Thrown if we cannot load the serializer.
296300
*/
297301
private Annotation getDocument(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException {
298-
String inputFormat = props.getProperty("inputFormat", "text");
302+
final String inputFormat = props.getProperty("inputFormat", "text");
299303
String date = props.getProperty("date");
300304
switch (inputFormat) {
301305
case "text":
302-
// The default encoding by the HTTP standard is ISO-8859-1, but most
303-
// real users of CoreNLP would likely assume UTF-8 by default.
304-
String defaultEncoding = this.strict ? "ISO-8859-1" : "UTF-8";
305-
// Get the encoding
306306
Headers headers = httpExchange.getRequestHeaders();
307-
String encoding;
308307
// the original default behavior of the server was to
309308
// unescape, so let's assume by default that the input text is
310309
// escaped. if the Content-type is set to text we will know
311310
// we shouldn't unescape after all
312-
String contentType = URL_ENCODED;
313-
if (headers.containsKey("Content-type")) {
314-
contentType = headers.getFirst("Content-type").split(";")[0].trim();
315-
String[] charsetPair = Arrays.stream(headers.getFirst("Content-type").split(";"))
316-
.map(x -> x.split("="))
317-
.filter(x -> x.length > 0 && "charset".equals(x[0]))
318-
.findFirst().orElse(new String[]{"charset", defaultEncoding});
319-
if (charsetPair.length == 2) {
320-
encoding = charsetPair[1];
321-
} else {
322-
encoding = defaultEncoding;
323-
}
324-
} else {
325-
encoding = defaultEncoding;
326-
}
311+
final String contentType = getContentType(headers);
312+
// Get the encoding
313+
final String encoding = getEncoding(headers);
327314

328315
String text = IOUtils.slurpReader(IOUtils.encodedInputStreamReader(httpExchange.getRequestBody(), encoding));
329316
if (contentType.equals(URL_ENCODED)) {
@@ -352,6 +339,71 @@ private Annotation getDocument(Properties props, HttpExchange httpExchange) thro
352339
}
353340
}
354341

342+
private String getContentType(Headers headers) {
343+
String contentType = URL_ENCODED;
344+
if (headers.containsKey("Content-type")) {
345+
contentType = headers.getFirst("Content-type").split(";")[0].trim();
346+
}
347+
return contentType;
348+
}
349+
350+
private String getEncoding(Headers headers) {
351+
// The default encoding by the HTTP standard is ISO-8859-1, but most
352+
// real users of CoreNLP would likely assume UTF-8 by default.
353+
String defaultEncoding = this.strict ? "ISO-8859-1" : "UTF-8";
354+
if (headers.containsKey("Content-type")) {
355+
String[] charsetPair = Arrays.stream(headers.getFirst("Content-type").split(";"))
356+
.map(x -> x.split("="))
357+
.filter(x -> x.length > 0 && "charset".equals(x[0]))
358+
.findFirst().orElse(new String[]{"charset", defaultEncoding});
359+
if (charsetPair.length == 2) {
360+
return charsetPair[1];
361+
} else {
362+
return defaultEncoding;
363+
}
364+
} else {
365+
return defaultEncoding;
366+
}
367+
}
368+
369+
/**
370+
* Get a SceneGraph request from the query, either from a query parameter (q)
371+
* or from the body of the request
372+
* <br>
373+
* TODO: don't actually know if the scenegraph parser is threadsafe.
374+
*
375+
* @return query
376+
*/
377+
private String getSceneGraphRequest(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException {
378+
final String inputFormat = props.getProperty("inputFormat", "text");
379+
if (!inputFormat.equals("text")) {
380+
throw new IOException("Unhandled input format for scenegraph: " + inputFormat);
381+
}
382+
String query = props.getProperty("q", null);
383+
if (query != null) {
384+
return query;
385+
}
386+
387+
Headers headers = httpExchange.getRequestHeaders();
388+
// the original default behavior of the server was to
389+
// unescape, so let's assume by default that the input text is
390+
// escaped. if the Content-type is set to text we will know
391+
// we shouldn't unescape after all
392+
final String contentType = getContentType(headers);
393+
// Get the encoding
394+
final String encoding = getEncoding(headers);
395+
396+
String text = IOUtils.slurpReader(IOUtils.encodedInputStreamReader(httpExchange.getRequestBody(), encoding));
397+
if (contentType.equals(URL_ENCODED)) {
398+
try {
399+
text = URLDecoder.decode(text, encoding);
400+
} catch (IllegalArgumentException e) {
401+
// ignore decoding errors so that libraries which don't specify a content type might not fail
402+
}
403+
}
404+
405+
return text;
406+
}
355407

356408
/**
357409
* Create (or retrieve) a StanfordCoreNLP object corresponding to these properties.
@@ -394,6 +446,29 @@ private StanfordCoreNLP mkStanfordCoreNLP(Properties props) {
394446
return impl;
395447
}
396448

449+
/**
450+
* This server has at most one SceneGraph parser, and it is not created at startup time
451+
* as most applications will not use it.
452+
* <br>
453+
* This function call creates it in a synchronized manner, so at most one is ever created.
454+
* <br>
455+
* @return RuleBasedParser
456+
*/
457+
private RuleBasedParser mkSceneGraphParser() {
458+
if (sceneParser != null) {
459+
return sceneParser;
460+
}
461+
synchronized (this) {
462+
// in case it got created in another thread
463+
if (sceneParser != null) {
464+
return sceneParser;
465+
}
466+
RuleBasedParser parser = new RuleBasedParser();
467+
sceneParser = parser;
468+
return parser;
469+
}
470+
}
471+
397472
/**
398473
* Parse the parameters of a connection into a CoreNLP properties file that can be passed into
399474
* {@link StanfordCoreNLP}, and used in the I/O stages.
@@ -1404,6 +1479,115 @@ public void handle(HttpExchange httpExchange) throws IOException {
14041479
}
14051480
}
14061481

1482+
/**
1483+
* A handler for executing scenegraph on text
1484+
*/
1485+
protected class SceneGraphHandler implements HttpHandler {
1486+
1487+
/**
1488+
* An authenticator to determine if we can perform this API request.
1489+
*/
1490+
private final Predicate<Properties> authenticator;
1491+
1492+
/**
1493+
* Create a new SceneGraphHandler.
1494+
* <br>
1495+
* It's not clear what a callback would do with this, since there's no Annotation at the end of a SceneGraph call, so we just skip it
1496+
* @param callback The callback to call when annotation has finished.
1497+
*/
1498+
public SceneGraphHandler(Predicate<Properties> authenticator) {
1499+
this.authenticator = authenticator;
1500+
}
1501+
1502+
@Override
1503+
public void handle(HttpExchange httpExchange) throws IOException {
1504+
if (onBlockList(httpExchange)) {
1505+
respondUnauthorized(httpExchange);
1506+
return;
1507+
}
1508+
setHttpExchangeResponseHeaders(httpExchange);
1509+
1510+
Properties props = getProperties(httpExchange);
1511+
1512+
if (authenticator != null && ! authenticator.test(props)) {
1513+
respondUnauthorized(httpExchange);
1514+
return;
1515+
}
1516+
Map<String, String> params = getURLParams(httpExchange.getRequestURI());
1517+
1518+
Future<Pair<String, SceneGraph>> response = corenlpExecutor.submit(() -> {
1519+
try {
1520+
// Get the document
1521+
String request = getSceneGraphRequest(props, httpExchange);
1522+
if (request == null || request.equals("")) {
1523+
respondBadInput("Blank input in scenegraph", httpExchange);
1524+
return Pair.makePair("", null);
1525+
}
1526+
RuleBasedParser parser = mkSceneGraphParser();
1527+
1528+
SceneGraph graph = parser.parse(request);
1529+
if (graph == null) {
1530+
respondError("Something weird happened and the text could not be parsed!", httpExchange);
1531+
}
1532+
return Pair.makePair(request, graph);
1533+
} catch (RuntimeException e) {
1534+
warn(e);
1535+
try {
1536+
respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
1537+
} catch (IOException ignored) {
1538+
}
1539+
}
1540+
return Pair.makePair("", null);
1541+
});
1542+
1543+
// Send response
1544+
try {
1545+
int timeout = getTimeout(props, httpExchange);
1546+
if (sceneParser == null) {
1547+
timeout = timeout + 60000; // add 60 seconds for loading a pipeline if needed
1548+
}
1549+
Pair<String, SceneGraph> pair = response.get(timeout, TimeUnit.MILLISECONDS);
1550+
SceneGraph graph = pair.second;
1551+
if (graph == null) {
1552+
// already responded with an error
1553+
return;
1554+
}
1555+
1556+
final StanfordCoreNLP.OutputFormat of;
1557+
try {
1558+
of = StanfordCoreNLP.OutputFormat.valueOf(props.getProperty("outputFormat", "json").toUpperCase(Locale.ROOT));
1559+
} catch (RuntimeException e) {
1560+
String badFormat = props.getProperty("outputFormat");
1561+
log("Received bad output format in scenegraph '" + badFormat + "'");
1562+
respondBadInput("Interface scenegraph does not handle output format '" + badFormat + "'", httpExchange);
1563+
return;
1564+
}
1565+
1566+
final String result;
1567+
switch(of) {
1568+
case JSON:
1569+
int id = PropertiesUtils.getInt(props, "id", -1);
1570+
String url = props.getProperty("url", "");
1571+
String phrase = pair.first;
1572+
result = graph.toJSON(id, url, phrase);
1573+
break;
1574+
case TEXT:
1575+
result = graph.toReadableString();
1576+
break;
1577+
default:
1578+
log("Received unhanded output format in scenegraph '" + of + "'");
1579+
respondBadInput("Interface scenegraph does not handle output format " + of, httpExchange);
1580+
return;
1581+
}
1582+
1583+
byte[] content = result.getBytes();
1584+
sendAndGetResponse(httpExchange, content);
1585+
} catch (InterruptedException | ExecutionException | TimeoutException e) {
1586+
respondError("Timeout when executing scenegraph query", httpExchange);
1587+
}
1588+
}
1589+
}
1590+
14071591
private static void sendAndGetResponse(HttpExchange httpExchange, byte[] response) throws IOException {
14081592
if (response.length > 0) {
14091593
httpExchange.getResponseHeaders().add("Content-type", "application/json");
@@ -1547,6 +1731,7 @@ public void run(Optional<Pair<String,String>> basicAuth,
15471731
withAuth(server.createContext(uriContext+"/tokensregex", new TokensRegexHandler(authenticator, callback)), basicAuth);
15481732
withAuth(server.createContext(uriContext+"/semgrex", new SemgrexHandler(authenticator, callback)), basicAuth);
15491733
withAuth(server.createContext(uriContext+"/tregex", new TregexHandler(authenticator, callback)), basicAuth);
1734+
withAuth(server.createContext(uriContext+"/scenegraph", new SceneGraphHandler(authenticator)), basicAuth);
15501735
withAuth(server.createContext(uriContext+"/corenlp-brat.js", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.js", "application/javascript")), basicAuth);
15511736
withAuth(server.createContext(uriContext+"/corenlp-brat.cs", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.css", "text/css")), basicAuth);
15521737
withAuth(server.createContext(uriContext+"/corenlp-parseviewer.js", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-parseviewer.js", "application/javascript")), basicAuth);

0 commit comments

Comments
 (0)