Skip to content

Commit 5cf5f95

Browse files
taserothjexp
authored andcommitted
allow loading of xml with DTD references, fixes #1185 (#1208)
loading of xml files was previously forbidden (#931). With this change, an dummy entity resolver is registered that does nothing, e.g. does not load external files.
1 parent 1ffa860 commit 5cf5f95

File tree

4 files changed

+41
-48
lines changed

4 files changed

+41
-48
lines changed

src/main/java/apoc/load/Xml.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package apoc.load;
22

3-
import apoc.util.FileUtils;
43
import apoc.result.MapResult;
54
import apoc.result.NodeResult;
5+
import apoc.util.FileUtils;
66
import apoc.util.Util;
77
import org.apache.commons.lang3.BooleanUtils;
88
import org.apache.commons.lang3.StringUtils;
@@ -13,6 +13,7 @@
1313
import org.neo4j.procedure.*;
1414
import org.w3c.dom.CharacterData;
1515
import org.w3c.dom.*;
16+
import org.xml.sax.InputSource;
1617

1718
import javax.xml.namespace.QName;
1819
import javax.xml.parsers.DocumentBuilder;
@@ -28,6 +29,7 @@
2829
import java.io.FileNotFoundException;
2930
import java.io.IOException;
3031
import java.io.InputStream;
32+
import java.io.StringReader;
3133
import java.net.URL;
3234
import java.net.URLConnection;
3335
import java.util.*;
@@ -40,7 +42,7 @@
4042

4143
public class Xml {
4244

43-
public static final XMLInputFactory FACTORY = XMLInputFactory.newFactory();
45+
private static final XMLInputFactory FACTORY = XMLInputFactory.newFactory();
4446

4547
@Context
4648
public GraphDatabaseService db;
@@ -69,8 +71,8 @@ private Stream<MapResult> xmlXpathToMapResult(@Name("url") String url, boolean s
6971
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
7072
documentBuilderFactory.setNamespaceAware(true);
7173
documentBuilderFactory.setIgnoringElementContentWhitespace(true);
72-
documentBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
7374
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
75+
documentBuilder.setEntityResolver((publicId, systemId) -> new InputSource(new StringReader("")));
7476

7577
FileUtils.checkReadAllowed(url);
7678
url = FileUtils.changeFileUrlIfImportDirectoryConstrained(url);

src/test/java/apoc/load/XmlTest.java

+26-33
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
package apoc.load;
22

33
import apoc.util.TestUtil;
4-
import org.apache.commons.lang.exception.ExceptionUtils;
54
import org.junit.After;
65
import org.junit.Before;
76
import org.junit.Test;
87
import org.neo4j.graphdb.GraphDatabaseService;
9-
import org.neo4j.graphdb.QueryExecutionException;
108
import org.neo4j.helpers.collection.Iterators;
119
import org.neo4j.test.TestGraphDatabaseFactory;
12-
import org.xml.sax.SAXParseException;
1310

1411
import java.util.Collections;
1512
import java.util.List;
@@ -22,29 +19,29 @@
2219

2320
public class XmlTest {
2421

25-
public static final String XML_AS_NESTED_MAP =
22+
private static final String XML_AS_NESTED_MAP =
2623
"{_type=parent, name=databases, " +
2724
"_children=[" +
2825
"{_type=child, name=Neo4j, _text=Neo4j is a graph database}, " +
2926
"{_type=child, name=relational, _children=[" +
3027
"{_type=grandchild, name=MySQL, _text=MySQL is a database & relational}, " +
3128
"{_type=grandchild, name=Postgres, _text=Postgres is a relational database}]}]}";
32-
public static final String XML_AS_NESTED_SIMPLE_MAP =
29+
private static final String XML_AS_NESTED_SIMPLE_MAP =
3330
"{_type=parent, name=databases, " +
3431
"_child=[" +
3532
"{_type=child, name=Neo4j, _text=Neo4j is a graph database}, " +
3633
"{_type=child, name=relational, _grandchild=[" +
3734
"{_type=grandchild, name=MySQL, _text=MySQL is a database & relational}, " +
3835
"{_type=grandchild, name=Postgres, _text=Postgres is a relational database}]}]}";
39-
public static final String XML_XPATH_AS_NESTED_MAP =
36+
static final String XML_XPATH_AS_NESTED_MAP =
4037
"[{_type=book, id=bk103, _children=[{_type=author, _text=Corets, Eva}, {_type=title, _text=Maeve Ascendant}, {_type=genre, _text=Fantasy}, {_type=price, _text=5.95}, {_type=publish_date, _text=2000-11-17}, {_type=description, _text=After the collapse of a nanotechnology " +
4138
"society in England, the young survivors lay the " +
4239
"foundation for a new society.}]}]";
4340

44-
public static final String XML_AS_SINGLE_LINE =
41+
private static final String XML_AS_SINGLE_LINE =
4542
"{_type=table, _children=[{_type=tr, _children=[{_type=td, _children=[{_type=img, src=pix/logo-tl.gif}]}]}]}";
4643

47-
public static final String XML_AS_SINGLE_LINE_SIMPLE =
44+
private static final String XML_AS_SINGLE_LINE_SIMPLE =
4845
"{_type=table, _table=[{_type=tr, _tr=[{_type=td, _td=[{_type=img, src=pix/logo-tl.gif}]}]}]}";
4946

5047
private GraphDatabaseService db;
@@ -64,7 +61,7 @@ public void tearDown() {
6461
}
6562

6663
@Test
67-
public void testLoadXml() throws Exception {
64+
public void testLoadXml() {
6865
testCall(db, "CALL apoc.load.xml('file:databases.xml')", // YIELD value RETURN value
6966
(row) -> {
7067
Object value = row.get("value");
@@ -73,7 +70,7 @@ public void testLoadXml() throws Exception {
7370
}
7471

7572
@Test
76-
public void testLoadXmlSimple() throws Exception {
73+
public void testLoadXmlSimple() {
7774
testCall(db, "CALL apoc.load.xmlSimple('file:databases.xml')", // YIELD value RETURN value
7875
(row) -> {
7976
Object value = row.get("value");
@@ -247,10 +244,10 @@ public void testLoadXmlWithNextWordRels() {
247244
row -> assertNotNull(row.get("node")));
248245
testResult(db, "match (n) return labels(n)[0] as label, count(*) as count", result -> {
249246
final Map<String, Long> resultMap = result.stream().collect(Collectors.toMap(o -> (String)o.get("label"), o -> (Long)o.get("count")));
250-
assertEquals(2l, (long)resultMap.get("XmlProcessingInstruction"));
251-
assertEquals(1l, (long)resultMap.get("XmlDocument"));
252-
assertEquals(3263l, (long)resultMap.get("XmlWord"));
253-
assertEquals(454l, (long)resultMap.get("XmlTag"));
247+
assertEquals(2L, (long)resultMap.get("XmlProcessingInstruction"));
248+
assertEquals(1L, (long)resultMap.get("XmlDocument"));
249+
assertEquals(3263L, (long)resultMap.get("XmlWord"));
250+
assertEquals(454L, (long)resultMap.get("XmlTag"));
254251
});
255252

256253
// no node more than one NEXT/NEXT_SIBLING
@@ -265,7 +262,7 @@ public void testLoadXmlWithNextWordRels() {
265262
testResult(db, "match p=(:XmlDocument)-[:NEXT_WORD*]->(e:XmlWord) where not (e)-[:NEXT_WORD]->() return length(p) as len",
266263
result -> {
267264
Map<String, Object> r = Iterators.single(result);
268-
assertEquals(3263l, r.get("len"));
265+
assertEquals(3263L, r.get("len"));
269266
});
270267
}
271268

@@ -276,10 +273,10 @@ public void testLoadXmlWithNextEntityRels() {
276273
row -> assertNotNull(row.get("node")));
277274
testResult(db, "match (n) return labels(n)[0] as label, count(*) as count", result -> {
278275
final Map<String, Long> resultMap = result.stream().collect(Collectors.toMap(o -> (String)o.get("label"), o -> (Long)o.get("count")));
279-
assertEquals(2l, (long)resultMap.get("XmlProcessingInstruction"));
280-
assertEquals(1l, (long)resultMap.get("XmlDocument"));
281-
assertEquals(3263l, (long)resultMap.get("XmlCharacters"));
282-
assertEquals(454l, (long)resultMap.get("XmlTag"));
276+
assertEquals(2L, (long)resultMap.get("XmlProcessingInstruction"));
277+
assertEquals(1L, (long)resultMap.get("XmlDocument"));
278+
assertEquals(3263L, (long)resultMap.get("XmlCharacters"));
279+
assertEquals(454L, (long)resultMap.get("XmlTag"));
283280
});
284281

285282
// no node more than one NEXT/NEXT_SIBLING
@@ -294,7 +291,7 @@ public void testLoadXmlWithNextEntityRels() {
294291
testResult(db, "match p=(:XmlDocument)-[:NE*]->(e:XmlCharacters) where not (e)-[:NE]->() return length(p) as len",
295292
result -> {
296293
Map<String, Object> r = Iterators.single(result);
297-
assertEquals(3263l, r.get("len"));
294+
assertEquals(3263L, r.get("len"));
298295
});
299296
}
300297

@@ -378,21 +375,17 @@ public void testLoadXmlFromTgzByUrl() {
378375
});
379376
}
380377

381-
@Test(expected = QueryExecutionException.class)
382-
public void testLoadXmlPreventXXEVulnerabilityThrowsQueryExecutionException() {
383-
try {
384-
testResult(db, "CALL apoc.load.xml('file:src/test/resources/xml/xxe.xml', '/catalog/book[genre=\"Computer\"]') yield value as result", (r) -> {});
385-
} catch (QueryExecutionException e) {
386-
// We want test that the cause of the exception is SAXParseException with the correct cause message
387-
Throwable except = ExceptionUtils.getRootCause(e);
388-
assertTrue(except instanceof SAXParseException);
389-
assertEquals("DOCTYPE is disallowed when the feature \"http://apache.org/xml/features/disallow-doctype-decl\" set to true.", except.getMessage());
390-
throw e;
391-
}
378+
@Test
379+
public void testExternalDTDschouldNotBeLoaded() {
380+
testCall(db, "CALL apoc.load.xml('file:src/test/resources/xml/missingExternalDTD.xml', '/', null, true)",
381+
(row) -> {
382+
Object value = row.get("value");
383+
assertEquals("{_type=document, _document=[null, {_type=title, _text=dtd 404}]}", value.toString());
384+
});
392385
}
393386

394387
@Test
395-
public void testLoadXmlSingleLineSimple() throws Exception {
388+
public void testLoadXmlSingleLineSimple() {
396389
testCall(db, "CALL apoc.load.xml('file:src/test/resources/xml/singleLine.xml', '/', null, true)", // YIELD value RETURN value
397390
(row) -> {
398391
Object value = row.get("value");
@@ -401,7 +394,7 @@ public void testLoadXmlSingleLineSimple() throws Exception {
401394
}
402395

403396
@Test
404-
public void testLoadXmlSingleLine() throws Exception {
397+
public void testLoadXmlSingleLine() {
405398
testCall(db, "CALL apoc.load.xml('file:src/test/resources/xml/singleLine.xml')", // YIELD value RETURN value
406399
(row) -> {
407400
Object value = row.get("value");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?xml version="1.0" ?>
2+
3+
<!DOCTYPE document SYSTEM "file://should-not-exist.dtd">
4+
<document>
5+
<!-- The refenced external DTD should point to a non-exiting file.
6+
Since we do not want to load external DTDs (XXE attack), the parser
7+
should be configured in a way not to load external DTD and hence not fail.
8+
-->
9+
<title>dtd 404</title>
10+
</document>

src/test/resources/xml/xxe.xml

-12
This file was deleted.

0 commit comments

Comments
 (0)