Skip to content

Commit 16df6a8

Browse files
committed
Improved encoding handling
1 parent 282aff2 commit 16df6a8

File tree

4 files changed

+89
-83
lines changed

4 files changed

+89
-83
lines changed

lib/xmljava.jar

254 Bytes
Binary file not shown.

src/com/maxprograms/xml/Constants.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
public class Constants {
1616

17-
public static final String VERSION = "2.0.0";
18-
public static final String BUILD = "20241215_0607";
17+
public static final String VERSION = "2.1.0";
18+
public static final String BUILD = "20250122_0906";
1919

2020
private Constants() {
2121
// private for security

src/com/maxprograms/xml/CustomContentHandler.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,10 @@ public void skippedEntity(String name) throws SAXException {
209209

210210
@Override
211211
public void declaration(String version, String encoding, String standalone) throws SAXException {
212-
// do nothing
212+
if (encoding == null) {
213+
encoding = "UTF-8";
214+
}
215+
this.encoding = encoding.toUpperCase();
213216
}
214217

215218
@Override
@@ -358,7 +361,7 @@ public void startDTD(String name, String publicId, String systemId1) throws SAXE
358361
if (doc == null) {
359362
this.systemId = systemId1;
360363
if (catalog != null && publicId != null) {
361-
catalog.parseDTD(publicId);
364+
catalog.parseDTD(publicId);
362365
}
363366
doc = new Document(null, name, publicId, systemId);
364367
if (encoding != null) {

src/com/maxprograms/xml/XMLOutputter.java

+82-79
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
*******************************************************************************/
1212
package com.maxprograms.xml;
1313

14+
import java.io.BufferedWriter;
1415
import java.io.IOException;
1516
import java.io.OutputStream;
17+
import java.io.OutputStreamWriter;
18+
import java.io.Writer;
1619
import java.lang.System.Logger;
1720
import java.lang.System.Logger.Level;
1821
import java.nio.charset.Charset;
@@ -35,77 +38,82 @@ public class XMLOutputter {
3538

3639
private static Logger logger = System.getLogger(XMLOutputter.class.getName());
3740

38-
public void output(Document sdoc, OutputStream output) throws IOException {
39-
if (defaultEncoding.equals(StandardCharsets.UTF_16LE)) {
40-
output.write(XMLUtils.UTF16LEBOM);
41-
}
42-
if (defaultEncoding.equals(StandardCharsets.UTF_16BE)) {
43-
output.write(XMLUtils.UTF16BEBOM);
44-
}
45-
if (writeBOM) {
46-
output.write(XMLUtils.UTF8BOM);
47-
}
48-
if (!skipLinefeed) {
49-
writeString(output, "<?xml version=\"1.0\" encoding=\"" + defaultEncoding + "\" ?>\n");
50-
} else {
51-
writeString(output, "<?xml version=\"1.0\" encoding=\"" + defaultEncoding + "\"?>");
52-
}
53-
String doctype = sdoc.getRootElement().getName();
54-
String publicId = sdoc.getPublicId();
55-
String systemId = sdoc.getSystemId();
56-
String internalSubset = sdoc.getInternalSubset();
57-
List<AttributeDecl> customAttributes = sdoc.getAttributes();
58-
if (customAttributes != null) {
59-
if (internalSubset == null) {
60-
internalSubset = "";
61-
}
62-
for (int i = 0; i < customAttributes.size(); i++) {
63-
internalSubset = internalSubset + "\n" + customAttributes.get(i);
64-
}
65-
}
66-
if (publicId != null || systemId != null || internalSubset != null) {
67-
writeString(output, "<!DOCTYPE " + doctype + " ");
68-
if (publicId != null) {
69-
writeString(output, "PUBLIC \"" + publicId + "\" \"" + systemId + "\"");
70-
if (internalSubset != null && !internalSubset.isEmpty()) {
71-
writeString(output, " [" + internalSubset + "]>\n");
41+
public void output(Document sdoc, OutputStream out) throws IOException {
42+
try (OutputStreamWriter outputWriter = new OutputStreamWriter(out, sdoc.getEncoding())) {
43+
try (Writer writer = new BufferedWriter(outputWriter)) {
44+
if (defaultEncoding.equals(StandardCharsets.UTF_16LE)) {
45+
writer.write(new String(new byte[] { (byte) 0xFF, (byte) 0xFE }, StandardCharsets.UTF_16LE));
46+
}
47+
if (defaultEncoding.equals(StandardCharsets.UTF_16BE)) {
48+
writer.write(new String(new byte[] { (byte) 0xFE, (byte) 0xFF }, StandardCharsets.UTF_16BE));
49+
}
50+
if (writeBOM && defaultEncoding.equals(StandardCharsets.UTF_8)) {
51+
writer.write(
52+
new String(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF }, StandardCharsets.UTF_8));
53+
}
54+
if (!skipLinefeed) {
55+
writer.write("<?xml version=\"1.0\" encoding=\"" + defaultEncoding + "\" ?>\n");
7256
} else {
73-
writeString(output, ">\n");
57+
writer.write("<?xml version=\"1.0\" encoding=\"" + defaultEncoding + "\"?>");
7458
}
75-
} else {
76-
if (systemId != null) {
77-
writeString(output, "SYSTEM \"" + systemId + "\" ");
59+
String doctype = sdoc.getRootElement().getName();
60+
String publicId = sdoc.getPublicId();
61+
String systemId = sdoc.getSystemId();
62+
String internalSubset = sdoc.getInternalSubset();
63+
List<AttributeDecl> customAttributes = sdoc.getAttributes();
64+
if (customAttributes != null) {
65+
if (internalSubset == null) {
66+
internalSubset = "";
67+
}
68+
for (int i = 0; i < customAttributes.size(); i++) {
69+
internalSubset = internalSubset + "\n" + customAttributes.get(i);
70+
}
7871
}
79-
if (internalSubset != null) {
80-
writeString(output, "[\n" + internalSubset + "]");
72+
if (publicId != null || systemId != null || internalSubset != null) {
73+
writer.write("<!DOCTYPE " + doctype + " ");
74+
if (publicId != null) {
75+
writer.write("PUBLIC \"" + publicId + "\" \"" + systemId + "\"");
76+
if (internalSubset != null && !internalSubset.isEmpty()) {
77+
writer.write(" [" + internalSubset + "]>\n");
78+
} else {
79+
writer.write(">\n");
80+
}
81+
} else {
82+
if (systemId != null) {
83+
writer.write("SYSTEM \"" + systemId + "\" ");
84+
}
85+
if (internalSubset != null) {
86+
writer.write("[\n" + internalSubset + "]");
87+
}
88+
writer.write(">\n");
89+
}
90+
} else {
91+
if (emptyDoctype) {
92+
writer.write("<!DOCTYPE " + doctype + ">");
93+
}
8194
}
82-
writeString(output, ">\n");
83-
}
84-
} else {
85-
if (emptyDoctype) {
86-
writeString(output, "<!DOCTYPE " + doctype + ">");
95+
entities = sdoc.getEntities();
96+
if (entities == null) {
97+
entities = new Hashtable<>();
98+
entities.put("lt", "&#38;#60;");
99+
entities.put("gt", "&#62;");
100+
entities.put("amp", "&#38;#38;");
101+
}
102+
processHeader(writer, sdoc.getContent());
87103
}
88104
}
89-
entities = sdoc.getEntities();
90-
if (entities == null) {
91-
entities = new Hashtable<>();
92-
entities.put("lt", "&#38;#60;");
93-
entities.put("gt", "&#62;");
94-
entities.put("amp", "&#38;#38;");
95-
}
96-
processHeader(output, sdoc.getContent());
97105
}
98106

99-
private void processHeader(OutputStream output, List<XMLNode> list) throws IOException {
107+
private void processHeader(Writer output, List<XMLNode> list) throws IOException {
100108
int length = list.size();
101109
for (int i = 0; i < length; i++) {
102110
XMLNode n = list.get(i);
103111
switch (n.getNodeType()) {
104112
case XMLNode.PROCESSING_INSTRUCTION_NODE:
105113
PI pi = (PI) n;
106-
writeString(output, "<?" + pi.getTarget() + " " + pi.getData() + "?>");
114+
output.write("<?" + pi.getTarget() + " " + pi.getData() + "?>");
107115
if (!preserve) {
108-
writeString(output, "\n");
116+
output.write("\n");
109117
}
110118
break;
111119
case XMLNode.DOCUMENT_NODE:
@@ -117,20 +125,20 @@ private void processHeader(OutputStream output, List<XMLNode> list) throws IOExc
117125
case XMLNode.COMMENT_NODE:
118126
Comment c = (Comment) n;
119127
if (!preserve) {
120-
writeString(output, "\n<!-- " + c.getText() + " -->");
128+
output.write("\n<!-- " + c.getText() + " -->");
121129
} else {
122-
writeString(output, "<!-- " + c.getText() + " -->");
130+
output.write("<!-- " + c.getText() + " -->");
123131
}
124132
break;
125133
case XMLNode.CDATA_SECTION_NODE:
126134
CData cd = (CData) n;
127-
writeString(output, "<![CDATA[" + cd.getData() + "]]>");
135+
output.write("<![CDATA[" + cd.getData() + "]]>");
128136
break;
129137
case XMLNode.TEXT_NODE:
130138
TextNode tn = (TextNode) n;
131139
String text = cleanString(tn.getText());
132140
if (text != null) {
133-
writeString(output, text);
141+
output.write(text);
134142
}
135143
break;
136144
default:
@@ -140,22 +148,21 @@ private void processHeader(OutputStream output, List<XMLNode> list) throws IOExc
140148
}
141149
}
142150

143-
private void traverse(OutputStream output, Element el) throws IOException {
144-
151+
private void traverse(Writer output, Element el) throws IOException {
145152
String type = el.getName();
146153
String space = el.getAttributeValue("xml:space", "default");
147154
if (space.equals("preserve") && !preserve) {
148155
preserve = true;
149156
}
150-
writeString(output, "<" + type);
157+
output.write("<" + type);
151158
List<Attribute> attrs = el.getAttributes();
152159
for (int i = 0; i < attrs.size(); i++) {
153160
Attribute a = attrs.get(i);
154-
writeString(output, " " + a.toString());
161+
output.write(" " + a.toString());
155162
}
156163
List<XMLNode> list = el.getContent();
157164
if (!list.isEmpty()) {
158-
writeString(output, ">");
165+
output.write(">");
159166
for (int i = 0; i < list.size(); i++) {
160167
XMLNode n = list.get(i);
161168
switch (n.getNodeType()) {
@@ -173,41 +180,41 @@ private void traverse(OutputStream output, Element el) throws IOException {
173180
text = text.replace("'", "&apos;");
174181
}
175182
if (preserve) {
176-
writeString(output, text);
183+
output.write(text);
177184
} else {
178-
writeString(output, normalize(text));
185+
output.write(normalize(text));
179186
}
180187
break;
181188
case XMLNode.PROCESSING_INSTRUCTION_NODE:
182189
PI pi = (PI) n;
183-
writeString(output, "<?" + pi.getTarget() + " " + pi.getData() + "?>");
190+
output.write("<?" + pi.getTarget() + " " + pi.getData() + "?>");
184191
break;
185192
case XMLNode.COMMENT_NODE:
186193
Comment c = (Comment) n;
187194
if (!preserve) {
188-
writeString(output, "\n<!-- " + c.getText() + " -->");
195+
output.write("\n<!-- " + c.getText() + " -->");
189196
} else {
190-
writeString(output, "<!-- " + c.getText() + " -->");
197+
output.write("<!-- " + c.getText() + " -->");
191198
}
192199
break;
193200
case XMLNode.CDATA_SECTION_NODE:
194-
writeString(output, n.toString());
201+
output.write(n.toString());
195202
break;
196203
default:
197204
// should never happen
198205
logger.log(Level.WARNING, Messages.getString("XMLOutputter.1"));
199206
}
200207
}
201208
if (!preserve) {
202-
writeString(output, "</" + type + ">\n");
209+
output.write("</" + type + ">\n");
203210
} else {
204-
writeString(output, "</" + type + ">");
211+
output.write("</" + type + ">");
205212
}
206213
} else {
207214
if (skipLinefeed) {
208-
writeString(output, " />");
215+
output.write(" />");
209216
} else {
210-
writeString(output, "/>");
217+
output.write("/>");
211218
}
212219
}
213220
}
@@ -287,10 +294,6 @@ private static String replaceEntities(String string, String token, String entity
287294
return result;
288295
}
289296

290-
private void writeString(OutputStream output, String input) throws IOException {
291-
output.write(input.getBytes(defaultEncoding));
292-
}
293-
294297
private static String normalize(String string) {
295298
StringBuilder rs = new StringBuilder("");
296299
int length = string.length();

0 commit comments

Comments
 (0)