Fix #545 (encoding handling, not only wrong for HTTP).

fvgh · fvgh · commit 94010f460527 · 2020-03-24T18:07:43.000+01:00
Spotless always provides decoded string to WTP.
diff --git a/_ext/eclipse-wtp/CHANGES.md b/_ext/eclipse-wtp/CHANGES.md
@@ -3,6 +3,10 @@
 We adhere to the [keepachangelog](https://keepachangelog.com/en/1.0.0/) format (starting after version `3.15.1`).
 
 ## [Unreleased]
+### Fixed
+* Handling of character encodings which require more than 1 byte. Previously the WTP
+decoded input twice, once using the encoding configured by the user, and
+once again using the default platform character set ([#545](https://github.com/diffplug/spotless/issues/545)).
 
 ## [3.15.2] - 2020-03-04
 ### Fixed
diff --git a/_ext/eclipse-wtp/src/main/java/com/diffplug/spotless/extra/eclipse/wtp/sse/ContentTypeManager.java b/_ext/eclipse-wtp/src/main/java/com/diffplug/spotless/extra/eclipse/wtp/sse/ContentTypeManager.java
@@ -18,6 +18,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
@@ -49,6 +50,7 @@
 class ContentTypeManager extends NoContentTypeSpecificHandling {
 	private final Map<String, IContentType> id2Object;
 	private final IContentType processorStepType;
+	private final IContentDescription processorStepDescription;
 
 	/**
 	 * Content type manager as required for cleanup steps.
@@ -66,6 +68,7 @@ class ContentTypeManager extends NoContentTypeSpecificHandling {
 		if (null == processorStepType) {
 			throw new IllegalArgumentException("The manager does not support content type " + formatterContentTypeID);
 		}
+		processorStepDescription = new StringDescription(processorStepType);
 	}
 
 	@Override
@@ -83,6 +86,45 @@ public IContentType findContentTypeFor(InputStream contents, String fileName) th
 		return processorStepType;
 	}
 
+	@Override
+	public IContentDescription getDescriptionFor(InputStream contents, String fileName, QualifiedName[] options) throws IOException {
+		return processorStepDescription;
+	}
+
+	private static class StringDescription implements IContentDescription {
+
+		private final IContentType type;
+
+		public StringDescription(IContentType type) {
+			this.type = type;
+		}
+
+		@Override
+		public boolean isRequested(QualifiedName key) {
+			return false; //Don't use set Property
+		}
+
+		@Override
+		public String getCharset() {
+			return Charset.defaultCharset().name(); //Spotless operates on an decoded string, meaning the input has always the "internal" encoding
+		}
+
+		@Override
+		public IContentType getContentType() {
+			return type;
+		}
+
+		@Override
+		public Object getProperty(QualifiedName key) {
+			return null; //Assume that the property map is empty
+		}
+
+		@Override
+		public void setProperty(QualifiedName key, Object value) {
+			throw new IllegalArgumentException("Content description key cannot be set: " + key);
+		}
+	}
+
 	/**
 	 * The WTP uses the manager only for ID mapping, so most of the methods are not used.
 	 * Actually it has a hand stitched way for transforming the content type ID
diff --git a/_ext/eclipse-wtp/src/test/java/com/diffplug/spotless/extra/eclipse/wtp/EclipseHtmlFormatterStepImplTest.java b/_ext/eclipse-wtp/src/test/java/com/diffplug/spotless/extra/eclipse/wtp/EclipseHtmlFormatterStepImplTest.java
@@ -89,6 +89,28 @@ public void formatCSS() throws Exception {
 				testData.expected("css.html"), output);
 	}
 
+	@Test
+	public void checkNoDoubleEndoding() throws Exception {
+		String osEncoding = System.getProperty("file.encoding");
+		//Assure that file.encoding is not used during the clean-up.
+		System.setProperty("file.encoding", "ISO-8859-1");
+		//Check that WTP does not try to do UTF-8 conversion again (since done by Spotless framework)
+		String[] input = testData.input("utf-8.html");
+		String output = formatter.format(input[0]);
+		System.setProperty("file.encoding", osEncoding);
+		assertEquals("Unexpected formatting of UTF-8", testData.expected("utf-8.html"), output);
+	}
+
+	@Test
+	public void checkBOMisStripped() throws Exception {
+		String[] input = testData.input("bom.html");
+		String[] inputWithoutBom = testData.input("utf-8.html");
+		//The UTF-8 BOM is interpreted as on UTF-16 character.
+		assertEquals("BOM input invalid", input[0].length() - 1, inputWithoutBom[0].length());
+		String output = formatter.format(input[0]);
+		assertEquals("BOM is not stripped", testData.expected("utf-8.html"), output);
+	}
+
 	@Test(expected = IllegalArgumentException.class)
 	public void configurationChange() throws Exception {
 		new EclipseHtmlFormatterStepImpl(new Properties());
diff --git a/_ext/eclipse-wtp/src/test/resources/html/expected/utf-8.html b/_ext/eclipse-wtp/src/test/resources/html/expected/utf-8.html
@@ -0,0 +1,7 @@
+<!DOCTYPE html>
+<HTML>
+<HEAD>
+<META charset="UTF-8">
+<TITLE>ÄÜ€</TITLE>
+</HEAD>
+</HTML>
diff --git a/_ext/eclipse-wtp/src/test/resources/html/input/bom.html b/_ext/eclipse-wtp/src/test/resources/html/input/bom.html
@@ -0,0 +1,2 @@
+﻿<!DOCTYPE html>
+<html><head><meta charset="UTF-8"><title>ÄÜ€</title></head></html>
diff --git a/_ext/eclipse-wtp/src/test/resources/html/input/utf-8.html b/_ext/eclipse-wtp/src/test/resources/html/input/utf-8.html
@@ -0,0 +1,2 @@
+<!DOCTYPE html>
+<html><head><meta charset="UTF-8"><title>ÄÜ€</title></head></html>

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+<!DOCTYPE html>`
	`2`	`+<html><head><meta charset="UTF-8"><title>ÄÜ€</title></head></html>`