Skip to content

Commit 914d90d

Browse files
committed
Make whitespace (' ', \t, \r, \n) always visible for "changed" lines
context lines, added-only lines, and removed-only lines are shown as usual in the diffs. fixes diffplug#465
1 parent 04d5c7d commit 914d90d

File tree

3 files changed

+439
-191
lines changed

3 files changed

+439
-191
lines changed

lib-extra/src/main/java/com/diffplug/spotless/extra/integration/DiffMessageFormatter.java

+25-56
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,18 @@
1818
import java.io.ByteArrayOutputStream;
1919
import java.io.File;
2020
import java.io.IOException;
21+
import java.nio.charset.Charset;
2122
import java.nio.charset.StandardCharsets;
2223
import java.nio.file.Files;
2324
import java.util.List;
2425
import java.util.ListIterator;
2526
import java.util.Objects;
2627

27-
import org.eclipse.jgit.diff.DiffFormatter;
2828
import org.eclipse.jgit.diff.EditList;
29-
import org.eclipse.jgit.diff.MyersDiff;
29+
import org.eclipse.jgit.diff.HistogramDiff;
3030
import org.eclipse.jgit.diff.RawText;
3131
import org.eclipse.jgit.diff.RawTextComparator;
3232

33-
import com.diffplug.common.base.CharMatcher;
3433
import com.diffplug.common.base.Errors;
3534
import com.diffplug.common.base.Preconditions;
3635
import com.diffplug.common.base.Splitter;
@@ -168,7 +167,9 @@ private void addIntendedLine(String indent, String line) {
168167
* sequence (\n, \r, \r\n).
169168
*/
170169
private static String diff(Builder builder, File file) throws IOException {
171-
String raw = new String(Files.readAllBytes(file.toPath()), builder.formatter.getEncoding());
170+
byte[] rawBytes = Files.readAllBytes(file.toPath());
171+
Charset encoding = builder.formatter.getEncoding();
172+
String raw = new String(rawBytes, encoding);
172173
String rawUnix = LineEnding.toUnix(raw);
173174
String formattedUnix;
174175
if (builder.isPaddedCell) {
@@ -177,61 +178,29 @@ private static String diff(Builder builder, File file) throws IOException {
177178
formattedUnix = builder.formatter.compute(rawUnix, file);
178179
}
179180

180-
if (rawUnix.equals(formattedUnix)) {
181-
// the formatting is fine, so it's a line-ending issue
182-
String formatted = builder.formatter.computeLineEndings(formattedUnix, file);
183-
return diffWhitespaceLineEndings(raw, formatted, false, true);
184-
} else {
185-
return diffWhitespaceLineEndings(rawUnix, formattedUnix, true, false);
181+
String formatted = builder.formatter.computeLineEndings(formattedUnix, file);
182+
// Assume the conversion to UTF-8 is always lossless
183+
// UTF-8 simplifies the implementation of WriteSpaceAwareDiffFormatter, so it could easily
184+
// match for ' ', '\n' and other characters at the byte level.
185+
// jgit diff algorithms are implemented for byte[], so we have to use bytes as well
186+
if (!encoding.equals(StandardCharsets.UTF_8)) {
187+
rawBytes = raw.getBytes(StandardCharsets.UTF_8);
186188
}
189+
byte[] formattedBytes = formatted.getBytes(StandardCharsets.UTF_8);
190+
return visualizeDiff(rawBytes, formattedBytes);
187191
}
188192

189-
/**
190-
* Returns a git-style diff between the two unix strings.
191-
*
192-
* Output has no trailing newlines.
193-
*
194-
* Boolean args determine whether whitespace or line endings will be visible.
195-
*/
196-
private static String diffWhitespaceLineEndings(String dirty, String clean, boolean whitespace, boolean lineEndings) throws IOException {
197-
dirty = visibleWhitespaceLineEndings(dirty, whitespace, lineEndings);
198-
clean = visibleWhitespaceLineEndings(clean, whitespace, lineEndings);
199-
200-
RawText a = new RawText(dirty.getBytes(StandardCharsets.UTF_8));
201-
RawText b = new RawText(clean.getBytes(StandardCharsets.UTF_8));
202-
EditList edits = new EditList();
203-
edits.addAll(MyersDiff.INSTANCE.diff(RawTextComparator.DEFAULT, a, b));
204-
193+
private static String visualizeDiff(byte[] rawBytes, byte[] formattedBytes) throws IOException {
194+
RawText a = new RawText(rawBytes);
195+
RawText b = new RawText(formattedBytes);
196+
EditList edits = new HistogramDiff().diff(RawTextComparator.DEFAULT, a, b);
205197
ByteArrayOutputStream out = new ByteArrayOutputStream();
206-
try (DiffFormatter formatter = new DiffFormatter(out)) {
207-
formatter.format(edits, a, b);
208-
}
209-
String formatted = out.toString(StandardCharsets.UTF_8.name());
210-
211-
// we don't need the diff to show this, since we display newlines ourselves
212-
formatted = formatted.replace("\\ No newline at end of file\n", "");
213-
return NEWLINE_MATCHER.trimTrailingFrom(formatted);
214-
}
215-
216-
private static final CharMatcher NEWLINE_MATCHER = CharMatcher.is('\n');
217-
218-
/**
219-
* Makes the whitespace and/or the lineEndings visible.
220-
*
221-
* MyersDiff wants inputs with only unix line endings. So this ensures that that is the case.
222-
*/
223-
private static String visibleWhitespaceLineEndings(String input, boolean whitespace, boolean lineEndings) {
224-
if (whitespace) {
225-
input = input.replace(' ', MIDDLE_DOT).replace("\t", "\\t");
226-
}
227-
if (lineEndings) {
228-
input = input.replace("\n", "\\n\n").replace("\r", "\\r");
229-
} else {
230-
// we want only \n, so if we didn't replace them above, we'll replace them here.
231-
input = input.replace("\r", "");
232-
}
233-
return input;
198+
// defaultCharset is here so the formatter could select "fancy" or "simple"
199+
// characters for whitespace visualization based on the capabilities of the console
200+
// For instance, if the app is running with file.encoding=ISO-8859-1, then
201+
// the console can't encode fancy whitespace characters, and the formatter would
202+
// resort to simple \r, \n, and so on
203+
new WriteSpaceAwareDiffFormatter(out, Charset.defaultCharset()).format(edits, a, b);
204+
return new String(out.toByteArray(), StandardCharsets.UTF_8);
234205
}
235-
236-
private static final char MIDDLE_DOT = '\u00b7';
237206
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
* Copyright 2016 DiffPlug
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.diffplug.spotless.extra.integration;
17+
18+
import java.io.ByteArrayOutputStream;
19+
import java.io.IOException;
20+
import java.nio.charset.Charset;
21+
import java.nio.charset.CharsetEncoder;
22+
import java.nio.charset.StandardCharsets;
23+
24+
import org.eclipse.jgit.diff.Edit;
25+
import org.eclipse.jgit.diff.EditList;
26+
import org.eclipse.jgit.diff.RawText;
27+
import org.eclipse.jgit.util.IntList;
28+
import org.eclipse.jgit.util.RawParseUtils;
29+
30+
/**
31+
* Formats the diff in Git-like style, however it makes whitespace visible for
32+
* edit-like diffs (when one fragment is replaced with another).
33+
*/
34+
class WriteSpaceAwareDiffFormatter {
35+
private static final int CONTEXT_LINES = 3;
36+
private static final String MIDDLE_DOT = "\u00b7";
37+
private static final String CR = "\u240d";
38+
private static final String LF = "\u240a";
39+
private static final String TAB = "\u21e5";
40+
private static final byte[] MIDDLE_DOT_UTF8 = MIDDLE_DOT.getBytes(StandardCharsets.UTF_8);
41+
private static final byte[] CR_UTF8 = CR.getBytes(StandardCharsets.UTF_8);
42+
private static final byte[] LF_UTF8 = LF.getBytes(StandardCharsets.UTF_8);
43+
private static final byte[] TAB_UTF8 = TAB.getBytes(StandardCharsets.UTF_8);
44+
private static final byte[] SPACE_SIMPLE = new byte[]{' '};
45+
private static final byte[] CR_SIMPLE = new byte[]{'\\', 'r'};
46+
private static final byte[] LF_SIMPLE = new byte[]{'\\', 'n'};
47+
private static final byte[] TAB_SIMPLE = new byte[]{'\\', 't'};
48+
49+
private final ByteArrayOutputStream out;
50+
private final byte[] middleDot;
51+
private final byte[] cr;
52+
private final byte[] lf;
53+
private final byte[] tab;
54+
55+
/**
56+
* Creates the formatter.
57+
* @param out output stream for the resulting diff. The diff would have \n line endings
58+
* @param charset the charset that will be used when printing the results for the end user
59+
*/
60+
public WriteSpaceAwareDiffFormatter(ByteArrayOutputStream out, Charset charset) {
61+
this.out = out;
62+
CharsetEncoder charsetEncoder = charset.newEncoder();
63+
this.middleDot = replacementFor(charsetEncoder, MIDDLE_DOT, MIDDLE_DOT_UTF8, SPACE_SIMPLE);
64+
this.cr = replacementFor(charsetEncoder, CR, CR_UTF8, CR_SIMPLE);
65+
this.lf = replacementFor(charsetEncoder, LF, LF_UTF8, LF_SIMPLE);
66+
this.tab = replacementFor(charsetEncoder, TAB, TAB_UTF8, TAB_SIMPLE);
67+
}
68+
69+
private static byte[] replacementFor(CharsetEncoder charsetEncoder, String value, byte[] fancy, byte[] simple) {
70+
return charsetEncoder.canEncode(value) ? fancy : simple;
71+
}
72+
73+
/**
74+
* Formats the diff.
75+
* @param edits the list of edits to format
76+
* @param a input text a, with \n line endings, with UTF-8 encoding
77+
* @param b input text b, with \n line endings, with UTF-8 encoding
78+
* @throws IOException if formatting fails
79+
*/
80+
public void format(EditList edits, RawText a, RawText b) throws IOException {
81+
IntList linesA = RawParseUtils.lineMap(a.getRawContent(), 0, a.getRawContent().length);
82+
IntList linesB = RawParseUtils.lineMap(b.getRawContent(), 0, b.getRawContent().length);
83+
boolean firstLine = true;
84+
for (int i = 0; i < edits.size(); i++) {
85+
Edit edit = edits.get(i);
86+
int lineA = Math.max(0, edit.getBeginA() - CONTEXT_LINES);
87+
int lineB = Math.max(0, edit.getBeginB() - CONTEXT_LINES);
88+
89+
final int endIdx = findCombinedEnd(edits, i);
90+
final Edit endEdit = edits.get(endIdx);
91+
92+
int endA = Math.min(a.size(), endEdit.getEndA() + CONTEXT_LINES);
93+
int endB = Math.min(b.size(), endEdit.getEndB() + CONTEXT_LINES);
94+
95+
if (firstLine) {
96+
firstLine = false;
97+
} else {
98+
out.write('\n');
99+
}
100+
header(lineA, endA, lineB, endB);
101+
102+
boolean showWhitespace = edit.getType() == Edit.Type.REPLACE;
103+
104+
while (lineA < endA || lineB < endB) {
105+
if (lineA < edit.getBeginA()) {
106+
// Common part before the diff
107+
line(' ', a, lineA, linesA, false);
108+
lineA++;
109+
lineB++;
110+
} else if (lineA < edit.getEndA()) {
111+
line('-', a, lineA, linesA, showWhitespace);
112+
lineA++;
113+
} else if (lineB < edit.getEndB()) {
114+
line('+', b, lineB, linesB, showWhitespace);
115+
lineB++;
116+
} else {
117+
// Common part after the diff
118+
line(' ', a, lineA, linesA, false);
119+
lineA++;
120+
lineB++;
121+
}
122+
123+
if (lineA == edit.getEndA() && lineB == edit.getEndB() && i < endIdx) {
124+
i++;
125+
edit = edits.get(i);
126+
showWhitespace = edit.getType() == Edit.Type.REPLACE;
127+
}
128+
}
129+
}
130+
}
131+
132+
/**
133+
* There might be multiple adjacent diffs, so we need to figure out the latest one in the group.
134+
* @param edits list of edits
135+
* @param i starting edit
136+
* @return the index of the latest edit in the group
137+
*/
138+
private int findCombinedEnd(EditList edits, int i) {
139+
for (; i < edits.size() - 1; i++) {
140+
Edit current = edits.get(i);
141+
Edit next = edits.get(i + 1);
142+
if (current.getEndA() - next.getBeginA() > 2 * CONTEXT_LINES &&
143+
current.getEndB() - next.getBeginB() > 2 * CONTEXT_LINES) {
144+
break;
145+
}
146+
}
147+
return i;
148+
}
149+
150+
private void header(int lineA, int endA, int lineB, int endB) {
151+
out.write('@');
152+
out.write('@');
153+
range('-', lineA + 1, endA - lineA);
154+
range('+', lineB + 1, endB - lineB);
155+
out.write(' ');
156+
out.write('@');
157+
out.write('@');
158+
}
159+
160+
private void range(char prefix, int begin, int length) {
161+
out.write(' ');
162+
out.write(prefix);
163+
if (length == 0) {
164+
writeInt(begin - 1);
165+
out.write(',');
166+
out.write('0');
167+
} else {
168+
writeInt(begin);
169+
if (length > 1) {
170+
out.write(',');
171+
writeInt(length);
172+
}
173+
}
174+
}
175+
176+
private void writeInt(int num) {
177+
String str = Integer.toString(num);
178+
for (int i = 0, len = str.length(); i < len; i++) {
179+
out.write(str.charAt(i));
180+
}
181+
}
182+
183+
private void line(char prefix, RawText a, int lineA, IntList lines, boolean showWhitespace) throws IOException {
184+
out.write('\n');
185+
out.write(prefix);
186+
if (!showWhitespace) {
187+
a.writeLine(out, lineA);
188+
return;
189+
}
190+
byte[] bytes = a.getRawContent();
191+
for (int i = lines.get(lineA + 1), end = lines.get(lineA + 2); i < end; i++) {
192+
byte b = bytes[i];
193+
if (b == ' ') {
194+
out.write(middleDot);
195+
} else if (b == '\t') {
196+
out.write(tab);
197+
} else if (b == '\r') {
198+
out.write(cr);
199+
} else if (b == '\n') {
200+
out.write(lf);
201+
} else {
202+
out.write(b);
203+
}
204+
}
205+
}
206+
}

0 commit comments

Comments
 (0)