Skip to content

Commit ea36ef7

Browse files
committed
fix(parser): Keep html and body end locations
Currently, we override the end locations for `html` and `body` tags on EOF. This isn't the most elegant solution, so happy for any suggestions of how to improve things.
1 parent e1ee2fb commit ea36ef7

File tree

2 files changed

+85
-3
lines changed

2 files changed

+85
-3
lines changed

packages/parse5/lib/parser/index.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,12 +1419,31 @@ function appendCommentToDocument<T extends TreeAdapterTypeMap>(p: Parser<T>, tok
14191419
function stopParsing<T extends TreeAdapterTypeMap>(p: Parser<T>, token: EOFToken): void {
14201420
p.stopped = true;
14211421

1422+
// NOTE: Set end locations for elements that remain on the open element stack.
14221423
if (token.location) {
1423-
// NOTE: generate location info for elements
1424-
// that remains on open element stack
1425-
for (let i = p.openElements.stackTop; i >= 0; i--) {
1424+
// NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
1425+
// This is a problem, as we might overwrite their end position here.
1426+
const target = p.fragmentContext ? 0 : 2;
1427+
for (let i = p.openElements.stackTop; i >= target; i--) {
14261428
p._setEndLocation(p.openElements.items[i], token);
14271429
}
1430+
1431+
// Handle `html` and `body`
1432+
if (!p.fragmentContext && p.openElements.stackTop >= 0) {
1433+
const htmlElement = p.openElements.items[0];
1434+
const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
1435+
if (htmlLocation && !htmlLocation.endTag) {
1436+
p._setEndLocation(htmlElement, token);
1437+
1438+
if (p.openElements.stackTop >= 1) {
1439+
const bodyElement = p.openElements.items[1];
1440+
const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
1441+
if (bodyLocation && !bodyLocation.endTag) {
1442+
p._setEndLocation(bodyElement, token);
1443+
}
1444+
}
1445+
}
1446+
}
14281447
}
14291448
}
14301449

@@ -3321,6 +3340,12 @@ function endTagAfterBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagT
33213340
//the end location explicitly.
33223341
if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === $.HTML) {
33233342
p._setEndLocation(p.openElements.items[0], token);
3343+
3344+
// Update the body element, if it doesn't have an end tag
3345+
const bodyElement = p.openElements.items[1];
3346+
if (!p.treeAdapter.getNodeSourceCodeLocation(bodyElement)?.endTag) {
3347+
p._setEndLocation(bodyElement, token);
3348+
}
33243349
}
33253350
} else {
33263351
tokenAfterBody(p, token);

packages/parse5/lib/parser/parser-location-info.test.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,63 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => {
172172

173173
assertNodeLocation(textLocation, html.slice(8, 15), html, [html]);
174174
});
175+
176+
test("Should use the HTML element's position for BODY, if BODY isn't closed", () => {
177+
const html = outdent`
178+
<html>
179+
<body>
180+
<p>test</p>
181+
</html>
182+
<!-- comment -->
183+
`;
184+
185+
const opts = {
186+
treeAdapter,
187+
sourceCodeLocationInfo: true,
188+
};
189+
190+
const document = parse5.parse(html, opts);
191+
const htmlEl = treeAdapter.getChildNodes(document)[0];
192+
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];
193+
194+
const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl);
195+
const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl);
196+
197+
assert.ok(htmlLocation?.endTag && bodyLocation);
198+
199+
// HTML element's end tag's start location should be BODY's end location
200+
assert.strictEqual(htmlLocation.endTag.startOffset, bodyLocation.endOffset);
201+
assert.strictEqual(htmlLocation.endTag.startLine, bodyLocation.endLine);
202+
assert.strictEqual(htmlLocation.endTag.startCol, bodyLocation.endCol);
203+
204+
// The HTML element's location should not be the location of EOF
205+
assert.notStrictEqual(htmlLocation.endOffset, html.length);
206+
});
207+
208+
test('Should set HTML location to EOF if no end tag is supplied', () => {
209+
const html = outdent`
210+
<html>
211+
<body>
212+
<p>test</p>
213+
<!-- comment -->
214+
`;
215+
216+
const opts = {
217+
treeAdapter,
218+
sourceCodeLocationInfo: true,
219+
};
220+
221+
const document = parse5.parse(html, opts);
222+
const htmlEl = treeAdapter.getChildNodes(document)[0];
223+
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];
224+
225+
const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl);
226+
const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl);
227+
228+
assert.ok(htmlLocation && bodyLocation);
229+
assert.strictEqual(htmlLocation.endOffset, html.length);
230+
assert.strictEqual(bodyLocation.endOffset, html.length);
231+
});
175232
});
176233

177234
describe('location-info-parser', () => {

0 commit comments

Comments
 (0)