Skip to content

Commit 94b46d0

Browse files
committed
refactor(parser): Remove _bootstrap method
1 parent 5ba2990 commit 94b46d0

File tree

5 files changed

+97
-105
lines changed

5 files changed

+97
-105
lines changed

packages/parse5-parser-stream/lib/index.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { Writable } from 'node:stream';
2-
import { Parser, ParserOptions } from 'parse5/dist/parser/index.js';
2+
import { Parser, ParserOptions, defaultParserOptions } from 'parse5/dist/parser/index.js';
33
import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js';
44
import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js';
55

@@ -42,10 +42,9 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
4242
constructor(options?: ParserOptions<T>) {
4343
super({ decodeStrings: false });
4444

45-
this.parser = new Parser(options);
46-
47-
this.document = this.parser.treeAdapter.createDocument();
48-
this.parser._bootstrap(this.document, null);
45+
const opts = { ...defaultParserOptions, ...options };
46+
this.parser = new Parser(opts);
47+
this.document = this.parser.document;
4948
}
5049

5150
//WritableStream implementation

packages/parse5/lib/index.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ export function parse<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
2929
html: string,
3030
options?: ParserOptions<T>
3131
): T['document'] {
32-
const parser = new Parser(options);
33-
34-
return parser.parse(html);
32+
return Parser.parse(html, options);
3533
}
3634

3735
/**
@@ -77,9 +75,7 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
7775
fragmentContext = null;
7876
}
7977

80-
const parser = new Parser(options);
81-
82-
return parser.parseFragment(html as string, fragmentContext);
78+
return Parser.parseFragment(html as string, fragmentContext, options);
8379
}
8480

8581
/**

packages/parse5/lib/parser/index.test.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-t
66
import { treeAdapters } from 'parse5-test-utils/utils/common.js';
77
import { NAMESPACES as NS } from '../common/html.js';
88

9-
const origParseFragment = Parser.prototype.parseFragment;
9+
const origParseFragment = Parser.parseFragment;
1010

1111
generateParsingTests('parser', 'Parser', {}, (test, opts) => ({
1212
node: test.fragmentContext
@@ -25,21 +25,25 @@ describe('parser', () => {
2525

2626
describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
2727
beforeEach(() => {
28-
Parser.prototype.parseFragment = function <T extends TreeAdapterTypeMap>(
29-
this: Parser<T>,
28+
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
3029
html: string,
31-
fragmentContext?: T['element']
32-
): { html: string; fragmentContext: T['element'] | null | undefined; options: ParserOptions<T> } {
30+
fragmentContext?: T['element'],
31+
options?: ParserOptions<T>
32+
): {
33+
html: string;
34+
fragmentContext: T['element'] | null | undefined;
35+
options: ParserOptions<T> | undefined;
36+
} {
3337
return {
3438
html,
3539
fragmentContext,
36-
options: this.options,
40+
options,
3741
};
3842
};
3943
});
4044

4145
afterEach(() => {
42-
Parser.prototype.parseFragment = origParseFragment;
46+
Parser.parseFragment = origParseFragment;
4347
});
4448

4549
it('parses correctly', () => {
@@ -63,7 +67,7 @@ describe('parser', () => {
6367

6468
assert.ok(!args.fragmentContext);
6569
expect(args).toHaveProperty('html', html);
66-
assert.ok(!args.options.sourceCodeLocationInfo);
70+
assert.ok(!args.options);
6771
});
6872
});
6973

packages/parse5/lib/parser/index.ts

Lines changed: 66 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
8282
*
8383
* @default `true`
8484
*/
85-
scriptingEnabled?: boolean | undefined;
85+
scriptingEnabled?: boolean;
8686

8787
/**
8888
* Enables source code location information. When enabled, each node (except the root node)
@@ -94,14 +94,14 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
9494
*
9595
* @default `false`
9696
*/
97-
sourceCodeLocationInfo?: boolean | undefined;
97+
sourceCodeLocationInfo?: boolean;
9898

9999
/**
100100
* Specifies the resulting tree format.
101101
*
102102
* @default `treeAdapters.default`
103103
*/
104-
treeAdapter?: TreeAdapter<T> | undefined;
104+
treeAdapter?: TreeAdapter<T>;
105105

106106
/**
107107
* Callback for parse errors.
@@ -111,86 +111,115 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
111111
onParseError?: ParserErrorHandler | null;
112112
}
113113

114+
export const defaultParserOptions = {
115+
scriptingEnabled: true,
116+
sourceCodeLocationInfo: false,
117+
treeAdapter: defaultTreeAdapter,
118+
onParseError: null,
119+
};
120+
114121
//Parser
115122
export class Parser<T extends TreeAdapterTypeMap> {
116-
options: ParserOptions<T>;
117123
treeAdapter: TreeAdapter<T>;
118124
private onParseError: ParserErrorHandler | null;
119125
private currentToken: Token | null = null;
120126

121-
constructor(options?: ParserOptions<T>) {
122-
this.options = {
123-
scriptingEnabled: true,
124-
sourceCodeLocationInfo: false,
125-
...options,
126-
};
127-
127+
public constructor(
128+
public options: Required<ParserOptions<T>>,
129+
public document: T['document'] = options.treeAdapter.createDocument(),
130+
public fragmentContext: T['element'] | null = null
131+
) {
128132
this.treeAdapter = this.options.treeAdapter ??= defaultTreeAdapter as TreeAdapter<T>;
129133
this.onParseError = this.options.onParseError ??= null;
130134

131135
// Always enable location info if we report parse errors.
132136
if (this.onParseError) {
133137
this.options.sourceCodeLocationInfo = true;
134138
}
139+
140+
this.tokenizer = new Tokenizer(this.options);
141+
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
142+
143+
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
144+
this._setContextModes(fragmentContext ?? document, this.fragmentContextID);
145+
146+
this.openElements = new OpenElementStack(
147+
this.document,
148+
this.treeAdapter,
149+
this.onItemPush.bind(this),
150+
this.onItemPop.bind(this)
151+
);
135152
}
136153

137154
// API
138-
public parse(html: string): T['document'] {
139-
const document = this.treeAdapter.createDocument();
155+
public static parse<T extends TreeAdapterTypeMap>(html: string, options?: ParserOptions<T>): T['document'] {
156+
const opts = {
157+
...defaultParserOptions,
158+
...options,
159+
};
140160

141-
this._bootstrap(document, null);
142-
this.tokenizer.write(html, true);
143-
this._runParsingLoop(null);
161+
const parser = new this(opts);
144162

145-
return document;
163+
parser.tokenizer.write(html, true);
164+
parser._runParsingLoop(null);
165+
166+
return parser.document;
146167
}
147168

148-
public parseFragment(html: string, fragmentContext?: T['parentNode'] | null): T['documentFragment'] {
169+
public static parseFragment<T extends TreeAdapterTypeMap>(
170+
html: string,
171+
fragmentContext?: T['parentNode'] | null,
172+
options?: ParserOptions<T>
173+
): T['documentFragment'] {
174+
const opts: Required<ParserOptions<T>> = {
175+
...defaultParserOptions,
176+
...options,
177+
};
178+
149179
//NOTE: use <template> element as a fragment context if context element was not provided,
150180
//so we will parse in "forgiving" manner
151-
fragmentContext ??= this.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);
181+
fragmentContext ??= opts.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);
152182

153183
//NOTE: create fake element which will be used as 'document' for fragment parsing.
154184
//This is important for jsdom there 'document' can't be recreated, therefore
155185
//fragment parsing causes messing of the main `document`.
156-
const documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
186+
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);
157187

158-
this._bootstrap(documentMock, fragmentContext);
188+
const parser = new this(opts, documentMock, fragmentContext);
159189

160-
if (this.fragmentContextID === $.TEMPLATE) {
161-
this.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
190+
if (parser.fragmentContextID === $.TEMPLATE) {
191+
parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
162192
}
163193

164-
this._initTokenizerForFragmentParsing();
165-
this._insertFakeRootElement();
166-
this._resetInsertionMode();
167-
this._findFormInFragmentContext();
168-
this.tokenizer.write(html, true);
169-
this._runParsingLoop(null);
194+
parser._initTokenizerForFragmentParsing();
195+
parser._insertFakeRootElement();
196+
parser._resetInsertionMode();
197+
parser._findFormInFragmentContext();
198+
parser.tokenizer.write(html, true);
199+
parser._runParsingLoop(null);
170200

171-
const rootElement = this.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
172-
const fragment = this.treeAdapter.createDocumentFragment();
201+
const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
202+
const fragment = opts.treeAdapter.createDocumentFragment();
173203

174-
this._adoptNodes(rootElement, fragment);
204+
parser._adoptNodes(rootElement, fragment);
175205

176206
return fragment;
177207
}
178208

179-
tokenizer!: Tokenizer;
209+
tokenizer: Tokenizer;
210+
180211
stopped = false;
181212
insertionMode = InsertionMode.INITIAL;
182213
originalInsertionMode = InsertionMode.INITIAL;
183214

184-
document!: T['document'];
185-
fragmentContext!: T['element'] | null;
186-
fragmentContextID = $.UNKNOWN;
215+
fragmentContextID: $;
187216

188217
headElement: null | T['element'] = null;
189218
formElement: null | T['element'] = null;
190219
pendingScript: null | T['element'] = null;
191220

192221
openElements!: OpenElementStack<T>;
193-
activeFormattingElements!: FormattingElementList<T>;
222+
activeFormattingElements: FormattingElementList<T>;
194223
private _considerForeignContent = false;
195224

196225
/**
@@ -206,44 +235,6 @@ export class Parser<T extends TreeAdapterTypeMap> {
206235
skipNextNewLine = false;
207236
fosterParentingEnabled = false;
208237

209-
//Bootstrap parser
210-
_bootstrap(document: T['document'], fragmentContext: T['element'] | null): void {
211-
this.tokenizer = new Tokenizer(this.options);
212-
213-
this.stopped = false;
214-
215-
this.insertionMode = InsertionMode.INITIAL;
216-
this.originalInsertionMode = InsertionMode.INITIAL;
217-
218-
this.document = document;
219-
this.fragmentContext = fragmentContext;
220-
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
221-
this._setContextModes(fragmentContext ?? document, this.fragmentContextID);
222-
223-
this.headElement = null;
224-
this.formElement = null;
225-
this.pendingScript = null;
226-
this.currentToken = null;
227-
228-
this.openElements = new OpenElementStack(
229-
this.document,
230-
this.treeAdapter,
231-
this.onItemPush.bind(this),
232-
this.onItemPop.bind(this)
233-
);
234-
235-
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
236-
237-
this.tmplInsertionModeStack.length = 0;
238-
239-
this.pendingCharacterTokens.length = 0;
240-
this.hasNonWhitespacePendingCharacterToken = false;
241-
242-
this.framesetOk = true;
243-
this.skipNextNewLine = false;
244-
this.fosterParentingEnabled = false;
245-
}
246-
247238
//Errors
248239
_err(token: Token, code: ERR, beforeToken?: boolean): void {
249240
if (!this.onParseError) return;

scripts/generate-parser-feedback-test/index.ts

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { convertTokenToHtml5Lib } from 'parse5-test-utils/utils/generate-tokeniz
66
import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
77
import { addSlashes } from 'parse5-test-utils/utils/common.js';
88
import { TokenType, Token } from '../../packages/parse5/dist/common/token.js';
9+
import type { TreeAdapterTypeMap } from '../../packages/parse5/dist/tree-adapters/interface.js';
910

1011
// eslint-disable-next-line no-console
1112
main().catch(console.error);
@@ -42,21 +43,22 @@ function appendToken(dest: Token[], token: Token): void {
4243

4344
function collectParserTokens(html: string): ReturnType<typeof convertTokenToHtml5Lib>[] {
4445
const tokens: Token[] = [];
45-
const parser = new Parser();
4646

47-
parser._processInputToken = function (token): void {
48-
Parser.prototype._processInputToken.call(this, token);
47+
class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
48+
override _processInputToken(token: Token): void {
49+
super._processInputToken(token);
4950

50-
// NOTE: Needed to split attributes of duplicate <html> and <body>
51-
// which are otherwise merged as per tree constructor spec
52-
if (token.type === TokenType.START_TAG) {
53-
token.attrs = [...token.attrs];
54-
}
51+
// NOTE: Needed to split attributes of duplicate <html> and <body>
52+
// which are otherwise merged as per tree constructor spec
53+
if (token.type === TokenType.START_TAG) {
54+
token.attrs = [...token.attrs];
55+
}
5556

56-
appendToken(tokens, token);
57-
};
57+
appendToken(tokens, token);
58+
}
59+
}
5860

59-
parser.parse(html);
61+
ExtendedParser.parse(html);
6062

6163
return tokens.map((token) => convertTokenToHtml5Lib(token));
6264
}

0 commit comments

Comments
 (0)