Skip to content

Commit d938786

Browse files
fb55wooorm43081j
authored
Move fork back into main project (#362)
- Ported to TypeScript - Switched to npm workspaces, in favour of `lerna` - Switched to Jest as the test runner - Switched to the `entities` module for decoding entities - A version of parse5's decoding logic was adapted for `entities`. Adopting this dependency allows us to share the maintenance with the `entities` & `htmlparser2` modules. - Moved the docs back to TSDoc comments - Switched to the state machine pattern of `htmlparser2` for tokenizer - No more mixins: Merged location & error mixins into the main classes - Introduced tag IDs, to avoid branching over large numbers of strings. - Introduced Maps and Sets where appropriate - Switched the order of the formatted elements list - Introduced array helpers in a lot of places - Updated tests to no longer build objects of tests (used `describe`/`it` constructs instead) Co-authored-by: Titus <[email protected]> Co-authored-by: 43081j <[email protected]>
1 parent 1930305 commit d938786

File tree

162 files changed

+21127
-33332
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+21127
-33332
lines changed

.eslintignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
packages/parse5/lib/tokenizer/named-entity-data.js
2-
test/benchmark/node_modules/**/*.js
3-
test/memory_benchmark/node_modules/**/*.js
1+
test/data/html5lib-tests
2+
packages/*/dist/
3+
test/dist/
44
node_modules

.eslintrc.json

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"env": {
33
"es2020": true,
44
"node": true,
5-
"mocha": true
5+
"jest": true
66
},
77
"extends": ["eslint:recommended", "prettier", "plugin:unicorn/recommended"],
88
"rules": {
@@ -21,6 +21,7 @@
2121
}
2222
],
2323
"prefer-template": "error",
24+
"arrow-body-style": ["error", "as-needed"],
2425

2526
"unicorn/no-null": "off",
2627
"unicorn/prevent-abbreviations": "off",
@@ -30,10 +31,26 @@
3031
"unicorn/no-array-reduce": "off",
3132
"unicorn/no-for-loop": "off",
3233
"unicorn/consistent-destructuring": "off",
33-
"unicorn/prefer-switch": ["error", { "emptyDefaultCase": "do-nothing-comment" }],
34-
"unicorn/number-literal-case": "off"
34+
"unicorn/prefer-switch": ["error", { "emptyDefaultCase": "do-nothing-comment" }]
3535
},
3636
"parserOptions": {
3737
"sourceType": "module"
38-
}
38+
},
39+
"overrides": [
40+
{
41+
"files": "*.ts",
42+
"extends": [
43+
"plugin:@typescript-eslint/eslint-recommended",
44+
"plugin:@typescript-eslint/recommended",
45+
"prettier"
46+
],
47+
"rules": {
48+
"@typescript-eslint/no-non-null-assertion": "warn",
49+
"@typescript-eslint/no-explicit-any": "warn",
50+
"@typescript-eslint/explicit-function-return-type": "error",
51+
52+
"@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
53+
}
54+
}
55+
]
3956
}

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# Exclude the HTML files from GitHub's language statistics
22
# https://github.com/github/linguist#using-gitattributes
3-
test/data/* linguist-vendored
3+
packages/test-utils/data/* linguist-vendored

.gitignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
node_modules
55
docs/build
66
docs/05_api_reference.md
7-
package-lock.json
8-
bench/package-lock.json
7+
packages/*/dist/
8+
test/dist/
99
.DS_Store
10+
tsconfig.tsbuildinfo

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
[submodule "test/data/html5lib-tests"]
1+
[submodule "html5lib-tests"]
22
path = test/data/html5lib-tests
33
url = https://github.com/HTMLParseErrorWG/html5lib-tests

.husky/pre-commit

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh
2+
. "$(dirname "$0")/_/husky.sh"
3+
4+
npm run pre-commit

.prettierignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1-
packages/parse5/lib/tokenizer/named-entity-data.js
1+
packages/*/dist/
2+
test/dist/
23
docs
4+
test/data/html5lib-tests

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
</p>
1010

1111
<p align="center">
12-
<a href="https://travis-ci.org/inikulin/parse5"><img alt="Build Status" src="https://api.travis-ci.org/inikulin/parse5.svg"></a>
12+
<a href="https://github.com/inikulin/parse5/actions/workflows/nodejs-test.yml"><img alt="Build Status" src="https://img.shields.io/github/workflow/status/inikulin/parse5/Node.js%20CI?label=tests&style=flat"></a>
1313
<a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a>
1414
<a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="http://img.shields.io/npm/dm/parse5.svg"></a>
1515
<a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="http://img.shields.io/npm/dt/parse5.svg"></a>

bench/memory/named-entity-data.js

Lines changed: 0 additions & 13 deletions
This file was deleted.

bench/memory/sax-parser.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import * as fs from 'node:fs';
1+
import { readFile } from 'node:fs/promises';
22
import format from 'human-format';
33
import promisifyEvent from 'promisify-event';
44
import memwatch from '@airbnb/node-memwatch';
5-
import SAXParser from '../../packages/parse5-sax-parser/lib/index.js';
5+
import { SAXParser } from '../../packages/parse5-sax-parser/dist/index.js';
66

77
main();
88

@@ -15,7 +15,7 @@ async function main() {
1515
let heapDiff = null;
1616

1717
memwatch.on('stats', (stats) => {
18-
maxMemUsage = Math.max(maxMemUsage, stats['current_base']);
18+
maxMemUsage = Math.max(maxMemUsage, stats.used_heap_size);
1919
});
2020

2121
startDate = new Date();
@@ -35,7 +35,7 @@ async function main() {
3535
}
3636

3737
async function parse() {
38-
const data = fs.readFileSync('../test/data/huge-page/huge-page.html', 'utf8');
38+
const data = await readFile(new URL('../../test/data/huge-page/huge-page.html', import.meta.url), 'utf8');
3939
let parsedDataSize = 0;
4040
const stream = new SAXParser();
4141

bench/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
{
22
"name": "parse5-benchmarks",
3+
"private": "true",
34
"type": "module",
45
"version": "1.0.0",
56
"description": "parse5 regression benchmarks",
67
"author": "Ivan Nikulin <[email protected]>",
78
"license": "MIT",
89
"dependencies": {
910
"benchmark": "^2.1.4",
10-
"human-format": "^0.7.0",
11+
"human-format": "^0.11.0",
1112
"@airbnb/node-memwatch": "^2.0.0",
12-
"parse5": "*",
13+
"parse5": "npm:parse5",
1314
"promisify-event": "^1.0.0"
1415
}
1516
}

bench/perf/index.js

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import { readFileSync, createReadStream, readdirSync } from 'node:fs';
22
import Benchmark from 'benchmark';
3-
import { loadTreeConstructionTestData } from '../../test/utils/generate-parsing-tests.js';
4-
import { loadSAXParserTestData } from '../../test/utils/load-sax-parser-test-data.js';
5-
import { treeAdapters, WritableStreamStub } from '../../test/utils/common.js';
6-
import * as parse5 from '../../packages/parse5/lib/index.js';
7-
import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/lib/index.js';
3+
import { loadTreeConstructionTestData } from 'parse5-test-utils/dist/generate-parsing-tests.js';
4+
import { loadSAXParserTestData } from 'parse5-test-utils/dist/load-sax-parser-test-data.js';
5+
import { treeAdapters, WritableStreamStub } from 'parse5-test-utils/dist/common.js';
6+
import * as parse5 from '../../packages/parse5/dist/index.js';
7+
import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/dist/index.js';
88
import * as parse5Upstream from 'parse5';
99

1010
const hugePagePath = new URL('../../test/data/huge-page/huge-page.html', import.meta.url);
@@ -46,19 +46,21 @@ global.runMicro = function (parser) {
4646
const pages = loadSAXParserTestData().map((test) => test.src);
4747

4848
global.runPages = function (parser) {
49-
for (let j = 0; j < pages.length; j++) {
50-
parser.parse(pages[j]);
49+
for (const page of pages) {
50+
parser.parse(page);
5151
}
5252
};
5353

5454
// Stream data
55-
global.files = readdirSync(saxPath).map((dirName) => {
56-
return new URL(`${dirName}/src.html`, saxPath).pathname;
57-
});
55+
global.files = readdirSync(saxPath).map((dirName) => new URL(`${dirName}/src.html`, saxPath).pathname);
5856

5957
// Utils
6058
function getHz(suite, testName) {
61-
return suite.find((t) => t.name === testName).hz;
59+
for (let i = 0; i < suite.length; i++) {
60+
if (suite[i].name === testName) {
61+
return suite[i].hz;
62+
}
63+
}
6264
}
6365

6466
function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {

lerna.json

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)