Skip to content

Commit 2107f34

Browse files
sttkphated
andauthored
feat!: Require encoding option to avoid inspecting chunks for UTF-8 encoding (#8)
feat: Remove the dependency on `remove-bom-buffer` feat: Use node core's `TextDecoder` to process beginning of stream Co-authored-by: Blaine Bublitz <[email protected]>
1 parent 619ddd6 commit 2107f34

File tree

4 files changed

+47
-43
lines changed

4 files changed

+47
-43
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ var concat = require('concat-stream');
1818
var removeBOM = require('remove-bom-stream');
1919

2020
fs.createReadStream('utf8-file-with-bom.txt')
21-
.pipe(removeBOM())
21+
.pipe(removeBOM('utf-8'))
2222
.pipe(
2323
concat(function (result) {
2424
// result won't have a BOM
@@ -28,9 +28,9 @@ fs.createReadStream('utf8-file-with-bom.txt')
2828

2929
## API
3030

31-
### `removeBOM()`
31+
### `removeBOM(encoding)`
3232

33-
Returns a `through2` stream that will remove a BOM, given the data is a UTF8 Buffer with a BOM at the beginning. If the data is not UTF8 or does not have a BOM, the data is not changed and this becomes a normal passthrough stream.
33+
Returns a `through2` stream that will remove a BOM, if the argument `encoding` is `'utf-8'` and the given data is a UTF8 Buffer with a BOM at the beginning. If the `encoding` is not `'utf-8'` or does not have a BOM, the data is not changed and this becomes a normal passthrough stream.
3434

3535
## License
3636

index.js

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,56 @@
11
'use strict';
22

33
var through = require('through2');
4-
var removeBom = require('remove-bom-buffer');
4+
var TextDecoder = require('util').TextDecoder;
55

6-
function removeBomStream() {
7-
var state = 0; // 0:Not removed, -1:In removing, 1:Already removed
8-
var buffer = Buffer.alloc(0);
6+
var BOM = '\ufeff';
97

10-
return through(onChunk, onFlush);
8+
function removeBomStream(encoding) {
9+
encoding = (encoding || '').toLowerCase();
10+
var isUTF8 = (encoding === 'utf-8' || encoding === 'utf8');
1111

12-
function removeAndCleanup(data) {
13-
state = 1; // Already removed
12+
// Needed due to https://github.com/nodejs/node/pull/42779
13+
if (!isUTF8) {
14+
return through();
15+
}
1416

15-
buffer = null;
17+
// Only used if encoding is UTF-8
18+
var decoder = new TextDecoder('utf-8', { ignoreBOM: false });
1619

17-
return removeBom(data);
18-
}
20+
var state = 0; // 0:Not removed, -1:In removing, 1:Already removed
21+
22+
return through(onChunk);
1923

20-
function onChunk(data, enc, cb) {
24+
function onChunk(data, _, cb) {
2125
if (state === 1) {
22-
return cb(null, data);
26+
cb(null, data);
27+
return;
2328
}
2429

25-
if (state === 0 /* Not removed */ && data.length >= 7) {
26-
return cb(null, removeAndCleanup(data));
27-
}
30+
try {
31+
state = -1;
2832

29-
state = -1; // In removing
33+
var chunk = decoder.decode(data, { stream: true });
3034

31-
var bufferLength = buffer.length;
32-
var chunkLength = data.length;
33-
var totalLength = bufferLength + chunkLength;
35+
// The first time we have data after a decode, it should have already removed the BOM
36+
if (chunk !== '') {
37+
chunk += decoder.decode(); // end of stream mode and clear inner buffer.
3438

35-
buffer = Buffer.concat([buffer, data], totalLength);
39+
// Node<=v12, TextDecoder#decode returns a BOM if it receives a BOM separately.
40+
// Ref https://github.com/nodejs/node/pull/30132
41+
if (chunk !== BOM) {
42+
state = 1;
43+
var buffer = Buffer.from(chunk, 'utf-8');
3644

37-
if (totalLength >= 7) {
38-
return cb(null, removeAndCleanup(buffer));
39-
}
40-
cb();
41-
}
45+
cb(null, buffer);
46+
return;
47+
}
48+
}
4249

43-
function onFlush(cb) {
44-
if (state === 2 /* Already removed */ || !buffer) {
45-
return cb();
50+
cb();
51+
} catch (err) {
52+
cb(err);
4653
}
47-
48-
cb(null, removeAndCleanup(buffer));
4954
}
5055
}
5156

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
"test": "nyc mocha --async-only"
2323
},
2424
"dependencies": {
25-
"remove-bom-buffer": "^3.0.0",
2625
"through2": "^4.0.2"
2726
},
2827
"devDependencies": {

test/index.js

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ describe('removeBomStream', function () {
2424
}
2525

2626
pipe(
27-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
27+
[fs.createReadStream(filepath), removeBomStream('utf-8'), concat(assert)],
2828
done
2929
);
3030
});
@@ -33,7 +33,7 @@ describe('removeBomStream', function () {
3333
var filepath = path.join(__dirname, './fixtures/test.txt');
3434
var fileContent = fs.readFileSync(filepath, 'utf-8');
3535

36-
var rmBom = removeBomStream();
36+
var rmBom = removeBomStream('utf8');
3737
var output = '';
3838
rmBom.on('data', function (d) {
3939
output += d.toString();
@@ -55,7 +55,7 @@ describe('removeBomStream', function () {
5555
}
5656

5757
pipe(
58-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
58+
[fs.createReadStream(filepath), removeBomStream('UTF-8'), concat(assert)],
5959
done
6060
);
6161
});
@@ -73,7 +73,7 @@ describe('removeBomStream', function () {
7373
[
7474
fs.createReadStream(filepath),
7575
chunker(1),
76-
removeBomStream(),
76+
removeBomStream('UTF8'),
7777
concat(assert),
7878
],
7979
done
@@ -92,7 +92,7 @@ describe('removeBomStream', function () {
9292
}
9393

9494
pipe(
95-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
95+
[fs.createReadStream(filepath), removeBomStream('UTF-8'), concat(assert)],
9696
done
9797
);
9898
});
@@ -101,7 +101,7 @@ describe('removeBomStream', function () {
101101
var filepath = path.join(__dirname, './fixtures/bom-utf8.txt');
102102
var fileContent = fs.readFileSync(filepath, 'utf-8');
103103

104-
var rmBom = removeBomStream();
104+
var rmBom = removeBomStream('utf-8');
105105
var output = '';
106106
rmBom.on('data', function (d) {
107107
output += d.toString();
@@ -123,7 +123,7 @@ describe('removeBomStream', function () {
123123
}
124124

125125
pipe(
126-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
126+
[fs.createReadStream(filepath), removeBomStream('utf-16be'), concat(assert)],
127127
done
128128
);
129129
});
@@ -138,7 +138,7 @@ describe('removeBomStream', function () {
138138
}
139139

140140
pipe(
141-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
141+
[fs.createReadStream(filepath), removeBomStream('utf-16be'), concat(assert)],
142142
done
143143
);
144144
});
@@ -153,7 +153,7 @@ describe('removeBomStream', function () {
153153
}
154154

155155
pipe(
156-
[fs.createReadStream(filepath), removeBomStream(), concat(assert)],
156+
[fs.createReadStream(filepath), removeBomStream('utf-16le'), concat(assert)],
157157
done
158158
);
159159
});

0 commit comments

Comments
 (0)