Skip to content

Commit 93aab95

Browse files
committed
filter U+FEFF (BOM) when decoding input data
1 parent 4da5d55 commit 93aab95

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

src/common/input/TextDecoder.test.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ describe('text encodings', () => {
5858
const decoder = new StringToUtf32();
5959
const target = new Uint32Array(5);
6060
for (let i = 0; i < 65536; ++i) {
61-
// skip surrogate pairs
62-
if (i >= 0xD800 && i <= 0xDFFF) {
61+
// skip surrogate pairs and a BOM
62+
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
6363
continue;
6464
}
6565
const length = decoder.decode(String.fromCharCode(i), target);
@@ -84,6 +84,14 @@ describe('text encodings', () => {
8484
decoder.clear();
8585
}
8686
});
87+
88+
it('0xFEFF(BOM)', () => {
89+
const decoder = new StringToUtf32();
90+
const target = new Uint32Array(5);
91+
const length = decoder.decode(String.fromCharCode(0xFEFF), target);
92+
assert.equal(length, 0);
93+
decoder.clear();
94+
});
8795
});
8896

8997
it('test strings', () => {
@@ -118,8 +126,8 @@ describe('text encodings', () => {
118126
const decoder = new Utf8ToUtf32();
119127
const target = new Uint32Array(5);
120128
for (let i = 0; i < 65536; ++i) {
121-
// skip surrogate pairs
122-
if (i >= 0xD800 && i <= 0xDFFF) {
129+
// skip surrogate pairs and a BOM
130+
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
123131
continue;
124132
}
125133
const utf8Data = fromByteString(encode(String.fromCharCode(i)));
@@ -142,6 +150,15 @@ describe('text encodings', () => {
142150
decoder.clear();
143151
}
144152
});
153+
154+
it('0xFEFF(BOM)', () => {
155+
const decoder = new Utf8ToUtf32();
156+
const target = new Uint32Array(5);
157+
const utf8Data = fromByteString(encode(String.fromCharCode(0xFEFF)));
158+
const length = decoder.decode(utf8Data, target);
159+
assert.equal(length, 0);
160+
decoder.clear();
161+
});
145162
});
146163

147164
it('test strings', () => {

src/common/input/TextDecoder.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ export class StringToUtf32 {
105105
}
106106
continue;
107107
}
108+
if (code === 0xFEFF) {
109+
// BOM
110+
continue;
111+
}
108112
target[size++] = code;
109113
}
110114
return size;
@@ -286,8 +290,8 @@ export class Utf8ToUtf32 {
286290
continue;
287291
}
288292
codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F);
289-
if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
290-
// illegal codepoint, no i-- here
293+
if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF) || codepoint === 0xFEFF) {
294+
// illegal codepoint or BOM, no i-- here
291295
continue;
292296
}
293297
target[size++] = codepoint;

0 commit comments

Comments
 (0)