Skip to content

Commit 063340d

Browse files
committed
Fixed Japanese Shift-JIS issue in Node.js and improved tests by also checking the encoding and not only the language and confidence
1 parent db15e93 commit 063340d

File tree

2 files changed

+32
-19
lines changed

2 files changed

+32
-19
lines changed

src/config/languageObject.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ module.exports = [
365365
name: "japanese",
366366
count: 0,
367367
utfRegex: new RegExp(//, flag),
368-
isoRegex: new RegExp(/»/, flag),
368+
isoRegex: new RegExp(/»|‚Á‚Ä/, flag),
369369
encoding: "Shift-JIS",
370370
utfFrequency: { low: 0.004257, high: 0.006585 },
371371
isoFrequency: { low: 0.004286, high: 0.004653 }

testing/subtitle-database.test.js

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,39 @@ checkLocation("umd", "language-encoding.min.js");
1010
// Test all files in the 'language folders' dataset
1111
const folderPath = "/home/gignu/Documents/Subtitle Database/Language Folders/";
1212
const testFiles = getFiles(folderPath);
13+
const minConfidence = 0.95;
1314

1415
testFiles.forEach((file) => {
1516
languageEncoding(file)
1617
.then((fileInfo) => {
1718
const testFileArray = file.split("/");
18-
const expectedLanguage = testFileArray[testFileArray.length - 2].toLowerCase().replace(" ", "-");
19+
const folderNameArr = testFileArray[testFileArray.length - 2].split('_');
20+
const expectedLanguage = folderNameArr ? folderNameArr[0] : null;
21+
const expectedEncoding = folderNameArr ? folderNameArr[1] : null;
1922

20-
if (fileInfo.language !== expectedLanguage)
21-
testFailed("language");
23+
if (!expectedLanguage) {
24+
console.error("Expected language not found in folder name:", file.directoryHandle?.name);
25+
setError(file, fileInfo);
2226

23-
if (fileInfo.confidence.encoding < 0.95)
24-
testFailed("confidence");
27+
} else if (!expectedEncoding) {
28+
console.error("Expected encoding not found in folder name:", file.directoryHandle?.name);
29+
setError(file, fileInfo);
2530

26-
function testFailed(issue) {
27-
console.error("Test case failed:");
28-
switch (issue) {
29-
case "language":
30-
console.info("Expected language:", expectedLanguage);
31-
console.info("Detected language:", fileInfo.language);
32-
case "confidence":
33-
console.info("Confidence score too low!");
34-
console.info("fileInfo.confidence.encoding:", fileInfo.confidence.encoding);
35-
}
36-
console.info(fileInfo);
37-
console.info("file:", file);
38-
process.exit(1);
31+
} else if (!fileInfo.confidence.encoding || fileInfo.confidence.encoding < minConfidence) {
32+
console.error("Encoding Confidence too low:", fileInfo.confidence.encoding);
33+
setError(file, fileInfo);
34+
35+
} else if (!fileInfo.confidence.language || fileInfo.confidence.language < minConfidence) {
36+
console.error("Language Confidence too low:", fileInfo.confidence.language);
37+
setError(file, fileInfo);
38+
39+
} else if (fileInfo.language !== expectedLanguage) {
40+
console.error(`Language mismatch! Expected ${expectedLanguage} but got ${fileInfo.language}`);
41+
setError(file, fileInfo);
42+
43+
} else if (fileInfo.encoding !== expectedEncoding) {
44+
console.error(`Encoding mismatch! Expected ${expectedEncoding} but got ${fileInfo.encoding}`);
45+
setError(file, fileInfo);
3946
}
4047
})
4148
.catch((error) => {
@@ -66,3 +73,9 @@ function checkLocation(folder, file) {
6673
process.exit(1);
6774
}
6875
}
76+
77+
function setError(file, fileInfo) {
78+
console.info('fileInfo:', fileInfo);
79+
console.info('file:', file);
80+
process.exit(1);
81+
}

0 commit comments

Comments
 (0)