Skip to content

Commit d1a62a3

Browse files
committed
feat: add turndown-plugin-gfm
1 parent f621309 commit d1a62a3

File tree

5 files changed

+49
-30
lines changed

5 files changed

+49
-30
lines changed

packages/crawler/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
"crawler": "^2.0.2",
2929
"gray-matter": "^4.0.3",
3030
"jsdom": "^26.1.0",
31-
"turndown": "^7.2.0"
31+
"turndown": "^7.2.0",
32+
"turndown-plugin-gfm": "^1.0.2"
3233
},
3334
"publishConfig": {
3435
"access": "public"

packages/crawler/src/common.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
import { LANGUAGE_MAP } from '@next-i18n/const';
22
import { getFileExtension } from '@next-i18n/utils';
33
import TurndownService from 'turndown';
4+
// @ts-ignore
5+
import turndownPluginGfm from 'turndown-plugin-gfm';
6+
47
export const CODE_LANGUAGE_SEP = '===CODE_LANGUAGE_SEP===';
58
export const CODE_LANGUAGE_SPACE = '===CODE_LANGUAGE_SPACE===';
69

710
const turndownService = new TurndownService({
811
codeBlockStyle: 'fenced',
912
});
1013

14+
const gfm = turndownPluginGfm.gfm;
15+
turndownService.use(gfm);
16+
1117
turndownService.addRule(`data-state="closed"`, {
1218
filter: (node) => {
1319
return (

packages/translate/src/chunk.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,5 @@ export function splitIntoChunks(content: string): string[] {
8181
chunks.push(currentChunk);
8282
}
8383

84-
// console.log('chunks-tokens', chunks.map((chunk) => estimateTokens(chunk)));
85-
86-
return chunks;
84+
return chunks.filter((chunk) => chunk.trim() !== '');
8785
}

packages/translate/src/openai.ts

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,35 +38,21 @@ export function checkApiKey() {
3838

3939
export const model = 'deepseek-chat';
4040

41-
export const systemPrompt =
42-
'You are a professional technical translator specializing in software documentation. You are particularly skilled at translating React, web development, and programming terminology, keeping the translations consistent and readable.';
43-
4441
// Helper function to translate a single chunk
4542
async function translateChunk(
4643
chunk: string,
4744
langConfig: LangConfig,
4845
context: string,
46+
needsFrontmatterRules = true,
4947
): Promise<string> {
5048
if (!openai) {
5149
throw new Error('OPENAI_API_KEY is not set.');
5250
}
5351

5452
const textLength = chunk.length;
55-
const prompt = `
56-
Translate the following documentation from English to ${langConfig.name}.
57-
58-
General rules:
59-
• The document is MDX format - ensure all component tags are properly closed (e.g., \`<AppOnly> </AppOnly>\` \`<PagesOnly> </PagesOnly>\` )
60-
• Keep unchanged:
61-
- All code blocks
62-
- Markdown formatting
63-
- HTML tags
64-
- Variables
65-
- Text within \`\`\` code blocks or inline \`code\`
66-
- URLs and file paths
67-
• Maintain the original paragraph structure and heading levels
53+
const systemPrompt = `You are a professional technical translator from English to ${langConfig.name} specializing in software documentation. You are particularly skilled at translating React, web development, and programming terminology, keeping the translations consistent and readable.`;
6854

69-
⚠️ CRITICAL MDX FRONTMATTER RULES ⚠️
55+
const frontmatterRules = `⚠️ CRITICAL MDX FRONTMATTER RULES ⚠️
7056
• NEVER start a frontmatter value with inline code (text between \`backticks\`)
7157
• This applies to ALL inline code including \`<Component>\` tags, \`functions\`, variables, etc.
7258
• In frontmatter (sections between --- marks), ALWAYS rearrange sentences so inline code appears AFTER some text
@@ -144,7 +130,23 @@ Examples of MDX Frontmatter Translation:
144130
title: \`public\` 目錄中的靜態資源
145131
146132
These rules apply ONLY to frontmatter (between --- marks) and are CRITICAL for proper document rendering.
147-
REMEMBER: You must NEVER start with inline code in frontmatter values and ALWAYS include both opening and closing --- delimiters.
133+
REMEMBER: You must NEVER start with inline code in frontmatter values and ALWAYS include both opening and closing --- delimiters.`;
134+
135+
const prompt = `
136+
Please read the following rules carefully:
137+
138+
General rules:
139+
• The document is MDX format - ensure all component tags are properly closed (e.g., \`<AppOnly> </AppOnly>\` \`<PagesOnly> </PagesOnly>\` )
140+
• Keep unchanged:
141+
- All code blocks
142+
- Markdown formatting
143+
- HTML tags
144+
- Variables
145+
- Text within \`\`\` code blocks or inline \`code\`
146+
- URLs and file paths
147+
• Maintain the original paragraph structure and heading levels
148+
149+
${needsFrontmatterRules ? frontmatterRules : ''}
148150
149151
Output format:
150152
• Provide only the translated content
@@ -153,7 +155,7 @@ Output format:
153155
154156
${context}
155157
156-
HERE IS THE TEXT TO TRANSLATE:
158+
NEXT MESSAGE IS THE TEXT TO TRANSLATE, PLEASE TRANSLATE IT TO ${langConfig.name}:
157159
`;
158160

159161
logger.debug(
@@ -165,18 +167,17 @@ HERE IS THE TEXT TO TRANSLATE:
165167
const messages: Array<ChatCompletionMessageParam> = [
166168
{
167169
role: 'system',
168-
content: systemPrompt,
169-
},
170-
{
171-
role: 'user',
172-
content: prompt,
170+
content: systemPrompt + prompt,
173171
},
174172
{
175173
role: 'user',
176174
content: chunk,
177175
},
178176
];
179177

178+
// console.log(systemPrompt + prompt);
179+
// console.log(chunk);
180+
180181
const response = await openai.chat.completions.create({
181182
model: model,
182183
max_completion_tokens: MAX_OUTPUT_TOKENS,
@@ -212,7 +213,7 @@ export async function $translateDocument({
212213

213214
// For small documents, use the direct approach
214215
if (!needsChunking(content)) {
215-
return await translateChunk(content, langConfig, context);
216+
return await translateChunk(content, langConfig, context, true);
216217
}
217218

218219
logger.debug(
@@ -227,7 +228,12 @@ export async function $translateDocument({
227228
logger.debug(`Translating chunk ${i + 1} of ${chunks.length}`);
228229

229230
// Translate the chunk
230-
const translatedChunk = await translateChunk(chunk, langConfig, context);
231+
const translatedChunk = await translateChunk(
232+
chunk,
233+
langConfig,
234+
context,
235+
i === 0,
236+
);
231237

232238
// Add to the complete translated content
233239
if (chunk[0] === '\n' && translatedChunk[0] !== '\n') {

pnpm-lock.yaml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)