diff --git a/.github/workflows/lint-404s.yml b/.github/workflows/lint-404s.yml index c665495a0a990..f919bac9f40d9 100644 --- a/.github/workflows/lint-404s.yml +++ b/.github/workflows/lint-404s.yml @@ -23,6 +23,8 @@ jobs: - 'docs/**' - 'includes/**' - 'platform-includes/**' + - 'scripts/lint-404s/**' + - 'app/api/source-map/**' dev-docs: - 'develop-docs/**' - uses: oven-sh/setup-bun@v2 @@ -30,26 +32,49 @@ jobs: bun-version: latest - uses: actions/cache@v4 - id: cache + id: cache-node-modules with: path: | ${{ github.workspace }}/node_modules - ${{ github.workspace }}/.next/cache ${{ github.workspace }}/.eslintcache - key: node-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-${{ hashFiles('**/yarn.lock') }} + key: node-modules-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-${{ hashFiles('**/yarn.lock') }} restore-keys: | - node-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}- + node-modules-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}- + + # Cache the Next.js build output to avoid rebuilding when docs content changes + # Separate caches for docs and dev-docs since they produce different outputs + - uses: actions/cache@v4 + id: cache-nextjs-docs + if: steps.filter.outputs.docs == 'true' + with: + path: | + ${{ github.workspace }}/.next + key: nextjs-build-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-${{ hashFiles('src/**', 'app/**', 'next.config.ts', 'tsconfig.json', 'tailwind.config.mjs') }} + restore-keys: | + nextjs-build-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}- + nextjs-build-docs-${{ runner.os }}- + + - uses: actions/cache@v4 + id: cache-nextjs-dev-docs + if: steps.filter.outputs.dev-docs == 'true' + with: + path: | + ${{ github.workspace }}/.next + key: nextjs-build-dev-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-${{ hashFiles('src/**', 'app/**', 'next.config.ts', 'tsconfig.json', 'tailwind.config.mjs') }} + restore-keys: | + nextjs-build-dev-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}- + nextjs-build-dev-docs-${{ runner.os }}- - run: yarn install --frozen-lockfile - run: yarn next build - if: steps.filter.outputs.docs == 'true' + if: steps.filter.outputs.docs == 'true' && steps.cache-nextjs-docs.outputs.cache-hit != 'true' env: SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 - run: yarn build:developer-docs - if: steps.filter.outputs.dev-docs == 'true' + if: steps.filter.outputs.dev-docs == 'true' && steps.cache-nextjs-dev-docs.outputs.cache-hit != 'true' env: SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 diff --git a/app/api/source-map/route.ts b/app/api/source-map/route.ts new file mode 100644 index 0000000000000..c7a273acbf727 --- /dev/null +++ b/app/api/source-map/route.ts @@ -0,0 +1,44 @@ +import {NextResponse} from 'next/server'; + +import {apiCategories} from 'sentry-docs/build/resolveOpenAPI'; +import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/frontmatter'; +import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs'; + +/** + * API endpoint that returns a mapping of slugs to their source file paths. + * This is used by the 404 link checker to deduplicate pages that share the same source. + */ +export async function GET() { + const docs = await (isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter()); + + // For non-developer docs, add API-generated pages (they have undefined sourcePath) + if (!isDeveloperDocs) { + const categories = await apiCategories(); + categories.forEach(category => { + docs.push({ + title: category.name, + slug: `api/${category.slug}`, + sourcePath: undefined, + }); + + category.apis.forEach(api => { + docs.push({ + title: api.name, + slug: `api/${category.slug}/${api.slug}`, + sourcePath: undefined, + }); + }); + }); + } + + const sourceMap: Record = {}; + + for (const doc of docs) { + // Normalize slug (remove leading and trailing slashes to match main.ts trimSlashes) + const slug = doc.slug.replace(/(^\/|\/$)/g, ''); + // sourcePath will be null for API-generated pages + sourceMap[slug] = doc.sourcePath ?? null; + } + + return NextResponse.json(sourceMap); +} diff --git a/app/sitemap.ts b/app/sitemap.ts index 173989e0a6fae..cfb22fb2975a3 100644 --- a/app/sitemap.ts +++ b/app/sitemap.ts @@ -1,7 +1,7 @@ import type {MetadataRoute} from 'next'; +import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/frontmatter'; import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs'; -import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/mdx'; export default async function sitemap(): Promise { if (isDeveloperDocs) { diff --git a/next.config.ts b/next.config.ts index c5a9d9ad67acc..0de6540ad6029 100644 --- a/next.config.ts +++ b/next.config.ts @@ -4,6 +4,11 @@ import {withSentryConfig} from '@sentry/nextjs'; import {REMOTE_IMAGE_PATTERNS} from './src/config/images'; import {redirects} from './redirects.js'; +// Exclude build-time-only dependencies from serverless function bundles to stay under +// Vercel's 250MB limit. These packages (esbuild, mdx-bundler, sharp, etc.) are only +// needed during the build process to compile MDX and optimize assets. The compiled +// output is used at runtime, so bundling these ~150-200MB of dependencies would bloat +// functions unnecessarily and cause deployment failures. const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS ? { '/**/*': [ @@ -13,6 +18,24 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS './.next/cache/mdx-bundler/**/*', './.next/cache/md-exports/**/*', 'docs/**/*', + // Exclude heavy build dependencies + 'node_modules/@esbuild/**/*', + 'node_modules/esbuild/**/*', + 'node_modules/@aws-sdk/**/*', + 'node_modules/@google-cloud/**/*', + 'node_modules/prettier/**/*', + 'node_modules/@prettier/**/*', + 'node_modules/sharp/**/*', + 'node_modules/mermaid/**/*', + // Exclude MDX processing dependencies + 'node_modules/mdx-bundler/**/*', + 'node_modules/rehype-preset-minify/**/*', + 'node_modules/rehype-prism-plus/**/*', + 'node_modules/rehype-prism-diff/**/*', + 'node_modules/remark-gfm/**/*', + 'node_modules/remark-mdx-images/**/*', + 'node_modules/unified/**/*', + 'node_modules/rollup/**/*', ], } : { @@ -23,7 +46,24 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS './.next/cache/md-exports/**/*', './apps/**/*', 'develop-docs/**/*', - 'node_modules/@esbuild/*', + // Exclude heavy build dependencies + 'node_modules/@esbuild/**/*', + 'node_modules/esbuild/**/*', + 'node_modules/@aws-sdk/**/*', + 'node_modules/@google-cloud/**/*', + 'node_modules/prettier/**/*', + 'node_modules/@prettier/**/*', + 'node_modules/sharp/**/*', + 'node_modules/mermaid/**/*', + // Exclude MDX processing dependencies + 'node_modules/mdx-bundler/**/*', + 'node_modules/rehype-preset-minify/**/*', + 'node_modules/rehype-prism-plus/**/*', + 'node_modules/rehype-prism-diff/**/*', + 'node_modules/remark-gfm/**/*', + 'node_modules/remark-mdx-images/**/*', + 'node_modules/unified/**/*', + 'node_modules/rollup/**/*', ], '/platform-redirect': [ '**/*.gif', @@ -38,7 +78,6 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS 'public/og-images/**/*', ], 'sitemap.xml': [ - 'docs/**/*', 'public/mdx-images/**/*', 'public/og-images/**/*', '**/*.gif', @@ -57,7 +96,22 @@ if (process.env.NODE_ENV !== 'development' && !process.env.NEXT_PUBLIC_SENTRY_DS const nextConfig = { pageExtensions: ['js', 'jsx', 'mdx', 'ts', 'tsx', 'mdx'], trailingSlash: true, - serverExternalPackages: ['rehype-preset-minify'], + serverExternalPackages: [ + 'rehype-preset-minify', + 'esbuild', + '@esbuild/darwin-arm64', + '@esbuild/darwin-x64', + '@esbuild/linux-arm64', + '@esbuild/linux-x64', + '@esbuild/win32-x64', + 'mdx-bundler', + 'sharp', + '@aws-sdk/client-s3', + '@google-cloud/storage', + 'prettier', + '@prettier/plugin-xml', + 'mermaid', + ], outputFileTracingExcludes, images: { contentDispositionType: 'inline', // "open image in new tab" instead of downloading diff --git a/scripts/lint-404s/README.md b/scripts/lint-404s/README.md new file mode 100644 index 0000000000000..6db24d2b8955d --- /dev/null +++ b/scripts/lint-404s/README.md @@ -0,0 +1,65 @@ +# 404 Link Checker + +This script checks all documentation pages for broken internal links (404s). + +## Usage + +```bash +# Basic usage (with deduplication - recommended) +bun ./scripts/lint-404s/main.ts + +# Show progress for each page +bun ./scripts/lint-404s/main.ts --progress + +# Skip deduplication and check all pages (for debugging) +bun ./scripts/lint-404s/main.ts --skip-deduplication + +# Filter to a specific path +bun ./scripts/lint-404s/main.ts --path platforms/javascript +``` + +## Deduplication + +By default, the checker **deduplicates common files** to improve performance. + +### Why? + +The Sentry docs use a "common" file system where documentation is shared across multiple platforms. For example: + +- `/platforms/apple/common/configuration/index.mdx` is rendered as: + - `/platforms/apple/guides/ios/configuration/` + - `/platforms/apple/guides/macos/configuration/` + - `/platforms/apple/guides/watchos/configuration/` + - ... and many more + +Without deduplication, the checker would fetch and test the same content dozens of times, which: + +- Takes much longer to run +- Wastes CI resources +- Provides no additional value (the content is identical) + +### How it works + +1. The checker fetches a source map from `/api/source-map` that maps each slug to its source file +2. It tracks which source files have been checked +3. For common files, it only checks the first instance +4. **API-generated pages** are always checked (they have no source file) + +This typically reduces the number of pages checked from **~9,000 to ~2,500**, a **72% reduction**. + +### When to use `--skip-deduplication` + +Use this flag to skip deduplication and verify that all rendered pages work correctly, even if they share the same source. This is rarely necessary but can help debug issues with: + +- Path routing +- Platform-specific rendering bugs +- Edge cases in the build system + +## Ignore List + +The `ignore-list.txt` file contains paths that should be skipped during checking. Add paths here (one per line) if they're known to be inaccessible or are special cases. + +## Exit Codes + +- `0` - No 404s found +- `1` - 404s were detected diff --git a/scripts/lint-404s/main.ts b/scripts/lint-404s/main.ts index 706d42ff2f238..fda76e7df46a6 100644 --- a/scripts/lint-404s/main.ts +++ b/scripts/lint-404s/main.ts @@ -13,6 +13,7 @@ const trimSlashes = (s: string) => s.replace(/(^\/|\/$)/g, ''); const ignoreListFile = path.join(dirname(import.meta.url), './ignore-list.txt'); const showProgress = process.argv.includes('--progress'); +const deduplicatePages = !process.argv.includes('--skip-deduplication'); // Get the path filter if specified const pathFilterIndex = process.argv.indexOf('--path'); @@ -35,22 +36,74 @@ async function fetchWithFollow(url: URL | string): Promise { return r; } +async function deduplicateSlugs( + allSlugs: string[] +): Promise<{skippedCount: number; slugsToCheck: string[]}> { + try { + const sourceMap: Record = await fetch( + `${baseURL}api/source-map` + ).then(r => r.json()); + + const checkedSources = new Set(); + const slugsToCheck: string[] = []; + let skippedCount = 0; + + for (const slug of allSlugs) { + // Use same normalization as route.ts (remove leading and trailing slashes) + const normalizedSlug = slug.replace(/(^\/|\/$)/g, ''); + const sourcePath = sourceMap[normalizedSlug]; + + // Always check API-generated pages (no source file) + if (!sourcePath) { + slugsToCheck.push(slug); + continue; + } + + // Skip if we've already checked this source file + if (checkedSources.has(sourcePath)) { + skippedCount++; + continue; + } + + // First time seeing this source file + checkedSources.add(sourcePath); + slugsToCheck.push(slug); + } + + return {skippedCount, slugsToCheck}; + } catch (error) { + console.warn('āš ļø Failed to fetch source map:', error.message); + console.warn('Falling back to checking all pages...\n'); + return {skippedCount: 0, slugsToCheck: allSlugs}; + } +} + async function main() { const sitemap = await fetch(`${baseURL}sitemap.xml`).then(r => r.text()); - const slugs = [...sitemap.matchAll(/([^<]*)<\/loc>/g)] + const allSlugs = [...sitemap.matchAll(/([^<]*)<\/loc>/g)] .map(l => l[1]) .map(url => trimSlashes(new URL(url).pathname)) .filter(Boolean) .filter(slug => (pathFilter ? slug.startsWith(pathFilter) : true)); - const allSlugsSet = new Set(slugs); - - if (pathFilter) { - console.log('Checking 404s on %d pages in /%s', slugs.length, pathFilter); - } else { - console.log('Checking 404s on %d pages', slugs.length); + const allSlugsSet = new Set(allSlugs); + + // Deduplicate pages with same source file (default behavior) + const {skippedCount, slugsToCheck} = deduplicatePages + ? await deduplicateSlugs(allSlugs) + : {skippedCount: 0, slugsToCheck: allSlugs}; + + if (skippedCount > 0) { + console.log( + 'Deduplication: checking %d unique pages (skipped %d duplicates)\n', + slugsToCheck.length, + skippedCount + ); } + const pathInfo = pathFilter ? ` in /${pathFilter}` : ''; + console.log('Checking 404s on %d pages%s', slugsToCheck.length, pathInfo); + const all404s: {page404s: Link[]; slug: string}[] = []; // check if the slug equivalent of the href is in the sitemap @@ -100,7 +153,7 @@ async function main() { return false; } - for (const slug of slugs) { + for (const slug of slugsToCheck) { const pageUrl = new URL(slug, baseURL); const now = performance.now(); const html = await fetchWithFollow(pageUrl.href).then(r => r.text()); @@ -134,7 +187,7 @@ async function main() { } if (all404s.length === 0) { - console.log('\n\nšŸŽ‰ No 404s found'); + console.log('\nšŸŽ‰ No 404s found'); return false; } const numberOf404s = all404s.map(x => x.page404s.length).reduce((a, b) => a + b, 0); diff --git a/src/frontmatter.ts b/src/frontmatter.ts new file mode 100644 index 0000000000000..d9bb794a3c584 --- /dev/null +++ b/src/frontmatter.ts @@ -0,0 +1,100 @@ +import matter from 'gray-matter'; +import {readFile} from 'node:fs/promises'; +import path from 'node:path'; +import {limitFunction} from 'p-limit'; + +import getAllFilesRecursively from './files'; +import {FrontMatter} from './types'; +import {isNotNil} from './utils'; + +const root = process.cwd(); +const FILE_CONCURRENCY_LIMIT = 20; + +const formatSlug = (slug: string): string => + slug + .replace(/^platforms\//, '') + .replace(/\/_category_.mdx?$/, '') + .replace(/\/index.mdx?$/, '') + .replace(/\.mdx?$/, ''); + +let getDocsFrontMatterCache: Promise | undefined; + +export function getDocsFrontMatter(): Promise { + if (!getDocsFrontMatterCache) { + getDocsFrontMatterCache = getDocsFrontMatterUncached(); + } + return getDocsFrontMatterCache; +} + +async function getDocsFrontMatterUncached(): Promise { + const docsPath = path.join(root, 'docs'); + const files = await getAllFilesRecursively(docsPath); + const allFrontMatter: FrontMatter[] = []; + + await Promise.all( + files.map( + limitFunction( + async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return; + } + + if (fileName.indexOf('/common/') !== -1) { + return; + } + + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + allFrontMatter.push({ + ...(frontmatter as FrontMatter), + slug: formatSlug(fileName), + sourcePath: path.join('docs', fileName), + }); + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) + ); + + return allFrontMatter; +} + +let getDevDocsFrontMatterCache: Promise | undefined; + +export function getDevDocsFrontMatter(): Promise { + if (!getDevDocsFrontMatterCache) { + getDevDocsFrontMatterCache = getDevDocsFrontMatterUncached(); + } + return getDevDocsFrontMatterCache; +} + +async function getDevDocsFrontMatterUncached(): Promise { + const folder = 'develop-docs'; + const docsPath = path.join(root, folder); + const files = await getAllFilesRecursively(docsPath); + const frontMatters = ( + await Promise.all( + files.map( + limitFunction( + async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return undefined; + } + + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + return { + ...(frontmatter as FrontMatter), + slug: fileName.replace(/\/index.mdx?$/, '').replace(/\.mdx?$/, ''), + sourcePath: path.join(folder, fileName), + }; + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) + ) + ).filter(isNotNil); + return frontMatters; +}