Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions .github/workflows/lint-404s.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,58 @@ jobs:
- 'docs/**'
- 'includes/**'
- 'platform-includes/**'
- 'scripts/lint-404s/**'
- 'app/api/source-map/**'
dev-docs:
- 'develop-docs/**'
- uses: oven-sh/setup-bun@v2
with:
bun-version: latest

- uses: actions/cache@v4
id: cache
id: cache-node-modules
with:
path: |
${{ github.workspace }}/node_modules
${{ github.workspace }}/.next/cache
${{ github.workspace }}/.eslintcache
key: node-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-${{ hashFiles('**/yarn.lock') }}
key: node-modules-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-${{ hashFiles('**/yarn.lock') }}
restore-keys: |
node-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-
node-modules-${{ runner.os }}-${{ steps.setup-node.outputs.node-version }}-

# Cache the Next.js build output to avoid rebuilding when docs content changes
# Separate caches for docs and dev-docs since they produce different outputs
- uses: actions/cache@v4
id: cache-nextjs-docs
if: steps.filter.outputs.docs == 'true'
with:
path: |
${{ github.workspace }}/.next
key: nextjs-build-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-${{ hashFiles('src/**', 'app/**', 'next.config.ts', 'tsconfig.json', 'tailwind.config.mjs') }}
restore-keys: |
nextjs-build-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-
nextjs-build-docs-${{ runner.os }}-

- uses: actions/cache@v4
id: cache-nextjs-dev-docs
if: steps.filter.outputs.dev-docs == 'true'
with:
path: |
${{ github.workspace }}/.next
key: nextjs-build-dev-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-${{ hashFiles('src/**', 'app/**', 'next.config.ts', 'tsconfig.json', 'tailwind.config.mjs') }}
restore-keys: |
nextjs-build-dev-docs-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }}-
nextjs-build-dev-docs-${{ runner.os }}-

- run: yarn install --frozen-lockfile

- run: yarn next build
if: steps.filter.outputs.docs == 'true'
if: steps.filter.outputs.docs == 'true' && steps.cache-nextjs-docs.outputs.cache-hit != 'true'
env:
SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0

- run: yarn build:developer-docs
if: steps.filter.outputs.dev-docs == 'true'
if: steps.filter.outputs.dev-docs == 'true' && steps.cache-nextjs-dev-docs.outputs.cache-hit != 'true'
env:
SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
Expand Down
44 changes: 44 additions & 0 deletions app/api/source-map/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import {NextResponse} from 'next/server';

import {apiCategories} from 'sentry-docs/build/resolveOpenAPI';
import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/frontmatter';
import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs';

/**
* API endpoint that returns a mapping of slugs to their source file paths.
* This is used by the 404 link checker to deduplicate pages that share the same source.
*/
export async function GET() {
const docs = await (isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter());

// For non-developer docs, add API-generated pages (they have undefined sourcePath)
if (!isDeveloperDocs) {
const categories = await apiCategories();
categories.forEach(category => {
docs.push({
title: category.name,
slug: `api/${category.slug}`,
sourcePath: undefined,
});

category.apis.forEach(api => {
docs.push({
title: api.name,
slug: `api/${category.slug}/${api.slug}`,
sourcePath: undefined,
});
});
});
}

const sourceMap: Record<string, string | null> = {};

for (const doc of docs) {
// Normalize slug (remove leading and trailing slashes to match main.ts trimSlashes)
const slug = doc.slug.replace(/(^\/|\/$)/g, '');
// sourcePath will be null for API-generated pages
sourceMap[slug] = doc.sourcePath ?? null;
}

return NextResponse.json(sourceMap);
}
2 changes: 1 addition & 1 deletion app/sitemap.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type {MetadataRoute} from 'next';

import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/frontmatter';
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Sitemap excludes critical API documentation.

The sitemap changed its import from sentry-docs/mdx to sentry-docs/frontmatter, but the new frontmatter.ts module doesn't include API-generated pages (like API categories and endpoints) that the original mdx.ts implementation adds. This causes the sitemap to exclude all dynamically-generated API documentation pages that were previously included.

Fix in Cursor Fix in Web

import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs';
import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/mdx';

export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
if (isDeveloperDocs) {
Expand Down
60 changes: 57 additions & 3 deletions next.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ import {withSentryConfig} from '@sentry/nextjs';
import {REMOTE_IMAGE_PATTERNS} from './src/config/images';
import {redirects} from './redirects.js';

// Exclude build-time-only dependencies from serverless function bundles to stay under
// Vercel's 250MB limit. These packages (esbuild, mdx-bundler, sharp, etc.) are only
// needed during the build process to compile MDX and optimize assets. The compiled
// output is used at runtime, so bundling these ~150-200MB of dependencies would bloat
// functions unnecessarily and cause deployment failures.
const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS
? {
'/**/*': [
Expand All @@ -13,6 +18,24 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS
'./.next/cache/mdx-bundler/**/*',
'./.next/cache/md-exports/**/*',
'docs/**/*',
// Exclude heavy build dependencies
'node_modules/@esbuild/**/*',
'node_modules/esbuild/**/*',
'node_modules/@aws-sdk/**/*',
'node_modules/@google-cloud/**/*',
'node_modules/prettier/**/*',
'node_modules/@prettier/**/*',
'node_modules/sharp/**/*',
'node_modules/mermaid/**/*',
// Exclude MDX processing dependencies
'node_modules/mdx-bundler/**/*',
'node_modules/rehype-preset-minify/**/*',
'node_modules/rehype-prism-plus/**/*',
'node_modules/rehype-prism-diff/**/*',
'node_modules/remark-gfm/**/*',
'node_modules/remark-mdx-images/**/*',
'node_modules/unified/**/*',
'node_modules/rollup/**/*',
],
}
: {
Expand All @@ -23,7 +46,24 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS
'./.next/cache/md-exports/**/*',
'./apps/**/*',
'develop-docs/**/*',
'node_modules/@esbuild/*',
// Exclude heavy build dependencies
'node_modules/@esbuild/**/*',
'node_modules/esbuild/**/*',
'node_modules/@aws-sdk/**/*',
'node_modules/@google-cloud/**/*',
'node_modules/prettier/**/*',
'node_modules/@prettier/**/*',
'node_modules/sharp/**/*',
'node_modules/mermaid/**/*',
// Exclude MDX processing dependencies
'node_modules/mdx-bundler/**/*',
'node_modules/rehype-preset-minify/**/*',
'node_modules/rehype-prism-plus/**/*',
'node_modules/rehype-prism-diff/**/*',
'node_modules/remark-gfm/**/*',
'node_modules/remark-mdx-images/**/*',
'node_modules/unified/**/*',
'node_modules/rollup/**/*',
],
'/platform-redirect': [
'**/*.gif',
Expand All @@ -38,7 +78,6 @@ const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS
'public/og-images/**/*',
],
'sitemap.xml': [
'docs/**/*',
'public/mdx-images/**/*',
'public/og-images/**/*',
'**/*.gif',
Expand All @@ -57,7 +96,22 @@ if (process.env.NODE_ENV !== 'development' && !process.env.NEXT_PUBLIC_SENTRY_DS
const nextConfig = {
pageExtensions: ['js', 'jsx', 'mdx', 'ts', 'tsx', 'mdx'],
trailingSlash: true,
serverExternalPackages: ['rehype-preset-minify'],
serverExternalPackages: [
'rehype-preset-minify',
'esbuild',
'@esbuild/darwin-arm64',
'@esbuild/darwin-x64',
'@esbuild/linux-arm64',
'@esbuild/linux-x64',
'@esbuild/win32-x64',
'mdx-bundler',
'sharp',
'@aws-sdk/client-s3',
'@google-cloud/storage',
'prettier',
'@prettier/plugin-xml',
'mermaid',
],
outputFileTracingExcludes,
images: {
contentDispositionType: 'inline', // "open image in new tab" instead of downloading
Expand Down
65 changes: 65 additions & 0 deletions scripts/lint-404s/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# 404 Link Checker

This script checks all documentation pages for broken internal links (404s).

## Usage

```bash
# Basic usage (with deduplication - recommended)
bun ./scripts/lint-404s/main.ts

# Show progress for each page
bun ./scripts/lint-404s/main.ts --progress

# Skip deduplication and check all pages (for debugging)
bun ./scripts/lint-404s/main.ts --skip-deduplication

# Filter to a specific path
bun ./scripts/lint-404s/main.ts --path platforms/javascript
```

## Deduplication

By default, the checker **deduplicates common files** to improve performance.

### Why?

The Sentry docs use a "common" file system where documentation is shared across multiple platforms. For example:

- `/platforms/apple/common/configuration/index.mdx` is rendered as:
- `/platforms/apple/guides/ios/configuration/`
- `/platforms/apple/guides/macos/configuration/`
- `/platforms/apple/guides/watchos/configuration/`
- ... and many more

Without deduplication, the checker would fetch and test the same content dozens of times, which:

- Takes much longer to run
- Wastes CI resources
- Provides no additional value (the content is identical)

### How it works

1. The checker fetches a source map from `/api/source-map` that maps each slug to its source file
2. It tracks which source files have been checked
3. For common files, it only checks the first instance
4. **API-generated pages** are always checked (they have no source file)

This typically reduces the number of pages checked from **~9,000 to ~2,500**, a **72% reduction**.

### When to use `--skip-deduplication`

Use this flag to skip deduplication and verify that all rendered pages work correctly, even if they share the same source. This is rarely necessary but can help debug issues with:

- Path routing
- Platform-specific rendering bugs
- Edge cases in the build system

## Ignore List

The `ignore-list.txt` file contains paths that should be skipped during checking. Add paths here (one per line) if they're known to be inaccessible or are special cases.

## Exit Codes

- `0` - No 404s found
- `1` - 404s were detected
71 changes: 62 additions & 9 deletions scripts/lint-404s/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const trimSlashes = (s: string) => s.replace(/(^\/|\/$)/g, '');
const ignoreListFile = path.join(dirname(import.meta.url), './ignore-list.txt');

const showProgress = process.argv.includes('--progress');
const deduplicatePages = !process.argv.includes('--skip-deduplication');

// Get the path filter if specified
const pathFilterIndex = process.argv.indexOf('--path');
Expand All @@ -35,22 +36,74 @@ async function fetchWithFollow(url: URL | string): Promise<Response> {
return r;
}

async function deduplicateSlugs(
allSlugs: string[]
): Promise<{skippedCount: number; slugsToCheck: string[]}> {
try {
const sourceMap: Record<string, string | null> = await fetch(
`${baseURL}api/source-map`
).then(r => r.json());

const checkedSources = new Set<string>();
const slugsToCheck: string[] = [];
let skippedCount = 0;

for (const slug of allSlugs) {
// Use same normalization as route.ts (remove leading and trailing slashes)
const normalizedSlug = slug.replace(/(^\/|\/$)/g, '');
const sourcePath = sourceMap[normalizedSlug];

// Always check API-generated pages (no source file)
if (!sourcePath) {
slugsToCheck.push(slug);
continue;
}

// Skip if we've already checked this source file
if (checkedSources.has(sourcePath)) {
skippedCount++;
continue;
}

// First time seeing this source file
checkedSources.add(sourcePath);
slugsToCheck.push(slug);
}

return {skippedCount, slugsToCheck};
} catch (error) {
console.warn('⚠️ Failed to fetch source map:', error.message);
console.warn('Falling back to checking all pages...\n');
return {skippedCount: 0, slugsToCheck: allSlugs};
}
}

async function main() {
const sitemap = await fetch(`${baseURL}sitemap.xml`).then(r => r.text());

const slugs = [...sitemap.matchAll(/<loc>([^<]*)<\/loc>/g)]
const allSlugs = [...sitemap.matchAll(/<loc>([^<]*)<\/loc>/g)]
.map(l => l[1])
.map(url => trimSlashes(new URL(url).pathname))
.filter(Boolean)
.filter(slug => (pathFilter ? slug.startsWith(pathFilter) : true));
const allSlugsSet = new Set(slugs);

if (pathFilter) {
console.log('Checking 404s on %d pages in /%s', slugs.length, pathFilter);
} else {
console.log('Checking 404s on %d pages', slugs.length);
const allSlugsSet = new Set(allSlugs);

// Deduplicate pages with same source file (default behavior)
const {skippedCount, slugsToCheck} = deduplicatePages
? await deduplicateSlugs(allSlugs)
: {skippedCount: 0, slugsToCheck: allSlugs};

if (skippedCount > 0) {
console.log(
'Deduplication: checking %d unique pages (skipped %d duplicates)\n',
slugsToCheck.length,
skippedCount
);
}

const pathInfo = pathFilter ? ` in /${pathFilter}` : '';
console.log('Checking 404s on %d pages%s', slugsToCheck.length, pathInfo);

const all404s: {page404s: Link[]; slug: string}[] = [];

// check if the slug equivalent of the href is in the sitemap
Expand Down Expand Up @@ -100,7 +153,7 @@ async function main() {
return false;
}

for (const slug of slugs) {
for (const slug of slugsToCheck) {
const pageUrl = new URL(slug, baseURL);
const now = performance.now();
const html = await fetchWithFollow(pageUrl.href).then(r => r.text());
Expand Down Expand Up @@ -134,7 +187,7 @@ async function main() {
}

if (all404s.length === 0) {
console.log('\n\n🎉 No 404s found');
console.log('\n🎉 No 404s found');
return false;
}
const numberOf404s = all404s.map(x => x.page404s.length).reduce((a, b) => a + b, 0);
Expand Down
Loading
Loading