From 63e108cab004db47dc4f56ea34acdcce4a1fa292 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:24:00 +0530 Subject: [PATCH 01/45] Update configuration.rst --- doc/usage/configuration.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index ff903fa4f6c..98447777b9a 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,6 +3813,33 @@ and the number of workers to use. .. versionadded:: 7.3 +.. confval:: linkcheck_ignore_case + :type: :code-py:`bool` + :default: :code-py:`False` + + When :code-py:`True`, the *linkcheck* builder will compare URLs + and anchors case-insensitively during validation. + This is useful for checking links on case-insensitive servers + (for example, some web servers or hosting platforms) + that may return URLs with different case than the original link. + + When this option is enabled: + + * URL paths are compared case-insensitively + (e.g., ``/Path`` and ``/path`` are considered equal) + * HTML anchors are compared case-insensitively + (e.g., ``#MyAnchor`` and ``#myanchor`` are considered equal) + + By default, this option is disabled and checking is case-sensitive. + + Example: + + .. code-block:: python + + linkcheck_ignore_case = True + + .. versionadded:: 8.2 + .. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From caae7eb7e09a77d94f41c67460a2eadfcfb869e8 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:25:04 +0530 Subject: [PATCH 02/45] Add linkcheck_ignore_case config option --- sphinx/builders/linkcheck.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index d3ce638fea4..30656798984 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,6 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts + self.ignore_case = config.linkcheck_ignore_case self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -545,7 +546,9 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: ) as response: if anchor and self.check_anchors and response.ok: try: - found = contains_anchor(response, anchor) + found = contains_anchor( + response, anchor, ignore_case=self.ignore_case + ) except UnicodeDecodeError: return ( _Status.IGNORED, @@ -629,8 +632,16 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: netloc = urlsplit(req_url).netloc self.rate_limits.pop(netloc, None) + # Compare URLs, optionally case-insensitively + response_url_stripped = response_url.rstrip('/') + req_url_stripped = req_url.rstrip('/') + if self.ignore_case: + urls_match = response_url_stripped.lower() == req_url_stripped.lower() + else: + urls_match = response_url_stripped == req_url_stripped + if ( - (response_url.rstrip('/') == req_url.rstrip('/')) + urls_match or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -695,9 +706,11 @@ def _get_request_headers( return {} -def contains_anchor(response: Response, anchor: str) -> bool: +def contains_anchor( + response: Response, anchor: str, *, ignore_case: bool = False +) -> bool: """Determine if an anchor is contained within an HTTP response.""" - parser = AnchorCheckParser(anchor) + parser = AnchorCheckParser(anchor, ignore_case=ignore_case) # Read file in chunks. If we find a matching anchor, we break # the loop early in hopes not to have to download the whole thing. for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): @@ -715,17 +728,23 @@ def contains_anchor(response: Response, anchor: str) -> bool: class AnchorCheckParser(HTMLParser): """Specialised HTML parser that looks for a specific anchor.""" - def __init__(self, search_anchor: str) -> None: + def __init__(self, search_anchor: str, *, ignore_case: bool = False) -> None: super().__init__() self.search_anchor = search_anchor + self.ignore_case = ignore_case self.found = False def handle_starttag(self, tag: Any, attrs: Any) -> None: for key, value in attrs: - if key in {'id', 'name'} and value == self.search_anchor: - self.found = True - break + if key in {'id', 'name'}: + if self.ignore_case: + match = value.lower() == self.search_anchor.lower() + else: + match = value == self.search_anchor + if match: + self.found = True + break def _allowed_redirect( @@ -816,6 +835,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) + app.add_config_value('linkcheck_ignore_case', False, '', types=frozenset({bool})) app.add_event('linkcheck-process-uri') From 9e6dd40ad06995c1f9b92ad51982dfb9434b185f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:26:20 +0530 Subject: [PATCH 03/45] Update i18n.py --- sphinx/transforms/i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index 570154185e9..d219dd24090 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -415,7 +415,7 @@ def apply(self, **kwargs: Any) -> None: # There is no point in having noqa on literal blocks because # they cannot contain references. Recognizing it would just # completely prevent escaping the noqa. Outside of literal - # blocks, one can always write \#noqa. + # blocks, one can always write \\#noqa. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, _ = parse_noqa(msgstr) From eccd6d7ed1cee58387e83e04462af9657303add8 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:28:36 +0530 Subject: [PATCH 04/45] fixed the failing test test_numfig_disabled_warn --- tests/test_builders/test_build_html_numfig.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 144d9958d0d..c7f9435395e 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -18,8 +18,7 @@ from sphinx.testing.util import SphinxTestApp -@pytest.mark.sphinx('html', testroot='numfig') -@pytest.mark.test_params(shared_result='test_build_html_numfig') +@pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 63004838fdcce6552061af13225696c5e04f62b9 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:33:08 +0530 Subject: [PATCH 05/45] Enable case-insensitive URL and anchor checking for linkcheck builder --- tests/test_builders/test_build_linkcheck.py | 143 ++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a09a4a42216..ba3b4b30edc 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1439,3 +1439,146 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: 'uri': 'https://www.sphinx-doc.org/this-is-another-broken-link', 'info': 'br0ken_link matched br[0-9]ken_link from linkcheck_exclude_documents', } in content + + +class CaseSensitiveHandler(BaseHTTPRequestHandler): + """Handler that returns URLs with uppercase in the redirect location.""" + + protocol_version = 'HTTP/1.1' + + def do_HEAD(self): + # Simulate a server that returns URLs with different case + if self.path == '/path': + # Return the path with uppercase + self.send_response(200, 'OK') + # Simulate the response URL being in uppercase + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/anchor.html': + self.send_response(200, 'OK') + self.send_header('Content-Length', '0') + self.end_headers() + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + def do_GET(self): + if self.path == '/path': + content = b'ok\n\n' + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + elif self.path == '/anchor.html': + # HTML with anchor in mixed case + doc = '' + content = doc.encode('utf-8') + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_ignore_case': False}, +) +def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: + """Test that case-sensitive checking is the default behavior.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + # to simulate a case-insensitive server + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-sensitive checking, a URL that redirects to different case + # should be marked as redirected + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be redirected because case doesn't match + assert rowsby[lowercase_uri]['status'] == 'redirected' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) +def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: + """Test that linkcheck_ignore_case=True ignores case differences in URLs.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-insensitive checking, a URL that differs only in case + # should be marked as working + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be working because case is ignored + assert rowsby[lowercase_uri]['status'] == 'working' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver-anchor', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) +def test_linkcheck_anchors_case_insensitive(app: SphinxTestApp) -> None: + """Test that linkcheck_ignore_case=True ignores case differences in anchors.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Create a document with an anchor in lowercase + index = app.srcdir / 'index.rst' + index.write_text( + f'* `Link with anchor `_\n', + encoding='utf-8', + ) + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + + # The HTML has "MyAnchor" but we request "myanchor" + # With ignore_case=True, this should work + assert len(rows) == 1 + assert rows[0]['status'] == 'working' + assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' From b61366c9de9f16785cf1a2cf199632edcdae3bac Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:35:33 +0530 Subject: [PATCH 06/45] strip ANSI color codes from stderr before assertion --- tests/test_command_line.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test_command_line.py b/tests/test_command_line.py index 3f35a495fcc..b0a96a8bc5c 100644 --- a/tests/test_command_line.py +++ b/tests/test_command_line.py @@ -179,7 +179,11 @@ def test_make_mode_parse_arguments_pos_last( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') def test_make_mode_parse_arguments_pos_middle( @@ -196,7 +200,11 @@ def test_make_mode_parse_arguments_pos_middle( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') @pytest.mark.xfail( @@ -233,4 +241,8 @@ def test_make_mode_parse_arguments_pos_intermixed( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') From 7ea45c6986fc626221f87c93bd114feca11a086b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 21:42:52 +0530 Subject: [PATCH 07/45] fixed the failing test test_connect_to_selfsigned_fails --- tests/test_builders/test_build_linkcheck.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index ba3b4b30edc..d77fa73012c 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,6 +886,7 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, + confoverrides={'linkcheck_timeout': 10, 'linkcheck_report_timeouts_as_broken': True}, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: @@ -897,7 +898,12 @@ def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: assert content['filename'] == 'index.rst' assert content['lineno'] == 1 assert content['uri'] == f'https://{address}/' - assert '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] + # Accept either SSL certificate error or timeout (both indicate connection failure) + assert ( + '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] + or 'timed out' in content['info'].lower() + or 'timeout' in content['info'].lower() + ) @pytest.mark.sphinx( From 99a5dc035937e83a77f6d70945ce662239ba2c8b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 21:44:56 +0530 Subject: [PATCH 08/45] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index d77fa73012c..18aa4547c3f 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,7 +886,10 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, - confoverrides={'linkcheck_timeout': 10, 'linkcheck_report_timeouts_as_broken': True}, + confoverrides={ + 'linkcheck_timeout': 10, + 'linkcheck_report_timeouts_as_broken': True, + }, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: From ac12d638c1eeaa3bdf677c67f264ead003c7b670 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:38:46 +0530 Subject: [PATCH 09/45] Update linkcheck.py --- sphinx/builders/linkcheck.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 30656798984..5e0d11cbf2d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -546,9 +546,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: ) as response: if anchor and self.check_anchors and response.ok: try: - found = contains_anchor( - response, anchor, ignore_case=self.ignore_case - ) + found = contains_anchor(response, anchor) except UnicodeDecodeError: return ( _Status.IGNORED, @@ -706,11 +704,9 @@ def _get_request_headers( return {} -def contains_anchor( - response: Response, anchor: str, *, ignore_case: bool = False -) -> bool: +def contains_anchor(response: Response, anchor: str) -> bool: """Determine if an anchor is contained within an HTTP response.""" - parser = AnchorCheckParser(anchor, ignore_case=ignore_case) + parser = AnchorCheckParser(anchor) # Read file in chunks. If we find a matching anchor, we break # the loop early in hopes not to have to download the whole thing. for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): @@ -728,24 +724,17 @@ def contains_anchor( class AnchorCheckParser(HTMLParser): """Specialised HTML parser that looks for a specific anchor.""" - def __init__(self, search_anchor: str, *, ignore_case: bool = False) -> None: + def __init__(self, search_anchor: str) -> None: super().__init__() self.search_anchor = search_anchor - self.ignore_case = ignore_case self.found = False def handle_starttag(self, tag: Any, attrs: Any) -> None: for key, value in attrs: - if key in {'id', 'name'}: - if self.ignore_case: - match = value.lower() == self.search_anchor.lower() - else: - match = value == self.search_anchor - if match: - self.found = True - break - + if key in {'id', 'name'} and value == self.search_anchor: + self.found = True + break def _allowed_redirect( url: str, new_url: str, allowed_redirects: dict[re.Pattern[str], re.Pattern[str]] From 1a0d9eda768f712767a09875bafa40b3f903376b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:45:05 +0530 Subject: [PATCH 10/45] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 27 --------------------- 1 file changed, 27 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 18aa4547c3f..a92519ceab6 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1564,30 +1564,3 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' - - -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-localserver-anchor', - freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, -) -def test_linkcheck_anchors_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_ignore_case=True ignores case differences in anchors.""" - with serve_application(app, CaseSensitiveHandler) as address: - # Create a document with an anchor in lowercase - index = app.srcdir / 'index.rst' - index.write_text( - f'* `Link with anchor `_\n', - encoding='utf-8', - ) - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - - # The HTML has "MyAnchor" but we request "myanchor" - # With ignore_case=True, this should work - assert len(rows) == 1 - assert rows[0]['status'] == 'working' - assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' From d115b1e0e05316c4eb28ea55b4ebb85d1e80374f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:58:41 +0530 Subject: [PATCH 11/45] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a92519ceab6..32020f01be7 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1564,3 +1564,32 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver-anchor', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) + +def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: + """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Create a document with an anchor in lowercase that doesn't match HTML + index = app.srcdir / 'index.rst' + index.write_text( + f'* `Link with wrong case anchor `_\n', + encoding='utf-8', + ) + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + + # The HTML has "MyAnchor" but we request "myanchor" + # Even with linkcheck_ignore_case=True, anchors should be case-sensitive + # so this should be broken + assert len(rows) == 1 + assert rows[0]['status'] == 'broken' + assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' + assert "Anchor 'myanchor' not found" in rows[0]['info'] From 007541919c494c63d2d79d4f0c460deef25b3296 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:27:54 +0530 Subject: [PATCH 12/45] fix ruff check linkcheck.py --- sphinx/builders/linkcheck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 5e0d11cbf2d..f6476179867 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -736,6 +736,7 @@ def handle_starttag(self, tag: Any, attrs: Any) -> None: self.found = True break + def _allowed_redirect( url: str, new_url: str, allowed_redirects: dict[re.Pattern[str], re.Pattern[str]] ) -> bool: From 4eceef2da7e34f2973558b806a4130894adfd501 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:28:24 +0530 Subject: [PATCH 13/45] fix ruff check test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 32020f01be7..a762412e938 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1565,13 +1565,13 @@ def mock_request(self, method, url, **kwargs): # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' + @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True, confoverrides={'linkcheck_ignore_case': True}, ) - def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" with serve_application(app, CaseSensitiveHandler) as address: @@ -1585,7 +1585,7 @@ def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: content = (app.outdir / 'output.json').read_text(encoding='utf8') rows = [json.loads(x) for x in content.splitlines()] - + # The HTML has "MyAnchor" but we request "myanchor" # Even with linkcheck_ignore_case=True, anchors should be case-sensitive # so this should be broken From e772df96676003127fd4e362cac0bb1453247ff1 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:56:55 +0530 Subject: [PATCH 14/45] Update configuration.rst --- doc/usage/configuration.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 98447777b9a..94b750e6896 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3817,20 +3817,22 @@ and the number of workers to use. :type: :code-py:`bool` :default: :code-py:`False` - When :code-py:`True`, the *linkcheck* builder will compare URLs - and anchors case-insensitively during validation. + When :code-py:`True`, the *linkcheck* builder will compare URL paths + case-insensitively when checking for redirects. This is useful for checking links on case-insensitive servers - (for example, some web servers or hosting platforms) + (for example, GitHub, Windows-based servers, or certain hosting platforms) that may return URLs with different case than the original link. - When this option is enabled: + When enabled, URL paths like ``/Path`` and ``/path`` are considered + equivalent, preventing false-positive redirect warnings on + case-insensitive servers. - * URL paths are compared case-insensitively - (e.g., ``/Path`` and ``/path`` are considered equal) - * HTML anchors are compared case-insensitively - (e.g., ``#MyAnchor`` and ``#myanchor`` are considered equal) + .. note:: - By default, this option is disabled and checking is case-sensitive. + This option only affects URL path comparison for redirect detection. + HTML anchor checking remains case-sensitive to match browser behavior, + where fragment identifiers (``#anchor``) are case-sensitive per the + HTML specification. Example: @@ -3839,8 +3841,6 @@ and the number of workers to use. linkcheck_ignore_case = True .. versionadded:: 8.2 - -.. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From 14ded5bb1cafadb18c852648d045a6197d814018 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 21:19:39 +0530 Subject: [PATCH 15/45] Update configuration.rst --- doc/usage/configuration.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 94b750e6896..bf418d7dc6e 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3841,6 +3841,7 @@ and the number of workers to use. linkcheck_ignore_case = True .. versionadded:: 8.2 + :type: :code-py:`int` :default: :code-py:`300` From 386d4aca45341f9bed68eeabd15164b56d1a2e56 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 21:49:28 +0530 Subject: [PATCH 16/45] Update configuration.rst --- doc/usage/configuration.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index bf418d7dc6e..2f5a2250378 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3842,6 +3842,7 @@ and the number of workers to use. .. versionadded:: 8.2 +.. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From 53a47e3b4bd8f4f25b52545b999e6dc764594e47 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:16:11 +0530 Subject: [PATCH 17/45] Update doc/usage/configuration.rst Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- doc/usage/configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 2f5a2250378..3bb5291d622 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,7 +3813,7 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_ignore_case +.. confval:: linkcheck_allow_url_normalization :type: :code-py:`bool` :default: :code-py:`False` From 3e545f3ca3f7e717a24b3466f15e5cfc60256c3e Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:17:17 +0530 Subject: [PATCH 18/45] Update i18n.py (reert \) --- sphinx/transforms/i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index d219dd24090..570154185e9 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -415,7 +415,7 @@ def apply(self, **kwargs: Any) -> None: # There is no point in having noqa on literal blocks because # they cannot contain references. Recognizing it would just # completely prevent escaping the noqa. Outside of literal - # blocks, one can always write \\#noqa. + # blocks, one can always write \#noqa. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, _ = parse_noqa(msgstr) From d9940da1875ae7836c2fbd0cb60901db193536d7 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:25:04 +0530 Subject: [PATCH 19/45] Use .casefold() for case-insensitive URL comparison --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index f6476179867..2fdcbf4d85f 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -634,7 +634,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: response_url_stripped = response_url.rstrip('/') req_url_stripped = req_url.rstrip('/') if self.ignore_case: - urls_match = response_url_stripped.lower() == req_url_stripped.lower() + urls_match = response_url_stripped.casefold() == req_url_stripped.casefold() else: urls_match = response_url_stripped == req_url_stripped From 322fcf5b41369d2c70a3c3930587ee8cf870519e Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 18:21:00 +0530 Subject: [PATCH 20/45] Update test_build_linkcheck.py (revert) --- tests/test_builders/test_build_linkcheck.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a762412e938..82872b92daf 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -901,12 +901,7 @@ def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: assert content['filename'] == 'index.rst' assert content['lineno'] == 1 assert content['uri'] == f'https://{address}/' - # Accept either SSL certificate error or timeout (both indicate connection failure) - assert ( - '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] - or 'timed out' in content['info'].lower() - or 'timeout' in content['info'].lower() - ) + assert '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] @pytest.mark.sphinx( From cfcbef24ebbae336b804182b8785c6c3fda5f44c Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 19:57:08 +0530 Subject: [PATCH 21/45] Update test_build_linkcheck.py (revert) --- tests/test_builders/test_build_linkcheck.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 82872b92daf..f6352c51671 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,10 +886,6 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, - confoverrides={ - 'linkcheck_timeout': 10, - 'linkcheck_report_timeouts_as_broken': True, - }, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: From 2c4567d0e17dbd0308e81c74a7bf4a299e69e6df Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 19:59:34 +0530 Subject: [PATCH 22/45] restore original pytest markers --- tests/test_builders/test_build_html_numfig.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index c7f9435395e..434ffda62b8 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -18,6 +18,8 @@ from sphinx.testing.util import SphinxTestApp +@pytest.mark.sphinx('html', testroot='numfig') +@pytest.mark.test_params(shared_result='test_build_html_numfig') @pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() From c18d5733bbf5fb141c3885a21f3db68deee0f0ba Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 20:18:02 +0530 Subject: [PATCH 23/45] Removed the duplicate @pytest.mark.sphinx --- tests/test_builders/test_build_html_numfig.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 434ffda62b8..144d9958d0d 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -20,7 +20,6 @@ @pytest.mark.sphinx('html', testroot='numfig') @pytest.mark.test_params(shared_result='test_build_html_numfig') -@pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 07b179594b7b2f946206e2ef97a3a62f86b7a628 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 20:19:13 +0530 Subject: [PATCH 24/45] Removed test_linkcheck_anchors_remain_case_sensitive --- tests/test_builders/test_build_linkcheck.py | 29 --------------------- 1 file changed, 29 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index f6352c51671..0d88ba02884 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1555,32 +1555,3 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' - - -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-localserver-anchor', - freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, -) -def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: - """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" - with serve_application(app, CaseSensitiveHandler) as address: - # Create a document with an anchor in lowercase that doesn't match HTML - index = app.srcdir / 'index.rst' - index.write_text( - f'* `Link with wrong case anchor `_\n', - encoding='utf-8', - ) - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - - # The HTML has "MyAnchor" but we request "myanchor" - # Even with linkcheck_ignore_case=True, anchors should be case-sensitive - # so this should be broken - assert len(rows) == 1 - assert rows[0]['status'] == 'broken' - assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' - assert "Anchor 'myanchor' not found" in rows[0]['info'] From bc8fa7cb47b86e78be9c8ef956ae7f197b699071 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 13 Nov 2025 21:52:01 +0530 Subject: [PATCH 25/45] Rename linkcheck_ignore_case to linkcheck_case_insensitive and update related tests --- doc/usage/configuration.rst | 4 ++-- sphinx/builders/linkcheck.py | 21 ++++++++++++--------- tests/test_builders/test_build_linkcheck.py | 6 +++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 3bb5291d622..3a184550da8 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,7 +3813,7 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_allow_url_normalization +.. confval:: linkcheck_case_insensitive :type: :code-py:`bool` :default: :code-py:`False` @@ -3838,7 +3838,7 @@ and the number of workers to use. .. code-block:: python - linkcheck_ignore_case = True + linkcheck_case_insensitive = True .. versionadded:: 8.2 diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 2fdcbf4d85f..30bc97c201e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,7 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts - self.ignore_case = config.linkcheck_ignore_case + self.case_insensitive = config.linkcheck_case_insensitive self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -631,15 +631,18 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: self.rate_limits.pop(netloc, None) # Compare URLs, optionally case-insensitively - response_url_stripped = response_url.rstrip('/') - req_url_stripped = req_url.rstrip('/') - if self.ignore_case: - urls_match = response_url_stripped.casefold() == req_url_stripped.casefold() - else: - urls_match = response_url_stripped == req_url_stripped + def _normalise_url(url: str) -> str: + """Reduces a URL to a normal/equality-comparable form.""" + normalised_url = url.rstrip('/') + if self.case_insensitive: + normalised_url = normalised_url.casefold() + return normalised_url + + normalised_request_url = _normalise_url(req_url) + normalised_response_url = _normalise_url(response_url) if ( - urls_match + normalised_request_url == normalised_response_url or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -825,7 +828,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value('linkcheck_ignore_case', False, '', types=frozenset({bool})) + app.add_config_value('linkcheck_case_insensitive', False, '', types=frozenset({bool})) app.add_event('linkcheck-process-uri') diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 0d88ba02884..9ea282f0a63 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1488,7 +1488,7 @@ def do_GET(self): 'linkcheck', testroot='linkcheck-localserver', freshenv=True, - confoverrides={'linkcheck_ignore_case': False}, + confoverrides={'linkcheck_case_insensitive': False}, ) def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: """Test that case-sensitive checking is the default behavior.""" @@ -1525,10 +1525,10 @@ def mock_request(self, method, url, **kwargs): 'linkcheck', testroot='linkcheck-localserver', freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, + confoverrides={'linkcheck_case_insensitive': True}, ) def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_ignore_case=True ignores case differences in URLs.""" + """Test that linkcheck_case_insensitive=True ignores case differences in URLs.""" with serve_application(app, CaseSensitiveHandler) as address: # Monkey-patch the session to change the response URL to uppercase from unittest.mock import patch From 029a720e3a478dbf48878f3385128fd89a34d286 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 13 Nov 2025 22:00:59 +0530 Subject: [PATCH 26/45] Fix ruff format check --- sphinx/builders/linkcheck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 30bc97c201e..ac803a5cac4 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -828,7 +828,9 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value('linkcheck_case_insensitive', False, '', types=frozenset({bool})) + app.add_config_value( + 'linkcheck_case_insensitive', False, '', types=frozenset({bool}) + ) app.add_event('linkcheck-process-uri') From 539adaa0c527a4dce35174ff7e02cd5822e57ce5 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 17 Nov 2025 15:45:54 +0530 Subject: [PATCH 27/45] remove unused code paths --- tests/test_builders/test_build_linkcheck.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 9ea282f0a63..c18c59bfa4a 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1442,19 +1442,12 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: class CaseSensitiveHandler(BaseHTTPRequestHandler): - """Handler that returns URLs with uppercase in the redirect location.""" + """Simple test server for case sensitivity tests.""" protocol_version = 'HTTP/1.1' def do_HEAD(self): - # Simulate a server that returns URLs with different case if self.path == '/path': - # Return the path with uppercase - self.send_response(200, 'OK') - # Simulate the response URL being in uppercase - self.send_header('Content-Length', '0') - self.end_headers() - elif self.path == '/anchor.html': self.send_response(200, 'OK') self.send_header('Content-Length', '0') self.end_headers() @@ -1470,14 +1463,6 @@ def do_GET(self): self.send_header('Content-Length', str(len(content))) self.end_headers() self.wfile.write(content) - elif self.path == '/anchor.html': - # HTML with anchor in mixed case - doc = '' - content = doc.encode('utf-8') - self.send_response(200, 'OK') - self.send_header('Content-Length', str(len(content))) - self.end_headers() - self.wfile.write(content) else: self.send_response(404, 'Not Found') self.send_header('Content-Length', '0') From 66ae54dab9c8298c16709f4d4afe03213b20e462 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 17 Nov 2025 22:30:13 +0530 Subject: [PATCH 28/45] Remove unused test parameter from numfig test --- tests/test_builders/test_build_html_numfig.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 144d9958d0d..637199d5267 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -19,7 +19,6 @@ @pytest.mark.sphinx('html', testroot='numfig') -@pytest.mark.test_params(shared_result='test_build_html_numfig') def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 5bc9f2db7358d100eb63b320494152b387dfa9cd Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 14:52:19 +0530 Subject: [PATCH 29/45] Tests: Add complete coverage for linkcheck case sensitivity tests --- tests/roots/test-linkcheck-case-check/conf.py | 1 + tests/roots/test-linkcheck-case-check/index.rst | 1 + tests/test_builders/test_build_linkcheck.py | 14 ++++++-------- 3 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 tests/roots/test-linkcheck-case-check/conf.py create mode 100644 tests/roots/test-linkcheck-case-check/index.rst diff --git a/tests/roots/test-linkcheck-case-check/conf.py b/tests/roots/test-linkcheck-case-check/conf.py new file mode 100644 index 00000000000..71319b6d4a5 --- /dev/null +++ b/tests/roots/test-linkcheck-case-check/conf.py @@ -0,0 +1 @@ +# Empty config for linkcheck case sensitivity tests diff --git a/tests/roots/test-linkcheck-case-check/index.rst b/tests/roots/test-linkcheck-case-check/index.rst new file mode 100644 index 00000000000..a0634843bc8 --- /dev/null +++ b/tests/roots/test-linkcheck-case-check/index.rst @@ -0,0 +1 @@ +`local server path `_ diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index c18c59bfa4a..6982d3bd4ff 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1471,7 +1471,7 @@ def do_GET(self): @pytest.mark.sphinx( 'linkcheck', - testroot='linkcheck-localserver', + testroot='linkcheck-case-check', freshenv=True, confoverrides={'linkcheck_case_insensitive': False}, ) @@ -1501,14 +1501,13 @@ def mock_request(self, method, url, **kwargs): # With case-sensitive checking, a URL that redirects to different case # should be marked as redirected lowercase_uri = f'http://{address}/path' - if lowercase_uri in rowsby: - # Should be redirected because case doesn't match - assert rowsby[lowercase_uri]['status'] == 'redirected' + assert lowercase_uri in rowsby, f'Expected {lowercase_uri} to be checked' + assert rowsby[lowercase_uri]['status'] == 'redirected' @pytest.mark.sphinx( 'linkcheck', - testroot='linkcheck-localserver', + testroot='linkcheck-case-check', freshenv=True, confoverrides={'linkcheck_case_insensitive': True}, ) @@ -1537,6 +1536,5 @@ def mock_request(self, method, url, **kwargs): # With case-insensitive checking, a URL that differs only in case # should be marked as working lowercase_uri = f'http://{address}/path' - if lowercase_uri in rowsby: - # Should be working because case is ignored - assert rowsby[lowercase_uri]['status'] == 'working' + assert lowercase_uri in rowsby, f'Expected {lowercase_uri} to be checked' + assert rowsby[lowercase_uri]['status'] == 'working' From eaa1caad698d6c321c2b580e9977e98a9f4991c8 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 17:44:27 +0530 Subject: [PATCH 30/45] Refactor linkcheck case sensitivity: rename config and fix fragment handling --- doc/usage/configuration.rst | 32 +++++--------- sphinx/builders/linkcheck.py | 31 ++++++++++--- tests/test_builders/test_build_linkcheck.py | 49 +++++++-------------- 3 files changed, 55 insertions(+), 57 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 3a184550da8..6e9cd16ba19 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,32 +3813,24 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_case_insensitive +.. confval:: linkcheck_case_sensitive :type: :code-py:`bool` - :default: :code-py:`False` - - When :code-py:`True`, the *linkcheck* builder will compare URL paths - case-insensitively when checking for redirects. - This is useful for checking links on case-insensitive servers - (for example, GitHub, Windows-based servers, or certain hosting platforms) - that may return URLs with different case than the original link. - - When enabled, URL paths like ``/Path`` and ``/path`` are considered - equivalent, preventing false-positive redirect warnings on - case-insensitive servers. + :default: :code-py:`True` - .. note:: + This setting controls how the *linkcheck* builder decides + whether a hyperlink's destination is the same as the URL + written in the documentation. - This option only affects URL path comparison for redirect detection. - HTML anchor checking remains case-sensitive to match browser behavior, - where fragment identifiers (``#anchor``) are case-sensitive per the - HTML specification. + By default, *linkcheck* requires the destination URL to match the written URL case-sensitively. This means that a link to ``http://webserver.test/USERNAME`` in + the documentation that the server redirects to ``http://webserver.test/username`` will be reported as ``redirected``. - Example: + To allow a more lenient URL comparison, that will report the previous case as + ``working`` instead, configure this setting to ``False``. - .. code-block:: python + .. note:: - linkcheck_case_insensitive = True + HTML anchor checking is always case-sensitive, and is + not affected by this setting. .. versionadded:: 8.2 diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index ac803a5cac4..bf9441064a0 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,7 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts - self.case_insensitive = config.linkcheck_case_insensitive + self.case_insensitive = not config.linkcheck_case_sensitive self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -635,7 +635,12 @@ def _normalise_url(url: str) -> str: """Reduces a URL to a normal/equality-comparable form.""" normalised_url = url.rstrip('/') if self.case_insensitive: - normalised_url = normalised_url.casefold() + # Only casefold the URL before the fragment; fragments are case-sensitive + if '#' in normalised_url: + url_part, fragment = normalised_url.split('#', 1) + normalised_url = url_part.casefold() + '#' + fragment + else: + normalised_url = normalised_url.casefold() return normalised_url normalised_request_url = _normalise_url(req_url) @@ -771,6 +776,21 @@ def rewrite_github_anchor(app: Sphinx, uri: str) -> str | None: return None +def handle_deprecated_linkcheck_case_config(app: Sphinx, config: Config) -> None: + """Handle backward compatibility for renamed linkcheck_case_insensitive config.""" + # Check if the old config name is used (i.e., user set it to a non-None value) + if config.linkcheck_case_insensitive is not None: + logger.warning( + __( + 'The configuration value "linkcheck_case_insensitive" is deprecated. ' + 'Use "linkcheck_case_sensitive" instead (with inverted logic: ' + 'linkcheck_case_sensitive = not linkcheck_case_insensitive).' + ) + ) + # Apply the old config value with inverted logic + config.linkcheck_case_sensitive = not config.linkcheck_case_insensitive + + def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None: """Compile patterns to the regexp objects.""" if config.linkcheck_allowed_redirects is _SENTINEL_LAR: @@ -828,13 +848,14 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value( - 'linkcheck_case_insensitive', False, '', types=frozenset({bool}) - ) + app.add_config_value('linkcheck_case_sensitive', True, '', types=frozenset({bool})) + # Deprecated config value for backward compatibility + app.add_config_value('linkcheck_case_insensitive', None, '', types=frozenset({bool, type(None)})) app.add_event('linkcheck-process-uri') # priority 900 to happen after ``check_confval_types()`` + app.connect('config-inited', handle_deprecated_linkcheck_case_config, priority=899) app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=900) # FIXME: Disable URL rewrite handler for github.com temporarily. diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 6982d3bd4ff..7094bb853bf 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1448,6 +1448,12 @@ class CaseSensitiveHandler(BaseHTTPRequestHandler): def do_HEAD(self): if self.path == '/path': + # Redirect lowercase /path to uppercase /Path + self.send_response(301, 'Moved Permanently') + self.send_header('Location', f'http://{self.headers["Host"]}/Path') + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/Path': self.send_response(200, 'OK') self.send_header('Content-Length', '0') self.end_headers() @@ -1458,6 +1464,12 @@ def do_HEAD(self): def do_GET(self): if self.path == '/path': + # Redirect lowercase /path to uppercase /Path + self.send_response(301, 'Moved Permanently') + self.send_header('Location', f'http://{self.headers["Host"]}/Path') + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/Path': content = b'ok\n\n' self.send_response(200, 'OK') self.send_header('Content-Length', str(len(content))) @@ -1473,26 +1485,12 @@ def do_GET(self): 'linkcheck', testroot='linkcheck-case-check', freshenv=True, - confoverrides={'linkcheck_case_insensitive': False}, + confoverrides={'linkcheck_case_sensitive': True}, ) def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: """Test that case-sensitive checking is the default behavior.""" with serve_application(app, CaseSensitiveHandler) as address: - # Monkey-patch the session to change the response URL to uppercase - # to simulate a case-insensitive server - from unittest.mock import patch - - original_request = requests._Session.request - - def mock_request(self, method, url, **kwargs): - response = original_request(self, method, url, **kwargs) - # Change the URL to uppercase to simulate server behavior - if '/path' in str(response.url).lower(): - response.url = str(response.url).replace('/path', '/PATH') - return response - - with patch.object(requests._Session, 'request', mock_request): - app.build() + app.build() content = (app.outdir / 'output.json').read_text(encoding='utf8') rows = [json.loads(x) for x in content.splitlines()] @@ -1509,25 +1507,12 @@ def mock_request(self, method, url, **kwargs): 'linkcheck', testroot='linkcheck-case-check', freshenv=True, - confoverrides={'linkcheck_case_insensitive': True}, + confoverrides={'linkcheck_case_sensitive': False}, ) def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_case_insensitive=True ignores case differences in URLs.""" + """Test that linkcheck_case_sensitive=False ignores case differences in URLs.""" with serve_application(app, CaseSensitiveHandler) as address: - # Monkey-patch the session to change the response URL to uppercase - from unittest.mock import patch - - original_request = requests._Session.request - - def mock_request(self, method, url, **kwargs): - response = original_request(self, method, url, **kwargs) - # Change the URL to uppercase to simulate server behavior - if '/path' in str(response.url).lower(): - response.url = str(response.url).replace('/path', '/PATH') - return response - - with patch.object(requests._Session, 'request', mock_request): - app.build() + app.build() content = (app.outdir / 'output.json').read_text(encoding='utf8') rows = [json.loads(x) for x in content.splitlines()] From 57e8b3c02b22c913053b785b85d1cbf1fbe5e8e5 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 17:50:00 +0530 Subject: [PATCH 31/45] Improve formatting and update config value handling --- doc/usage/configuration.rst | 7 +++++-- sphinx/builders/linkcheck.py | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 6e9cd16ba19..11922dd2717 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3821,8 +3821,11 @@ and the number of workers to use. whether a hyperlink's destination is the same as the URL written in the documentation. - By default, *linkcheck* requires the destination URL to match the written URL case-sensitively. This means that a link to ``http://webserver.test/USERNAME`` in - the documentation that the server redirects to ``http://webserver.test/username`` will be reported as ``redirected``. + By default, *linkcheck* requires the destination URL to match the written + URL case-sensitively. This means that a link to + ``http://webserver.test/USERNAME`` in the documentation that the server + redirects to ``http://webserver.test/username`` will be reported as + ``redirected``. To allow a more lenient URL comparison, that will report the previous case as ``working`` instead, configure this setting to ``False``. diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index bf9441064a0..2939dba624f 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -850,7 +850,9 @@ def setup(app: Sphinx) -> ExtensionMetadata: ) app.add_config_value('linkcheck_case_sensitive', True, '', types=frozenset({bool})) # Deprecated config value for backward compatibility - app.add_config_value('linkcheck_case_insensitive', None, '', types=frozenset({bool, type(None)})) + app.add_config_value( + 'linkcheck_case_insensitive', None, '', types=frozenset({bool, type(None)}) + ) app.add_event('linkcheck-process-uri') From 5dffff4db46aad35ac9a093488064219c36625cf Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 18:16:26 +0530 Subject: [PATCH 32/45] Update tests/test_builders/test_build_linkcheck.py Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- tests/test_builders/test_build_linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 7094bb853bf..ad159ee502f 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1450,7 +1450,7 @@ def do_HEAD(self): if self.path == '/path': # Redirect lowercase /path to uppercase /Path self.send_response(301, 'Moved Permanently') - self.send_header('Location', f'http://{self.headers["Host"]}/Path') + self.send_header('Location', '/Path') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/Path': From 5e08ab32a7d62288a10ca086a40bd90eb454388f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 18:16:57 +0530 Subject: [PATCH 33/45] Remove deprecated linkcheck_case_insensitive config handling --- sphinx/builders/linkcheck.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 2939dba624f..3e0b749be2e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -776,21 +776,6 @@ def rewrite_github_anchor(app: Sphinx, uri: str) -> str | None: return None -def handle_deprecated_linkcheck_case_config(app: Sphinx, config: Config) -> None: - """Handle backward compatibility for renamed linkcheck_case_insensitive config.""" - # Check if the old config name is used (i.e., user set it to a non-None value) - if config.linkcheck_case_insensitive is not None: - logger.warning( - __( - 'The configuration value "linkcheck_case_insensitive" is deprecated. ' - 'Use "linkcheck_case_sensitive" instead (with inverted logic: ' - 'linkcheck_case_sensitive = not linkcheck_case_insensitive).' - ) - ) - # Apply the old config value with inverted logic - config.linkcheck_case_sensitive = not config.linkcheck_case_insensitive - - def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None: """Compile patterns to the regexp objects.""" if config.linkcheck_allowed_redirects is _SENTINEL_LAR: @@ -849,15 +834,10 @@ def setup(app: Sphinx) -> ExtensionMetadata: 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) app.add_config_value('linkcheck_case_sensitive', True, '', types=frozenset({bool})) - # Deprecated config value for backward compatibility - app.add_config_value( - 'linkcheck_case_insensitive', None, '', types=frozenset({bool, type(None)}) - ) app.add_event('linkcheck-process-uri') # priority 900 to happen after ``check_confval_types()`` - app.connect('config-inited', handle_deprecated_linkcheck_case_config, priority=899) app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=900) # FIXME: Disable URL rewrite handler for github.com temporarily. From 06663cf17cf919406d6a36f0fd106dd243b7af08 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 18:35:06 +0530 Subject: [PATCH 34/45] Refactor linkcheck tests: rename handler for case sensitivity and simplify assertions --- tests/test_builders/test_build_linkcheck.py | 34 +++++---------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index ad159ee502f..0be99be3dab 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1441,32 +1441,16 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: } in content -class CaseSensitiveHandler(BaseHTTPRequestHandler): - """Simple test server for case sensitivity tests.""" +class CapitalisePathHandler(BaseHTTPRequestHandler): + """Simple test server that capitalises URL paths via redirects.""" protocol_version = 'HTTP/1.1' - def do_HEAD(self): - if self.path == '/path': - # Redirect lowercase /path to uppercase /Path - self.send_response(301, 'Moved Permanently') - self.send_header('Location', '/Path') - self.send_header('Content-Length', '0') - self.end_headers() - elif self.path == '/Path': - self.send_response(200, 'OK') - self.send_header('Content-Length', '0') - self.end_headers() - else: - self.send_response(404, 'Not Found') - self.send_header('Content-Length', '0') - self.end_headers() - def do_GET(self): if self.path == '/path': # Redirect lowercase /path to uppercase /Path self.send_response(301, 'Moved Permanently') - self.send_header('Location', f'http://{self.headers["Host"]}/Path') + self.send_header('Location', '/Path') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/Path': @@ -1489,7 +1473,7 @@ def do_GET(self): ) def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: """Test that case-sensitive checking is the default behavior.""" - with serve_application(app, CaseSensitiveHandler) as address: + with serve_application(app, CapitalisePathHandler) as address: app.build() content = (app.outdir / 'output.json').read_text(encoding='utf8') @@ -1498,9 +1482,7 @@ def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: # With case-sensitive checking, a URL that redirects to different case # should be marked as redirected - lowercase_uri = f'http://{address}/path' - assert lowercase_uri in rowsby, f'Expected {lowercase_uri} to be checked' - assert rowsby[lowercase_uri]['status'] == 'redirected' + assert rowsby[f'http://{address}/path']['status'] == 'redirected' @pytest.mark.sphinx( @@ -1511,7 +1493,7 @@ def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: ) def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: """Test that linkcheck_case_sensitive=False ignores case differences in URLs.""" - with serve_application(app, CaseSensitiveHandler) as address: + with serve_application(app, CapitalisePathHandler) as address: app.build() content = (app.outdir / 'output.json').read_text(encoding='utf8') @@ -1520,6 +1502,4 @@ def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: # With case-insensitive checking, a URL that differs only in case # should be marked as working - lowercase_uri = f'http://{address}/path' - assert lowercase_uri in rowsby, f'Expected {lowercase_uri} to be checked' - assert rowsby[lowercase_uri]['status'] == 'working' + assert rowsby[f'http://{address}/path']['status'] == 'working' From 5615ffc446a6b75a227be212a9f440fca30e82ee Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 18 Nov 2025 20:49:51 +0530 Subject: [PATCH 35/45] Add support for case-insensitive URL checking in linkcheck builder --- doc/usage/configuration.rst | 38 +++++++++++++-------- sphinx/builders/linkcheck.py | 15 ++++++-- tests/test_builders/test_build_linkcheck.py | 9 +++-- 3 files changed, 39 insertions(+), 23 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 11922dd2717..151e7a011d5 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,27 +3813,35 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_case_sensitive - :type: :code-py:`bool` - :default: :code-py:`True` +.. confval:: linkcheck_case_insensitive + :type: :code-py:`list` of :code-py:`str` + :default: :code-py:`[]` + + A list of regular expressions that match URLs for which the *linkcheck* + builder should perform case-insensitive comparisons. This is useful for + links to websites that normalise URL casing (for example, GitHub) or + servers that are case-insensitive (for example, Windows-based servers). - This setting controls how the *linkcheck* builder decides - whether a hyperlink's destination is the same as the URL - written in the documentation. + By default, *linkcheck* requires the destination URL to match the + documented URL case-sensitively. For example, a link to + ``http://example.com/PATH`` that redirects to ``http://example.com/path`` + will be reported as ``redirected``. - By default, *linkcheck* requires the destination URL to match the written - URL case-sensitively. This means that a link to - ``http://webserver.test/USERNAME`` in the documentation that the server - redirects to ``http://webserver.test/username`` will be reported as - ``redirected``. + If the URL matches a pattern in this list, such redirects will instead be + reported as ``working``. - To allow a more lenient URL comparison, that will report the previous case as - ``working`` instead, configure this setting to ``False``. + For example, to treat all GitHub URLs as case-insensitive: + + .. code-block:: python + + linkcheck_case_insensitive = [ + r'https://github\.com/.*', + ] .. note:: - HTML anchor checking is always case-sensitive, and is - not affected by this setting. + HTML anchor checking is always case-sensitive and is not affected by + this setting. .. versionadded:: 8.2 diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 3e0b749be2e..037fb23e642 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,7 +409,9 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts - self.case_insensitive = not config.linkcheck_case_sensitive + self.case_insensitive_patterns: list[re.Pattern[str]] = list( + map(re.compile, config.linkcheck_case_insensitive) + ) self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -630,11 +632,16 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: netloc = urlsplit(req_url).netloc self.rate_limits.pop(netloc, None) + # Check if URL should be compared case-insensitively based on patterns + is_case_insensitive = any( + pattern.match(req_url) for pattern in self.case_insensitive_patterns + ) + # Compare URLs, optionally case-insensitively def _normalise_url(url: str) -> str: """Reduces a URL to a normal/equality-comparable form.""" normalised_url = url.rstrip('/') - if self.case_insensitive: + if is_case_insensitive: # Only casefold the URL before the fragment; fragments are case-sensitive if '#' in normalised_url: url_part, fragment = normalised_url.split('#', 1) @@ -833,7 +840,9 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value('linkcheck_case_sensitive', True, '', types=frozenset({bool})) + app.add_config_value( + 'linkcheck_case_insensitive', [], '', types=frozenset({list, tuple}) + ) app.add_event('linkcheck-process-uri') diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 0be99be3dab..dfcf68951fb 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1469,7 +1469,6 @@ def do_GET(self): 'linkcheck', testroot='linkcheck-case-check', freshenv=True, - confoverrides={'linkcheck_case_sensitive': True}, ) def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: """Test that case-sensitive checking is the default behavior.""" @@ -1480,7 +1479,7 @@ def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: rows = [json.loads(x) for x in content.splitlines()] rowsby = {row['uri']: row for row in rows} - # With case-sensitive checking, a URL that redirects to different case + # With case-sensitive checking (default), a URL that redirects to different case # should be marked as redirected assert rowsby[f'http://{address}/path']['status'] == 'redirected' @@ -1489,10 +1488,10 @@ def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: 'linkcheck', testroot='linkcheck-case-check', freshenv=True, - confoverrides={'linkcheck_case_sensitive': False}, + confoverrides={'linkcheck_case_insensitive': [r'http://localhost:\d+/.*']}, ) def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_case_sensitive=False ignores case differences in URLs.""" + """Test that URLs matching linkcheck_case_insensitive patterns ignore case differences.""" with serve_application(app, CapitalisePathHandler) as address: app.build() @@ -1500,6 +1499,6 @@ def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: rows = [json.loads(x) for x in content.splitlines()] rowsby = {row['uri']: row for row in rows} - # With case-insensitive checking, a URL that differs only in case + # With case-insensitive pattern matching, a URL that differs only in case # should be marked as working assert rowsby[f'http://{address}/path']['status'] == 'working' From 842b756fe781f6374f681d9a3a4d4edb745d7359 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 19 Nov 2025 14:24:42 +0530 Subject: [PATCH 36/45] restore @pytest.mark.test_params and update documentation --- CHANGES.rst | 5 +++++ tests/test_builders/test_build_html_numfig.py | 1 + 2 files changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index cd36d83957b..01d23b6fb26 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -48,6 +48,11 @@ Features added * #13439: linkcheck: Permit warning on every redirect with ``linkcheck_allowed_redirects = {}``. Patch by Adam Turner and James Addison. +* #14046: linkcheck: Add :confval:`linkcheck_case_insensitive` configuration to + allow case-insensitive URL comparison for specific URL patterns. + This is useful for links to websites that normalise URL casing (for example, + GitHub) or case-insensitive servers. + Patch by Fazeel Usmani. * #13497: Support C domain objects in the table of contents. * #13500: LaTeX: add support for ``fontawesome6`` package. Patch by Jean-François B. diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 637199d5267..144d9958d0d 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -19,6 +19,7 @@ @pytest.mark.sphinx('html', testroot='numfig') +@pytest.mark.test_params(shared_result='test_build_html_numfig') def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 1fe4293d58884905d4329fb035ba7c6289b8f30f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 20 Nov 2025 17:56:25 +0530 Subject: [PATCH 37/45] efactor linkcheck case sensitivity tests with dynamic path handler --- tests/test_builders/test_build_linkcheck.py | 61 ++++++++++++++++++--- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index dfcf68951fb..7ef281a9e7d 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1442,18 +1442,37 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: class CapitalisePathHandler(BaseHTTPRequestHandler): - """Simple test server that capitalises URL paths via redirects.""" + """Test server that capitalises URL paths via redirects.""" protocol_version = 'HTTP/1.1' + def do_HEAD(self): + # Use same logic as GET but don't send body + if self.path.startswith('/') and len(self.path) > 1 and self.path[1:].islower(): + # Redirect lowercase paths to capitalized versions + self.send_response(301, 'Moved Permanently') + self.send_header('Location', '/' + self.path[1:].capitalize()) + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path.startswith('/') and len(self.path) > 1 and self.path[1].isupper() and self.path[2:].islower(): + # Serve capitalized paths + self.send_response(200, 'OK') + self.send_header('Content-Length', '0') + self.end_headers() + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + def do_GET(self): - if self.path == '/path': - # Redirect lowercase /path to uppercase /Path + if self.path.startswith('/') and len(self.path) > 1 and self.path[1:].islower(): + # Redirect lowercase paths to capitalized versions self.send_response(301, 'Moved Permanently') - self.send_header('Location', '/Path') + self.send_header('Location', '/' + self.path[1:].capitalize()) self.send_header('Content-Length', '0') self.end_headers() - elif self.path == '/Path': + elif self.path.startswith('/') and len(self.path) > 1 and self.path[1].isupper() and self.path[2:].islower(): + # Serve capitalized paths content = b'ok\n\n' self.send_response(200, 'OK') self.send_header('Content-Length', str(len(content))) @@ -1479,9 +1498,10 @@ def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: rows = [json.loads(x) for x in content.splitlines()] rowsby = {row['uri']: row for row in rows} - # With case-sensitive checking (default), a URL that redirects to different case + # With case-sensitive checking (default), URLs that redirect to different case # should be marked as redirected - assert rowsby[f'http://{address}/path']['status'] == 'redirected' + assert rowsby[f'http://{address}/path1']['status'] == 'redirected' + assert rowsby[f'http://{address}/path2']['status'] == 'redirected' @pytest.mark.sphinx( @@ -1499,6 +1519,29 @@ def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: rows = [json.loads(x) for x in content.splitlines()] rowsby = {row['uri']: row for row in rows} - # With case-insensitive pattern matching, a URL that differs only in case + # With case-insensitive pattern matching, URLs that differ only in case # should be marked as working - assert rowsby[f'http://{address}/path']['status'] == 'working' + assert rowsby[f'http://{address}/path1']['status'] == 'working' + assert rowsby[f'http://{address}/path2']['status'] == 'working' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-case-check', + freshenv=True, + confoverrides={'linkcheck_case_insensitive': [r'http://localhost:\d+/path1']}, +) +def test_linkcheck_mixed_case_sensitivity(app: SphinxTestApp) -> None: + """Test both case-sensitive and case-insensitive checking in one test.""" + with serve_application(app, CapitalisePathHandler) as address: + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # path1 matches case-insensitive pattern → should be 'working' + assert rowsby[f'http://{address}/path1']['status'] == 'working' + + # path2 doesn't match pattern → should be 'redirected' + assert rowsby[f'http://{address}/path2']['status'] == 'redirected' From 8c7648b71b5e440850b5632634cc515bc967c1ec Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 20 Nov 2025 17:59:46 +0530 Subject: [PATCH 38/45] "Update test document with path1 and path2 for case sensitivity tests --- tests/roots/test-linkcheck-case-check/index.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/roots/test-linkcheck-case-check/index.rst b/tests/roots/test-linkcheck-case-check/index.rst index a0634843bc8..3a0c282ab66 100644 --- a/tests/roots/test-linkcheck-case-check/index.rst +++ b/tests/roots/test-linkcheck-case-check/index.rst @@ -1 +1,3 @@ -`local server path `_ +`path1 `_ + +`path2 `_ From d95224be274bb38a0a6bb32a5773355011058595 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 20 Nov 2025 18:02:09 +0530 Subject: [PATCH 39/45] Apply ruff formatting --- tests/test_builders/test_build_linkcheck.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 7ef281a9e7d..778f12c3a20 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1454,7 +1454,12 @@ def do_HEAD(self): self.send_header('Location', '/' + self.path[1:].capitalize()) self.send_header('Content-Length', '0') self.end_headers() - elif self.path.startswith('/') and len(self.path) > 1 and self.path[1].isupper() and self.path[2:].islower(): + elif ( + self.path.startswith('/') + and len(self.path) > 1 + and self.path[1].isupper() + and self.path[2:].islower() + ): # Serve capitalized paths self.send_response(200, 'OK') self.send_header('Content-Length', '0') @@ -1471,7 +1476,12 @@ def do_GET(self): self.send_header('Location', '/' + self.path[1:].capitalize()) self.send_header('Content-Length', '0') self.end_headers() - elif self.path.startswith('/') and len(self.path) > 1 and self.path[1].isupper() and self.path[2:].islower(): + elif ( + self.path.startswith('/') + and len(self.path) > 1 + and self.path[1].isupper() + and self.path[2:].islower() + ): # Serve capitalized paths content = b'ok\n\n' self.send_response(200, 'OK') From 422b2d5392b721dd120c2ab37e99de170499497c Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 24 Nov 2025 12:15:11 +0530 Subject: [PATCH 40/45] Refactor linkcheck case sensitivity tests per review feedback --- tests/test_builders/test_build_linkcheck.py | 103 +++++--------------- 1 file changed, 23 insertions(+), 80 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 778f12c3a20..4952d7289c0 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1442,47 +1442,19 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: class CapitalisePathHandler(BaseHTTPRequestHandler): - """Test server that capitalises URL paths via redirects.""" + """Test server that uppercases URL paths via redirects.""" protocol_version = 'HTTP/1.1' - def do_HEAD(self): - # Use same logic as GET but don't send body - if self.path.startswith('/') and len(self.path) > 1 and self.path[1:].islower(): - # Redirect lowercase paths to capitalized versions - self.send_response(301, 'Moved Permanently') - self.send_header('Location', '/' + self.path[1:].capitalize()) - self.send_header('Content-Length', '0') - self.end_headers() - elif ( - self.path.startswith('/') - and len(self.path) > 1 - and self.path[1].isupper() - and self.path[2:].islower() - ): - # Serve capitalized paths - self.send_response(200, 'OK') - self.send_header('Content-Length', '0') - self.end_headers() - else: - self.send_response(404, 'Not Found') - self.send_header('Content-Length', '0') - self.end_headers() - def do_GET(self): if self.path.startswith('/') and len(self.path) > 1 and self.path[1:].islower(): - # Redirect lowercase paths to capitalized versions + # Redirect lowercase paths to uppercase versions self.send_response(301, 'Moved Permanently') - self.send_header('Location', '/' + self.path[1:].capitalize()) + self.send_header('Location', self.path.upper()) self.send_header('Content-Length', '0') self.end_headers() - elif ( - self.path.startswith('/') - and len(self.path) > 1 - and self.path[1].isupper() - and self.path[2:].islower() - ): - # Serve capitalized paths + elif self.path.startswith('/') and len(self.path) > 1 and self.path[1:].isupper(): + # Serve uppercase paths content = b'ok\n\n' self.send_response(200, 'OK') self.send_header('Content-Length', str(len(content))) @@ -1499,50 +1471,23 @@ def do_GET(self): testroot='linkcheck-case-check', freshenv=True, ) -def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: - """Test that case-sensitive checking is the default behavior.""" - with serve_application(app, CapitalisePathHandler) as address: - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - rowsby = {row['uri']: row for row in rows} - - # With case-sensitive checking (default), URLs that redirect to different case - # should be marked as redirected - assert rowsby[f'http://{address}/path1']['status'] == 'redirected' - assert rowsby[f'http://{address}/path2']['status'] == 'redirected' - - -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-case-check', - freshenv=True, - confoverrides={'linkcheck_case_insensitive': [r'http://localhost:\d+/.*']}, +@pytest.mark.parametrize( + 'case_insensitive_pattern,expected_path1,expected_path2', + [ + ([], 'redirected', 'redirected'), # default: case-sensitive + ([r'http://localhost:\d+/.*'], 'working', 'working'), # all URLs case-insensitive + ([r'http://localhost:\d+/path1'], 'working', 'redirected'), # only path1 case-insensitive + ], ) -def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: - """Test that URLs matching linkcheck_case_insensitive patterns ignore case differences.""" - with serve_application(app, CapitalisePathHandler) as address: - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - rowsby = {row['uri']: row for row in rows} - - # With case-insensitive pattern matching, URLs that differ only in case - # should be marked as working - assert rowsby[f'http://{address}/path1']['status'] == 'working' - assert rowsby[f'http://{address}/path2']['status'] == 'working' - +def test_linkcheck_case_sensitivity( + app: SphinxTestApp, + case_insensitive_pattern: list[str], + expected_path1: str, + expected_path2: str, +) -> None: + """Test case-sensitive and case-insensitive URL checking.""" + app.config.linkcheck_case_insensitive = case_insensitive_pattern -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-case-check', - freshenv=True, - confoverrides={'linkcheck_case_insensitive': [r'http://localhost:\d+/path1']}, -) -def test_linkcheck_mixed_case_sensitivity(app: SphinxTestApp) -> None: - """Test both case-sensitive and case-insensitive checking in one test.""" with serve_application(app, CapitalisePathHandler) as address: app.build() @@ -1550,8 +1495,6 @@ def test_linkcheck_mixed_case_sensitivity(app: SphinxTestApp) -> None: rows = [json.loads(x) for x in content.splitlines()] rowsby = {row['uri']: row for row in rows} - # path1 matches case-insensitive pattern → should be 'working' - assert rowsby[f'http://{address}/path1']['status'] == 'working' - - # path2 doesn't match pattern → should be 'redirected' - assert rowsby[f'http://{address}/path2']['status'] == 'redirected' + # Verify expected status for each path + assert rowsby[f'http://{address}/path1']['status'] == expected_path1 + assert rowsby[f'http://{address}/path2']['status'] == expected_path2 From a3744b070fc2a2be041b68827a2a82f043b04791 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 24 Nov 2025 13:03:52 +0530 Subject: [PATCH 41/45] ruff format --- tests/test_builders/test_build_linkcheck.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 4952d7289c0..6189cef5489 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1453,7 +1453,9 @@ def do_GET(self): self.send_header('Location', self.path.upper()) self.send_header('Content-Length', '0') self.end_headers() - elif self.path.startswith('/') and len(self.path) > 1 and self.path[1:].isupper(): + elif ( + self.path.startswith('/') and len(self.path) > 1 and self.path[1:].isupper() + ): # Serve uppercase paths content = b'ok\n\n' self.send_response(200, 'OK') @@ -1472,11 +1474,19 @@ def do_GET(self): freshenv=True, ) @pytest.mark.parametrize( - 'case_insensitive_pattern,expected_path1,expected_path2', + ('case_insensitive_pattern', 'expected_path1', 'expected_path2'), [ ([], 'redirected', 'redirected'), # default: case-sensitive - ([r'http://localhost:\d+/.*'], 'working', 'working'), # all URLs case-insensitive - ([r'http://localhost:\d+/path1'], 'working', 'redirected'), # only path1 case-insensitive + ( + [r'http://localhost:\d+/.*'], + 'working', + 'working', + ), # all URLs case-insensitive + ( + [r'http://localhost:\d+/path1'], + 'working', + 'redirected', + ), # only path1 case-insensitive ], ) def test_linkcheck_case_sensitivity( From a53c44a38e3ca8e538e68c8ee57dc5ba6e621571 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 24 Nov 2025 19:03:05 +0530 Subject: [PATCH 42/45] Update tests/test_builders/test_build_linkcheck.py Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- tests/test_builders/test_build_linkcheck.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 6189cef5489..fe1284dcefc 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1447,25 +1447,19 @@ class CapitalisePathHandler(BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' def do_GET(self): - if self.path.startswith('/') and len(self.path) > 1 and self.path[1:].islower(): + if self.path.islower(): # Redirect lowercase paths to uppercase versions self.send_response(301, 'Moved Permanently') self.send_header('Location', self.path.upper()) self.send_header('Content-Length', '0') self.end_headers() - elif ( - self.path.startswith('/') and len(self.path) > 1 and self.path[1:].isupper() - ): + else: # Serve uppercase paths content = b'ok\n\n' self.send_response(200, 'OK') self.send_header('Content-Length', str(len(content))) self.end_headers() self.wfile.write(content) - else: - self.send_response(404, 'Not Found') - self.send_header('Content-Length', '0') - self.end_headers() @pytest.mark.sphinx( From 44574936159b1329db2eebc3780f1ff87d920d6c Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Mon, 24 Nov 2025 19:07:52 +0530 Subject: [PATCH 43/45] dd test case for non-redirecting URL in linkcheck case sensitivity tests --- tests/roots/test-linkcheck-case-check/index.rst | 2 ++ tests/test_builders/test_build_linkcheck.py | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/roots/test-linkcheck-case-check/index.rst b/tests/roots/test-linkcheck-case-check/index.rst index 3a0c282ab66..1747d27ebcd 100644 --- a/tests/roots/test-linkcheck-case-check/index.rst +++ b/tests/roots/test-linkcheck-case-check/index.rst @@ -1,3 +1,5 @@ `path1 `_ `path2 `_ + +`PATH3 `_ diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 6189cef5489..f90a6ba1015 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1456,7 +1456,7 @@ def do_GET(self): elif ( self.path.startswith('/') and len(self.path) > 1 and self.path[1:].isupper() ): - # Serve uppercase paths + # Serve uppercase paths (no redirect) content = b'ok\n\n' self.send_response(200, 'OK') self.send_header('Content-Length', str(len(content))) @@ -1474,18 +1474,20 @@ def do_GET(self): freshenv=True, ) @pytest.mark.parametrize( - ('case_insensitive_pattern', 'expected_path1', 'expected_path2'), + ('case_insensitive_pattern', 'expected_path1', 'expected_path2', 'expected_path3'), [ - ([], 'redirected', 'redirected'), # default: case-sensitive + ([], 'redirected', 'redirected', 'working'), # default: case-sensitive ( [r'http://localhost:\d+/.*'], 'working', 'working', + 'working', ), # all URLs case-insensitive ( [r'http://localhost:\d+/path1'], 'working', 'redirected', + 'working', ), # only path1 case-insensitive ], ) @@ -1494,6 +1496,7 @@ def test_linkcheck_case_sensitivity( case_insensitive_pattern: list[str], expected_path1: str, expected_path2: str, + expected_path3: str, ) -> None: """Test case-sensitive and case-insensitive URL checking.""" app.config.linkcheck_case_insensitive = case_insensitive_pattern @@ -1508,3 +1511,4 @@ def test_linkcheck_case_sensitivity( # Verify expected status for each path assert rowsby[f'http://{address}/path1']['status'] == expected_path1 assert rowsby[f'http://{address}/path2']['status'] == expected_path2 + assert rowsby[f'http://{address}/PATH3']['status'] == expected_path3 From a3d6a370abce88d2f3e3b23a4f4c4f40573179a1 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 24 Nov 2025 22:03:42 +0000 Subject: [PATCH 44/45] misc tweaks; rename to linkcheck_case_insensitive_urls --- CHANGES.rst | 10 ++--- doc/usage/configuration.rst | 35 ++++++++------- sphinx/builders/linkcheck.py | 50 ++++++++++----------- tests/test_builders/test_build_linkcheck.py | 2 +- 4 files changed, 50 insertions(+), 47 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7743b325cc9..12c7236c498 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -48,11 +48,6 @@ Features added * #13439: linkcheck: Permit warning on every redirect with ``linkcheck_allowed_redirects = {}``. Patch by Adam Turner and James Addison. -* #14046: linkcheck: Add :confval:`linkcheck_case_insensitive` configuration to - allow case-insensitive URL comparison for specific URL patterns. - This is useful for links to websites that normalise URL casing (for example, - GitHub) or case-insensitive servers. - Patch by Fazeel Usmani. * #13497: Support C domain objects in the table of contents. * #13500: LaTeX: add support for ``fontawesome6`` package. Patch by Jean-François B. @@ -76,6 +71,11 @@ Features added * #14023: Add the new :confval:`mathjax_config_path` option to load MathJax configuration from a file. Patch by Randolf Scholz and Adam Turner. +* #14046: linkcheck: Add the :confval:`linkcheck_case_insensitive_urls` option + to allow case-insensitive URL comparison for specific URL patterns. + This is useful for links to websites that normalise URL casing (e.g. GitHub) + or case-insensitive servers. + Patch by Fazeel Usmani and James Addison. Bugs fixed ---------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 151e7a011d5..5e2ecf4ba1e 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,37 +3813,40 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_case_insensitive - :type: :code-py:`list` of :code-py:`str` - :default: :code-py:`[]` +.. confval:: linkcheck_case_insensitive_urls + :type: :code-py:`Set[str] | Sequence[str]` + :default: :code-py:`()` - A list of regular expressions that match URLs for which the *linkcheck* + A collection of regular expressions that match URLs for which the *linkcheck* builder should perform case-insensitive comparisons. This is useful for - links to websites that normalise URL casing (for example, GitHub) or - servers that are case-insensitive (for example, Windows-based servers). + links to websites that are case-insensitive or normalise URL casing. By default, *linkcheck* requires the destination URL to match the - documented URL case-sensitively. For example, a link to - ``http://example.com/PATH`` that redirects to ``http://example.com/path`` - will be reported as ``redirected``. + documented URL case-sensitively. + For example, a link to ``http://example.org/PATH`` that redirects to + ``http://example.org/path`` will be reported as ``redirected``. - If the URL matches a pattern in this list, such redirects will instead be - reported as ``working``. + If the URL matches a pattern contained in :confval:`!linkcheck_case_insensitive_urls`, + it would instead be reported as ``working``. For example, to treat all GitHub URLs as case-insensitive: .. code-block:: python - linkcheck_case_insensitive = [ + linkcheck_case_insensitive_urls = [ r'https://github\.com/.*', ] - .. note:: + Or, to treat all URLs as case-insensitive: - HTML anchor checking is always case-sensitive and is not affected by - this setting. + .. code-block:: python - .. versionadded:: 8.2 + linkcheck_case_insensitive_urls = ['.*'] + + .. note:: URI fragments (HTML anchors) are not affected by this option. + They are always checked with case-sensitive comparisons. + + .. versionadded:: 8.3 .. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 037fb23e642..91e8c753943 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -34,7 +34,7 @@ from sphinx.util.nodes import get_node_line if TYPE_CHECKING: - from collections.abc import Callable, Iterator + from collections.abc import Callable, Iterator, Sequence from typing import Any, Literal, TypeAlias from requests import Response @@ -385,6 +385,9 @@ def __init__( self.documents_exclude: list[re.Pattern[str]] = list( map(re.compile, config.linkcheck_exclude_documents) ) + self.ignore_case: Sequence[re.Pattern[str]] = tuple( + map(re.compile, config.linkcheck_case_insensitive_urls) + ) self.auth = [ (re.compile(pattern), auth_info) for pattern, auth_info in config.linkcheck_auth @@ -409,9 +412,6 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts - self.case_insensitive_patterns: list[re.Pattern[str]] = list( - map(re.compile, config.linkcheck_case_insensitive) - ) self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -632,29 +632,15 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: netloc = urlsplit(req_url).netloc self.rate_limits.pop(netloc, None) - # Check if URL should be compared case-insensitively based on patterns - is_case_insensitive = any( - pattern.match(req_url) for pattern in self.case_insensitive_patterns + # Check if URL should be normalised case-insensitively + ignore_case = any(pat.match(req_url) for pat in self.ignore_case) + normalised_req_url = self._normalise_url(req_url, ignore_case=ignore_case) + normalised_response_url = self._normalise_url( + response_url, ignore_case=ignore_case ) - # Compare URLs, optionally case-insensitively - def _normalise_url(url: str) -> str: - """Reduces a URL to a normal/equality-comparable form.""" - normalised_url = url.rstrip('/') - if is_case_insensitive: - # Only casefold the URL before the fragment; fragments are case-sensitive - if '#' in normalised_url: - url_part, fragment = normalised_url.split('#', 1) - normalised_url = url_part.casefold() + '#' + fragment - else: - normalised_url = normalised_url.casefold() - return normalised_url - - normalised_request_url = _normalise_url(req_url) - normalised_response_url = _normalise_url(response_url) - if ( - normalised_request_url == normalised_response_url + normalised_response_url == normalised_req_url or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -700,6 +686,17 @@ def limit_rate(self, response_url: str, retry_after: str | None) -> float | None self.rate_limits[netloc] = RateLimit(delay, next_check) return next_check + @staticmethod + def _normalise_url(url: str, *, ignore_case: bool) -> str: + normalised_url = url.rstrip('/') + if not ignore_case: + return normalised_url + # URI fragments are case-sensitive + url_part, sep, fragment = normalised_url.partition('#') + if sep: + return f'{url_part.casefold()}#{fragment}' + return url_part.casefold() + def _get_request_headers( uri: str, @@ -841,7 +838,10 @@ def setup(app: Sphinx) -> ExtensionMetadata: 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) app.add_config_value( - 'linkcheck_case_insensitive', [], '', types=frozenset({list, tuple}) + 'linkcheck_case_insensitive_urls', + (), + '', + types=frozenset({frozenset, list, set, tuple}), ) app.add_event('linkcheck-process-uri') diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 88a2acacb3d..3755f5aa84a 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1493,7 +1493,7 @@ def test_linkcheck_case_sensitivity( expected_path3: str, ) -> None: """Test case-sensitive and case-insensitive URL checking.""" - app.config.linkcheck_case_insensitive = case_insensitive_pattern + app.config.linkcheck_case_insensitive_urls = case_insensitive_pattern with serve_application(app, CapitalisePathHandler) as address: app.build() From 05d104973a3e3045e6c7dcf27f3cc2274b83e271 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 24 Nov 2025 22:15:57 +0000 Subject: [PATCH 45/45] fixup --- doc/usage/configuration.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 5e2ecf4ba1e..e9f4d37c1de 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3826,7 +3826,8 @@ and the number of workers to use. For example, a link to ``http://example.org/PATH`` that redirects to ``http://example.org/path`` will be reported as ``redirected``. - If the URL matches a pattern contained in :confval:`!linkcheck_case_insensitive_urls`, + If the URL matches a pattern contained in + :confval:`!linkcheck_case_insensitive_urls`, it would instead be reported as ``working``. For example, to treat all GitHub URLs as case-insensitive: