Skip to content

Commit d8238d3

Browse files
Remove GitHub-specific token validation and generalize authentication
Co-authored-by: nicoragne <nicoragne@hotmail.fr>
1 parent c9fff75 commit d8238d3

File tree

5 files changed

+35
-93
lines changed

5 files changed

+35
-93
lines changed

src/gitingest/clone.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
4040
config : CloneConfig
4141
The configuration for cloning the repository.
4242
token : str | None
43-
GitHub personal access token (PAT) for accessing private repositories.
43+
Personal access token (PAT) for accessing private repositories.
4444
4545
Raises
4646
------
@@ -84,7 +84,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8484
logger.debug("Resolved commit", extra={"commit": commit})
8585

8686
clone_cmd = ["git"]
87-
if token and is_github_host(url):
87+
if token:
8888
clone_cmd += ["-c", create_git_auth_header(token, url=url)]
8989

9090
clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"]

src/gitingest/utils/auth.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,20 @@
44

55
import os
66

7-
from gitingest.utils.git_utils import validate_github_token
8-
97

108
def resolve_token(token: str | None) -> str | None:
119
"""Resolve the token to use for the query.
1210
1311
Parameters
1412
----------
1513
token : str | None
16-
GitHub personal access token (PAT) for accessing private repositories.
14+
Personal access token (PAT) for accessing private repositories.
1715
1816
Returns
1917
-------
2018
str | None
2119
The resolved token.
2220
2321
"""
24-
token = token or os.getenv("GITHUB_TOKEN")
25-
if token:
26-
validate_github_token(token)
22+
token = token or os.getenv("GITHUB_TOKEN") # Keep env var name for backward compatibility
2723
return token

src/gitingest/utils/git_utils.py

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@
44

55
import asyncio
66
import base64
7-
import re
87
import sys
98
from pathlib import Path
10-
from typing import TYPE_CHECKING, Final, Iterable
9+
from typing import TYPE_CHECKING, Iterable
1110
from urllib.parse import urlparse
1211

1312
import httpx
1413
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
1514

1615
from gitingest.utils.compat_func import removesuffix
17-
from gitingest.utils.exceptions import InvalidGitHubTokenError
16+
1817
from gitingest.utils.logging_config import get_logger
1918

2019
if TYPE_CHECKING:
@@ -23,10 +22,7 @@
2322
# Initialize logger for this module
2423
logger = get_logger(__name__)
2524

26-
# GitHub Personal-Access tokens (classic + fine-grained).
27-
# - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics
28-
# - github_pat_ → 22 alphanumerics + "_" + 59 alphanumerics
29-
_GITHUB_PAT_PATTERN: Final[str] = r"^(?:gh[pousr]_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59})$"
25+
3026

3127

3228
def is_github_host(url: str) -> bool:
@@ -263,9 +259,9 @@ def create_git_command(base_cmd: list[str], local_path: str, url: str, token: st
263259
local_path : str
264260
The local path where the git command should be executed.
265261
url : str
266-
The repository URL to check if it's a GitHub repository.
262+
The repository URL for authentication.
267263
token : str | None
268-
GitHub personal access token (PAT) for accessing private repositories.
264+
Personal access token (PAT) for accessing private repositories.
269265
270266
Returns
271267
-------
@@ -274,21 +270,20 @@ def create_git_command(base_cmd: list[str], local_path: str, url: str, token: st
274270
275271
"""
276272
cmd = [*base_cmd, "-C", local_path]
277-
if token and is_github_host(url):
273+
if token:
278274
cmd += ["-c", create_git_auth_header(token, url=url)]
279275
return cmd
280276

281277

282-
def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
283-
"""Create a Basic authentication header for GitHub git operations.
278+
def create_git_auth_header(token: str, url: str) -> str:
279+
"""Create a Basic authentication header for git operations.
284280
285281
Parameters
286282
----------
287283
token : str
288-
GitHub personal access token (PAT) for accessing private repositories.
284+
Personal access token (PAT) for accessing private repositories.
289285
url : str
290-
The GitHub URL to create the authentication header for.
291-
Defaults to "https://github.com" if not provided.
286+
The repository URL to create the authentication header for.
292287
293288
Returns
294289
-------
@@ -298,35 +293,18 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
298293
Raises
299294
------
300295
ValueError
301-
If the URL is not a valid GitHub repository URL.
296+
If the URL is not a valid repository URL.
302297
303298
"""
304299
hostname = urlparse(url).hostname
305300
if not hostname:
306-
msg = f"Invalid GitHub URL: {url!r}"
301+
msg = f"Invalid repository URL: {url!r}"
307302
raise ValueError(msg)
308303

309304
basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode()
310305
return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}"
311306

312307

313-
def validate_github_token(token: str) -> None:
314-
"""Validate the format of a GitHub Personal Access Token.
315-
316-
Parameters
317-
----------
318-
token : str
319-
GitHub personal access token (PAT) for accessing private repositories.
320-
321-
Raises
322-
------
323-
InvalidGitHubTokenError
324-
If the token format is invalid.
325-
326-
"""
327-
if not re.fullmatch(_GITHUB_PAT_PATTERN, token):
328-
raise InvalidGitHubTokenError
329-
330308

331309
async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None:
332310
"""Configure sparse-checkout for a partially cloned repository.

src/server/query_processor.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from gitingest.clone import clone_repo
1010
from gitingest.ingestion import ingest_query
1111
from gitingest.query_parser import parse_remote_repo
12-
from gitingest.utils.git_utils import resolve_commit, validate_github_token
12+
from gitingest.utils.git_utils import resolve_commit
1313
from gitingest.utils.logging_config import get_logger
1414
from gitingest.utils.pattern_utils import process_patterns
1515
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata
@@ -262,9 +262,6 @@ async def process_query(
262262
If the commit hash is not found (should never happen).
263263
264264
"""
265-
if token:
266-
validate_github_token(token)
267-
268265
try:
269266
query = await parse_remote_repo(input_text, token=token)
270267
except Exception as exc:

tests/test_git_utils.py

Lines changed: 18 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,24 @@
11
"""Tests for the ``git_utils`` module.
22
3-
These tests validate the ``validate_github_token`` function, which ensures that
4-
GitHub personal access tokens (PATs) are properly formatted.
3+
These tests validate various git utility functions for repository operations.
54
"""
65

76
from __future__ import annotations
87

98
import base64
109
from typing import TYPE_CHECKING
10+
from urllib.parse import urlparse
1111

1212
import pytest
1313

14-
from gitingest.utils.exceptions import InvalidGitHubTokenError
15-
from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host, validate_github_token
14+
from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host
1615

1716
if TYPE_CHECKING:
1817
from pathlib import Path
1918

2019
from pytest_mock import MockerFixture
2120

2221

23-
@pytest.mark.parametrize(
24-
"token",
25-
[
26-
# Valid tokens: correct prefixes and at least 36 allowed characters afterwards
27-
"github_pat_" + "a" * 22 + "_" + "b" * 59,
28-
"ghp_" + "A" * 36,
29-
"ghu_" + "B" * 36,
30-
"ghs_" + "C" * 36,
31-
"ghr_" + "D" * 36,
32-
"gho_" + "E" * 36,
33-
],
34-
)
35-
def test_validate_github_token_valid(token: str) -> None:
36-
"""validate_github_token should accept properly-formatted tokens."""
37-
# Should not raise any exception
38-
validate_github_token(token)
39-
40-
41-
@pytest.mark.parametrize(
42-
"token",
43-
[
44-
"github_pat_short", # Too short after prefix
45-
"ghp_" + "b" * 35, # one character short
46-
"invalidprefix_" + "c" * 36, # Wrong prefix
47-
"github_pat_" + "!" * 36, # Disallowed characters
48-
"github_pat_" + "a" * 36, # Too short after 'github_pat_' prefix
49-
"", # Empty string
50-
],
51-
)
52-
def test_validate_github_token_invalid(token: str) -> None:
53-
"""Test that ``validate_github_token`` raises ``InvalidGitHubTokenError`` on malformed tokens."""
54-
with pytest.raises(InvalidGitHubTokenError):
55-
validate_github_token(token)
5622

5723

5824
@pytest.mark.parametrize(
@@ -72,15 +38,18 @@ def test_validate_github_token_invalid(token: str) -> None:
7238
"ghp_" + "d" * 36,
7339
[
7440
"-c",
75-
create_git_auth_header("ghp_" + "d" * 36),
76-
], # Auth header expected for GitHub URL + token
41+
create_git_auth_header("ghp_" + "d" * 36, "https://github.com/owner/repo.git"),
42+
], # Auth header expected when token is provided
7743
),
7844
(
7945
["git", "clone"],
8046
"/some/path",
8147
"https://gitlab.com/owner/repo.git",
8248
"ghp_" + "e" * 36,
83-
[], # No auth header for non-GitHub URL even if token provided
49+
[
50+
"-c",
51+
create_git_auth_header("ghp_" + "e" * 36, "https://gitlab.com/owner/repo.git"),
52+
], # Auth header expected for any URL when token is provided
8453
),
8554
],
8655
)
@@ -103,17 +72,19 @@ def test_create_git_command(
10372

10473

10574
@pytest.mark.parametrize(
106-
"token",
75+
("token", "url"),
10776
[
108-
"ghp_abcdefghijklmnopqrstuvwxyz012345", # typical ghp_ token
109-
"github_pat_1234567890abcdef1234567890abcdef1234",
77+
("ghp_abcdefghijklmnopqrstuvwxyz012345", "https://github.com/owner/repo.git"), # typical ghp_ token
78+
("github_pat_1234567890abcdef1234567890abcdef1234", "https://github.com/owner/repo.git"),
79+
("some_token", "https://gitlab.com/owner/repo.git"), # non-GitHub URL
11080
],
11181
)
112-
def test_create_git_auth_header(token: str) -> None:
82+
def test_create_git_auth_header(token: str, url: str) -> None:
11383
"""Test that ``create_git_auth_header`` produces correct base64-encoded header."""
114-
header = create_git_auth_header(token)
84+
header = create_git_auth_header(token, url)
11585
expected_basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode()
116-
expected = f"http.https://github.com/.extraheader=Authorization: Basic {expected_basic}"
86+
hostname = urlparse(url).hostname
87+
expected = f"http.https://{hostname}/.extraheader=Authorization: Basic {expected_basic}"
11788
assert header == expected
11889

11990

@@ -122,7 +93,7 @@ def test_create_git_auth_header(token: str) -> None:
12293
[
12394
("https://github.com/foo/bar.git", "ghp_" + "f" * 36, True),
12495
("https://github.com/foo/bar.git", None, False),
125-
("https://gitlab.com/foo/bar.git", "ghp_" + "g" * 36, False),
96+
("https://gitlab.com/foo/bar.git", "ghp_" + "g" * 36, True), # Now called for all URLs with token
12697
],
12798
)
12899
def test_create_git_command_helper_calls(

0 commit comments

Comments
 (0)