From 7c31ee8974463fc8863945e21ad46983ca83a006 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 9 Aug 2025 13:35:25 +0000 Subject: [PATCH 1/3] Checkpoint before follow-up message Co-authored-by: nicoragne --- src/gitingest/utils/git_utils.py | 52 ++++++++++++++------------------ tests/test_clone.py | 2 -- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index daf4056d..1384cdab 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -10,8 +10,7 @@ from typing import TYPE_CHECKING, Final, Iterable from urllib.parse import urlparse -import httpx -from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND + from gitingest.utils.compat_func import removesuffix from gitingest.utils.exceptions import InvalidGitHubTokenError @@ -112,7 +111,7 @@ async def ensure_git_installed() -> None: async def check_repo_exists(url: str, token: str | None = None) -> bool: - """Check whether a remote Git repository is reachable. + """Check whether a remote Git repository is reachable using git ls-remote. Parameters ---------- @@ -126,35 +125,30 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: bool ``True`` if the repository exists, ``False`` otherwise. - Raises - ------ - RuntimeError - If the host returns an unrecognised status code. - """ - headers = {} - + cmd = ["git", "ls-remote"] + + # Add authentication header if token is provided for GitHub repositories if token and is_github_host(url): - host, owner, repo = _parse_github_url(url) - # Public GitHub vs. GitHub Enterprise - base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3" - url = f"{base_api}/repos/{owner}/{repo}" - headers["Authorization"] = f"Bearer {token}" - - async with httpx.AsyncClient(follow_redirects=True) as client: - try: - response = await client.head(url, headers=headers) - except httpx.RequestError: - return False - - status_code = response.status_code - - if status_code == HTTP_200_OK: - return True - if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}: + cmd.extend(["-c", create_git_auth_header(token, url=url)]) + + cmd.extend(["--exit-code", url, "HEAD"]) + + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + + # git ls-remote returns 0 if repository exists and is accessible + # returns non-zero if repository doesn't exist or is not accessible + return proc.returncode == 0 + + except Exception: + # If any exception occurs (e.g., git not available), assume repo doesn't exist return False - msg = f"Unexpected HTTP status {status_code} for {url}" - raise RuntimeError(msg) def _parse_github_url(url: str) -> tuple[str, str, str]: diff --git a/tests/test_clone.py b/tests/test_clone.py index 1d89c212..b2637bff 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -11,9 +11,7 @@ from typing import TYPE_CHECKING from unittest.mock import AsyncMock -import httpx import pytest -from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND from gitingest.clone import clone_repo from gitingest.schemas import CloneConfig From d36198da8065759eeb4b10d5b99e48fd4097133a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 9 Aug 2025 13:38:42 +0000 Subject: [PATCH 2/3] Refactor check_repo_exists to use git ls-remote with authentication support Co-authored-by: nicoragne --- tests/test_clone.py | 71 +++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/tests/test_clone.py b/tests/test_clone.py index b2637bff..8dd70f65 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -99,24 +99,30 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None @pytest.mark.asyncio @pytest.mark.parametrize( - ("status_code", "expected"), + ("returncode", "expected"), [ - (HTTP_200_OK, True), - (HTTP_401_UNAUTHORIZED, False), - (HTTP_403_FORBIDDEN, False), - (HTTP_404_NOT_FOUND, False), + (0, True), # Repository exists and is accessible + (2, False), # Repository doesn't exist or is not accessible + (128, False), # Git error (e.g., authentication failure) ], ) -async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None: - """Verify that ``check_repo_exists`` interprets httpx results correctly.""" - mock_client = AsyncMock() - mock_client.__aenter__.return_value = mock_client # context-manager protocol - mock_client.head.return_value = httpx.Response(status_code=status_code) - mocker.patch("httpx.AsyncClient", return_value=mock_client) +async def test_check_repo_exists(returncode: int, *, expected: bool, mocker: MockerFixture) -> None: + """Verify that ``check_repo_exists`` interprets git ls-remote results correctly.""" + mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"", b"") + mock_process.returncode = returncode + mock_exec.return_value = mock_process result = await check_repo_exists(DEMO_URL) assert result is expected + # Verify that git ls-remote was called with the correct arguments + mock_exec.assert_called_once_with( + "git", "ls-remote", "--exit-code", DEMO_URL, "HEAD", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) @pytest.mark.asyncio @@ -188,24 +194,53 @@ async def test_clone_commit(run_command_mock: AsyncMock) -> None: @pytest.mark.asyncio -async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None: - """Test ``check_repo_exists`` when a redirect (302) is returned. +async def test_check_repo_exists_with_exception(mocker: MockerFixture) -> None: + """Test ``check_repo_exists`` when an exception occurs during git ls-remote. - Given a URL that responds with "302 Found": + Given a git ls-remote command that raises an exception: When ``check_repo_exists`` is called, Then it should return ``False``, indicating the repo is inaccessible. """ mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) - mock_process = AsyncMock() - mock_process.communicate.return_value = (b"302\n", b"") - mock_process.returncode = 0 # Simulate successful request - mock_exec.return_value = mock_process + mock_exec.side_effect = Exception("Git command failed") repo_exists = await check_repo_exists(DEMO_URL) assert repo_exists is False +@pytest.mark.asyncio +async def test_check_repo_exists_with_token(mocker: MockerFixture) -> None: + """Test ``check_repo_exists`` with GitHub token authentication. + + Given a GitHub URL and a token: + When ``check_repo_exists`` is called, + Then it should include the authentication header in the git ls-remote command. + """ + mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + mock_auth_header = mocker.patch("gitingest.utils.git_utils.create_git_auth_header") + mock_auth_header.return_value = "http.extraheader=Authorization: Bearer test_token" + + github_url = "https://github.com/owner/repo" + result = await check_repo_exists(github_url, token="test_token") + + assert result is True + # Verify that authentication header was created + mock_auth_header.assert_called_once_with("test_token", url=github_url) + # Verify that git ls-remote was called with the authentication config + mock_exec.assert_called_once_with( + "git", "ls-remote", "-c", "http.extraheader=Authorization: Bearer test_token", + "--exit-code", github_url, "HEAD", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + @pytest.mark.asyncio async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None: """Test cloning a repository when a timeout occurs. From b486f06a7c264805f4d2777c8d9c9bb8aeeb038b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 9 Aug 2025 13:40:21 +0000 Subject: [PATCH 3/3] Add generic token auth support for non-GitHub Git repositories Co-authored-by: nicoragne --- src/gitingest/utils/git_utils.py | 16 +++++++++++--- tests/test_clone.py | 36 ++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index 1384cdab..4f07cc10 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -128,9 +128,19 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: """ cmd = ["git", "ls-remote"] - # Add authentication header if token is provided for GitHub repositories - if token and is_github_host(url): - cmd.extend(["-c", create_git_auth_header(token, url=url)]) + # Add authentication header if token is provided + if token: + if is_github_host(url): + # Use GitHub-specific authentication + cmd.extend(["-c", create_git_auth_header(token, url=url)]) + else: + # For non-GitHub repositories, use generic HTTP basic auth + # This works for GitLab, Bitbucket, and other Git hosting services + parsed_url = urlparse(url) + if parsed_url.hostname: + basic_auth = base64.b64encode(f"oauth2:{token}".encode()).decode() + auth_header = f"http.https://{parsed_url.hostname}/.extraheader=Authorization: Basic {basic_auth}" + cmd.extend(["-c", auth_header]) cmd.extend(["--exit-code", url, "HEAD"]) diff --git a/tests/test_clone.py b/tests/test_clone.py index 8dd70f65..3962125f 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -210,12 +210,12 @@ async def test_check_repo_exists_with_exception(mocker: MockerFixture) -> None: @pytest.mark.asyncio -async def test_check_repo_exists_with_token(mocker: MockerFixture) -> None: +async def test_check_repo_exists_with_github_token(mocker: MockerFixture) -> None: """Test ``check_repo_exists`` with GitHub token authentication. Given a GitHub URL and a token: When ``check_repo_exists`` is called, - Then it should include the authentication header in the git ls-remote command. + Then it should include the GitHub-specific authentication header in the git ls-remote command. """ mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) mock_process = AsyncMock() @@ -241,6 +241,38 @@ async def test_check_repo_exists_with_token(mocker: MockerFixture) -> None: ) +@pytest.mark.asyncio +async def test_check_repo_exists_with_non_github_token(mocker: MockerFixture) -> None: + """Test ``check_repo_exists`` with non-GitHub token authentication. + + Given a non-GitHub URL and a token: + When ``check_repo_exists`` is called, + Then it should include the generic HTTP basic auth header in the git ls-remote command. + """ + mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + mock_base64 = mocker.patch("base64.b64encode") + mock_base64.return_value.decode.return_value = "encoded_token" + + gitlab_url = "https://gitlab.com/owner/repo" + result = await check_repo_exists(gitlab_url, token="test_token") + + assert result is True + # Verify that base64 encoding was called for the token + mock_base64.assert_called_once_with(b"oauth2:test_token") + # Verify that git ls-remote was called with the authentication config + mock_exec.assert_called_once_with( + "git", "ls-remote", "-c", "http.https://gitlab.com/.extraheader=Authorization: Basic encoded_token", + "--exit-code", gitlab_url, "HEAD", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + @pytest.mark.asyncio async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None: """Test cloning a repository when a timeout occurs.