99from typing import TYPE_CHECKING , Iterable
1010from urllib .parse import urlparse
1111
12- import httpx
13- from starlette .status import HTTP_200_OK , HTTP_401_UNAUTHORIZED , HTTP_403_FORBIDDEN , HTTP_404_NOT_FOUND
1412
15- from gitingest .utils .compat_func import removesuffix
1613
1714from gitingest .utils .logging_config import get_logger
1815
2522
2623
2724
28- def is_github_host (url : str ) -> bool :
29- """Check if a URL is from a GitHub host (github.com or GitHub Enterprise).
30-
31- Parameters
32- ----------
33- url : str
34- The URL to check
35-
36- Returns
37- -------
38- bool
39- True if the URL is from a GitHub host, False otherwise
40-
41- """
42- hostname = urlparse (url ).hostname or ""
43- return hostname .startswith ("github." )
44-
4525
4626async def run_command (* args : str ) -> tuple [bytes , bytes ]:
4727 """Execute a shell command asynchronously and return (stdout, stderr) bytes.
@@ -115,80 +95,27 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
11595 url : str
11696 URL of the Git repository to check.
11797 token : str | None
118- GitHub personal access token (PAT) for accessing private repositories.
98+ Personal access token (PAT) for accessing private repositories.
11999
120100 Returns
121101 -------
122102 bool
123103 ``True`` if the repository exists, ``False`` otherwise.
124104
125- Raises
126- ------
127- RuntimeError
128- If the host returns an unrecognised status code.
129-
130105 """
131- headers = {}
132-
133- if token and is_github_host (url ):
134- host , owner , repo = _parse_github_url (url )
135- # Public GitHub vs. GitHub Enterprise
136- base_api = "https://api.github.com" if host == "github.com" else f"https://{ host } /api/v3"
137- url = f"{ base_api } /repos/{ owner } /{ repo } "
138- headers ["Authorization" ] = f"Bearer { token } "
139-
140- async with httpx .AsyncClient (follow_redirects = True ) as client :
141- try :
142- response = await client .head (url , headers = headers )
143- except httpx .RequestError :
144- return False
145-
146- status_code = response .status_code
147-
148- if status_code == HTTP_200_OK :
106+ try :
107+ # Use git ls-remote to check if repository exists
108+ cmd = ["git" ]
109+ if token :
110+ cmd += ["-c" , create_git_auth_header (token , url = url )]
111+ cmd += ["ls-remote" , "--heads" , url ]
112+
113+ await run_command (* cmd )
149114 return True
150- if status_code in { HTTP_401_UNAUTHORIZED , HTTP_403_FORBIDDEN , HTTP_404_NOT_FOUND } :
115+ except Exception :
151116 return False
152- msg = f"Unexpected HTTP status { status_code } for { url } "
153- raise RuntimeError (msg )
154117
155118
156- def _parse_github_url (url : str ) -> tuple [str , str , str ]:
157- """Parse a GitHub URL and return (hostname, owner, repo).
158-
159- Parameters
160- ----------
161- url : str
162- The URL of the GitHub repository to parse.
163-
164- Returns
165- -------
166- tuple[str, str, str]
167- A tuple containing the hostname, owner, and repository name.
168-
169- Raises
170- ------
171- ValueError
172- If the URL is not a valid GitHub repository URL.
173-
174- """
175- parsed = urlparse (url )
176- if parsed .scheme not in {"http" , "https" }:
177- msg = f"URL must start with http:// or https://: { url !r} "
178- raise ValueError (msg )
179-
180- if not parsed .hostname or not parsed .hostname .startswith ("github." ):
181- msg = f"Un-recognised GitHub hostname: { parsed .hostname !r} "
182- raise ValueError (msg )
183-
184- parts = removesuffix (parsed .path , ".git" ).strip ("/" ).split ("/" )
185- expected_path_length = 2
186- if len (parts ) != expected_path_length :
187- msg = f"Path must look like /<owner>/<repo>: { parsed .path !r} "
188- raise ValueError (msg )
189-
190- owner , repo = parts
191- return parsed .hostname , owner , repo
192119
193120
194121async def fetch_remote_branches_or_tags (url : str , * , ref_type : str , token : str | None = None ) -> list [str ]:
@@ -201,7 +128,7 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
201128 ref_type: str
202129 The type of reference to fetch. Can be "branches" or "tags".
203130 token : str | None
204- GitHub personal access token (PAT) for accessing private repositories.
131+ Personal access token (PAT) for accessing private repositories.
205132
206133 Returns
207134 -------
@@ -221,7 +148,7 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
221148 cmd = ["git" ]
222149
223150 # Add authentication if needed
224- if token and is_github_host ( url ) :
151+ if token :
225152 cmd += ["-c" , create_git_auth_header (token , url = url )]
226153
227154 cmd += ["ls-remote" ]
@@ -314,7 +241,7 @@ async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None
314241 config : CloneConfig
315242 The configuration for cloning the repository, including subpath and blob flag.
316243 token : str | None
317- GitHub personal access token (PAT) for accessing private repositories.
244+ Personal access token (PAT) for accessing private repositories.
318245
319246 """
320247 subpath = config .subpath .lstrip ("/" )
@@ -333,7 +260,7 @@ async def resolve_commit(config: CloneConfig, token: str | None) -> str:
333260 config : CloneConfig
334261 The configuration for cloning the repository.
335262 token : str | None
336- GitHub personal access token (PAT) for accessing private repositories.
263+ Personal access token (PAT) for accessing private repositories.
337264
338265 Returns
339266 -------
@@ -365,7 +292,7 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
365292 pattern : str
366293 The pattern to use to resolve the commit SHA.
367294 token : str | None
368- GitHub personal access token (PAT) for accessing private repositories.
295+ Personal access token (PAT) for accessing private repositories.
369296
370297 Returns
371298 -------
@@ -380,7 +307,7 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
380307 """
381308 # Build: git [-c http.<host>/.extraheader=Auth...] ls-remote <url> <pattern>
382309 cmd : list [str ] = ["git" ]
383- if token and is_github_host ( url ) :
310+ if token :
384311 cmd += ["-c" , create_git_auth_header (token , url = url )]
385312
386313 cmd += ["ls-remote" , url , pattern ]
0 commit comments