Skip to content

Commit 157da7d

Browse files
committed
refactor: Use GitPython instead of git in command line
1 parent 4871b84 commit 157da7d

File tree

6 files changed

+305
-170
lines changed

6 files changed

+305
-170
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ readme = {file = "README.md", content-type = "text/markdown" }
66
requires-python = ">= 3.8"
77
dependencies = [
88
"click>=8.0.0",
9+
"gitpython>=3.1.0",
910
"httpx",
1011
"loguru>=0.7.0",
1112
"pathspec>=0.12.1",

src/gitingest/clone.py

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import DEFAULT_TIMEOUT
9+
import git
910
from gitingest.utils.git_utils import (
11+
_add_token_to_url,
1012
check_repo_exists,
1113
checkout_partial_clone,
1214
create_git_auth_header,
13-
create_git_command,
15+
create_git_repo,
1416
ensure_git_installed,
1517
is_github_host,
1618
resolve_commit,
17-
run_command,
1819
)
1920
from gitingest.utils.logging_config import get_logger
2021
from gitingest.utils.os_utils import ensure_directory_exists_or_create
@@ -83,41 +84,62 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8384
commit = await resolve_commit(config, token=token)
8485
logger.debug("Resolved commit", extra={"commit": commit})
8586

86-
clone_cmd = ["git"]
87+
# Prepare URL with authentication if needed
88+
clone_url = url
8789
if token and is_github_host(url):
88-
clone_cmd += ["-c", create_git_auth_header(token, url=url)]
89-
90-
clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"]
91-
if partial_clone:
92-
clone_cmd += ["--filter=blob:none", "--sparse"]
93-
94-
clone_cmd += [url, local_path]
95-
96-
# Clone the repository
97-
logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "<url>", local_path])})
98-
await run_command(*clone_cmd)
99-
logger.info("Git clone completed successfully")
90+
clone_url = _add_token_to_url(url, token)
91+
92+
# Clone the repository using GitPython
93+
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
94+
try:
95+
clone_kwargs = {
96+
"single_branch": True,
97+
"no_checkout": True,
98+
"depth": 1,
99+
}
100+
101+
if partial_clone:
102+
# GitPython doesn't directly support --filter and --sparse in clone
103+
# We'll need to use git.Git() for the initial clone with these options
104+
git_cmd = git.Git()
105+
cmd_args = ["clone", "--single-branch", "--no-checkout", "--depth=1"]
106+
if partial_clone:
107+
cmd_args.extend(["--filter=blob:none", "--sparse"])
108+
cmd_args.extend([clone_url, local_path])
109+
git_cmd.execute(cmd_args)
110+
else:
111+
git.Repo.clone_from(clone_url, local_path, **clone_kwargs)
112+
113+
logger.info("Git clone completed successfully")
114+
except git.GitCommandError as exc:
115+
msg = f"Git clone failed: {exc}"
116+
raise RuntimeError(msg) from exc
100117

101118
# Checkout the subpath if it is a partial clone
102119
if partial_clone:
103120
logger.info("Setting up partial clone for subpath", extra={"subpath": config.subpath})
104121
await checkout_partial_clone(config, token=token)
105122
logger.debug("Partial clone setup completed")
106123

107-
git = create_git_command(["git"], local_path, url, token)
108-
109-
# Ensure the commit is locally available
110-
logger.debug("Fetching specific commit", extra={"commit": commit})
111-
await run_command(*git, "fetch", "--depth=1", "origin", commit)
112-
113-
# Write the work-tree at that commit
114-
logger.info("Checking out commit", extra={"commit": commit})
115-
await run_command(*git, "checkout", commit)
116-
117-
# Update submodules
118-
if config.include_submodules:
119-
logger.info("Updating submodules")
120-
await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1")
121-
logger.debug("Submodules updated successfully")
124+
# Create repo object and perform operations
125+
try:
126+
repo = create_git_repo(local_path, url, token)
127+
128+
# Ensure the commit is locally available
129+
logger.debug("Fetching specific commit", extra={"commit": commit})
130+
repo.git.fetch("--depth=1", "origin", commit)
131+
132+
# Write the work-tree at that commit
133+
logger.info("Checking out commit", extra={"commit": commit})
134+
repo.git.checkout(commit)
135+
136+
# Update submodules
137+
if config.include_submodules:
138+
logger.info("Updating submodules")
139+
repo.git.submodule("update", "--init", "--recursive", "--depth=1")
140+
logger.debug("Submodules updated successfully")
141+
except git.GitCommandError as exc:
142+
msg = f"Git operation failed: {exc}"
143+
raise RuntimeError(msg) from exc
122144

123145
logger.info("Git clone operation completed successfully", extra={"local_path": local_path})

src/gitingest/utils/git_utils.py

Lines changed: 127 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import TYPE_CHECKING, Final, Iterable
1111
from urllib.parse import urlparse
1212

13+
import git
1314
import httpx
1415
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
1516

@@ -50,6 +51,9 @@ def is_github_host(url: str) -> bool:
5051
async def run_command(*args: str) -> tuple[bytes, bytes]:
5152
"""Execute a shell command asynchronously and return (stdout, stderr) bytes.
5253
54+
This function is kept for backward compatibility with non-git commands.
55+
Git operations should use GitPython directly.
56+
5357
Parameters
5458
----------
5559
*args : str
@@ -92,21 +96,26 @@ async def ensure_git_installed() -> None:
9296
9397
"""
9498
try:
95-
await run_command("git", "--version")
96-
except RuntimeError as exc:
99+
# Use GitPython to check git availability
100+
git.Git().version()
101+
except git.GitCommandError as exc:
102+
msg = "Git is not installed or not accessible. Please install Git first."
103+
raise RuntimeError(msg) from exc
104+
except Exception as exc:
97105
msg = "Git is not installed or not accessible. Please install Git first."
98106
raise RuntimeError(msg) from exc
107+
99108
if sys.platform == "win32":
100109
try:
101-
stdout, _ = await run_command("git", "config", "core.longpaths")
102-
if stdout.decode().strip().lower() != "true":
110+
longpaths_value = git.Git().config("core.longpaths")
111+
if longpaths_value.lower() != "true":
103112
logger.warning(
104113
"Git clone may fail on Windows due to long file paths. "
105114
"Consider enabling long path support with: 'git config --global core.longpaths true'. "
106115
"Note: This command may require administrator privileges.",
107116
extra={"platform": "windows", "longpaths_enabled": False},
108117
)
109-
except RuntimeError:
118+
except git.GitCommandError:
110119
# Ignore if checking 'core.longpaths' fails.
111120
pass
112121

@@ -222,61 +231,73 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
222231
msg = f"Invalid fetch type: {ref_type}"
223232
raise ValueError(msg)
224233

225-
cmd = ["git"]
226-
227-
# Add authentication if needed
228-
if token and is_github_host(url):
229-
cmd += ["-c", create_git_auth_header(token, url=url)]
230-
231-
cmd += ["ls-remote"]
232-
233-
fetch_tags = ref_type == "tags"
234-
to_fetch = "tags" if fetch_tags else "heads"
235-
236-
cmd += [f"--{to_fetch}"]
237-
238-
# `--refs` filters out the peeled tag objects (those ending with "^{}") (for tags)
239-
if fetch_tags:
240-
cmd += ["--refs"]
241-
242-
cmd += [url]
243-
244234
await ensure_git_installed()
245-
stdout, _ = await run_command(*cmd)
246-
# For each line in the output:
247-
# - Skip empty lines and lines that don't contain "refs/{to_fetch}/"
248-
# - Extract the branch or tag name after "refs/{to_fetch}/"
249-
return [
250-
line.split(f"refs/{to_fetch}/", 1)[1]
251-
for line in stdout.decode().splitlines()
252-
if line.strip() and f"refs/{to_fetch}/" in line
253-
]
235+
236+
# Use GitPython to get remote references
237+
try:
238+
git_cmd = git.Git()
239+
240+
# Prepare environment with authentication if needed
241+
env = None
242+
if token and is_github_host(url):
243+
auth_url = _add_token_to_url(url, token)
244+
url = auth_url
245+
246+
fetch_tags = ref_type == "tags"
247+
to_fetch = "tags" if fetch_tags else "heads"
248+
249+
# Build ls-remote command
250+
cmd_args = ["ls-remote", f"--{to_fetch}"]
251+
if fetch_tags:
252+
cmd_args.append("--refs") # Filter out peeled tag objects
253+
cmd_args.append(url)
254+
255+
# Run the command
256+
output = git_cmd.execute(cmd_args, env=env)
257+
258+
# Parse output
259+
return [
260+
line.split(f"refs/{to_fetch}/", 1)[1]
261+
for line in output.splitlines()
262+
if line.strip() and f"refs/{to_fetch}/" in line
263+
]
264+
except git.GitCommandError as exc:
265+
msg = f"Failed to fetch {ref_type} from {url}: {exc}"
266+
raise RuntimeError(msg) from exc
254267

255268

256-
def create_git_command(base_cmd: list[str], local_path: str, url: str, token: str | None = None) -> list[str]:
257-
"""Create a git command with authentication if needed.
269+
def create_git_repo(local_path: str, url: str, token: str | None = None) -> git.Repo:
270+
"""Create a GitPython Repo object with authentication if needed.
258271
259272
Parameters
260273
----------
261-
base_cmd : list[str]
262-
The base git command to start with.
263274
local_path : str
264-
The local path where the git command should be executed.
275+
The local path where the git repository is located.
265276
url : str
266277
The repository URL to check if it's a GitHub repository.
267278
token : str | None
268279
GitHub personal access token (PAT) for accessing private repositories.
269280
270281
Returns
271282
-------
272-
list[str]
273-
The git command with authentication if needed.
283+
git.Repo
284+
A GitPython Repo object configured with authentication.
274285
275286
"""
276-
cmd = [*base_cmd, "-C", local_path]
277-
if token and is_github_host(url):
278-
cmd += ["-c", create_git_auth_header(token, url=url)]
279-
return cmd
287+
try:
288+
repo = git.Repo(local_path)
289+
290+
# Configure authentication if needed
291+
if token and is_github_host(url):
292+
auth_header = create_git_auth_header(token, url=url)
293+
# Set the auth header in git config for this repo
294+
key, value = auth_header.split('=', 1)
295+
repo.git.config(key, value)
296+
297+
return repo
298+
except git.InvalidGitRepositoryError as exc:
299+
msg = f"Invalid git repository at {local_path}"
300+
raise ValueError(msg) from exc
280301

281302

282303
def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
@@ -343,8 +364,13 @@ async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None
343364
if config.blob:
344365
# Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt)
345366
subpath = str(Path(subpath).parent.as_posix())
346-
checkout_cmd = create_git_command(["git"], config.local_path, config.url, token)
347-
await run_command(*checkout_cmd, "sparse-checkout", "set", subpath)
367+
368+
try:
369+
repo = create_git_repo(config.local_path, config.url, token)
370+
repo.git.execute(["sparse-checkout", "set", subpath])
371+
except git.GitCommandError as exc:
372+
msg = f"Failed to configure sparse-checkout: {exc}"
373+
raise RuntimeError(msg) from exc
348374

349375

350376
async def resolve_commit(config: CloneConfig, token: str | None) -> str:
@@ -400,20 +426,27 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
400426
If the ref does not exist in the remote repository.
401427
402428
"""
403-
# Build: git [-c http.<host>/.extraheader=Auth...] ls-remote <url> <pattern>
404-
cmd: list[str] = ["git"]
405-
if token and is_github_host(url):
406-
cmd += ["-c", create_git_auth_header(token, url=url)]
407-
408-
cmd += ["ls-remote", url, pattern]
409-
stdout, _ = await run_command(*cmd)
410-
lines = stdout.decode().splitlines()
411-
sha = _pick_commit_sha(lines)
412-
if not sha:
413-
msg = f"{pattern!r} not found in {url}"
414-
raise ValueError(msg)
415-
416-
return sha
429+
try:
430+
git_cmd = git.Git()
431+
432+
# Prepare authentication if needed
433+
auth_url = url
434+
if token and is_github_host(url):
435+
auth_url = _add_token_to_url(url, token)
436+
437+
# Execute ls-remote command
438+
output = git_cmd.execute(["ls-remote", auth_url, pattern])
439+
lines = output.splitlines()
440+
441+
sha = _pick_commit_sha(lines)
442+
if not sha:
443+
msg = f"{pattern!r} not found in {url}"
444+
raise ValueError(msg)
445+
446+
return sha
447+
except git.GitCommandError as exc:
448+
msg = f"Failed to resolve {pattern} in {url}: {exc}"
449+
raise ValueError(msg) from exc
417450

418451

419452
def _pick_commit_sha(lines: Iterable[str]) -> str | None:
@@ -449,3 +482,37 @@ def _pick_commit_sha(lines: Iterable[str]) -> str | None:
449482
first_non_peeled = sha
450483

451484
return first_non_peeled # branch or lightweight tag (or None)
485+
486+
487+
def _add_token_to_url(url: str, token: str) -> str:
488+
"""Add authentication token to GitHub URL.
489+
490+
Parameters
491+
----------
492+
url : str
493+
The original GitHub URL.
494+
token : str
495+
The GitHub token to add.
496+
497+
Returns
498+
-------
499+
str
500+
The URL with embedded authentication.
501+
502+
"""
503+
from urllib.parse import urlparse, urlunparse
504+
505+
parsed = urlparse(url)
506+
# Add token as username in URL (GitHub supports this)
507+
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
508+
if parsed.port:
509+
netloc += f":{parsed.port}"
510+
511+
return urlunparse((
512+
parsed.scheme,
513+
netloc,
514+
parsed.path,
515+
parsed.params,
516+
parsed.query,
517+
parsed.fragment
518+
))

0 commit comments

Comments
 (0)