From 825148f88d1d471c09170728e6aee8d84d4ce6f3 Mon Sep 17 00:00:00 2001 From: ValentinFrancois <24223132+ValentinFrancois@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:17:11 +0200 Subject: [PATCH 1/2] Support multiple self-managed gitlab hosts --- gitlab_submodule/gitlab_submodule.py | 28 ++---- gitlab_submodule/project_manager_utils.py | 33 +++++++ gitlab_submodule/read_gitmodules.py | 2 +- gitlab_submodule/submodule_to_project.py | 107 ++++++++++++---------- tests/test_project_manager_utils.py | 19 ++++ tests/test_submodule_to_project.py | 62 ++++++++++--- 6 files changed, 165 insertions(+), 86 deletions(-) create mode 100644 gitlab_submodule/project_manager_utils.py create mode 100644 tests/test_project_manager_utils.py diff --git a/gitlab_submodule/gitlab_submodule.py b/gitlab_submodule/gitlab_submodule.py index 772de60..982a29a 100644 --- a/gitlab_submodule/gitlab_submodule.py +++ b/gitlab_submodule/gitlab_submodule.py @@ -1,35 +1,23 @@ -from typing import Generator, List, Optional, Union +from typing import Generator, List, Optional -from gitlab import Gitlab -from gitlab.v4.objects import Project, ProjectManager +from gitlab.v4.objects import Project from gitlab_submodule.objects import Submodule, Subproject +from gitlab_submodule.project_manager_utils import OneOrManyClients from gitlab_submodule.read_gitmodules import \ iterate_project_submodules as iterate_submodules from gitlab_submodule.submodule_commit import get_submodule_commit from gitlab_submodule.submodule_to_project import submodule_to_project -def _get_project_manager( - gitlab_object: Union[Gitlab, ProjectManager]) -> ProjectManager: - if isinstance(gitlab_object, ProjectManager): - return gitlab_object - elif isinstance(gitlab_object, Gitlab): - return gitlab_object.projects - else: - raise TypeError('Needs a Gitlab instance or its ProjectManager') - - def submodule_to_subproject( gitmodules_submodule: Submodule, - gl: Union[Gitlab, ProjectManager], - self_managed_gitlab_host: Optional[str] = None + gls: OneOrManyClients, ) -> Subproject: try: submodule_project = submodule_to_project( gitmodules_submodule, - _get_project_manager(gl), - self_managed_gitlab_host + gls, ) submodule_commit = get_submodule_commit( gitmodules_submodule, @@ -46,17 +34,15 @@ def submodule_to_subproject( def iterate_subprojects( project: Project, - gl: Union[Gitlab, ProjectManager], + gls: OneOrManyClients, ref: Optional[str] = None, only_gitlab_subprojects: bool = False, - self_managed_gitlab_host: Optional[str] = None ) -> Generator[Subproject, None, None]: for gitmodules_submodule in iterate_submodules(project, ref): try: subproject: Subproject = submodule_to_subproject( gitmodules_submodule, - _get_project_manager(gl), - self_managed_gitlab_host, + gls, ) if not (only_gitlab_subprojects and not subproject.project): yield subproject diff --git a/gitlab_submodule/project_manager_utils.py b/gitlab_submodule/project_manager_utils.py new file mode 100644 index 0000000..fca3536 --- /dev/null +++ b/gitlab_submodule/project_manager_utils.py @@ -0,0 +1,33 @@ +from typing import Dict, List, Union + +from gitlab import Gitlab +from gitlab.v4.objects import ProjectManager + +# Some typing +Client = Union[Gitlab, ProjectManager] +OneOrManyClients = Union[Client, List[Client]] +ProjectManagerDicts = Dict[str, ProjectManager] + + +def as_project_manager(gl: Client) -> ProjectManager: + if isinstance(gl, ProjectManager): + return gl + elif isinstance(gl, Gitlab): + return gl.projects + else: + raise TypeError('Needs a Gitlab instance or its ProjectManager') + + +def get_host_url(gl: Client) -> str: + if isinstance(gl, Gitlab): + return gl._base_url + elif isinstance(gl, ProjectManager): + return gl.gitlab._base_url + else: + raise TypeError(gl) + + +def map_domain_to_clients(gls: OneOrManyClients) -> ProjectManagerDicts: + if not isinstance(gls, list): + gls = [gls] + return {get_host_url(gl): as_project_manager(gl) for gl in gls} diff --git a/gitlab_submodule/read_gitmodules.py b/gitlab_submodule/read_gitmodules.py index e1548fd..fb252bd 100644 --- a/gitlab_submodule/read_gitmodules.py +++ b/gitlab_submodule/read_gitmodules.py @@ -17,7 +17,7 @@ def iterate_project_submodules( ref: Optional[str] = None) -> Iterable[Submodule]: gitmodules_file_content = _get_gitmodules_file_content(project, ref) if not gitmodules_file_content: - return [] + raise StopIteration for (name, url, path) in _read_gitmodules_file_content( gitmodules_file_content): yield Submodule( diff --git a/gitlab_submodule/submodule_to_project.py b/gitlab_submodule/submodule_to_project.py index bca4260..a966041 100644 --- a/gitlab_submodule/submodule_to_project.py +++ b/gitlab_submodule/submodule_to_project.py @@ -1,87 +1,96 @@ import logging import re from posixpath import join, normpath -from typing import List, Optional, Union +from typing import Optional, Tuple from gitlab.exceptions import GitlabGetError, GitlabHttpError from gitlab.v4.objects import Project, ProjectManager from giturlparse import GitUrlParsed, parse from gitlab_submodule.objects import Submodule +from gitlab_submodule.project_manager_utils import (OneOrManyClients, + map_domain_to_clients) from gitlab_submodule.string_utils import lstrip, rstrip logger = logging.getLogger(__name__) -def submodule_to_project( - submodule: Submodule, - project_manager: ProjectManager, - self_managed_gitlab_host: Optional[Union[str, List[str]]] = None -) -> Optional[Project]: - submodule_project_path_with_namespace = \ - _submodule_url_to_path_with_namespace(submodule.url, - submodule.parent_project, - self_managed_gitlab_host) - if not submodule_project_path_with_namespace: - return None - try: - submodule_project = project_manager.get( - submodule_project_path_with_namespace) - except (GitlabGetError, GitlabHttpError): - # Repo doesn't actually exist (possible because you can modify - # .gitmodules without using `git submodule add`) - raise FileNotFoundError( - 'No repo found at url "{}" for submodule at path "{}" - Check if ' - 'the repo was deleted.'.format(submodule.url, submodule.path)) - return submodule_project +def host_url_to_domain(url: str) -> str: + return url.split("//")[1].rstrip("/") + +def match_submodule_to_client_and_format_project_path( + submodule: Submodule, + gls: OneOrManyClients +) -> Optional[Tuple[ProjectManager, str]]: + url = submodule.url -def _submodule_url_to_path_with_namespace( - url: str, - parent_project: Project, - self_managed_gitlab_host: Optional[Union[str, List[str]]] = None -) -> Optional[str]: - """Returns a path pointing to a Gitlab project, or None if the submodule - is hosted elsewhere - """ # check if the submodule url is a relative path to the project path if url.startswith('./') or url.startswith('../'): # we build the path of the submodule project using the path of # the current project url = rstrip(url, '.git') - path_with_namespace = normpath( - join(parent_project.path_with_namespace, url)) - return path_with_namespace + path_with_namespace = normpath(join( + submodule.parent_project.path_with_namespace, + url + )) + client: ProjectManager = submodule.parent_project.manager + return client, path_with_namespace + # If URL is not relative: try parsing it parsed: GitUrlParsed = parse(url) if not parsed.valid: logger.warning(f'submodule git url does not seem to be valid: {url}') return None - # even if the parent project is hosted on a self-managed gitlab host, - # it can still use submodules hosted on gitlab.com - gitlab_hosts = ['gitlab'] - if self_managed_gitlab_host: - if isinstance(self_managed_gitlab_host, str): - gitlab_hosts.append(self_managed_gitlab_host) - else: - gitlab_hosts.extend(self_managed_gitlab_host) + url_to_client = map_domain_to_clients(gls) + domain_to_client = { + host_url_to_domain(_url): client + for _url, client in url_to_client.items() + } - # giturlparse.GitUrlParsed.platform is too permissive and will be set to - # 'gitlab' for some non-gitlab urls, for instance: - # https://opensource.ncsa.illinois.edu/bitbucket/scm/u3d/3dutilities.git - if (parsed.platform not in ('gitlab', 'base') - or not any([re.match(fr'^{host}(\.\w+)?$', parsed.host) - for host in gitlab_hosts])): + matched_domain = [ + domain for domain in domain_to_client + if re.search("(^|[/@])" + domain, url) + ] + if len(matched_domain) == 0: logger.warning(f'submodule git url is not hosted on gitlab: {url}') return None + elif len(matched_domain) > 1: + raise ValueError(f"More than one of the provided Gitlab host domains " + f"matches submodule url {url}") + else: + matched_domain = matched_domain[0] + client = domain_to_client[matched_domain] # Format to python-gitlab path_with_namespace: # rewrite to https format then split by host and keep & cut the right part. # I find it more robust than trying to rebuild the path from the different # attributes of giturlparse.GitUrlParsed objects https_url = parsed.url2https - path_with_namespace = https_url.split(parsed.host)[1] + path_with_namespace = https_url.split(matched_domain)[1] path_with_namespace = lstrip(path_with_namespace, '/') path_with_namespace = rstrip(path_with_namespace, '.git') - return path_with_namespace + return client, path_with_namespace + + +def submodule_to_project( + submodule: Submodule, + gls: OneOrManyClients, +) -> Optional[Project]: + match = match_submodule_to_client_and_format_project_path( + submodule=submodule, + gls=gls + ) + if not match: + return None + try: + client, submodule_project_path_with_namespace = match + submodule_project = client.get(submodule_project_path_with_namespace) + except (GitlabGetError, GitlabHttpError): + # Repo doesn't actually exist (possible because you can modify + # .gitmodules without using `git submodule add`) + raise FileNotFoundError( + 'No repo found at url "{}" for submodule at path "{}" - Check if ' + 'the repo was deleted.'.format(submodule.url, submodule.path)) + return submodule_project diff --git a/tests/test_project_manager_utils.py b/tests/test_project_manager_utils.py new file mode 100644 index 0000000..de9a33a --- /dev/null +++ b/tests/test_project_manager_utils.py @@ -0,0 +1,19 @@ +from gitlab import Gitlab + +from gitlab_submodule.project_manager_utils import get_host_url +from gitlab_submodule.project_manager_utils import map_domain_to_clients + + +def test_get_host_url(): + gl = Gitlab() + assert get_host_url(gl.projects) == "https://gitlab.com" + + +def test_map_domain_to_clients(): + gl1 = Gitlab() + gl2 = Gitlab("myhost.com").projects + mapped = map_domain_to_clients([gl1, gl2]) + assert mapped == { + "https://gitlab.com": gl1.projects, + "myhost.com": gl2 + } diff --git a/tests/test_submodule_to_project.py b/tests/test_submodule_to_project.py index ebc82f2..d7c3e36 100644 --- a/tests/test_submodule_to_project.py +++ b/tests/test_submodule_to_project.py @@ -1,27 +1,59 @@ from unittest import TestCase -from unittest.mock import Mock +from unittest.mock import MagicMock -from gitlab_submodule.submodule_to_project import \ - _submodule_url_to_path_with_namespace +from gitlab import Gitlab +from gitlab.v4.objects import ProjectManager + +from gitlab_submodule import Submodule +from gitlab_submodule.submodule_to_project import host_url_to_domain +from gitlab_submodule.submodule_to_project import ( + match_submodule_to_client_and_format_project_path) + + +def test_host_url_to_domain(): + assert host_url_to_domain("https://myhost.com/") == "myhost.com" class TestSubmoduleToProject(TestCase): + + def mock_submodule(self, url: str) -> MagicMock: + submodule = MagicMock(Submodule) + submodule.url = url + return submodule + def test__submodule_url_to_path_with_namespace(self): # Normal gitlab host - path_with_namespace = _submodule_url_to_path_with_namespace( - 'https://gitlab.com/namespace/repo.git', - Mock()) + _, path_with_namespace = \ + match_submodule_to_client_and_format_project_path( + self.mock_submodule('https://gitlab.com/namespace/repo.git'), + gls=Gitlab() + ) self.assertEqual(path_with_namespace, 'namespace/repo') - # Self-managed gitlab URL without self_managed_gitlab_host - path_with_namespace = _submodule_url_to_path_with_namespace( - 'https://custom-gitlab/namespace/repo.git', - Mock()) - self.assertEqual(path_with_namespace, None) + # Self-managed gitlab URL, wrong client + match = match_submodule_to_client_and_format_project_path( + self.mock_submodule('https://custom-gitlab/namespace/repo.git'), + gls=Gitlab()) + self.assertEqual(match, None) + + # Self-managed gitlab URL that includes the URL of the wrong client + match = \ + match_submodule_to_client_and_format_project_path( + self.mock_submodule( + 'https://custom-gitlab.com/namespace/repo.git'), + gls=Gitlab() + ) + self.assertEqual(match, None) # Self-managed gitlab URL with self_managed_gitlab_host - path_with_namespace = _submodule_url_to_path_with_namespace( - 'https://custom-gitlab/namespace/repo.git', - Mock(), - self_managed_gitlab_host='custom-gitlab') + self_hosted_client = MagicMock(ProjectManager) + self_hosted_client.gitlab = MagicMock(Gitlab) + self_hosted_client.gitlab._base_url = "https://custom-gitlab.com" + client, path_with_namespace = \ + match_submodule_to_client_and_format_project_path( + self.mock_submodule( + 'https://custom-gitlab.com/namespace/repo.git'), + gls=[Gitlab(), self_hosted_client], + ) self.assertEqual(path_with_namespace, 'namespace/repo') + self.assertEqual(client, self_hosted_client) From e1cc6d7610eb819284732bcf09b08451588d5a04 Mon Sep 17 00:00:00 2001 From: ValentinFrancois <24223132+ValentinFrancois@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:17:26 +0200 Subject: [PATCH 2/2] Side improvements --- .github/workflows/_code_checks.yml | 2 +- .gitignore | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_code_checks.yml b/.github/workflows/_code_checks.yml index 33f1d4c..a39e8ed 100644 --- a/.github/workflows/_code_checks.yml +++ b/.github/workflows/_code_checks.yml @@ -39,7 +39,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -r requirements_dev.txt - pip install -r requirements.txt + python -m pip install -r requirements.txt - name: lint run: make lint diff --git a/.gitignore b/.gitignore index 8f02e22..d4acf3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .idea/ -*/__pycache__/ +**/__pycache__/ *.egg-info/ dist/