From 78f86ea0da958fc92ea9f4d23ea0780a5d80e801 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Mon, 24 Nov 2025 23:24:26 +0100 Subject: [PATCH 1/3] implement retry mechanism for HEAD call --- .../environment_variables.md | 6 ++- src/huggingface_hub/constants.py | 4 ++ src/huggingface_hub/file_download.py | 45 +++++++++++++++++-- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index edd71b8a57..613e35825f 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -73,7 +73,11 @@ For more details, see [logging reference](../package_reference/utilities#hugging ### HF_HUB_ETAG_TIMEOUT -Integer value to define the number of seconds to wait for server response when fetching the latest metadata from a repo before downloading a file. If the request times out, `huggingface_hub` will default to the locally cached files. Setting a lower value speeds up the workflow for machines with a slow connection that have already cached files. A higher value guarantees the metadata call to succeed in more cases. Default to 10s. +Integer value to define the initial number of seconds to wait for server response when fetching the latest metadata from a repo before downloading a file. If the request times out, `huggingface_hub` will default to the locally cached files. If no cached file is found, a retry is attempted with a longer timeout (see `HF_HUB_ETAG_TIMEOUT_RETRY`). Setting a lower value speeds up the workflow for machines with a slow connection that have already cached files. Default to 10s. + +### HF_HUB_ETAG_TIMEOUT_RETRY + +Integer value to define the number of seconds to wait when retrying metadata fetch after an initial timeout. When the initial metadata request times out and no local cached file is found, `huggingface_hub` will retry with this longer timeout before failing. This helps handle transient network slowdowns while keeping the initial timeout fast for cached files. Default to 60s. ### HF_HUB_DOWNLOAD_TIMEOUT diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index 7a1bb21f4d..38ae13682a 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -32,6 +32,7 @@ def _as_int(value: Optional[str]) -> Optional[int]: CONFIG_NAME = "config.json" REPOCARD_NAME = "README.md" DEFAULT_ETAG_TIMEOUT = 10 +DEFAULT_ETAG_TIMEOUT_RETRY = 60 DEFAULT_DOWNLOAD_TIMEOUT = 10 DEFAULT_REQUEST_TIMEOUT = 10 DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024 @@ -230,6 +231,9 @@ def _as_int(value: Optional[str]) -> Optional[int]: # Used to override the etag timeout on a system level HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT +# Used to override the etag retry timeout on a system level (for retrying after initial timeout when no local file) +HF_HUB_ETAG_TIMEOUT_RETRY: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT_RETRY")) or DEFAULT_ETAG_TIMEOUT_RETRY + # Used to override the get request timeout on a system level HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index 90109133e1..acd7ceba6b 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -1131,8 +1131,28 @@ def _hf_hub_download_to_cache_dir( if not force_download: return pointer_path - # Otherwise, raise appropriate error - _raise_on_head_call_error(head_call_error, force_download, local_files_only) + # No local file found, retry with longer timeout if it was a timeout error + if isinstance(head_call_error, httpx.TimeoutException): + logger.info("Metadata fetch timed out and no local file found. Retrying with longer timeout..") + (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = ( + _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + etag_timeout=constants.HF_HUB_ETAG_TIMEOUT_RETRY, + headers=headers, + token=token, + local_files_only=local_files_only, + storage_folder=storage_folder, + relative_filename=relative_filename, + ) + ) + + # If still error, raise + if head_call_error is not None: + _raise_on_head_call_error(head_call_error, force_download, local_files_only) # From now on, etag, commit_hash, url and size are not None. assert etag is not None, "etag must have been retrieved from server" @@ -1300,9 +1320,26 @@ def _hf_hub_download_to_local_dir( ) if not force_download: return local_path + elif not force_download and isinstance(head_call_error, httpx.TimeoutException): + # No local file found, retry with longer timeout if it was a timeout error + logger.info("Metadata fetch timed out and no local file found. Retrying with longer timeout...") + (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = ( + _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + etag_timeout=constants.HF_HUB_ETAG_TIMEOUT_RETRY, + headers=headers, + token=token, + local_files_only=local_files_only, + ) + ) - # Otherwise => raise - _raise_on_head_call_error(head_call_error, force_download, local_files_only) + # If still error, raise + if head_call_error is not None: + _raise_on_head_call_error(head_call_error, force_download, local_files_only) # From now on, etag, commit_hash, url and size are not None. assert etag is not None, "etag must have been retrieved from server" From e251e99b97c9e3a0721aa9166894c138e456b409 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Tue, 25 Nov 2025 09:01:38 +0100 Subject: [PATCH 2/3] don't define a en variable --- docs/source/en/package_reference/environment_variables.md | 6 +----- src/huggingface_hub/constants.py | 4 ---- src/huggingface_hub/file_download.py | 5 ++++- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index 613e35825f..edd71b8a57 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -73,11 +73,7 @@ For more details, see [logging reference](../package_reference/utilities#hugging ### HF_HUB_ETAG_TIMEOUT -Integer value to define the initial number of seconds to wait for server response when fetching the latest metadata from a repo before downloading a file. If the request times out, `huggingface_hub` will default to the locally cached files. If no cached file is found, a retry is attempted with a longer timeout (see `HF_HUB_ETAG_TIMEOUT_RETRY`). Setting a lower value speeds up the workflow for machines with a slow connection that have already cached files. Default to 10s. - -### HF_HUB_ETAG_TIMEOUT_RETRY - -Integer value to define the number of seconds to wait when retrying metadata fetch after an initial timeout. When the initial metadata request times out and no local cached file is found, `huggingface_hub` will retry with this longer timeout before failing. This helps handle transient network slowdowns while keeping the initial timeout fast for cached files. Default to 60s. +Integer value to define the number of seconds to wait for server response when fetching the latest metadata from a repo before downloading a file. If the request times out, `huggingface_hub` will default to the locally cached files. Setting a lower value speeds up the workflow for machines with a slow connection that have already cached files. A higher value guarantees the metadata call to succeed in more cases. Default to 10s. ### HF_HUB_DOWNLOAD_TIMEOUT diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index 38ae13682a..7a1bb21f4d 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -32,7 +32,6 @@ def _as_int(value: Optional[str]) -> Optional[int]: CONFIG_NAME = "config.json" REPOCARD_NAME = "README.md" DEFAULT_ETAG_TIMEOUT = 10 -DEFAULT_ETAG_TIMEOUT_RETRY = 60 DEFAULT_DOWNLOAD_TIMEOUT = 10 DEFAULT_REQUEST_TIMEOUT = 10 DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024 @@ -231,9 +230,6 @@ def _as_int(value: Optional[str]) -> Optional[int]: # Used to override the etag timeout on a system level HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT -# Used to override the etag retry timeout on a system level (for retrying after initial timeout when no local file) -HF_HUB_ETAG_TIMEOUT_RETRY: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT_RETRY")) or DEFAULT_ETAG_TIMEOUT_RETRY - # Used to override the get request timeout on a system level HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index acd7ceba6b..fee72b8a02 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -63,6 +63,9 @@ _are_symlinks_supported_in_dir: dict[str, bool] = {} +# Internal retry timeout for metadata fetch when no local file exists +_ETAG_RETRY_TIMEOUT = 60 + def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool: """Return whether the symlinks are supported on the machine. @@ -1141,7 +1144,7 @@ def _hf_hub_download_to_cache_dir( repo_type=repo_type, revision=revision, endpoint=endpoint, - etag_timeout=constants.HF_HUB_ETAG_TIMEOUT_RETRY, + etag_timeout=_ETAG_RETRY_TIMEOUT, headers=headers, token=token, local_files_only=local_files_only, From 582dd4a86b3032e789b0a0f19f4f0f6ea563080f Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Tue, 25 Nov 2025 09:05:18 +0100 Subject: [PATCH 3/3] nit --- src/huggingface_hub/file_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index fee72b8a02..58f4ffcf2c 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -1333,7 +1333,7 @@ def _hf_hub_download_to_local_dir( repo_type=repo_type, revision=revision, endpoint=endpoint, - etag_timeout=constants.HF_HUB_ETAG_TIMEOUT_RETRY, + etag_timeout=_ETAG_RETRY_TIMEOUT, headers=headers, token=token, local_files_only=local_files_only,