From 5db3f38a0f8df90cbd4e2c0e9a4e8eba8cad82b3 Mon Sep 17 00:00:00 2001 From: john-1-1-1 Date: Fri, 19 Jun 2026 01:47:46 +0700 Subject: [PATCH 1/4] feat: add --timeout argument for remote hub fetching --- src/modelinfo/cli.py | 29 ++++++++++++++++++---------- src/modelinfo/parsers/huggingface.py | 28 +++++++++++++-------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py index 1d9da7a..7db823f 100644 --- a/src/modelinfo/cli.py +++ b/src/modelinfo/cli.py @@ -107,6 +107,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: default=0.9, help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.", ) + parser.add_argument( + "--timeout", + type=float, + default=10.0, + help="Network request timeout in seconds for Hugging Face Hub (default 10.0).", + ) parser.add_argument( "-v", "--version", @@ -117,8 +123,8 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: def analyze_model( - file_path: str, - context_override: int | None, + file_path: str, + context_override: int | None, gpu_count: int = 1, batch_size: int = 1, fetch_tensors: bool = False, @@ -126,7 +132,8 @@ def analyze_model( strategy: str = "tp", is_vllm: bool = False, gpu_vram_gb: float = 0.0, - gpu_util: float = 0.9 + gpu_util: float = 0.9, + timeout: float = 10.0 ) -> dict: tensors = {} config = None @@ -136,7 +143,7 @@ def analyze_model( if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")): from modelinfo.parsers.huggingface import fetch_huggingface_repo - tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors) + tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors, timeout=timeout) elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"): tensors = parse_safetensors_header(file_path) format_name = "SafeTensors" @@ -235,8 +242,8 @@ def main(argv: Sequence[str] | None = None) -> int: models = [] for model_path in args.file: info = analyze_model( - model_path, - args.context, + model_path, + args.context, gpu_count=gpu_count, batch_size=args.batch_size, fetch_tensors=args.tensors, @@ -244,7 +251,8 @@ def main(argv: Sequence[str] | None = None) -> int: strategy=args.strategy, is_vllm=args.vllm, gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0, - gpu_util=args.gpu_util + gpu_util=args.gpu_util, + timeout=args.timeout ) models.append((model_path.split("/")[-1], info)) @@ -254,8 +262,8 @@ def main(argv: Sequence[str] | None = None) -> int: file_path = args.file[0] info = analyze_model( - file_path, - args.context, + file_path, + args.context, gpu_count=gpu_count, batch_size=args.batch_size, fetch_tensors=args.tensors, @@ -263,7 +271,8 @@ def main(argv: Sequence[str] | None = None) -> int: strategy=args.strategy, is_vllm=args.vllm, gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0, - gpu_util=args.gpu_util + gpu_util=args.gpu_util, + timeout=args.timeout ) print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display) diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py index 713ce82..b6e6b92 100644 --- a/src/modelinfo/parsers/huggingface.py +++ b/src/modelinfo/parsers/huggingface.py @@ -29,7 +29,7 @@ def _get_hf_token() -> str | None: return None -def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes: +def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None, timeout: float = 10.0) -> bytes: if headers is None: headers = {} @@ -39,7 +39,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = req = urllib.request.Request(url, headers=headers) try: - with urllib.request.urlopen(req, timeout=10) as response: + with urllib.request.urlopen(req, timeout=timeout) as response: if limit is not None: return response.read(limit) return response.read() @@ -50,16 +50,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}") raise -def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]: +def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]: url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}" # 1. Fetch the first 500KB in a single roundtrip headers = {"Range": "bytes=0-500000"} try: - chunk = _make_request(url, headers=headers, limit=500000) + chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout) except urllib.error.HTTPError as e: if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB) - chunk = _make_request(url, limit=500000) + chunk = _make_request(url, limit=500000, timeout=timeout) else: raise @@ -74,18 +74,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]: else: # 3. Double-roundtrip only if the header is massive (>500KB) headers = {"Range": f"bytes=8-{8+header_size-1}"} - json_bytes = _make_request(url, headers=headers, limit=header_size) + json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout) return json.loads(json_bytes) -def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]: +def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]: """ Fetches the metadata directly from the Hugging Face Hub over the network. Returns: (tensors, config, format_name, disk_size) """ api_url = f"https://huggingface.co/api/models/{repo_id}" try: - api_data = json.loads(_make_request(api_url).decode("utf-8")) + api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8")) except urllib.error.HTTPError as e: if e.code == 401: raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}") @@ -99,7 +99,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D config = None if "config.json" in filenames: config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json" - config = json.loads(_make_request(config_url).decode("utf-8")) + config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8")) tensors = {} total_size = 0.0 @@ -107,7 +107,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D if "model.safetensors.index.json" in filenames: # Sharded SafeTensors index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json" - index_data = json.loads(_make_request(index_url).decode("utf-8")) + index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8")) weight_map = index_data.get("weight_map", {}) unique_shards = list(set(weight_map.values())) @@ -128,8 +128,8 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D } else: def fetch_shard(shard: str): - return shard, _fetch_safetensors_header(repo_id, shard) - + return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout) + with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor: future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards} for future in concurrent.futures.as_completed(future_to_shard): @@ -154,12 +154,12 @@ def fetch_shard(shard: str): if token: req.add_header("Authorization", f"Bearer {token}") try: - with urllib.request.urlopen(req) as response: + with urllib.request.urlopen(req, timeout=timeout) as response: total_size = int(response.headers.get("Content-Length", 0)) except Exception: pass - header = _fetch_safetensors_header(repo_id, "model.safetensors") + header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout) tensors = header format_name = "SafeTensors" From d37d2bddbe862794d96ef3c960898a5e99c51c71 Mon Sep 17 00:00:00 2001 From: john-1-1-1 Date: Fri, 19 Jun 2026 01:48:11 +0700 Subject: [PATCH 2/4] test: add timeout reproduction and configuration tests --- tests/test_hf_timeout_configurable.py | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/test_hf_timeout_configurable.py diff --git a/tests/test_hf_timeout_configurable.py b/tests/test_hf_timeout_configurable.py new file mode 100644 index 0000000..157f298 --- /dev/null +++ b/tests/test_hf_timeout_configurable.py @@ -0,0 +1,45 @@ +import pytest + +from unittest.mock import patch, MagicMock +from modelinfo.cli import main +from modelinfo.parsers.huggingface import fetch_huggingface_repo + +def test_cli_timeout_argument(): + with patch('modelinfo.parsers.huggingface.fetch_huggingface_repo') as mock_fetch: + + mock_fetch.return_value = ({}, {}, "SafeTensors", 0.0) + + try: + result = main(["--timeout", "20.0", "gpt2"]) + assert result == 0 + except SystemExit as e: + pytest.fail(f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented.") + +def test_fetch_huggingface_repo_timeout_parameter(): + with patch('urllib.request.urlopen') as mock_urlopen: + import struct + + mock_response = MagicMock() + mock_response.__enter__.return_value = mock_response + + header_json = b'{"__metadata__": {}}' + header_size = len(header_json) + valid_header = struct.pack(" Date: Fri, 19 Jun 2026 01:53:53 +0700 Subject: [PATCH 3/4] docs: update README with --timeout arg --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7c6b4ab..d8a2e5b 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓ | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. | | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. | | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. | +| `--timeout` | `--timeout 30` | Network request timeout in seconds for Hugging Face Hub. Defaults to `10.0`. | | `-v, --version` | `modelinfo -v` | Show program's version number and exit. | ## Architecture From d8db97b8efcda1e6329451e8e4bff994a85a6a92 Mon Sep 17 00:00:00 2001 From: john-1-1-1 Date: Fri, 19 Jun 2026 13:19:23 +0700 Subject: [PATCH 4/4] Address CodeRabbit review points and cleanup tests --- .idea/.gitignore | 5 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 7 + .idea/modelinfo-cli.iml | 17 +++ .idea/modules.xml | 8 ++ .idea/vcs.xml | 7 + src/modelinfo/cli.py | 102 +++++++++----- src/modelinfo/parsers/huggingface.py | 126 ++++++++++++------ tests/test_hf_timeout_configurable.py | 35 +++-- 9 files changed, 229 insertions(+), 84 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modelinfo-cli.iml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..b58b603 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..590a59e --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modelinfo-cli.iml b/.idea/modelinfo-cli.iml new file mode 100644 index 0000000..e2fc31a --- /dev/null +++ b/.idea/modelinfo-cli.iml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..df083bd --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..8306744 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py index 7db823f..96c42ff 100644 --- a/src/modelinfo/cli.py +++ b/src/modelinfo/cli.py @@ -2,6 +2,7 @@ import json import os import sys +import math from typing import Sequence from modelinfo.architecture import identify_architecture_name from modelinfo.calculator import calculate_footprint @@ -12,7 +13,13 @@ class VersionAction(argparse.Action): - def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"): + def __init__( + self, + option_strings, + dest=argparse.SUPPRESS, + default=argparse.SUPPRESS, + help="show program's version number and exit", + ): super().__init__( option_strings=option_strings, dest=dest, @@ -41,12 +48,25 @@ def _positive_int(value: str) -> int: return ivalue +def _positive_float(value: str) -> float: + try: + fvalue = float(value) + except ValueError: + raise argparse.ArgumentTypeError(f"Invalid float value: {value}") + + if not math.isfinite(fvalue): + raise argparse.ArgumentTypeError(f"Timeout must be a finite number: {value}") + if fvalue <= 0: + raise argparse.ArgumentTypeError(f"Timeout must be greater than 0: {value}") + return fvalue + + def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser( prog="modelinfo", description="High-performance CLI utility to inspect ML model checkpoints and calculate VRAM requirements.", ) - + parser.add_argument( "file", type=str, @@ -109,7 +129,7 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: ) parser.add_argument( "--timeout", - type=float, + type=_positive_float, default=10.0, help="Network request timeout in seconds for Hugging Face Hub (default 10.0).", ) @@ -133,21 +153,28 @@ def analyze_model( is_vllm: bool = False, gpu_vram_gb: float = 0.0, gpu_util: float = 0.9, - timeout: float = 10.0 + timeout: float = 10.0, ) -> dict: tensors = {} config = None disk_size = 0.0 - + file_path_lower = file_path.lower() - - if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")): + + if not os.path.exists(file_path) and not file_path_lower.endswith( + (".safetensors", ".gguf", ".pt", ".bin", ".index.json") + ): from modelinfo.parsers.huggingface import fetch_huggingface_repo - tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors, timeout=timeout) - elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"): + + tensors, config, format_name, disk_size = fetch_huggingface_repo( + file_path, fetch_tensors=fetch_tensors, timeout=timeout + ) + elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith( + ".index.json" + ): tensors = parse_safetensors_header(file_path) format_name = "SafeTensors" - + config_path = os.path.join(os.path.dirname(file_path), "config.json") if os.path.exists(config_path): try: @@ -155,7 +182,7 @@ def analyze_model( config = json.load(f) except (json.JSONDecodeError, OSError): pass - + elif file_path_lower.endswith(".gguf"): tensors = parse_gguf_header(file_path) format_name = "GGUF" @@ -163,10 +190,14 @@ def analyze_model( tensors = parse_pytorch_header(file_path) format_name = "PyTorch" elif os.path.isdir(file_path): - raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.") + raise IsADirectoryError( + f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory." + ) else: - raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.") - + raise ValueError( + f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID." + ) + max_context = None if config: max_context = config.get("max_position_embeddings") @@ -175,7 +206,7 @@ def analyze_model( gen_arch = metadata.get("general.architecture") if gen_arch: max_context = metadata.get(f"{gen_arch}.context_length") - + is_default_context = False context_length = context_override if context_length is None: @@ -183,7 +214,7 @@ def analyze_model( is_default_context = True footprint = calculate_footprint( - tensors, + tensors, context_length=context_length, batch_size=batch_size, config=config, @@ -192,16 +223,16 @@ def analyze_model( strategy=strategy, is_vllm=is_vllm, gpu_vram_bytes=gpu_vram_gb * 1024**3 if gpu_vram_gb else 0.0, - gpu_util=gpu_util + gpu_util=gpu_util, ) num_layers = footprint["num_layers"] arch_name = identify_architecture_name(tensors, num_layers, config) if format_name != "SafeTensors" or os.path.exists(file_path): disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0 - + tensor_count = len([k for k in tensors.keys() if k != "__metadata__"]) - + return { "format_name": format_name, "arch_name": arch_name, @@ -218,7 +249,7 @@ def analyze_model( "strategy": strategy, "is_vllm": is_vllm, "gpu_vram_gb": gpu_vram_gb, - "gpu_util": gpu_util + "gpu_util": gpu_util, } @@ -228,17 +259,20 @@ def main(argv: Sequence[str] | None = None) -> int: gpu_name_display = None gpu_vram_gb = None gpu_count = 1 - + if args.gpu or args.vllm: target = args.gpu if args.gpu else "auto" from modelinfo.hardware import resolve_gpu + gpu_name_display, gpu_vram_gb, gpu_count = resolve_gpu(target) if len(args.file) > 1: if args.vllm: - console.print("[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]") + console.print( + "[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]" + ) return 1 - + models = [] for model_path in args.file: info = analyze_model( @@ -252,15 +286,19 @@ def main(argv: Sequence[str] | None = None) -> int: is_vllm=args.vllm, gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0, gpu_util=args.gpu_util, - timeout=args.timeout + timeout=args.timeout, ) models.append((model_path.split("/")[-1], info)) - - print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display) + + print_compare_info( + models, + gpu_vram_gb if gpu_vram_gb else args.max_vram, + gpu_name=gpu_name_display, + ) return 0 - + file_path = args.file[0] - + info = analyze_model( file_path, args.context, @@ -272,10 +310,14 @@ def main(argv: Sequence[str] | None = None) -> int: is_vllm=args.vllm, gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0, gpu_util=args.gpu_util, - timeout=args.timeout + timeout=args.timeout, ) - print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display) + print_model_info( + **info, + max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, + gpu_name=gpu_name_display, + ) return 0 diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py index b6e6b92..cc07236 100644 --- a/src/modelinfo/parsers/huggingface.py +++ b/src/modelinfo/parsers/huggingface.py @@ -6,11 +6,12 @@ import urllib.request from typing import Any, Dict, Tuple + def _get_hf_token() -> str | None: token = os.environ.get("HF_TOKEN") if token: return token - + cache_path = os.path.expanduser("~/.cache/huggingface/token") if os.path.exists(cache_path): try: @@ -18,7 +19,7 @@ def _get_hf_token() -> str | None: return f.read().strip() except OSError: pass - + legacy_path = os.path.expanduser("~/.huggingface/token") if os.path.exists(legacy_path): try: @@ -26,17 +27,23 @@ def _get_hf_token() -> str | None: return f.read().strip() except OSError: pass - + return None -def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None, timeout: float = 10.0) -> bytes: + +def _make_request( + url: str, + headers: Dict[str, str] = None, + limit: int | None = None, + timeout: float = 10.0, +) -> bytes: if headers is None: headers = {} - + token = _get_hf_token() if token: headers["Authorization"] = f"Bearer {token}" - + req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=timeout) as response: @@ -45,40 +52,54 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = return response.read() except urllib.error.HTTPError as e: if e.code == 401: - raise PermissionError(f"Gated/Private Model or Invalid Token (401 Unauthorized). Set the HF_TOKEN environment variable to access {url}") + raise PermissionError( + f"Gated/Private Model or Invalid Token (401 Unauthorized). Set the HF_TOKEN environment variable to access {url}" + ) if e.code == 404: - raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}") + raise FileNotFoundError( + f"Could not find repository or file on Hugging Face (404 Not Found): {url}" + ) raise -def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]: + +def _fetch_safetensors_header( + repo_id: str, filename: str, timeout: float = 10.0 +) -> Dict[str, Any]: url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}" - + # 1. Fetch the first 500KB in a single roundtrip headers = {"Range": "bytes=0-500000"} try: chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout) except urllib.error.HTTPError as e: - if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB) + if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB) chunk = _make_request(url, limit=500000, timeout=timeout) else: raise - + if len(chunk) < 8: - raise ValueError(f"File {filename} is too small to contain a SafeTensors header.") - + raise ValueError( + f"File {filename} is too small to contain a SafeTensors header." + ) + header_size = struct.unpack("500KB) - headers = {"Range": f"bytes=8-{8+header_size-1}"} - json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout) - + headers = {"Range": f"bytes=8-{8 + header_size - 1}"} + json_bytes = _make_request( + url, headers=headers, limit=header_size, timeout=timeout + ) + return json.loads(json_bytes) -def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]: + +def fetch_huggingface_repo( + repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0 +) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]: """ Fetches the metadata directly from the Hugging Face Hub over the network. Returns: (tensors, config, format_name, disk_size) @@ -88,68 +109,83 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: f api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8")) except urllib.error.HTTPError as e: if e.code == 401: - raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}") + raise PermissionError( + f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}" + ) if e.code == 404: - raise FileNotFoundError(f"Could not find repository on Hugging Face (404 Not Found): {repo_id}") + raise FileNotFoundError( + f"Could not find repository on Hugging Face (404 Not Found): {repo_id}" + ) raise - + siblings = api_data.get("siblings", []) filenames = {s["rfilename"] for s in siblings} - + config = None if "config.json" in filenames: config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json" config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8")) - + tensors = {} total_size = 0.0 - + if "model.safetensors.index.json" in filenames: # Sharded SafeTensors index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json" - index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8")) - + index_data = json.loads( + _make_request(index_url, timeout=timeout).decode("utf-8") + ) + weight_map = index_data.get("weight_map", {}) unique_shards = list(set(weight_map.values())) - + total_size = index_data.get("metadata", {}).get("total_size", 0.0) - + if config and not fetch_tensors and total_size > 0: # Lazy Fetch Paradigm for tensor_name in weight_map.keys(): tensors[tensor_name] = {"shape": [], "dtype": "BF16"} - + tensors["__metadata__"] = { "missing_shards": 0, "total_shards": len(unique_shards), "is_sharded": True, "lazy_fetch": True, - "total_size": total_size + "total_size": total_size, } else: + def fetch_shard(shard: str): return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout) - - with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor: - future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards} + + with concurrent.futures.ThreadPoolExecutor( + max_workers=max(1, min(8, len(unique_shards))) + ) as executor: + future_to_shard = { + executor.submit(fetch_shard, shard): shard + for shard in unique_shards + } for future in concurrent.futures.as_completed(future_to_shard): shard, shard_header = future.result() for k, v in shard_header.items(): if k != "__metadata__": tensors[k] = v - + tensors["__metadata__"] = { "missing_shards": 0, "total_shards": len(unique_shards), - "is_sharded": True + "is_sharded": True, } format_name = "SafeTensors" - + elif "model.safetensors" in filenames: # Single SafeTensors - + # Determine total size first - req = urllib.request.Request(f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors", method="HEAD") + req = urllib.request.Request( + f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors", + method="HEAD", + ) token = _get_hf_token() if token: req.add_header("Authorization", f"Bearer {token}") @@ -159,12 +195,14 @@ def fetch_shard(shard: str): except Exception: pass - header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout) + header = _fetch_safetensors_header( + repo_id, "model.safetensors", timeout=timeout + ) tensors = header - + format_name = "SafeTensors" - + else: raise ValueError(f"Repository {repo_id} does not contain SafeTensors weights.") - + return tensors, config, format_name, float(total_size) diff --git a/tests/test_hf_timeout_configurable.py b/tests/test_hf_timeout_configurable.py index 157f298..1dd0cc9 100644 --- a/tests/test_hf_timeout_configurable.py +++ b/tests/test_hf_timeout_configurable.py @@ -4,19 +4,28 @@ from modelinfo.cli import main from modelinfo.parsers.huggingface import fetch_huggingface_repo -def test_cli_timeout_argument(): - with patch('modelinfo.parsers.huggingface.fetch_huggingface_repo') as mock_fetch: +def test_cli_timeout_argument(): + with patch("modelinfo.parsers.huggingface.fetch_huggingface_repo") as mock_fetch: mock_fetch.return_value = ({}, {}, "SafeTensors", 0.0) try: result = main(["--timeout", "20.0", "gpt2"]) assert result == 0 + + # Verify that the timeout argument successfully reaches fetch_huggingface_repo + args, kwargs = mock_fetch.call_args + assert kwargs.get("timeout") == 20.0, ( + f"Expected timeout=20.0 in fetch_huggingface_repo, but got {kwargs.get('timeout')}" + ) except SystemExit as e: - pytest.fail(f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented.") + pytest.fail( + f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented." + ) + def test_fetch_huggingface_repo_timeout_parameter(): - with patch('urllib.request.urlopen') as mock_urlopen: + with patch("urllib.request.urlopen") as mock_urlopen: import struct mock_response = MagicMock() @@ -25,7 +34,7 @@ def test_fetch_huggingface_repo_timeout_parameter(): header_json = b'{"__metadata__": {}}' header_size = len(header_json) valid_header = struct.pack("