From 5db3f38a0f8df90cbd4e2c0e9a4e8eba8cad82b3 Mon Sep 17 00:00:00 2001
From: john-1-1-1 <John_1_1_1@mail.ru>
Date: Fri, 19 Jun 2026 01:47:46 +0700
Subject: [PATCH 1/4] feat: add --timeout argument for remote hub fetching

---
 src/modelinfo/cli.py                 | 29 ++++++++++++++++++----------
 src/modelinfo/parsers/huggingface.py | 28 +++++++++++++--------------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
index 1d9da7a..7db823f 100644
--- a/src/modelinfo/cli.py
+++ b/src/modelinfo/cli.py
@@ -107,6 +107,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         default=0.9,
         help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
     )
+    parser.add_argument(
+        "--timeout",
+        type=float,
+        default=10.0,
+        help="Network request timeout in seconds for Hugging Face Hub (default 10.0).",
+    )
     parser.add_argument(
         "-v",
         "--version",
@@ -117,8 +123,8 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
 
 
 def analyze_model(
-    file_path: str, 
-    context_override: int | None, 
+    file_path: str,
+    context_override: int | None,
     gpu_count: int = 1,
     batch_size: int = 1,
     fetch_tensors: bool = False,
@@ -126,7 +132,8 @@ def analyze_model(
     strategy: str = "tp",
     is_vllm: bool = False,
     gpu_vram_gb: float = 0.0,
-    gpu_util: float = 0.9
+    gpu_util: float = 0.9,
+    timeout: float = 10.0
 ) -> dict:
     tensors = {}
     config = None
@@ -136,7 +143,7 @@ def analyze_model(
     
     if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors, timeout=timeout)
     elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
@@ -235,8 +242,8 @@ def main(argv: Sequence[str] | None = None) -> int:
         models = []
         for model_path in args.file:
             info = analyze_model(
-                model_path, 
-                args.context, 
+                model_path,
+                args.context,
                 gpu_count=gpu_count,
                 batch_size=args.batch_size,
                 fetch_tensors=args.tensors,
@@ -244,7 +251,8 @@ def main(argv: Sequence[str] | None = None) -> int:
                 strategy=args.strategy,
                 is_vllm=args.vllm,
                 gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-                gpu_util=args.gpu_util
+                gpu_util=args.gpu_util,
+                timeout=args.timeout
             )
             models.append((model_path.split("/")[-1], info))
             
@@ -254,8 +262,8 @@ def main(argv: Sequence[str] | None = None) -> int:
     file_path = args.file[0]
     
     info = analyze_model(
-        file_path, 
-        args.context, 
+        file_path,
+        args.context,
         gpu_count=gpu_count,
         batch_size=args.batch_size,
         fetch_tensors=args.tensors,
@@ -263,7 +271,8 @@ def main(argv: Sequence[str] | None = None) -> int:
         strategy=args.strategy,
         is_vllm=args.vllm,
         gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-        gpu_util=args.gpu_util
+        gpu_util=args.gpu_util,
+        timeout=args.timeout
     )
 
     print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py
index 713ce82..b6e6b92 100644
--- a/src/modelinfo/parsers/huggingface.py
+++ b/src/modelinfo/parsers/huggingface.py
@@ -29,7 +29,7 @@ def _get_hf_token() -> str | None:
             
     return None
 
-def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes:
+def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None, timeout: float = 10.0) -> bytes:
     if headers is None:
         headers = {}
         
@@ -39,7 +39,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
         
     req = urllib.request.Request(url, headers=headers)
     try:
-        with urllib.request.urlopen(req, timeout=10) as response:
+        with urllib.request.urlopen(req, timeout=timeout) as response:
             if limit is not None:
                 return response.read(limit)
             return response.read()
@@ -50,16 +50,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
            raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
         raise
 
-def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
+def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
     url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
     
     # 1. Fetch the first 500KB in a single roundtrip
     headers = {"Range": "bytes=0-500000"}
     try:
-        chunk = _make_request(url, headers=headers, limit=500000)
+        chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
     except urllib.error.HTTPError as e:
         if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
-            chunk = _make_request(url, limit=500000)
+            chunk = _make_request(url, limit=500000, timeout=timeout)
         else:
             raise
             
@@ -74,18 +74,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
     else:
         # 3. Double-roundtrip only if the header is massive (>500KB)
         headers = {"Range": f"bytes=8-{8+header_size-1}"}
-        json_bytes = _make_request(url, headers=headers, limit=header_size)
+        json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
         
     return json.loads(json_bytes)
 
-def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
+def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
     """
     Fetches the metadata directly from the Hugging Face Hub over the network.
     Returns: (tensors, config, format_name, disk_size)
     """
     api_url = f"https://huggingface.co/api/models/{repo_id}"
     try:
-        api_data = json.loads(_make_request(api_url).decode("utf-8"))
+        api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
     except urllib.error.HTTPError as e:
         if e.code == 401:
             raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
@@ -99,7 +99,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
     config = None
     if "config.json" in filenames:
         config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json"
-        config = json.loads(_make_request(config_url).decode("utf-8"))
+        config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
         
     tensors = {}
     total_size = 0.0
@@ -107,7 +107,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
     if "model.safetensors.index.json" in filenames:
         # Sharded SafeTensors
         index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json"
-        index_data = json.loads(_make_request(index_url).decode("utf-8"))
+        index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
         
         weight_map = index_data.get("weight_map", {})
         unique_shards = list(set(weight_map.values()))
@@ -128,8 +128,8 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
             }
         else:
             def fetch_shard(shard: str):
-                return shard, _fetch_safetensors_header(repo_id, shard)
-                
+                return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
+            
             with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
                 future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
                 for future in concurrent.futures.as_completed(future_to_shard):
@@ -154,12 +154,12 @@ def fetch_shard(shard: str):
         if token:
             req.add_header("Authorization", f"Bearer {token}")
         try:
-            with urllib.request.urlopen(req) as response:
+            with urllib.request.urlopen(req, timeout=timeout) as response:
                 total_size = int(response.headers.get("Content-Length", 0))
         except Exception:
             pass
 
-        header = _fetch_safetensors_header(repo_id, "model.safetensors")
+        header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
         tensors = header
             
         format_name = "SafeTensors"

From d37d2bddbe862794d96ef3c960898a5e99c51c71 Mon Sep 17 00:00:00 2001
From: john-1-1-1 <John_1_1_1@mail.ru>
Date: Fri, 19 Jun 2026 01:48:11 +0700
Subject: [PATCH 2/4] test: add timeout reproduction and configuration tests

---
 tests/test_hf_timeout_configurable.py | 45 +++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 tests/test_hf_timeout_configurable.py

diff --git a/tests/test_hf_timeout_configurable.py b/tests/test_hf_timeout_configurable.py
new file mode 100644
index 0000000..157f298
--- /dev/null
+++ b/tests/test_hf_timeout_configurable.py
@@ -0,0 +1,45 @@
+import pytest
+
+from unittest.mock import patch, MagicMock
+from modelinfo.cli import main
+from modelinfo.parsers.huggingface import fetch_huggingface_repo
+
+def test_cli_timeout_argument():
+    with patch('modelinfo.parsers.huggingface.fetch_huggingface_repo') as mock_fetch:
+
+        mock_fetch.return_value = ({}, {}, "SafeTensors", 0.0)
+
+        try:
+            result = main(["--timeout", "20.0", "gpt2"])
+            assert result == 0
+        except SystemExit as e:
+            pytest.fail(f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented.")
+
+def test_fetch_huggingface_repo_timeout_parameter():
+    with patch('urllib.request.urlopen') as mock_urlopen:
+        import struct
+
+        mock_response = MagicMock()
+        mock_response.__enter__.return_value = mock_response
+
+        header_json = b'{"__metadata__": {}}'
+        header_size = len(header_json)
+        valid_header = struct.pack("<Q", header_size) + header_json
+        
+        mock_response.read.side_effect = [
+            b'{"siblings": [{"rfilename": "config.json"}, {"rfilename": "model.safetensors"}]}',
+            b'{"max_position_embeddings": 1024}',
+            valid_header,
+        ]
+
+        mock_response.headers = {"Content-Length": "1000"}
+        
+        mock_urlopen.return_value = mock_response
+
+        try:
+            fetch_huggingface_repo("gpt2", timeout=20.0)
+        except TypeError as e:
+            pytest.fail(f"fetch_huggingface_repo failed with TypeError: {e}. timeout parameter is likely not implemented.")
+
+        args, kwargs = mock_urlopen.call_args
+        assert kwargs.get('timeout') == 20.0, f"Expected timeout=20.0, but got {kwargs.get('timeout')}"

From 28a86d281680b6a38dc4bb73589754d860d2752b Mon Sep 17 00:00:00 2001
From: john-1-1-1 <John_1_1_1@mail.ru>
Date: Fri, 19 Jun 2026 01:53:53 +0700
Subject: [PATCH 3/4] docs: update README with --timeout arg

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 7c6b4ab..d8a2e5b 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,7 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network request timeout in seconds for Hugging Face Hub. Defaults to `10.0`. |
 | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 
 ## Architecture

From d8db97b8efcda1e6329451e8e4bff994a85a6a92 Mon Sep 17 00:00:00 2001
From: john-1-1-1 <John_1_1_1@mail.ru>
Date: Fri, 19 Jun 2026 13:19:23 +0700
Subject: [PATCH 4/4] Address CodeRabbit review points and cleanup tests

---
 .idea/.gitignore                              |   5 +
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .idea/misc.xml                                |   7 +
 .idea/modelinfo-cli.iml                       |  17 +++
 .idea/modules.xml                             |   8 ++
 .idea/vcs.xml                                 |   7 +
 src/modelinfo/cli.py                          | 102 +++++++++-----
 src/modelinfo/parsers/huggingface.py          | 126 ++++++++++++------
 tests/test_hf_timeout_configurable.py         |  35 +++--
 9 files changed, 229 insertions(+), 84 deletions(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modelinfo-cli.iml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..b58b603
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,5 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..590a59e
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.14" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.14" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modelinfo-cli.iml b/.idea/modelinfo-cli.iml
new file mode 100644
index 0000000..e2fc31a
--- /dev/null
+++ b/.idea/modelinfo-cli.iml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.14" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..df083bd
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/modelinfo-cli.iml" filepath="$PROJECT_DIR$/.idea/modelinfo-cli.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..8306744
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
index 7db823f..96c42ff 100644
--- a/src/modelinfo/cli.py
+++ b/src/modelinfo/cli.py
@@ -2,6 +2,7 @@
 import json
 import os
 import sys
+import math
 from typing import Sequence
 from modelinfo.architecture import identify_architecture_name
 from modelinfo.calculator import calculate_footprint
@@ -12,7 +13,13 @@
 
 
 class VersionAction(argparse.Action):
-    def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
+    def __init__(
+        self,
+        option_strings,
+        dest=argparse.SUPPRESS,
+        default=argparse.SUPPRESS,
+        help="show program's version number and exit",
+    ):
         super().__init__(
             option_strings=option_strings,
             dest=dest,
@@ -41,12 +48,25 @@ def _positive_int(value: str) -> int:
     return ivalue
 
 
+def _positive_float(value: str) -> float:
+    try:
+        fvalue = float(value)
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"Invalid float value: {value}")
+
+    if not math.isfinite(fvalue):
+        raise argparse.ArgumentTypeError(f"Timeout must be a finite number: {value}")
+    if fvalue <= 0:
+        raise argparse.ArgumentTypeError(f"Timeout must be greater than 0: {value}")
+    return fvalue
+
+
 def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         prog="modelinfo",
         description="High-performance CLI utility to inspect ML model checkpoints and calculate VRAM requirements.",
     )
-    
+
     parser.add_argument(
         "file",
         type=str,
@@ -109,7 +129,7 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     )
     parser.add_argument(
         "--timeout",
-        type=float,
+        type=_positive_float,
         default=10.0,
         help="Network request timeout in seconds for Hugging Face Hub (default 10.0).",
     )
@@ -133,21 +153,28 @@ def analyze_model(
     is_vllm: bool = False,
     gpu_vram_gb: float = 0.0,
     gpu_util: float = 0.9,
-    timeout: float = 10.0
+    timeout: float = 10.0,
 ) -> dict:
     tensors = {}
     config = None
     disk_size = 0.0
-    
+
     file_path_lower = file_path.lower()
-    
-    if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+
+    if not os.path.exists(file_path) and not file_path_lower.endswith(
+        (".safetensors", ".gguf", ".pt", ".bin", ".index.json")
+    ):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors, timeout=timeout)
-    elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
+
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(
+            file_path, fetch_tensors=fetch_tensors, timeout=timeout
+        )
+    elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(
+        ".index.json"
+    ):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
-        
+
         config_path = os.path.join(os.path.dirname(file_path), "config.json")
         if os.path.exists(config_path):
             try:
@@ -155,7 +182,7 @@ def analyze_model(
                     config = json.load(f)
             except (json.JSONDecodeError, OSError):
                 pass
-                
+
     elif file_path_lower.endswith(".gguf"):
         tensors = parse_gguf_header(file_path)
         format_name = "GGUF"
@@ -163,10 +190,14 @@ def analyze_model(
         tensors = parse_pytorch_header(file_path)
         format_name = "PyTorch"
     elif os.path.isdir(file_path):
-        raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
+        raise IsADirectoryError(
+            f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory."
+        )
     else:
-        raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
-        
+        raise ValueError(
+            f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID."
+        )
+
     max_context = None
     if config:
         max_context = config.get("max_position_embeddings")
@@ -175,7 +206,7 @@ def analyze_model(
         gen_arch = metadata.get("general.architecture")
         if gen_arch:
             max_context = metadata.get(f"{gen_arch}.context_length")
-            
+
     is_default_context = False
     context_length = context_override
     if context_length is None:
@@ -183,7 +214,7 @@ def analyze_model(
         is_default_context = True
 
     footprint = calculate_footprint(
-        tensors, 
+        tensors,
         context_length=context_length,
         batch_size=batch_size,
         config=config,
@@ -192,16 +223,16 @@ def analyze_model(
         strategy=strategy,
         is_vllm=is_vllm,
         gpu_vram_bytes=gpu_vram_gb * 1024**3 if gpu_vram_gb else 0.0,
-        gpu_util=gpu_util
+        gpu_util=gpu_util,
     )
     num_layers = footprint["num_layers"]
     arch_name = identify_architecture_name(tensors, num_layers, config)
 
     if format_name != "SafeTensors" or os.path.exists(file_path):
         disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0
-        
+
     tensor_count = len([k for k in tensors.keys() if k != "__metadata__"])
-    
+
     return {
         "format_name": format_name,
         "arch_name": arch_name,
@@ -218,7 +249,7 @@ def analyze_model(
         "strategy": strategy,
         "is_vllm": is_vllm,
         "gpu_vram_gb": gpu_vram_gb,
-        "gpu_util": gpu_util
+        "gpu_util": gpu_util,
     }
 
 
@@ -228,17 +259,20 @@ def main(argv: Sequence[str] | None = None) -> int:
     gpu_name_display = None
     gpu_vram_gb = None
     gpu_count = 1
-    
+
     if args.gpu or args.vllm:
         target = args.gpu if args.gpu else "auto"
         from modelinfo.hardware import resolve_gpu
+
         gpu_name_display, gpu_vram_gb, gpu_count = resolve_gpu(target)
 
     if len(args.file) > 1:
         if args.vllm:
-            console.print("[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]")
+            console.print(
+                "[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]"
+            )
             return 1
-            
+
         models = []
         for model_path in args.file:
             info = analyze_model(
@@ -252,15 +286,19 @@ def main(argv: Sequence[str] | None = None) -> int:
                 is_vllm=args.vllm,
                 gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
                 gpu_util=args.gpu_util,
-                timeout=args.timeout
+                timeout=args.timeout,
             )
             models.append((model_path.split("/")[-1], info))
-            
-        print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
+
+        print_compare_info(
+            models,
+            gpu_vram_gb if gpu_vram_gb else args.max_vram,
+            gpu_name=gpu_name_display,
+        )
         return 0
-        
+
     file_path = args.file[0]
-    
+
     info = analyze_model(
         file_path,
         args.context,
@@ -272,10 +310,14 @@ def main(argv: Sequence[str] | None = None) -> int:
         is_vllm=args.vllm,
         gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
         gpu_util=args.gpu_util,
-        timeout=args.timeout
+        timeout=args.timeout,
     )
 
-    print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
+    print_model_info(
+        **info,
+        max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram,
+        gpu_name=gpu_name_display,
+    )
     return 0
 
 
diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py
index b6e6b92..cc07236 100644
--- a/src/modelinfo/parsers/huggingface.py
+++ b/src/modelinfo/parsers/huggingface.py
@@ -6,11 +6,12 @@
 import urllib.request
 from typing import Any, Dict, Tuple
 
+
 def _get_hf_token() -> str | None:
     token = os.environ.get("HF_TOKEN")
     if token:
         return token
-        
+
     cache_path = os.path.expanduser("~/.cache/huggingface/token")
     if os.path.exists(cache_path):
         try:
@@ -18,7 +19,7 @@ def _get_hf_token() -> str | None:
                 return f.read().strip()
         except OSError:
             pass
-            
+
     legacy_path = os.path.expanduser("~/.huggingface/token")
     if os.path.exists(legacy_path):
         try:
@@ -26,17 +27,23 @@ def _get_hf_token() -> str | None:
                 return f.read().strip()
         except OSError:
             pass
-            
+
     return None
 
-def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None, timeout: float = 10.0) -> bytes:
+
+def _make_request(
+    url: str,
+    headers: Dict[str, str] = None,
+    limit: int | None = None,
+    timeout: float = 10.0,
+) -> bytes:
     if headers is None:
         headers = {}
-        
+
     token = _get_hf_token()
     if token:
         headers["Authorization"] = f"Bearer {token}"
-        
+
     req = urllib.request.Request(url, headers=headers)
     try:
         with urllib.request.urlopen(req, timeout=timeout) as response:
@@ -45,40 +52,54 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
             return response.read()
     except urllib.error.HTTPError as e:
         if e.code == 401:
-            raise PermissionError(f"Gated/Private Model or Invalid Token (401 Unauthorized). Set the HF_TOKEN environment variable to access {url}")
+            raise PermissionError(
+                f"Gated/Private Model or Invalid Token (401 Unauthorized). Set the HF_TOKEN environment variable to access {url}"
+            )
         if e.code == 404:
-           raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
+            raise FileNotFoundError(
+                f"Could not find repository or file on Hugging Face (404 Not Found): {url}"
+            )
         raise
 
-def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
+
+def _fetch_safetensors_header(
+    repo_id: str, filename: str, timeout: float = 10.0
+) -> Dict[str, Any]:
     url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
-    
+
     # 1. Fetch the first 500KB in a single roundtrip
     headers = {"Range": "bytes=0-500000"}
     try:
         chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
     except urllib.error.HTTPError as e:
-        if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
+        if e.code == 416:  # Range Not Satisfiable (file is smaller than 500KB)
             chunk = _make_request(url, limit=500000, timeout=timeout)
         else:
             raise
-            
+
     if len(chunk) < 8:
-        raise ValueError(f"File {filename} is too small to contain a SafeTensors header.")
-        
+        raise ValueError(
+            f"File {filename} is too small to contain a SafeTensors header."
+        )
+
     header_size = struct.unpack("<Q", chunk[:8])[0]
-    
+
     # 2. Slice locally if it fits
     if 8 + header_size <= len(chunk):
-        json_bytes = chunk[8:8+header_size]
+        json_bytes = chunk[8 : 8 + header_size]
     else:
         # 3. Double-roundtrip only if the header is massive (>500KB)
-        headers = {"Range": f"bytes=8-{8+header_size-1}"}
-        json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
-        
+        headers = {"Range": f"bytes=8-{8 + header_size - 1}"}
+        json_bytes = _make_request(
+            url, headers=headers, limit=header_size, timeout=timeout
+        )
+
     return json.loads(json_bytes)
 
-def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
+
+def fetch_huggingface_repo(
+    repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0
+) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
     """
     Fetches the metadata directly from the Hugging Face Hub over the network.
     Returns: (tensors, config, format_name, disk_size)
@@ -88,68 +109,83 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: f
         api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
     except urllib.error.HTTPError as e:
         if e.code == 401:
-            raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
+            raise PermissionError(
+                f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}"
+            )
         if e.code == 404:
-             raise FileNotFoundError(f"Could not find repository on Hugging Face (404 Not Found): {repo_id}")
+            raise FileNotFoundError(
+                f"Could not find repository on Hugging Face (404 Not Found): {repo_id}"
+            )
         raise
-        
+
     siblings = api_data.get("siblings", [])
     filenames = {s["rfilename"] for s in siblings}
-    
+
     config = None
     if "config.json" in filenames:
         config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json"
         config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
-        
+
     tensors = {}
     total_size = 0.0
-    
+
     if "model.safetensors.index.json" in filenames:
         # Sharded SafeTensors
         index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json"
-        index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
-        
+        index_data = json.loads(
+            _make_request(index_url, timeout=timeout).decode("utf-8")
+        )
+
         weight_map = index_data.get("weight_map", {})
         unique_shards = list(set(weight_map.values()))
-        
+
         total_size = index_data.get("metadata", {}).get("total_size", 0.0)
-        
+
         if config and not fetch_tensors and total_size > 0:
             # Lazy Fetch Paradigm
             for tensor_name in weight_map.keys():
                 tensors[tensor_name] = {"shape": [], "dtype": "BF16"}
-                
+
             tensors["__metadata__"] = {
                 "missing_shards": 0,
                 "total_shards": len(unique_shards),
                 "is_sharded": True,
                 "lazy_fetch": True,
-                "total_size": total_size
+                "total_size": total_size,
             }
         else:
+
             def fetch_shard(shard: str):
                 return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
-            
-            with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
-                future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
+
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=max(1, min(8, len(unique_shards)))
+            ) as executor:
+                future_to_shard = {
+                    executor.submit(fetch_shard, shard): shard
+                    for shard in unique_shards
+                }
                 for future in concurrent.futures.as_completed(future_to_shard):
                     shard, shard_header = future.result()
                     for k, v in shard_header.items():
                         if k != "__metadata__":
                             tensors[k] = v
-                            
+
             tensors["__metadata__"] = {
                 "missing_shards": 0,
                 "total_shards": len(unique_shards),
-                "is_sharded": True
+                "is_sharded": True,
             }
         format_name = "SafeTensors"
-        
+
     elif "model.safetensors" in filenames:
         # Single SafeTensors
-        
+
         # Determine total size first
-        req = urllib.request.Request(f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors", method="HEAD")
+        req = urllib.request.Request(
+            f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors",
+            method="HEAD",
+        )
         token = _get_hf_token()
         if token:
             req.add_header("Authorization", f"Bearer {token}")
@@ -159,12 +195,14 @@ def fetch_shard(shard: str):
         except Exception:
             pass
 
-        header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
+        header = _fetch_safetensors_header(
+            repo_id, "model.safetensors", timeout=timeout
+        )
         tensors = header
-            
+
         format_name = "SafeTensors"
-        
+
     else:
         raise ValueError(f"Repository {repo_id} does not contain SafeTensors weights.")
-        
+
     return tensors, config, format_name, float(total_size)
diff --git a/tests/test_hf_timeout_configurable.py b/tests/test_hf_timeout_configurable.py
index 157f298..1dd0cc9 100644
--- a/tests/test_hf_timeout_configurable.py
+++ b/tests/test_hf_timeout_configurable.py
@@ -4,19 +4,28 @@
 from modelinfo.cli import main
 from modelinfo.parsers.huggingface import fetch_huggingface_repo
 
-def test_cli_timeout_argument():
-    with patch('modelinfo.parsers.huggingface.fetch_huggingface_repo') as mock_fetch:
 
+def test_cli_timeout_argument():
+    with patch("modelinfo.parsers.huggingface.fetch_huggingface_repo") as mock_fetch:
         mock_fetch.return_value = ({}, {}, "SafeTensors", 0.0)
 
         try:
             result = main(["--timeout", "20.0", "gpt2"])
             assert result == 0
+
+            # Verify that the timeout argument successfully reaches fetch_huggingface_repo
+            args, kwargs = mock_fetch.call_args
+            assert kwargs.get("timeout") == 20.0, (
+                f"Expected timeout=20.0 in fetch_huggingface_repo, but got {kwargs.get('timeout')}"
+            )
         except SystemExit as e:
-            pytest.fail(f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented.")
+            pytest.fail(
+                f"CLI failed with SystemExit: {e}. --timeout argument is likely not implemented."
+            )
+
 
 def test_fetch_huggingface_repo_timeout_parameter():
-    with patch('urllib.request.urlopen') as mock_urlopen:
+    with patch("urllib.request.urlopen") as mock_urlopen:
         import struct
 
         mock_response = MagicMock()
@@ -25,7 +34,7 @@ def test_fetch_huggingface_repo_timeout_parameter():
         header_json = b'{"__metadata__": {}}'
         header_size = len(header_json)
         valid_header = struct.pack("<Q", header_size) + header_json
-        
+
         mock_response.read.side_effect = [
             b'{"siblings": [{"rfilename": "config.json"}, {"rfilename": "model.safetensors"}]}',
             b'{"max_position_embeddings": 1024}',
@@ -33,13 +42,19 @@ def test_fetch_huggingface_repo_timeout_parameter():
         ]
 
         mock_response.headers = {"Content-Length": "1000"}
-        
+
         mock_urlopen.return_value = mock_response
 
         try:
             fetch_huggingface_repo("gpt2", timeout=20.0)
         except TypeError as e:
-            pytest.fail(f"fetch_huggingface_repo failed with TypeError: {e}. timeout parameter is likely not implemented.")
-
-        args, kwargs = mock_urlopen.call_args
-        assert kwargs.get('timeout') == 20.0, f"Expected timeout=20.0, but got {kwargs.get('timeout')}"
+            pytest.fail(
+                f"fetch_huggingface_repo failed with TypeError: {e}. timeout parameter is likely not implemented."
+            )
+
+        # Verify that the timeout parameter propagates to all urlopen calls
+        for call in mock_urlopen.call_args_list:
+            args, kwargs = call
+            assert kwargs.get("timeout") == 20.0, (
+                f"Expected timeout=20.0 in urlopen call, but got {kwargs.get('timeout')}"
+            )