From d4d5378dc421f14625acc81405b4ef946c514ce5 Mon Sep 17 00:00:00 2001
From: ShouQue <dingtianwei.dtw@alibaba-inc.com>
Date: Mon, 20 Apr 2026 10:39:50 +0800
Subject: [PATCH 1/2] (feat): update pyproject.toml to support install from pip
 directly.

---
 .../Debugging Guide/debug_guide.md            |   2 +-
 .../Installation/installation.md              |   8 +-
 .../aliyun_serverless_devpod_quick_start.md   |   2 +-
 .../Quick Start/multi_nodes_quick_start.md    |   4 +-
 .../Quick Start/single_node_quick_start.md    |   4 +-
 .../docs/User Guides/Algorithms/Reward_FL.md  |   2 +-
 .../Hardware Support/ascend_usage.md          |   2 +-
 .../Pipeline/agent_pipeline_start.md          |   2 +-
 .../Pipeline/agentic_pipeline_start.md        |   2 +-
 .../Pipeline/distill_pipeline_start.md        |   2 +-
 .../Pipeline/dpo_pipeline_start.md            |   2 +-
 .../on_policy_distill_pipeline_start.md       |   2 +-
 .../Pipeline/rlvr_pipeline_start.md           |   8 +-
 .../Pipeline/sft_pipeline_start.md            |   2 +-
 .../Pipeline/vl_rlvr_pipeline_start.md        |   2 +-
 .../Debugging Guide/debug_guide.md            |   2 +-
 .../Installation/installation.md              |   6 +-
 .../aliyun_serverless_devpod_quick_start.md   |   2 +-
 .../Quick Start/multi_nodes_quick_start.md    |   4 +-
 .../Quick Start/single_node_quick_start.md    |   4 +-
 .../User Guides/Algorithms/Reward_FL.md       |   2 +-
 .../Hardware Support/ascend_usage.md          |   2 +-
 .../Pipeline/agentic_pipeline_start.md        |   2 +-
 .../Pipeline/distill_pipeline_start.md        |   2 +-
 .../Pipeline/dpo_pipeline_start.md            |   2 +-
 .../on_policy_distill_pipeline_start.md       |   2 +-
 .../Pipeline/rlvr_pipeline_start.md           |   8 +-
 .../Pipeline/sft_pipeline_start.md            |   2 +-
 .../Pipeline/vl_rlvr_pipeline_start.md        |   4 +-
 pyproject.toml                                | 130 ++++++++++++++++++
 30 files changed, 177 insertions(+), 43 deletions(-)

diff --git a/docs_roll/docs/Getting Started/Debugging Guide/debug_guide.md b/docs_roll/docs/Getting Started/Debugging Guide/debug_guide.md
index 358c7e5f9..d193b7e19 100644
--- a/docs_roll/docs/Getting Started/Debugging Guide/debug_guide.md	
+++ b/docs_roll/docs/Getting Started/Debugging Guide/debug_guide.md	
@@ -64,7 +64,7 @@ conda activate python310_torch260_em
 2. Install dependencies:
 ```bash
 pip3 install torch torchvision torchaudio py-cpuinfo
-pip install -r requirements_em_local_debug.txt
+pip install -e ".[local-debug]"
 ```
 
 3. Run the test script:
diff --git a/docs_roll/docs/Getting Started/Installation/installation.md b/docs_roll/docs/Getting Started/Installation/installation.md
index d658b55f2..cdb0c50c9 100644
--- a/docs_roll/docs/Getting Started/Installation/installation.md	
+++ b/docs_roll/docs/Getting Started/Installation/installation.md	
@@ -19,8 +19,10 @@ vLLM >= 0.7.3
 # Clone the repository and install
 git clone https://github.com/alibaba/ROLL.git
 cd ROLL
-pip install -r requirements.txt # Or follow your specific installation steps
-# For development, consider: pip install -e .
+# Install with specific PyTorch version and inference engine, e.g.:
+pip install -e ".[torch260-vllm]"  # PyTorch 2.6.0 + vLLM
+# Other available extras: torch260-sglang, torch280-vllm, torch280-sglang, torch260-diffsynth, local-debug, gem
+# For basic install: pip install -e .
 ```
 
 For AMD users, please ensure you meet the following prerequisites:
@@ -33,7 +35,7 @@ vLLM >= 0.8.4
 # Clone the repository and install
 git clone https://github.com/alibaba/ROLL.git
 cd ROLL
-pip install -r requirements.txt # Or follow your specific installation steps
+pip install -e ".[torch260-vllm]"  # Or choose other extras based on your environment
 ```
 We highly suggest to use pre-built Docker images from [Image Addresses](https://alibaba.github.io/ROLL/docs/QuickStart/image_address) instead of installation from Custom Environment for ROCm users.
 
diff --git a/docs_roll/docs/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md b/docs_roll/docs/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md
index 7742850c6..c7a813a6d 100644
--- a/docs_roll/docs/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md	
+++ b/docs_roll/docs/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md	
@@ -35,7 +35,7 @@ git clone https://github.com/alibaba/ROLL.git
 
 # 2. Install project dependencies
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/  
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/  
 ```
 
 ### Run a Pipeline Example
diff --git a/docs_roll/docs/Getting Started/Quick Start/multi_nodes_quick_start.md b/docs_roll/docs/Getting Started/Quick Start/multi_nodes_quick_start.md
index 00c687c3e..4d7197e93 100644
--- a/docs_roll/docs/Getting Started/Quick Start/multi_nodes_quick_start.md	
+++ b/docs_roll/docs/Getting Started/Quick Start/multi_nodes_quick_start.md	
@@ -30,9 +30,9 @@ nvidia-smi
 # 4. Clone the project code
 git clone https://github.com/alibaba/ROLL.git
 
-# 5. Install project dependencies (choose the requirements file corresponding to your image)
+# 5. Install project dependencies (choose the extras corresponding to your environment)
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/
 ```
 
 ## Pipeline Execution
diff --git a/docs_roll/docs/Getting Started/Quick Start/single_node_quick_start.md b/docs_roll/docs/Getting Started/Quick Start/single_node_quick_start.md
index 072afd862..8fb691450 100644
--- a/docs_roll/docs/Getting Started/Quick Start/single_node_quick_start.md	
+++ b/docs_roll/docs/Getting Started/Quick Start/single_node_quick_start.md	
@@ -30,9 +30,9 @@ nvidia-smi
 # 4. Clone the project code
 git clone https://github.com/alibaba/ROLL.git
 
-# 5. Install project dependencies (choose the requirements file corresponding to your image)
+# 5. Install project dependencies (choose the extras corresponding to your environment)
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/
 ```
 
 ## Pipeline Execution
diff --git a/docs_roll/docs/User Guides/Algorithms/Reward_FL.md b/docs_roll/docs/User Guides/Algorithms/Reward_FL.md
index 34734158f..0027a0581 100644
--- a/docs_roll/docs/User Guides/Algorithms/Reward_FL.md	
+++ b/docs_roll/docs/User Guides/Algorithms/Reward_FL.md	
@@ -79,7 +79,7 @@ The following parameters related to Wan2_2 are as follows:
 
 ## Setup environments
 ```
-pip install -r requirements_torch260_diffsynth.txt
+pip install -e ".[torch260-diffsynth]"
 ```
 
 ## Reference Example
diff --git a/docs_roll/docs/User Guides/Hardware Support/ascend_usage.md b/docs_roll/docs/User Guides/Hardware Support/ascend_usage.md
index d45f57136..a194f110c 100644
--- a/docs_roll/docs/User Guides/Hardware Support/ascend_usage.md	
+++ b/docs_roll/docs/User Guides/Hardware Support/ascend_usage.md	
@@ -73,7 +73,7 @@ pip install vllm-ascend==0.13.0
 ```
 git clone https://github.com/alibaba/ROLL.git
 cd ROLL
-pip install -r requirements_common.txt
+pip install -e .
 pip install deepspeed==0.16.4
 cd ..
 ```
diff --git a/docs_roll/docs/User Guides/Pipeline/agent_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/agent_pipeline_start.md
index 7c0eeeb04..5b5cf8c61 100644
--- a/docs_roll/docs/User Guides/Pipeline/agent_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/agent_pipeline_start.md	
@@ -52,7 +52,7 @@ Before you begin, ensure you have the following:
 2. **Python Dependencies** – Install all necessary Python dependencies, typically via the requirements file:
 
    ```bash
-   pip install -r requirements.txt   # Or a specific file like requirements_torch260.txt
+   pip install -e .   # Or with specific extras like pip install -e ".[torch260-vllm]"
    ```
 
    Ensure any specific dependencies for your chosen agentic environments or models are also met.
diff --git a/docs_roll/docs/User Guides/Pipeline/agentic_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/agentic_pipeline_start.md
index a0b587392..21b6c16b0 100644
--- a/docs_roll/docs/User Guides/Pipeline/agentic_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/agentic_pipeline_start.md	
@@ -218,7 +218,7 @@ bash examples/qwen2.5-0.5B-agentic/run_agentic_pipeline_frozen_lake.sh
 * Ensure all necessary dependencies are installed, it's recommended to start from [image launch](../../Getting%20Started/Installation/installation.md):
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * Confirm all model paths in the configuration are accessible.
diff --git a/docs_roll/docs/User Guides/Pipeline/distill_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/distill_pipeline_start.md
index 8c9d563e3..595c2cf21 100644
--- a/docs_roll/docs/User Guides/Pipeline/distill_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/distill_pipeline_start.md	
@@ -205,7 +205,7 @@ bash examples/qwen2.5-7B-distill_megatron/run_distill_pipeline.sh
 * Ensure all necessary dependencies are installed:
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * Verify that all model paths in the configuration are accessible.
diff --git a/docs_roll/docs/User Guides/Pipeline/dpo_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/dpo_pipeline_start.md
index d54e65700..d773c4ae7 100644
--- a/docs_roll/docs/User Guides/Pipeline/dpo_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/dpo_pipeline_start.md	
@@ -200,7 +200,7 @@ bash bash examples/qwen2.5-3B-dpo_megatron/run_dpo_pipeline.sh
 * Ensure all necessary dependencies are installed:
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * Verify that all model paths in the configuration are accessible.
diff --git a/docs_roll/docs/User Guides/Pipeline/on_policy_distill_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/on_policy_distill_pipeline_start.md
index 479825b28..462dc7cf7 100644
--- a/docs_roll/docs/User Guides/Pipeline/on_policy_distill_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/on_policy_distill_pipeline_start.md	
@@ -349,7 +349,7 @@ bash examples/qwen3-8B-onpolicy-distill-megatron/run_onpolicy_distill_pipeline.s
 * Ensure all necessary dependencies are installed:
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * Verify that all model paths in the configuration are accessible.
diff --git a/docs_roll/docs/User Guides/Pipeline/rlvr_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/rlvr_pipeline_start.md
index dbef7f187..8904b8205 100644
--- a/docs_roll/docs/User Guides/Pipeline/rlvr_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/rlvr_pipeline_start.md	
@@ -215,11 +215,11 @@ Example data format (Code domain):
   "source": "codeforeces",
   "difficulty": "0",
   "prompt": "You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. \n\n### Question: Write a function that takes an array of distinct integers and returns all possible permutations (in any order). Each permutation should be represented as an array of integers. The function should handle arrays of different lengths efficiently.\n\n### Format: You will use the following starter code to write the solution to the problem and enclose your code within delimiters.\n```python\ndef permute(nums):\n```\n\n### Answer: (use the provided format with backticks)",
-  "messages": "[{\"role\": \"user\", \"content\": \"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. \\n\\n### Question: Write a function that takes an array of distinct integers and returns all possible permutations (in any order). Each permutation should be represented as an array of integers. The function should handle arrays of different lengths efficiently.\\n\\n### Format: You will use the following starter code to write the solution to the problem and enclose your code within delimiters.\\n```python\\ndef permute(nums):\\n```\\n\\n### Answer: (use the provided format with backticks)\"}]",
-  "ground_truth": "[\"def permute(nums):\\n    \\\"\\\"\\\"\\n    Given an array of distinct integers, return all possible permutations.\\n    Each permutation is an array of integers.\\n    \\\"\\\"\\\"\\n    def backtrack(start, end):\\n        if start == end:\\n            permutations.append(nums[:])\\n        for i in range(start, end):\\n            nums[start], nums[i] = nums[i], nums[start]\\n            backtrack(start + 1, end)\\n            nums[start], nums[i] = nums[i], nums[start]\\n\\n    permutations = []\\n    backtrack(0, len(nums))\\n    return permutations\"]",
+  "messages": "[{\"role\": \"user\", \"content\": \"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. \n\n### Question: Write a function that takes an array of distinct integers and returns all possible permutations (in any order). Each permutation should be represented as an array of integers. The function should handle arrays of different lengths efficiently.\n\n### Format: You will use the following starter code to write the solution to the problem and enclose your code within delimiters.\n```python\ndef permute(nums):\n```\n\n### Answer: (use the provided format with backticks)\"}]",
+  "ground_truth": "[\"def permute(nums):\n    \\\"\\\"\\\"\n    Given an array of distinct integers, return all possible permutations.\n    Each permutation is an array of integers.\n    \\\"\\\"\\\"\n    def backtrack(start, end):\n        if start == end:\n            permutations.append(nums[:])\n        for i in range(start, end):\n            nums[start], nums[i] = nums[i], nums[start]\n            backtrack(start + 1, end)\n            nums[start], nums[i] = nums[i], nums[start]\n\n    permutations = []\n    backtrack(0, len(nums))\n    return permutations\"]",
   "case_type": "pytest",
   "test_case_function": " ",
-  "test_cases": "[{\"assert_code\": \"\\n\\n\\ndef test_permute_single_element():\\n    assert permute([1]) == [[1]]\\n\\ndef test_permute_two_elements():\\n    result = permute([1, 2])\\n    expected = [[1, 2], [2, 1]]\\n    assert sorted(result) == sorted(expected)\\n\\ndef test_permute_three_elements():\\n    result = permute([1, 2, 3])\\n    expected = [[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\\n    assert sorted(result) == sorted(expected)\\n\\ndef test_permute_four_elements():\\n    result = permute([1, 2, 3, 4])\\n    expected = [\\n        [1, 2, 3, 4], [1, 2, 4, 3], [1, 3, 2, 4], [1, 3, 4, 2], [1, 4, 2, 3], [1, 4, 3, 2],\\n        [2, 1, 3, 4], [2, 1, 4, 3], [2, 3, 1, 4], [2, 3, 4, 1], [2, 4, 1, 3], [2, 4, 3, 1],\\n        [3, 1, 2, 4], [3, 1, 4, 2], [3, 2, 1, 4], [3, 2, 4, 1], [3, 4, 1, 2], [3, 4, 2, 1],\\n        [4, 1, 2, 3], [4, 1, 3, 2], [4, 2, 1, 3], [4, 2, 3, 1], [4, 3, 1, 2], [4, 3, 2, 1]\\n    ]\\n    assert sorted(result) == sorted(expected)\"}]",
+  "test_cases": "[{\"assert_code\": \"\n\n\ndef test_permute_single_element():\n    assert permute([1]) == [[1]]\n\ndef test_permute_two_elements():\n    result = permute([1, 2])\n    expected = [[1, 2], [2, 1]]\n    assert sorted(result) == sorted(expected)\n\ndef test_permute_three_elements():\n    result = permute([1, 2, 3])\n    expected = [[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\n    assert sorted(result) == sorted(expected)\n\ndef test_permute_four_elements():\n    result = permute([1, 2, 3, 4])\n    expected = [\n        [1, 2, 3, 4], [1, 2, 4, 3], [1, 3, 2, 4], [1, 3, 4, 2], [1, 4, 2, 3], [1, 4, 3, 2],\n        [2, 1, 3, 4], [2, 1, 4, 3], [2, 3, 1, 4], [2, 3, 4, 1], [2, 4, 1, 3], [2, 4, 3, 1],\n        [3, 1, 2, 4], [3, 1, 4, 2], [3, 2, 1, 4], [3, 2, 4, 1], [3, 4, 1, 2], [3, 4, 2, 1],\n        [4, 1, 2, 3], [4, 1, 3, 2], [4, 2, 1, 3], [4, 2, 3, 1], [4, 3, 1, 2], [4, 3, 2, 1]\n    ]\n    assert sorted(result) == sorted(expected)\"}]",
   "tag": "KodCode"
 }
 ```
@@ -314,7 +314,7 @@ bash examples/qwen2.5-7B-rlvr_megatron/run_rlvr_pipeline.sh
 * Ensure all necessary dependencies are installed:
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * Verify that all model paths in the configuration are accessible.
diff --git a/docs_roll/docs/User Guides/Pipeline/sft_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/sft_pipeline_start.md
index 36f32b36b..3cac83fcc 100644
--- a/docs_roll/docs/User Guides/Pipeline/sft_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/sft_pipeline_start.md	
@@ -228,7 +228,7 @@ Key items to check:
 ### Step 2: Prepare Environment and Dependencies
 
 ```bash
-pip install -r requirements.txt
+pip install -e .
 ```
 
 Also ensure:
diff --git a/docs_roll/docs/User Guides/Pipeline/vl_rlvr_pipeline_start.md b/docs_roll/docs/User Guides/Pipeline/vl_rlvr_pipeline_start.md
index e54ffdc7f..f5f2d5805 100644
--- a/docs_roll/docs/User Guides/Pipeline/vl_rlvr_pipeline_start.md	
+++ b/docs_roll/docs/User Guides/Pipeline/vl_rlvr_pipeline_start.md	
@@ -300,7 +300,7 @@ bash examples/qwen2.5-vl-7B-rlvr/run_rlvr_pipeline.sh
 * Ensure all necessary dependencies are installed. NOTE: VLLM is the only supported inference engine for VLM pipeline currently, thus use the corresponding requirement files:
 
   ```bash
-  pip install -r requirements_torch260_vllm.txt
+  pip install -e ".[torch260-vllm]"
   ```
 
 * Verify that all model paths in the configuration are accessible.
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Debugging Guide/debug_guide.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Debugging Guide/debug_guide.md
index 639aede97..254863c41 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Debugging Guide/debug_guide.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Debugging Guide/debug_guide.md	
@@ -62,7 +62,7 @@ conda activate python310_torch260_em
 2. 安装依赖：
 ```bash
 pip3 install torch torchvision torchaudio py-cpuinfo
-pip install -r requirements_em_local_debug.txt
+pip install -e ".[local-debug]"
 ```
 
 3. 运行测试脚本：
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Installation/installation.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Installation/installation.md
index a291c9d7a..cb78e2b32 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Installation/installation.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Installation/installation.md	
@@ -19,6 +19,8 @@ vLLM >= 0.7.3
 # 克隆仓库并安装
 git clone https://github.com/alibaba/ROLL.git
 cd ROLL
-pip install -r requirements.txt # 或按照您的特定安装步骤
-# 对于开发，可以考虑使用：pip install -e .
+# 安装指定 PyTorch 版本和推理引擎，例如：
+pip install -e ".[torch260-vllm]"  # PyTorch 2.6.0 + vLLM
+# 其他可选项：torch260-sglang, torch280-vllm, torch280-sglang, torch260-diffsynth, local-debug, gem
+# 基础安装：pip install -e .
 ```
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md
index f8b47da83..6d575e1b7 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/aliyun_serverless_devpod_quick_start.md	
@@ -35,7 +35,7 @@ git clone https://github.com/alibaba/ROLL.git
 
 # 2. 安装项目依赖
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/
 ```
 
 ### 运行 pipeline 示例
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/multi_nodes_quick_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/multi_nodes_quick_start.md
index 5837bbb39..ec06c0bc4 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/multi_nodes_quick_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/multi_nodes_quick_start.md	
@@ -30,9 +30,9 @@ nvidia-smi
 # 4. 克隆项目代码
 git clone https://github.com/alibaba/ROLL.git
 
-# 5. 安装项目依赖（选择对应镜像的requirements文件）
+# 5. 安装项目依赖（选择对应环境的可选项）
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/
 ```
 
 ## pipeline运行
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/single_node_quick_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/single_node_quick_start.md
index cb5264cb9..5709be5d3 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/single_node_quick_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/Getting Started/Quick Start/single_node_quick_start.md	
@@ -30,9 +30,9 @@ nvidia-smi
 # 4. 克隆项目代码
 git clone https://github.com/alibaba/ROLL.git
 
-# 5. 安装项目依赖（选择对应镜像的requirements文件）
+# 5. 安装项目依赖（选择对应环境的可选项）
 cd ROLL
-pip install -r requirements_torch260_vllm.txt -i https://mirrors.aliyun.com/pypi/simple/
+pip install -e ".[torch260-vllm]" -i https://mirrors.aliyun.com/pypi/simple/
 ```
 
 ## pipeline运行
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Algorithms/Reward_FL.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Algorithms/Reward_FL.md
index 48bba2ea6..8becc9bcd 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Algorithms/Reward_FL.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Algorithms/Reward_FL.md	
@@ -77,7 +77,7 @@ Wan2_2 相关参数如下：
 
 ## 环境配置
 ```
-pip install -r requirements_torch260_diffsynth.txt
+pip install -e ".[torch260-diffsynth]"
 ```
 
 ## 参考示例
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Hardware Support/ascend_usage.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Hardware Support/ascend_usage.md
index deab932b8..a67b6717b 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Hardware Support/ascend_usage.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Hardware Support/ascend_usage.md	
@@ -78,7 +78,7 @@ pip install vllm-ascend==0.11.0rc1
 ```
 git clone https://github.com/alibaba/ROLL.git
 cd ROLL
-pip install -r requirements_common.txt
+pip install -e .
 pip install deepspeed==0.16.4
 cd ..
 ```
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/agentic_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/agentic_pipeline_start.md
index aef64cc18..ad0a2bae5 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/agentic_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/agentic_pipeline_start.md	
@@ -214,7 +214,7 @@ bash examples/qwen2.5-0.5B-agentic/run_agentic_pipeline_frozen_lake.sh
 * 确保已安装所有必要依赖，建议从[镜像启动](../../Getting%20Started/Installation/installation.md)：
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * 确认配置中所有模型路径均可访问。
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/distill_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/distill_pipeline_start.md
index 7d0a3f726..303054955 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/distill_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/distill_pipeline_start.md	
@@ -191,7 +191,7 @@ bash examples/qwen2.5-7B-distill_megatron/run_distill_pipeline.sh
 * 确保已安装所有必要依赖：
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * 确认配置中所有模型路径均可访问。
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/dpo_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/dpo_pipeline_start.md
index f729e88dc..93ac2ce28 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/dpo_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/dpo_pipeline_start.md	
@@ -186,7 +186,7 @@ bash examples/qwen2.5-3B-dpo_megatron/run_dpo_pipeline.sh
 * 确保已安装所有必要依赖：
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * 确认配置中所有模型路径均可访问。
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/on_policy_distill_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/on_policy_distill_pipeline_start.md
index 739237aa8..12f442a53 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/on_policy_distill_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/on_policy_distill_pipeline_start.md	
@@ -358,7 +358,7 @@ bash examples/qwen3-8B-onpolicy-distill-megatron/run_onpolicy_distill_pipeline.s
 * 确保安装了所有必要的依赖：
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * 验证配置中的所有模型路径是否可访问。
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/rlvr_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/rlvr_pipeline_start.md
index 38678d803..2ffab0e88 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/rlvr_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/rlvr_pipeline_start.md	
@@ -214,11 +214,11 @@ rlvr流水线使用JSON格式的数据文件。不同领域需要特定字段：
   "source": "codeforeces",
   "difficulty": "0",
   "prompt": "你是一位专业的Python程序员。你将收到一个问题（问题描述）并生成一个正确的Python程序，该程序符合描述并能通过所有测试。\\n\\n### 问题：编写一个函数，接收一个不同整数的数组并返回所有可能的排列（任意顺序）。每个排列应表示为一个整数数组。该函数应能高效处理不同长度的数组。\\n\\n### 格式：你将使用以下起始代码编写问题的解决方案，并将代码包含在分隔符内。\\n```python\\ndef permute(nums):\\n```\\n\\n### 答案：（使用提供的格式和反引号）",
-  "messages": "[{\"role\": \"user\", \"content\": \"你是一位专业的Python程序员。你将收到一个问题（问题描述）并生成一个正确的Python程序，该程序符合描述并能通过所有测试。 \\n\\n### 问题：编写一个函数，接收一个不同整数的数组并返回所有可能的排列（任意顺序）。每个排列应表示为一个整数数组。该函数应能高效处理不同长度的数组。\\n\\n### 格式：你将使用以下起始代码编写问题的解决方案，并将代码包含在分隔符内。\\n```python\\ndef permute(nums):\\n```\\n\\n### 答案：（使用提供的格式和反引号）\"}]",
-  "ground_truth": "[\"def permute(nums):\\n    \\\"\\\"\\\"\\n    给定一个不同整数的数组，返回所有可能的排列。\\n    每个排列是一个整数数组。\\n    \\\"\\\"\\\"\\n    def backtrack(start, end):\\n        if start == end:\\n            permutations.append(nums[:])\\n        for i in range(start, end):\\n            nums[start], nums[i] = nums[i], nums[start]\\n            backtrack(start + 1, end)\\n            nums[start], nums[i] = nums[i], nums[start]\\n\\n    permutations = []\\n    backtrack(0, len(nums))\\n    return permutations\"]",
+  "messages": "[{\"role\": \"user\", \"content\": \"你是一位专业的Python程序员。你将收到一个问题（问题描述）并生成一个正确的Python程序，该程序符合描述并能通过所有测试。 \n\n### 问题：编写一个函数，接收一个不同整数的数组并返回所有可能的排列（任意顺序）。每个排列应表示为一个整数数组。该函数应能高效处理不同长度的数组。\n\n### 格式：你将使用以下起始代码编写问题的解决方案，并将代码包含在分隔符内。\n```python\ndef permute(nums):\n```\n\n### 答案：（使用提供的格式和反引号）\"}]",
+  "ground_truth": "[\"def permute(nums):\n    \\\"\\\"\\\"\n    给定一个不同整数的数组，返回所有可能的排列。\n    每个排列是一个整数数组。\n    \\\"\\\"\\\"\n    def backtrack(start, end):\n        if start == end:\n            permutations.append(nums[:])\n        for i in range(start, end):\n            nums[start], nums[i] = nums[i], nums[start]\n            backtrack(start + 1, end)\n            nums[start], nums[i] = nums[i], nums[start]\n\n    permutations = []\n    backtrack(0, len(nums))\n    return permutations\"]",
   "case_type": "pytest",
   "test_case_function": " ",
-  "test_cases": "[{\"assert_code\": \"\\n\\n\\ndef test_permute_single_element():\\n    assert permute([1]) == [[1]]\\n\\ndef test_permute_two_elements():\\n    result = permute([1, 2])\\n    expected = [[1, 2], [2, 1]]\\n    assert sorted(result) == sorted(expected)\\n\\ndef test_permute_three_elements():\\n    result = permute([1, 2, 3])\\n    expected = [[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\\n    assert sorted(result) == sorted(expected)\\n\\ndef test_permute_four_elements():\\n    result = permute([1, 2, 3, 4])\\n    expected = [\\n        [1, 2, 3, 4], [1, 2, 4, 3], [1, 3, 2, 4], [1, 3, 4, 2], [1, 4, 2, 3], [1, 4, 3, 2],\\n        [2, 1, 3, 4], [2, 1, 4, 3], [2, 3, 1, 4], [2, 3, 4, 1], [2, 4, 1, 3], [2, 4, 3, 1],\\n        [3, 1, 2, 4], [3, 1, 4, 2], [3, 2, 1, 4], [3, 2, 4, 1], [3, 4, 1, 2], [3, 4, 2, 1],\\n        [4, 1, 2, 3], [4, 1, 3, 2], [4, 2, 1, 3], [4, 2, 3, 1], [4, 3, 1, 2], [4, 3, 2, 1]\\n    ]\\n    assert sorted(result) == sorted(expected)\"}]",
+  "test_cases": "[{\"assert_code\": \"\n\n\ndef test_permute_single_element():\n    assert permute([1]) == [[1]]\n\ndef test_permute_two_elements():\n    result = permute([1, 2])\n    expected = [[1, 2], [2, 1]]\n    assert sorted(result) == sorted(expected)\n\ndef test_permute_three_elements():\n    result = permute([1, 2, 3])\n    expected = [[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\n    assert sorted(result) == sorted(expected)\n\ndef test_permute_four_elements():\n    result = permute([1, 2, 3, 4])\n    expected = [\n        [1, 2, 3, 4], [1, 2, 4, 3], [1, 3, 2, 4], [1, 3, 4, 2], [1, 4, 2, 3], [1, 4, 3, 2],\n        [2, 1, 3, 4], [2, 1, 4, 3], [2, 3, 1, 4], [2, 3, 4, 1], [2, 4, 1, 3], [2, 4, 3, 1],\n        [3, 1, 2, 4], [3, 1, 4, 2], [3, 2, 1, 4], [3, 2, 4, 1], [3, 4, 1, 2], [3, 4, 2, 1],\n        [4, 1, 2, 3], [4, 1, 3, 2], [4, 2, 1, 3], [4, 2, 3, 1], [4, 3, 1, 2], [4, 3, 2, 1]\n    ]\n    assert sorted(result) == sorted(expected)\"}]",
   "tag": "KodCode"
 }
 ```
@@ -313,7 +313,7 @@ bash examples/qwen2.5-7B-rlvr_megatron/run_rlvr_pipeline.sh
 * 确保安装了所有必要的依赖：
 
   ```bash
-  pip install -r requirements.txt
+  pip install -e .
   ```
 
 * 验证配置中的所有模型路径是否可访问。
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/sft_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/sft_pipeline_start.md
index 48e689089..7382dd4d2 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/sft_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/sft_pipeline_start.md	
@@ -229,7 +229,7 @@ bash examples/qwen2.5-7B-sft_megatron/run_sft_pipeline.sh
 ### 步骤2：准备环境和依赖
 
 ```bash
-pip install -r requirements.txt
+pip install -e .
 ```
 
 并确保：
diff --git a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/vl_rlvr_pipeline_start.md b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/vl_rlvr_pipeline_start.md
index e924dcc50..cd8a1b7d3 100644
--- a/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/vl_rlvr_pipeline_start.md	
+++ b/docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/vl_rlvr_pipeline_start.md	
@@ -291,10 +291,10 @@ bash examples/qwen2.5-vl-7B-rlvr/run_rlvr_pipeline.sh
 
 ### 步骤2：准备环境和依赖
 
-* 确保安装了所有必要的依赖。注意：VLM 流水线当前只支持使用 VLLM 作为推理引擎，因而需要选择使用对应的requirement文件：
+* 确保安装了所有必要的依赖。注意：VLM 流水线当前只支持使用 VLLM 作为推理引擎，因而需要选择使用对应的可选依赖：
 
   ```bash
-  pip install -r requirements_torch260_vllm.txt
+  pip install -e ".[torch260-vllm]"
   ```
 
 * 验证配置中的所有模型路径是否可访问。
diff --git a/pyproject.toml b/pyproject.toml
index 7d6a333bd..a81520ba4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,133 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "roll"
+version = "0.1.0"
+description = "ROLL: Reinforcement Learning Optimization for Large-Scale Learning"
+readme = "README.md"
+license = {text = "Apache-2.0"}
+requires-python = ">=3.10"
+dependencies = [
+    "ray[default,cgraph]==2.48.0",
+    "numpy<2.0a0,>=1.25",
+    "tensordict",
+    "sympy",
+    "modelscope",
+    "datasets==3.3.0",
+    "tqdm",
+    "peft==0.12.0",
+    "tyro>=0.5.7",
+    "pydantic",
+    "pytest",
+    "loralib",
+    "einops",
+    "isort",
+    "jsonlines",
+    "deprecated",
+    "trl==0.9.6",
+    "dacite",
+    "codetiming",
+    "more_itertools",
+    "pybase64",
+    "sglang-router",
+    "wandb",
+    "swanlab",
+    "math-verify",
+    "openai",
+    "langdetect",
+    "nltk>=3.8",
+    "gym",
+    "gymnasium[toy-text]",
+    "gym_sokoban",
+    "hydra-core",
+    "omegaconf",
+    "latex2sympy2==1.5.4",
+    "latex2sympy2_extended==1.10.1",
+    "antlr4-python3-runtime==4.9.3",
+    "mcore_adapter @ git+https://github.com/alibaba/roll.git#subdirectory=mcore_adapter",
+    "hatchling",
+    "editables",
+]
+
+[project.optional-dependencies]
+gem = [
+    "gem-llm @ git+https://github.com/axon-rl/gem.git@v0.0.4#egg=gem-llm@v0.0.4",
+    "mcp",
+]
+vision = [
+    "pycocotools",
+]
+torch280-sglang = [
+    "torch==2.8.0.*",
+    "torchvision==0.23.0.*",
+    "torchaudio==2.8.0.*",
+    "deepspeed==0.16.4",
+    "accelerate==0.34.2",
+    "transformers>=4.56.1",
+    "sglang[srt,torch-memory-saver]==0.5.2",
+    "flash-linear-attention",
+]
+torch280-vllm = [
+    "torch==2.8.0.*",
+    "torchvision==0.23.0.*",
+    "torchaudio==2.8.0.*",
+    "transformers>=4.56.1",
+    "deepspeed==0.16.4",
+    "accelerate==0.34.2",
+    "flash-attn",
+    "vllm==0.10.2",
+    "flash-linear-attention",
+]
+torch260-sglang = [
+    "torch==2.6.0.*",
+    "torchvision==0.21.0.*",
+    "torchaudio==2.6.0.*",
+    "flash-attn",
+    "transformer-engine[pytorch]==2.2.0",
+    "deepspeed==0.16.4",
+    "accelerate==0.34.2",
+    "sglang[srt,torch-memory-saver]==0.4.6.post4",
+    "transformers==4.51.1",
+    "cuda-bindings==12.9.0",
+]
+torch260-vllm = [
+    "torch==2.6.0.*",
+    "torchvision==0.21.0.*",
+    "torchaudio==2.6.0.*",
+    "flash-attn",
+    "transformer-engine[pytorch]==2.2.0",
+    "deepspeed==0.16.4",
+    "accelerate==0.34.2",
+    "vllm==0.8.4",
+]
+torch260-diffsynth = [
+    "torch==2.6.0.*",
+    "torchvision==0.21.0.*",
+    "torchaudio==2.6.0.*",
+    "flash-attn",
+    "deepspeed==0.16.4",
+    "accelerate==0.34.2",
+    "diffsynth",
+    "transformers==4.52.4",
+    "decord",
+    "pyext",
+    "pycocotools",
+    "scikit-image",
+    "diffusers==0.31.0",
+    "onnx",
+    "onnx2torch",
+]
+local-debug = [
+    "py-cpuinfo",
+    "deepspeed==0.16.4",
+    "matplotlib",
+]
+
+[tool.setuptools.packages.find]
+include = ["roll*"]
+
 [tool.black]
 line-length = 119
 target-version = ['py310']

From aadb15a3aede0704ce6d02fda5b99cda251ce45e Mon Sep 17 00:00:00 2001
From: ShouQue <dingtianwei.dtw@alibaba-inc.com>
Date: Mon, 20 Apr 2026 14:21:08 +0800
Subject: [PATCH 2/2] (fix) : fix unit tests.

---
 tests/agentic/env/test_frozen_lake.py         | 51 ++++++++++---------
 .../agentic/env_manager/config_load_utils.py  |  6 ++-
 .../env_manager/test_traj_env_manager.py      | 17 ++++---
 .../test_traj_env_manager_debug.py            | 19 ++++---
 tests/datasets/test_collator.py               |  2 +-
 tests/distributed/executor/test_cluster.py    | 15 ++++--
 .../scheduler/test_generate_scheduler.py      |  8 +--
 .../test_fsdp2_cp_qwen3_hf_equivalence.py     |  2 +-
 ...est_fsdp2_cp_qwen3_hf_rmpad_equivalence.py |  2 +-
 .../strategy/grad_norm/test_grad_norm_unit.py |  2 +
 tests/math/test_math_dataset.py               |  3 +-
 .../models/cuda_mem/test_ray_model_forward.py |  2 +-
 .../megatron/test_offload_states.py           |  2 +-
 13 files changed, 76 insertions(+), 55 deletions(-)

diff --git a/tests/agentic/env/test_frozen_lake.py b/tests/agentic/env/test_frozen_lake.py
index 3bf35370f..0fcb14da2 100644
--- a/tests/agentic/env/test_frozen_lake.py
+++ b/tests/agentic/env/test_frozen_lake.py
@@ -1,35 +1,36 @@
-from roll.pipeline.agentic.env import FrozenLakeEnvConfig, FrozenLakeEnv
+from roll.pipeline.agentic.env.frozen_lake import FrozenLakeEnv
 from roll.pipeline.agentic.utils import dump_frames_as_gif
 
 
 def test_frozen_lake():
-    config = FrozenLakeEnvConfig(size=4, p=0.8, is_slippery=False, map_seed=42)
-    env = FrozenLakeEnv(config)
+    """Test FrozenLake environment with a fixed action sequence (no keyboard input)."""
+    env = FrozenLakeEnv(
+        size=4,
+        p=0.8,
+        is_slippery=False,
+        map_seed=42,
+        render_mode="rgb_array"
+    )
     frames = []
-    print(env.reset(seed=42))
-    while True:
-        keyboard = input("Enter action: ")
-        if keyboard.lower() == "q":
-            break
-        try:
-            action = int(keyboard)
-        except Exception as e:
-            print("Invalid action, please enter a number")
-            continue
-        if action not in env.ACTION_LOOKUP:
-            print(f"Invalid action {action}, please enter a number between 1 and 4")
-            continue
-        obs, reward, done, info = env.step(action)
-        print()
-        print(obs, reward, done, info)
-        if action in env.ACTION_LOOKUP:
-            frames.append(env.render(mode="rgb_array"))
+    obs = env.reset(seed=42)
+    print(f"Initial observation: {obs}")
+    frames.append(env.render(mode="rgb_array"))
+
+    # Predefined action sequence to complete the game
+    # Actions: 1=Left, 2=Down, 3=Right, 4=Up (note: 0=Still)
+    actions = ['2', '2', '3', '3', '4', '3']  # Down, Down, Right, Right, Up, Right
+
+    for action in actions:
+        obs, reward, done,truncated, info = env.step(action)
+        print(f"Action: {action}, Obs: {obs}, Reward: {reward}, Done: {done}, Info: {info}")
+        frames.append(env.render(mode="rgb_array"))
         if done:
+            print("Game completed!")
             break
 
     # save the image
     dump_frames_as_gif(filename="./frozen_lake_result.gif", frames=frames)
-
-
-if __name__ == "__main__":
-    test_frozen_lake()
+    
+    # Basic assertions
+    assert len(frames) > 0, "Should have captured frames"
+    print(f"Test passed! Captured {len(frames)} frames.")
\ No newline at end of file
diff --git a/tests/agentic/env_manager/config_load_utils.py b/tests/agentic/env_manager/config_load_utils.py
index 96b642931..23f43a4bc 100644
--- a/tests/agentic/env_manager/config_load_utils.py
+++ b/tests/agentic/env_manager/config_load_utils.py
@@ -1,9 +1,13 @@
 from dacite import from_dict
 from hydra.experimental import compose, initialize
+from hydra.core.global_hydra import GlobalHydra
 from omegaconf import OmegaConf
 
 def make_pipeline_config(config_path, config_name, data_class):
-
+    # Clear any existing Hydra instance to allow re-initialization
+    if GlobalHydra.instance().is_initialized():
+        GlobalHydra.instance().clear()
+ 
     initialize(config_path=config_path)
     cfg = compose(config_name=config_name)
     pipeline_config = from_dict(data_class=data_class, data=OmegaConf.to_container(cfg, resolve=True))
diff --git a/tests/agentic/env_manager/test_traj_env_manager.py b/tests/agentic/env_manager/test_traj_env_manager.py
index 4f9fd56e7..cb5dbe641 100644
--- a/tests/agentic/env_manager/test_traj_env_manager.py
+++ b/tests/agentic/env_manager/test_traj_env_manager.py
@@ -10,6 +10,7 @@
 """
 import threading
 
+import pytest
 import ray
 
 from roll.distributed.scheduler.rollout_scheduler import GroupQueueManager
@@ -21,9 +22,15 @@
 from roll.pipeline.agentic.env_manager.vl_traj_env_manager import VLTrajEnvManager
 from tests.agentic.env_manager.config_load_utils import make_pipeline_config
 
+@pytest.fixture(scope="function")
+def ray_init():
+    """Initialize Ray for each test function and shutdown after."""
+    ray.init(log_to_driver=True, ignore_reinit_error=True)
+    yield
+    ray.shutdown()
 
-def test_debug_traj_env_manager():
-    ray.init(log_to_driver=True)
+
+def test_debug_traj_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
@@ -61,8 +68,7 @@ def test_debug_traj_env_manager():
     env_manager.stop()
 
 
-def test_debug_vl_traj_env_manager():
-    ray.init(log_to_driver=True)
+def test_debug_vl_traj_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
@@ -103,8 +109,7 @@ def test_debug_vl_traj_env_manager():
     env_manager.stop()
 
 
-def test_debug_step_env_manager():
-    ray.init(log_to_driver=True)
+def test_debug_step_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
diff --git a/tests/agentic/env_manager/test_traj_env_manager_debug.py b/tests/agentic/env_manager/test_traj_env_manager_debug.py
index 5d328c85f..e18deee82 100644
--- a/tests/agentic/env_manager/test_traj_env_manager_debug.py
+++ b/tests/agentic/env_manager/test_traj_env_manager_debug.py
@@ -10,6 +10,7 @@
 """
 import threading
 
+import pytest
 import ray
 
 from roll.distributed.scheduler.rollout_scheduler import GroupQueueManager
@@ -21,10 +22,14 @@
 from roll.pipeline.agentic.env_manager.vl_traj_env_manager import VLTrajEnvManager
 from roll.utils.import_utils import safe_import_class
 from tests.agentic.env_manager.config_load_utils import make_pipeline_config
-
-
-def test_debug_traj_env_manager():
-    ray.init(log_to_driver=True)
+@pytest.fixture(scope="function")
+def ray_init():
+    """Initialize Ray for each test function and shutdown after."""
+    ray.init(log_to_driver=True, ignore_reinit_error=True)
+    yield
+    ray.shutdown()
+
+def test_debug_traj_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
@@ -65,8 +70,7 @@ def test_debug_traj_env_manager():
     env_manager.stop()
 
 
-def test_debug_vl_traj_env_manager():
-    ray.init(log_to_driver=True)
+def test_debug_vl_traj_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
@@ -107,8 +111,7 @@ def test_debug_vl_traj_env_manager():
     env_manager.stop()
 
 
-def test_debug_step_env_manager():
-    ray.init(log_to_driver=True)
+def test_debug_step_env_manager(ray_init):
     current_step = 0
 
     config_path = ""
diff --git a/tests/datasets/test_collator.py b/tests/datasets/test_collator.py
index 8221ccdbd..6bdcf0f7e 100644
--- a/tests/datasets/test_collator.py
+++ b/tests/datasets/test_collator.py
@@ -5,7 +5,7 @@
 
 
 def test_data_collator_with_padding_for_padded_keys():
-    tokenizer = AutoTokenizer.from_pretrained("/Users/pan/Downloads/huggingface/gpt2-imdb", padding_side="left")
+    tokenizer = AutoTokenizer.from_pretrained("lvwerra/gpt2-imdb", padding_side="left")
 
     tokenizer.pad_token_id = tokenizer.eos_token_id
 
diff --git a/tests/distributed/executor/test_cluster.py b/tests/distributed/executor/test_cluster.py
index c7c57da34..2f7e39be8 100644
--- a/tests/distributed/executor/test_cluster.py
+++ b/tests/distributed/executor/test_cluster.py
@@ -9,7 +9,12 @@
 from roll.distributed.executor.worker import Worker, RankInfo
 from roll.distributed.scheduler.decorator import register, Dispatch
 from roll.distributed.scheduler.resource_manager import ResourceManager
-
+@pytest.fixture(scope="function")
+def ray_init():
+    """Initialize Ray for each test function and shutdown after."""
+    ray.init(log_to_driver=True, ignore_reinit_error=True)
+    yield
+    ray.shutdown()
 
 @ray.remote
 class TestWorker(Worker):
@@ -60,10 +65,10 @@ def add(self, x: List):
         return res
 
 
-def test_cluster_run():
+def test_cluster_run(ray_init):
     ray.init(log_to_driver=True)
 
-    resource_manager = ResourceManager()
+    resource_manager = ResourceManager(num_gpus_per_node=8, num_nodes=1)
 
     test_worker_config = WorkerConfig(name="test_worker", world_size=2)
     test_cluster: Any = Cluster(
@@ -78,10 +83,10 @@ def test_cluster_run():
     assert res == [1, 2]
 
 
-def test_cluster_dp_mp_compute():
+def test_cluster_dp_mp_compute(ray_init):
     ray.init(log_to_driver=True)
 
-    resource_manager = ResourceManager()
+    resource_manager = ResourceManager(num_gpus_per_node=8, num_nodes=1)
 
     test_worker_config = WorkerConfig(name="test_worker", world_size=8)
     test_cluster: Any = Cluster(
diff --git a/tests/distributed/scheduler/test_generate_scheduler.py b/tests/distributed/scheduler/test_generate_scheduler.py
index c572ee895..8d1814762 100644
--- a/tests/distributed/scheduler/test_generate_scheduler.py
+++ b/tests/distributed/scheduler/test_generate_scheduler.py
@@ -11,7 +11,6 @@
 from roll.distributed.scheduler.generate_scheduler import (
     DynamicSamplingScheduler,
     RolloutContext,
-    LoadBalancer,
     ExperienceItem,
 )
 import roll.distributed.scheduler.user_defined_rollout_loop as udrl
@@ -26,9 +25,12 @@
 
 logger = get_logger()
 
-
+@pytest.mark.skip(reason="LoadBalancer class has been removed from the codebase")
 async def test_load_balancer():
-    load_balancer = LoadBalancer(mp_rank_zero={0:0, 1:0, 2:0, 3:0}, max_running_requests=2)
+    # LoadBalancer class no longer exists
+    # Original test code preserved for reference:
+    # load_balancer = LoadBalancer(mp_rank_zero={0:0, 1:0, 2:0, 3:0}, max_running_requests=2)
+    pytest.skip("LoadBalancer class removed")
 
     leases = []
     for i in range(8):
diff --git a/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_equivalence.py b/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_equivalence.py
index e8514c894..fc9cb9a1d 100644
--- a/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_equivalence.py
+++ b/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_equivalence.py
@@ -153,7 +153,7 @@ def test_fsdp2_cp_qwen3_hf_logits_equivalence():
     world_size = 2
     port = _find_free_port()
     model_id = os.environ.get(
-        "ROLL_TEST_QWEN3_MODEL_ID", "/home/dilixiati.dlxtmhte/.cache/openlm/hub/14ffd5928d24731fd670f04c645a5928"
+        "ROLL_TEST_QWEN3_MODEL_ID", "Qwen/Qwen3-4B-Instruct-2507"
     )
     mp.spawn(
         _worker_qwen3_hf_equivalence,
diff --git a/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_rmpad_equivalence.py b/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_rmpad_equivalence.py
index 7b3f294fc..25c9921f4 100644
--- a/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_rmpad_equivalence.py
+++ b/tests/distributed/strategy/context_parallel/test_fsdp2_cp_qwen3_hf_rmpad_equivalence.py
@@ -494,7 +494,7 @@ def test_fsdp2_cp_qwen3_hf_rmpad_logits_equivalence():
     port = _find_free_port()
     model_id = os.environ.get(
         "ROLL_TEST_QWEN3_MODEL_ID",
-        "/home/dilixiati.dlxtmhte/.cache/openlm/hub/14ffd5928d24731fd670f04c645a5928",
+        "Qwen/Qwen3-4B-Instruct-2507",
     )
     mp.spawn(
         _worker_qwen3_hf_rmpad_equivalence,
diff --git a/tests/distributed/strategy/grad_norm/test_grad_norm_unit.py b/tests/distributed/strategy/grad_norm/test_grad_norm_unit.py
index cd6c20f47..12c00cf20 100644
--- a/tests/distributed/strategy/grad_norm/test_grad_norm_unit.py
+++ b/tests/distributed/strategy/grad_norm/test_grad_norm_unit.py
@@ -131,6 +131,8 @@ class SimpleModel(nn.Module):
             def __init__(self):
                 super().__init__()
                 self.linear = nn.Linear(10, 5, bias=True)
+            def forward(self, x):
+                return self.linear(x)
 
         model = SimpleModel().to(device)
 
diff --git a/tests/math/test_math_dataset.py b/tests/math/test_math_dataset.py
index a45d49dc1..ee0b31169 100644
--- a/tests/math/test_math_dataset.py
+++ b/tests/math/test_math_dataset.py
@@ -8,8 +8,7 @@
 from roll.datasets.collator import DataCollatorWithPaddingForPaddedKeys
 
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
-dataset_path = "/home/weixun.wwx/Numina_hardrule_1212_lv2.json"
-dataset = load_dataset("json", data_files=dataset_path)["train"]
+dataset = load_dataset("PrimeIntellect/NuminaMath-QwQ-CoT-5M",split='train')
 
 
 # 加上format，然后转ids的func
diff --git a/tests/models/cuda_mem/test_ray_model_forward.py b/tests/models/cuda_mem/test_ray_model_forward.py
index 5ccc2065e..9fc8f3580 100644
--- a/tests/models/cuda_mem/test_ray_model_forward.py
+++ b/tests/models/cuda_mem/test_ray_model_forward.py
@@ -6,7 +6,7 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from cuda_mem.utils import log_gpu_memory_usage
+from .utils import log_gpu_memory_usage
 
 
 @ray.remote(num_gpus=1)
diff --git a/tests/third_party/megatron/test_offload_states.py b/tests/third_party/megatron/test_offload_states.py
index cb6416ed9..353398d97 100644
--- a/tests/third_party/megatron/test_offload_states.py
+++ b/tests/third_party/megatron/test_offload_states.py
@@ -36,7 +36,7 @@
 
 class McaModelCreator:
 
-    def __init__(self, optimizer_type, model_name="/data/cpfs_0/common/models/Qwen2.5-0.5B-Instruct"):
+    def __init__(self, optimizer_type, model_name="Qwen/Qwen2.5-0.5B-Instruct"):
         self.model_name = model_name
         if optimizer_type is None:
             self.megatron_train_args = TrainingArguments(