alibaba · PanAndy · May 26, 2026 · May 26, 2026
diff --git a/...ascend_examples/qwen3_30b_rlvl_fsdp2.yaml → ...ascend_examples/qwen3_30b_rlvr_fsdp2.yaml b/...ascend_examples/qwen3_30b_rlvl_fsdp2.yaml → ...ascend_examples/qwen3_30b_rlvr_fsdp2.yaml
@@ -58,8 +58,8 @@ add_token_level_kl: false
 
 whiten_advantages: true
 
-pretrain: /home/l00691321/models/Qwen3-30B-A3B
-reward_pretrain: /home/l00691321/models/Qwen3-30B-A3B
+pretrain: Qwen/Qwen3-30B-A3B
+reward_pretrain: Qwen/Qwen3-30B-A3B
 
 actor_train:
   model_args:

diff --git a/roll/third_party/fsdp2/model_update.py b/roll/third_party/fsdp2/model_update.py
@@ -223,7 +223,7 @@ def _colocated_model_update(self):
                     infer_parallel_tensors = [None] * infer_parallel_size if co_infer_rank == 0 else None
                     global_dst_rank = dist.get_global_rank(self._infer_parallel_cpu_group, 0)
                     dist.gather_object(
-                        serialized_tensors, infer_parallel_tensors, group_dst=global_dst_rank, group=self._infer_parallel_cpu_group
+                        serialized_tensors, infer_parallel_tensors, dst=global_dst_rank, group=self._infer_parallel_cpu_group
                     )
             if refs:
                 ray.get(refs)

diff --git a/roll/third_party/vllm/ray_distributed_executor.py b/roll/third_party/vllm/ray_distributed_executor.py
@@ -88,15 +88,26 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", **ray_remote_kwar
             runtime_env = RuntimeEnv(env_vars=env_vars)
             assert current_platform.ray_device_key == "GPU" or "NPU"
             # NV+AMD GPUs, and Intel XPUs
-            worker = ray.remote(
-                num_cpus=0,
-                num_gpus={current_platform.ray_device_key: 0.01},
-                runtime_env=runtime_env,
-                scheduling_strategy=PlacementGroupSchedulingStrategy(
-                    placement_group=pg,
-                ),
-                **ray_remote_kwargs,
-            )(RayWorkerWrapper).remote(rpc_rank=rank)
+            if current_platform.ray_device_key == "GPU":
+                worker = ray.remote(
+                    num_cpus=0,
+                    num_gpus=0.01,
+                    runtime_env=runtime_env,
+                    scheduling_strategy=PlacementGroupSchedulingStrategy(
+                        placement_group=pg,
+                    ),
+                    **ray_remote_kwargs,
+                )(RayWorkerWrapper).remote(rpc_rank=rank)
+            else:
+                worker = ray.remote(
+                    num_cpus=0,
+                    resources={current_platform.ray_device_key: 0.01},
+                    runtime_env=runtime_env,
+                    scheduling_strategy=PlacementGroupSchedulingStrategy(
+                        placement_group=pg,
+                    ),
+                    **ray_remote_kwargs,
+                )(RayWorkerWrapper).remote(rpc_rank=rank)
             worker_metadata.append(RayWorkerMetaData(worker=worker, created_rank=rank))
 
         worker_ips = ray.get(