diff --git a/examples/ascend_examples/qwen3_30b_rlvl_fsdp2.yaml b/examples/ascend_examples/qwen3_30b_rlvr_fsdp2.yaml similarity index 97% rename from examples/ascend_examples/qwen3_30b_rlvl_fsdp2.yaml rename to examples/ascend_examples/qwen3_30b_rlvr_fsdp2.yaml index 01c3880f8..a1bea9c29 100644 --- a/examples/ascend_examples/qwen3_30b_rlvl_fsdp2.yaml +++ b/examples/ascend_examples/qwen3_30b_rlvr_fsdp2.yaml @@ -58,8 +58,8 @@ add_token_level_kl: false whiten_advantages: true -pretrain: /home/l00691321/models/Qwen3-30B-A3B -reward_pretrain: /home/l00691321/models/Qwen3-30B-A3B +pretrain: Qwen/Qwen3-30B-A3B +reward_pretrain: Qwen/Qwen3-30B-A3B actor_train: model_args: diff --git a/roll/third_party/fsdp2/model_update.py b/roll/third_party/fsdp2/model_update.py index c53bef122..b2dffb4c7 100644 --- a/roll/third_party/fsdp2/model_update.py +++ b/roll/third_party/fsdp2/model_update.py @@ -223,7 +223,7 @@ def _colocated_model_update(self): infer_parallel_tensors = [None] * infer_parallel_size if co_infer_rank == 0 else None global_dst_rank = dist.get_global_rank(self._infer_parallel_cpu_group, 0) dist.gather_object( - serialized_tensors, infer_parallel_tensors, group_dst=global_dst_rank, group=self._infer_parallel_cpu_group + serialized_tensors, infer_parallel_tensors, dst=global_dst_rank, group=self._infer_parallel_cpu_group ) if refs: ray.get(refs) diff --git a/roll/third_party/vllm/ray_distributed_executor.py b/roll/third_party/vllm/ray_distributed_executor.py index 001374ee9..150706ad3 100644 --- a/roll/third_party/vllm/ray_distributed_executor.py +++ b/roll/third_party/vllm/ray_distributed_executor.py @@ -88,15 +88,26 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", **ray_remote_kwar runtime_env = RuntimeEnv(env_vars=env_vars) assert current_platform.ray_device_key == "GPU" or "NPU" # NV+AMD GPUs, and Intel XPUs - worker = ray.remote( - num_cpus=0, - num_gpus={current_platform.ray_device_key: 0.01}, - runtime_env=runtime_env, - scheduling_strategy=PlacementGroupSchedulingStrategy( - placement_group=pg, - ), - **ray_remote_kwargs, - )(RayWorkerWrapper).remote(rpc_rank=rank) + if current_platform.ray_device_key == "GPU": + worker = ray.remote( + num_cpus=0, + num_gpus=0.01, + runtime_env=runtime_env, + scheduling_strategy=PlacementGroupSchedulingStrategy( + placement_group=pg, + ), + **ray_remote_kwargs, + )(RayWorkerWrapper).remote(rpc_rank=rank) + else: + worker = ray.remote( + num_cpus=0, + resources={current_platform.ray_device_key: 0.01}, + runtime_env=runtime_env, + scheduling_strategy=PlacementGroupSchedulingStrategy( + placement_group=pg, + ), + **ray_remote_kwargs, + )(RayWorkerWrapper).remote(rpc_rank=rank) worker_metadata.append(RayWorkerMetaData(worker=worker, created_rank=rank)) worker_ips = ray.get(