Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
verl
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZhangXiaoyun
verl
Commits
99d2c19b
Unverified
Commit
99d2c19b
authored
Dec 22, 2024
by
Guangming Sheng
Committed by
GitHub
Dec 21, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[misc] feat: remove @ray.remote on workers to allow inheritance (#61)
Co-authored-by: Haibin Lin <haibin.lin@bytedance.com>
parent
09568e60
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
10 additions
and
16 deletions
+10
-16
examples/split_placement/main_ppo_split.py
+4
-4
verl/trainer/main_generation.py
+2
-2
verl/trainer/main_ppo.py
+4
-4
verl/workers/fsdp_workers.py
+0
-3
verl/workers/megatron_workers.py
+0
-3
No files found.
examples/split_placement/main_ppo_split.py
View file @
99d2c19b
...
...
@@ -135,9 +135,9 @@ def main_task(config):
from
verl.trainer.ppo.ray_trainer
import
ResourcePoolManager
,
Role
role_worker_mapping
=
{
Role
.
ActorRollout
:
ActorRolloutRefWorker
,
Role
.
Critic
:
CriticWorker
,
Role
.
RefPolicy
:
ActorRolloutRefWorker
Role
.
ActorRollout
:
ray
.
remote
(
ActorRolloutRefWorker
)
,
Role
.
Critic
:
ray
.
remote
(
CriticWorker
)
,
Role
.
RefPolicy
:
ray
.
remote
(
ActorRolloutRefWorker
)
}
# NOTE: initialze two resource pool
...
...
@@ -173,7 +173,7 @@ def main_task(config):
from
verl.workers.megatron_workers
import
RewardModelWorker
else
:
raise
NotImplementedError
role_worker_mapping
[
Role
.
RewardModel
]
=
RewardModelWorker
role_worker_mapping
[
Role
.
RewardModel
]
=
ray
.
remote
(
RewardModelWorker
)
mapping
[
Role
.
RewardModel
]
=
critic_pool_id
reward_fn
=
RewardManager
(
tokenizer
=
tokenizer
,
num_examine
=
0
)
...
...
verl/trainer/main_generation.py
View file @
99d2c19b
...
...
@@ -14,7 +14,7 @@
"""
Generate responses given a dataset of prompts
"""
import
ray
import
numpy
as
np
import
hydra
import
os
...
...
@@ -59,7 +59,7 @@ def main(config):
if
tokenizer
.
pad_token
is
None
:
tokenizer
.
pad_token
=
tokenizer
.
eos_token
ray_cls_with_init
=
RayClassWithInitArgs
(
cls
=
ActorRolloutRefWorker
,
config
=
config
,
role
=
'rollout'
)
ray_cls_with_init
=
RayClassWithInitArgs
(
cls
=
ray
.
remote
(
ActorRolloutRefWorker
)
,
config
=
config
,
role
=
'rollout'
)
resource_pool
=
RayResourcePool
(
process_on_nodes
=
[
config
.
trainer
.
n_gpus_per_node
]
*
config
.
trainer
.
nnodes
)
wg
=
RayWorkerGroup
(
resource_pool
=
resource_pool
,
ray_cls_with_init
=
ray_cls_with_init
)
wg
.
init_model
()
...
...
verl/trainer/main_ppo.py
View file @
99d2c19b
...
...
@@ -136,9 +136,9 @@ def main_task(config):
from
verl.trainer.ppo.ray_trainer
import
ResourcePoolManager
,
Role
role_worker_mapping
=
{
Role
.
ActorRollout
:
ActorRolloutRefWorker
,
Role
.
Critic
:
CriticWorker
,
Role
.
RefPolicy
:
ActorRolloutRefWorker
Role
.
ActorRollout
:
ray
.
remote
(
ActorRolloutRefWorker
)
,
Role
.
Critic
:
ray
.
remote
(
CriticWorker
)
,
Role
.
RefPolicy
:
ray
.
remote
(
ActorRolloutRefWorker
)
}
global_pool_id
=
'global_pool'
...
...
@@ -164,7 +164,7 @@ def main_task(config):
from
verl.workers.megatron_workers
import
RewardModelWorker
else
:
raise
NotImplementedError
role_worker_mapping
[
Role
.
RewardModel
]
=
RewardModelWorker
role_worker_mapping
[
Role
.
RewardModel
]
=
ray
.
remote
(
RewardModelWorker
)
mapping
[
Role
.
RewardModel
]
=
global_pool_id
reward_fn
=
RewardManager
(
tokenizer
=
tokenizer
,
num_examine
=
0
)
...
...
verl/workers/fsdp_workers.py
View file @
99d2c19b
...
...
@@ -40,7 +40,6 @@ logger = logging.getLogger(__file__)
logger
.
setLevel
(
os
.
getenv
(
'VERL_PPO_LOGGING_LEVEL'
,
'WARN'
))
@ray.remote
class
ActorRolloutRefWorker
(
Worker
):
"""
This worker can be instantiated as a standalone actor or a standalone rollout or a standalone reference policy
...
...
@@ -434,7 +433,6 @@ class ActorRolloutRefWorker(Worker):
offload_fsdp_param_and_grad
(
module
=
self
.
actor_module_fsdp
,
offload_grad
=
self
.
_is_offload_grad
)
@ray.remote
class
CriticWorker
(
Worker
):
def
__init__
(
self
,
config
):
...
...
@@ -642,7 +640,6 @@ class CriticWorker(Worker):
offload_fsdp_param_and_grad
(
module
=
self
.
critic_module
,
offload_grad
=
self
.
_is_offload_grad
)
@ray.remote
class
RewardModelWorker
(
Worker
):
"""
Note that we only implement the reward model that is subclass of AutoModelForSequenceClassification.
...
...
verl/workers/megatron_workers.py
View file @
99d2c19b
...
...
@@ -60,7 +60,6 @@ def set_random_seed(seed):
# os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
@ray.remote
class
ActorRolloutRefWorker
(
MegatronWorker
):
"""
This worker can be instantiated as a standalone actor or a standalone rollout or a standalone reference policy
...
...
@@ -406,7 +405,6 @@ class ActorRolloutRefWorker(MegatronWorker):
pass
@ray.remote
class
CriticWorker
(
MegatronWorker
):
def
__init__
(
self
,
config
):
...
...
@@ -575,7 +573,6 @@ class CriticWorker(MegatronWorker):
pass
@ray.remote
class
RewardModelWorker
(
MegatronWorker
):
"""
Note that we only implement the reward model that is subclass of AutoModelForSequenceClassification.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment