Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
prm
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZhangXiaoyun
prm
Commits
a7508d55
Commit
a7508d55
authored
Feb 28, 2025
by
ZhangXiaoyun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
redis
parent
d70794a9
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
114 additions
and
8 deletions
+114
-8
openr/prm/infer_fns.py
+28
-1
openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
+1
-1
openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
+67
-0
openr/scripts/eval/beam_search.sh
+18
-6
No files found.
openr/prm/infer_fns.py
View file @
a7508d55
...
@@ -10,6 +10,19 @@ import multiprocessing
...
@@ -10,6 +10,19 @@ import multiprocessing
lock
=
multiprocessing
.
Lock
()
lock
=
multiprocessing
.
Lock
()
print
(
id
(
lock
),
os
.
getpid
())
print
(
id
(
lock
),
os
.
getpid
())
import
redis
redis_client
=
redis
.
Redis
(
host
=
'127.0.0.1'
,
port
=
6379
,
db
=
0
)
def
set_shared_value
(
key
,
value
):
redis_client
.
set
(
key
,
value
)
def
get_shared_value
(
key
):
value
=
redis_client
.
get
(
key
)
if
value
:
return
int
(
value
.
decode
(
'utf-8'
))
else
:
return
None
question_item_map
=
{}
question_item_map
=
{}
with
open
(
"./envs/MATH/dataset/test500.jsonl"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
"./envs/MATH/dataset/test500.jsonl"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
:
for
line
in
f
:
...
@@ -85,11 +98,24 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
...
@@ -85,11 +98,24 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
# "stop_token_ids": [151643, 151643],
# "stop_token_ids": [151643, 151643],
}
}
sampling_params
=
SamplingParams
(
**
inference_params
)
sampling_params
=
SamplingParams
(
**
inference_params
)
step_scores
=
[]
for
i
in
range
(
len
(
inputs
)):
step_score
=
get_shared_value
(
inputs
[
i
])
if
step_score
is
None
:
break
# invers
if
random
.
random
()
>
acc
:
step_score
=
1
-
step_score
step_scores
.
append
(
step_score
)
inputs
=
inputs
[
len
(
step_scores
):]
global
lock
global
lock
with
lock
:
with
lock
:
outputs
=
model
.
generate
(
inputs
,
sampling_params
)
outputs
=
model
.
generate
(
inputs
,
sampling_params
)
step_scores
=
[]
for
output
in
outputs
:
for
output
in
outputs
:
step
=
output
.
prompt
step
=
output
.
prompt
...
@@ -104,6 +130,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
...
@@ -104,6 +130,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
break
break
# invers
# invers
set_shared_value
(
step
,
step_score
)
if
random
.
random
()
>
acc
:
if
random
.
random
()
>
acc
:
step_score
=
1
-
step_score
step_score
=
1
-
step_score
step_scores
.
append
(
step_score
)
step_scores
.
append
(
step_score
)
...
...
openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
View file @
a7508d55
...
@@ -13,7 +13,7 @@ PYTHON_EXECUTABLE=$(which python3)
...
@@ -13,7 +13,7 @@ PYTHON_EXECUTABLE=$(which python3)
MODEL_BASE
=
/share/collab/codemodel/models
MODEL_BASE
=
/share/collab/codemodel/models
CUDA_DEVICE_BASE
=
0
CUDA_DEVICE_BASE
=
0
POLICY_MODEL_NAME
=
Qwen2.5-Math-
1.5
B-Instruct
POLICY_MODEL_NAME
=
Qwen2.5-Math-
7
B-Instruct
# VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
# VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
# VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
# VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
VALUE_MODEL_NAME
=
${
POLICY_MODEL_NAME
}
_RM
VALUE_MODEL_NAME
=
${
POLICY_MODEL_NAME
}
_RM
...
...
openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
0 → 100644
View file @
a7508d55
set
-e
export
RAY_TEMP_DIR
=
"/tmp/ray_
$SLURM_JOBID
"
echo
"RAY TEMP DIR is
$RAY_TEMP_DIR
"
HOST_ADDR
=
0.0.0.0
CONTROLER_PORT
=
28777
WORKER_BASE_PORT
=
30010
ACC
=
1.0
MODEL_BASE
=
/share/collab/codemodel/models
CUDA_DEVICE_BASE
=
0
POLICY_MODEL_NAME
=
Qwen2.5-Math-7B-Instruct
while
[[
"$#"
-gt
0
]]
;
do
case
$1
in
--acc
)
ACC
=
"
$2
"
;
shift
;;
--policy_model_name
)
POLICY_MODEL_NAME
=
"
$2
"
;
shift
;;
*
)
echo
"Unknown parameter passed:
$1
"
;
exit
1
;;
esac
shift
done
VALUE_MODEL_NAME
=
${
POLICY_MODEL_NAME
}
_RM
MODEL_PATH
=
$MODEL_BASE
/
$POLICY_MODEL_NAME
VALUE_MODEL_PATH
=
$MODEL_BASE
/
$POLICY_MODEL_NAME
echo
PYTHON_EXECUTABLE
=
$(
which python3
)
PYTHON_EXECUTABLE
=
$(
which python3
)
LOGDIR
=
logs_fastchat
tmux start-server
tmux new-session
-s
FastChat
-n
controller
-d
tmux send-keys
"export LOGDIR=
${
LOGDIR
}
"
Enter
tmux send-keys
"
$PYTHON_EXECUTABLE
-m fastchat.serve.controller --port
${
CONTROLER_PORT
}
--host
$HOST_ADDR
"
Enter
NUM_LM_WORKER
=
2
NUM_RM_WORKER
=
14
echo
"Wait 5 seconds ..."
sleep 5
echo
"Starting workers"
for
i
in
$(
seq 0
$((
NUM_LM_WORKER-1
)))
do
WORKER_PORT
=
$((
WORKER_BASE_PORT+i
))
tmux new-window
-n
policy_worker_
$i
tmux send-keys
"export LOGDIR=
${
LOGDIR
}
"
Enter
tmux send-keys
"CUDA_VISIBLE_DEVICES=
$((
CUDA_DEVICE_BASE
))
$PYTHON_EXECUTABLE
-m reason.llm_service.workers.vllm_worker --model-path
$MODEL_PATH
--controller-address http://
$HOST_ADDR
:
$CONTROLER_PORT
--host
$HOST_ADDR
--port
$WORKER_PORT
--worker-address http://
$HOST_ADDR
:
$WORKER_PORT
--gpu_memory_utilization 0.45"
Enter
done
# start value service
for
i
in
$(
seq 0
$((
NUM_RM_WORKER-1
)))
do
WORKER_PORT
=
$((
i+WORKER_BASE_PORT+NUM_LM_WORKER
))
GPU_ID
=
$((
(
i
+
NUM_LM_WORKER
)
/
2
+
CUDA_DEVICE_BASE
))
tmux new-window
-n
value_worker
tmux send-keys
"export LOGDIR=
${
LOGDIR
}
"
Enter
tmux send-keys
"CUDA_VISIBLE_DEVICES=
$GPU_ID
$PYTHON_EXECUTABLE
-m reason.llm_service.workers.gold_reward_model_worker --model-path
$VALUE_MODEL_PATH
--model-names
$VALUE_MODEL_NAME
--controller-address http://
$HOST_ADDR
:
$CONTROLER_PORT
--host
$HOST_ADDR
--port
$WORKER_PORT
--worker-address http://
$HOST_ADDR
:
$WORKER_PORT
--gpu_memory_utilization 0.45 --acc
$ACC
"
Enter
done
tmux send-keys
"redis-server ~/redis_conf/redis.conf"
Enter
openr/scripts/eval/beam_search.sh
View file @
a7508d55
export
RAY_TEMP_DIR
=
"/tmp/ray_
$SLURM_JOBID
"
export
RAY_TEMP_DIR
=
"/tmp/ray_
$SLURM_JOBID
"
echo
"RAY TEMP DIR is
$RAY_TEMP_DIR
"
echo
"RAY TEMP DIR is
$RAY_TEMP_DIR
"
POLICY_MODEL_NAME
=
Qwen2.5-Math-7B-Instruct
ACC
=
1.0
while
[[
"$#"
-gt
0
]]
;
do
case
$1
in
--acc
)
ACC
=
"
$2
"
;
shift
;;
--policy_model_name
)
POLICY_MODEL_NAME
=
"
$2
"
;
shift
;;
*
)
echo
"Unknown parameter passed:
$1
"
;
exit
1
;;
esac
shift
done
VALUE_MODEL_NAME
=
${
POLICY_MODEL_NAME
}
_RM
SAVE_DIR
=
"
${
POLICY_MODEL_NAME
}
/
${
ACC
}
"
python reason/evaluation/evaluate.py
\
python reason/evaluation/evaluate.py
\
--LM
Qwen2.5-Math-1.5B-Instruct
\
--LM
$POLICY_MODEL_NAME
\
--RM
Qwen2.5-Math-1.5B-Instruct_RM
\
--RM
$VALUE_MODEL_NAME
\
--task_name
MATH
\
--task_name
MATH
\
--temperature
0.7
\
--temperature
0.7
\
--max_new_tokens
2048
\
--max_new_tokens
2048
\
--num_sequence
1
\
--num_sequence
1
\
--tree_max_width
4
\
--tree_max_width
4
\
--tree_max_depth
50
\
--tree_max_depth
50
\
--save_dir
debug
\
--save_dir
$SAVE_DIR
\
--method
beam_search
\
--method
beam_search
\
--num_worker
64
\
--num_worker
64
\
--controller_addr
http://0.0.0.0:28777
--controller_addr
http://0.0.0.0:28777
# math-shepherd-mistral-7b-prm
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment