Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
38be25e7
Commit
38be25e7
authored
Oct 25, 2024
by
nzy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
step4: refactor test reward & fix bug in code_template
parent
61469b06
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
75 additions
and
143 deletions
+75
-143
step4_test_reward_model.py
+0
-118
step4_test_reward_model_client.py
+68
-14
step4_test_sftorm.py
+2
-2
utils.py
+4
-3
utils_dataset.py
+1
-6
No files found.
step4_test_reward_model.py
deleted
100644 → 0
View file @
61469b06
import
requests
import
subprocess
import
os
import
json
import
time
from
tqdm.contrib.concurrent
import
thread_map
from
copy
import
deepcopy
from
utils
import
load_jsonl
,
save_jsonl
,
extract_code
,
read_config
from
utils_metric
import
group_results
,
score_pass_at_k
from
utils_dataset
import
code_template
from
transformers
import
AutoTokenizer
def
run_server
(
api_port
,
cuda_device
,
rm_inference_yaml_path
,
llamafactory_path
):
env
=
os
.
environ
.
copy
()
env
[
"API_PORT"
]
=
str
(
api_port
)
env
[
"CUDA_VISIBLE_DEVICES"
]
=
str
(
cuda_device
)
server_process
=
subprocess
.
Popen
(
[
"llamafactory-cli"
,
"api"
,
rm_inference_yaml_path
],
stdout
=
subprocess
.
PIPE
,
env
=
env
,
cwd
=
llamafactory_path
,
text
=
True
)
for
line
in
server_process
.
stdout
:
if
"start output"
in
line
:
break
#TODO
print
(
f
"Started server with PID {server_process.pid} on port {api_port} and CUDA device {cuda_device}"
)
return
server_process
def
start_servers
(
llamafactory_path
,
inference_cfg_path
):
cuda_devices
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
.
split
(
","
)
gpu_num
=
len
(
cuda_devices
)
server_processes
=
[
run_server
(
8000
+
i
,
cuda_devices
[
i
],
inference_cfg_path
,
llamafactory_path
)
for
i
in
range
(
gpu_num
)
]
time
.
sleep
(
10
)
# Wait for the servers to start (adjust the sleep time as needed)
return
server_processes
def
stop_servers
(
server_processes
):
for
server_process
in
server_processes
:
server_process
.
terminate
()
server_process
.
wait
()
print
(
f
"Terminated server with PID {server_process.pid}"
)
def
get_rewards_from_server
(
server_url
:
str
,
messages
:
list
[
str
]):
"""
Gets reward scores from the API server.
"""
headers
=
{
"Content-Type"
:
"application/json"
}
payload
=
{
"model"
:
"model"
,
"messages"
:
messages
}
response
=
requests
.
post
(
server_url
,
json
=
payload
,
headers
=
headers
)
rewards
=
json
.
loads
(
response
.
text
)[
"scores"
]
return
rewards
def
preprocess_dataset
(
model_path
,
test_dataset
,
gpu_num
):
"apply chat_template and split the dataset to different gpu"
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
)
result
=
[]
for
i
,
item
in
enumerate
(
test_dataset
):
messages
=
deepcopy
(
item
[
"messages"
])
messages
[
-
1
][
"content"
]
=
code_template
.
format
(
extract_code
(
messages
[
-
1
][
"content"
]))
# https://github.com/hiyouga/LLaMA-Factory/blob/a45f3f5461e2936b9e119eda2ef4d8c7a4131740/tests/data/test_template.py#L58
# # llama factory's template should match tokenizer's `apply_chat_template`.
item
[
"format_str"
]
=
[
tokenizer
.
apply_chat_template
(
messages
,
tokenize
=
False
)]
result
.
append
((
item
,
8000
+
i
%
gpu_num
))
return
result
def
test_reward_model
(
item
,
api_port
):
server_url
=
f
"http://0.0.0.0:{api_port}/v1/score/evaluation"
score
=
get_rewards_from_server
(
server_url
,
item
[
"format_str"
])[
0
]
return
{
"problem_id"
:
item
[
"problem_id"
],
"messages"
:
item
[
"messages"
],
"eval_result"
:
item
[
"eval_result"
],
"score"
:
score
,
}
if
__name__
==
"__main__"
:
cfg
=
read_config
([
"orm_testmodel"
])
orm_test_model
=
cfg
[
"orm_testmodel"
]
raw_test_dataset
=
load_jsonl
(
cfg
[
"dataset"
][
"minimal_test_path"
])
servers
=
start_servers
(
cfg
[
"llamafactory_path"
],
cfg
[
"orm"
][
orm_test_model
][
"inference_yaml_path"
],
)
test_dataset
=
preprocess_dataset
(
cfg
[
"orm"
][
orm_test_model
][
"model_path"
],
raw_test_dataset
,
len
(
servers
)
)
results
=
thread_map
(
lambda
arg
:
test_reward_model
(
*
arg
),
test_dataset
,
max_workers
=
len
(
servers
))
save_jsonl
(
results
,
cfg
[
"orm"
][
orm_test_model
][
"minimal_test_score_path"
])
stop_servers
(
servers
)
groups
=
group_results
(
results
,
cfg
[
"apps"
])
eval_results
=
[
score_pass_at_k
(
groups
,
k
,
orm_test_model
)
for
k
in
range
(
1
,
32
)]
save_jsonl
(
eval_results
,
cfg
[
"orm"
][
orm_test_model
][
"eval_result_path"
])
step4_test_reward_model_client.py
View file @
38be25e7
import
argparse
from
copy
import
deepcopy
import
json
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
utils
import
load_jsonl
,
save_jsonl
,
read_config
import
requests
from
transformers
import
AutoTokenizer
import
pprint
from
pathlib
import
Path
from
utils
import
load_jsonl
,
save_jsonl
,
extract_code
,
code_template
from
utils_metric
import
group_results
,
score_pass_at_k
from
utils_metric
import
group_results
,
score_pass_at_k
from
step4_test_reward_model
import
preprocess_dataset
,
test_reward_model
if
__name__
==
"__main__"
:
def
get_rewards_from_server
(
server_url
:
str
,
messages
:
list
[
str
]):
cfg
=
read_config
([
"orm_testmodel"
])
"""
orm_test_model
=
cfg
[
"orm_testmodel"
]
Gets reward scores from the API server.
"""
headers
=
{
"Content-Type"
:
"application/json"
}
payload
=
{
"model"
:
"model"
,
"messages"
:
messages
}
response
=
requests
.
post
(
server_url
,
json
=
payload
,
headers
=
headers
)
rewards
=
json
.
loads
(
response
.
text
)[
"scores"
]
return
rewards
def
preprocess_dataset
(
model_path
,
test_dataset
,
gpu_num
):
"apply chat_template and split the dataset to different gpu"
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
)
result
=
[]
for
i
,
item
in
enumerate
(
test_dataset
):
messages
=
deepcopy
(
item
[
"messages"
])
messages
[
-
1
][
"content"
]
=
code_template
.
format
(
extract_code
(
messages
[
-
1
][
"content"
])
)
# https://github.com/hiyouga/LLaMA-Factory/blob/a45f3f5461e2936b9e119eda2ef4d8c7a4131740/tests/data/test_template.py#L58
# # llama factory's template should match tokenizer's `apply_chat_template`.
item
[
"format_str"
]
=
[
tokenizer
.
apply_chat_template
(
messages
,
tokenize
=
False
)]
result
.
append
((
item
,
8000
+
i
%
gpu_num
))
return
result
raw_test_dataset
=
load_jsonl
(
cfg
[
"dataset"
][
"minimal_test_path"
])
def
test_reward_model
(
item
,
api_port
):
server_url
=
f
"http://0.0.0.0:{api_port}/v1/score/evaluation"
score
=
get_rewards_from_server
(
server_url
,
item
[
"format_str"
])[
0
]
return
{
"problem_id"
:
item
[
"problem_id"
],
"messages"
:
item
[
"messages"
],
"eval_result"
:
item
[
"eval_result"
],
"score"
:
score
,
}
model_path
=
cfg
[
"orm"
][
orm_test_model
][
"model_path"
]
test_dataset
=
preprocess_dataset
(
model_path
,
raw_test_dataset
,
1
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
)
parser
.
add_argument
(
"--test"
,
type
=
str
)
parser
.
add_argument
(
"--apps"
,
type
=
str
)
args
=
parser
.
parse_args
()
home_path
=
Path
(
args
.
model
)
.
parent
result_dir
=
home_path
/
"eval"
result_dir
.
mkdir
(
exist_ok
=
True
)
# compute score
score_path
=
result_dir
/
"scores.jsonl"
raw_test_dataset
=
load_jsonl
(
args
.
test
)
test_dataset
=
preprocess_dataset
(
args
.
model
,
raw_test_dataset
,
1
)
results
=
[
test_reward_model
(
*
arg
)
for
arg
in
tqdm
(
test_dataset
)]
results
=
[
test_reward_model
(
*
arg
)
for
arg
in
tqdm
(
test_dataset
)]
save_jsonl
(
results
,
cfg
[
"orm"
][
orm_test_model
][
"minimal_test_score_path"
])
save_jsonl
(
results
,
score_path
)
# results = load_jsonl(result_path)
groups
=
group_results
(
results
,
cfg
[
"apps"
])
# compute pass@k
eval_results
=
[
score_pass_at_k
(
groups
,
k
,
orm_test_model
)
for
k
in
range
(
1
,
16
)]
eval_result_path
=
result_dir
/
"passk.jsonl"
save_jsonl
(
eval_results
,
cfg
[
"orm"
][
orm_test_model
][
"eval_result_path"
])
# results = load_jsonl(result_path)
print
(
eval_results
)
groups
=
group_results
(
results
,
args
.
apps
)
eval_results
=
[
score_pass_at_k
(
groups
,
k
,
home_path
.
stem
)
for
k
in
range
(
1
,
16
)]
save_jsonl
(
eval_results
,
eval_result_path
)
pprint
.
pp
(
eval_results
)
step4_test_sftorm.py
View file @
38be25e7
from
utils_vllm
import
vllm_score
from
utils_vllm
import
vllm_score
from
utils
import
read_config
,
load_jsonl
,
save_jsonl
,
extract_code
from
utils
import
read_config
,
load_jsonl
,
save_jsonl
,
extract_code
,
code_template
from
utils_dataset
import
code_template
,
mk_critic_qa
,
mk_critic_verify
from
utils_dataset
import
mk_critic_qa
,
mk_critic_verify
from
utils_metric
import
group_results
,
score_pass_at_k
from
utils_metric
import
group_results
,
score_pass_at_k
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
...
...
utils.py
View file @
38be25e7
from
pathlib
import
Path
import
json
import
json
import
os
import
re
import
re
from
codebleu
import
calc_codebleu
from
codebleu
import
calc_codebleu
import
sys
import
sys
...
@@ -33,7 +31,10 @@ def save_json(data, file_path, indent=None):
...
@@ -33,7 +31,10 @@ def save_json(data, file_path, indent=None):
codeblock_pattern
=
re
.
compile
(
r"```python(.+?)```"
,
flags
=
re
.
DOTALL
)
codeblock_pattern
=
re
.
compile
(
r"```python(.+?)```"
,
flags
=
re
.
DOTALL
)
code_template
=
"""```python
{}
```
"""
def
extract_code
(
text
:
str
):
def
extract_code
(
text
:
str
):
codes
=
[
match
.
strip
()
for
match
in
re
.
findall
(
codeblock_pattern
,
text
)]
codes
=
[
match
.
strip
()
for
match
in
re
.
findall
(
codeblock_pattern
,
text
)]
...
...
utils_dataset.py
View file @
38be25e7
from
utils
import
load_json
,
save_json
from
utils
import
load_json
,
save_json
,
code_template
def
mk_preference_dataset_info
(
dataset_name
):
def
mk_preference_dataset_info
(
dataset_name
):
...
@@ -23,11 +23,6 @@ def mk_preference_dataset_info(dataset_name):
...
@@ -23,11 +23,6 @@ def mk_preference_dataset_info(dataset_name):
}
}
# see utils.extract_code
# TODO Check the code format in dataset
code_template
=
r"```python{}```"
def
mk_preference_pair
(
instruction
,
chosen_code
,
rejected_code
):
def
mk_preference_pair
(
instruction
,
chosen_code
,
rejected_code
):
return
{
return
{
"messages"
:
[
"messages"
:
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment