Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
6405c330
Commit
6405c330
authored
Jan 08, 2025
by
nanziyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs
parent
048ea73a
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
144 additions
and
123 deletions
+144
-123
codecritic/cli/eval.py
+26
-44
codecritic/cli/test_genrm.py
+21
-6
codecritic/cli/test_orm.py
+17
-6
scripts/algolr.sh
+19
-9
scripts/train_orm.sh
+61
-58
No files found.
codecritic/cli/eval.py
View file @
6405c330
import
argparse
import
argparse
from
collections
import
defaultdict
import
json
from
functools
import
partial
from
functools
import
partial
from
pathlib
import
Path
import
codecritic.evaluation.metric
as
metric
import
codecritic.evaluation.metric
as
metric
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
from
codecritic.utils.json
import
load_jsonl
def
eval
(
samples_path
):
model
,
testset
=
samples_path
.
stem
.
split
(
'-'
)[:
2
]
def
f
(
item
):
item
[
"model"
]
=
model
item
[
"testset"
]
=
testset
samples
=
load_jsonl
(
samples_path
)
def
eval
(
scores
):
ks
=
list
(
range
(
1
,
17
))
ks
=
list
(
range
(
1
,
17
))
results
=
[]
results
=
[]
results
.
append
(
metric
.
pass_at_k
(
samples
,
ks
))
results
.
extend
(
metric
.
pass_at_k
(
scores
,
ks
))
results
.
append
(
metric
.
top_at_k
(
samples
,
ks
,
metric
.
postive_and_negative
))
results
.
extend
(
metric
.
pass_at_k
(
scores
,
[
50
]))
results
.
append
(
metric
.
top_at_k
(
samples
,
ks
,
metric
.
positive_only
))
results
.
extend
(
metric
.
top_at_k
(
scores
,
ks
,
metric
.
positive_only
))
if
"negative_score"
in
scores
[
0
]:
results
.
extend
(
metric
.
top_at_k
(
scores
,
ks
,
metric
.
postive_and_negative
))
for
i
in
range
(
4
):
for
i
in
range
(
4
):
threshold
=
0.5
+
i
*
0.1
threshold
=
0.5
+
i
*
0.1
score_func
=
partial
(
metric
.
pos_neg_filter_uncertain
,
threshold
=
threshold
)
score_func
=
partial
(
metric
.
pos_neg_filter_uncertain
,
threshold
=
threshold
)
results
.
append
(
metric
.
top_at_k
(
sampl
es
,
ks
,
score_func
))
results
.
extend
(
metric
.
top_at_k
(
scor
es
,
ks
,
score_func
))
return
list
(
map
(
f
,
results
))
return
results
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
"--score"
,
type
=
str
,
help
=
"path/to/score"
)
"--sample_dir"
,
type
=
str
,
default
=
None
,
help
=
"Path to the directory containing samples. If not provided, cached results will be used."
)
parser
.
add_argument
(
"--out_dir"
,
type
=
str
,
help
=
"path/to/output_dir"
)
parser
.
add_argument
(
"--score_func"
,
type
=
str
,
default
=
"all"
,
choices
=
[
"all"
,
"posonly"
,
"posneg"
,
"posneg_filter"
],
# Add valid options
help
=
"Select the scoring function to use. Default: 'all'."
)
parser
.
add_argument
(
"--plot"
,
type
=
str
,
help
=
"path/to/plot"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
outdir
=
Path
(
args
.
out_dir
)
scores
=
load_jsonl
(
args
.
score
)
if
args
.
sample_dir
:
groups
=
defaultdict
(
list
)
for
samples_path
in
Path
(
args
.
sample_dir
)
.
glob
(
"*.jsonl"
):
for
item
in
scores
:
out_path
=
outdir
/
(
samples_path
.
stem
+
"-eval.jsonl"
)
groups
[
item
[
"dataset"
]]
.
append
(
item
)
if
not
out_path
.
exists
():
eval_results
=
eval
(
samples_path
)
for
dataset
,
lst
in
groups
.
items
():
save_jsonl
(
eval_results
,
out_path
)
results
=
eval
(
lst
)
for
r
in
results
:
for
out_path
in
outdir
.
glob
(
"*.jsonl"
):
r
[
"dataset"
]
=
dataset
pass
print
(
json
.
dumps
(
r
))
\ No newline at end of file
codecritic/cli/test_genrm.py
View file @
6405c330
import
argparse
import
argparse
from
collections
import
defaultdict
from
functools
import
partial
from
functools
import
partial
import
os
import
os
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
codecritic.dataset.genrm_prompt
import
THINK_MESSAGE
,
JUDGE_MESSAGE
,
JUDGE_TOEKNS
from
codecritic.dataset.genrm_prompt
import
JUDGE_MESSAGE
,
JUDGE_TOEKNS
from
codecritic.utils.inference
import
generate_worker
,
score_worker
from
codecritic.utils.inference
import
generate_worker
,
score_worker
from
codecritic.utils.parallel
import
model_map
from
codecritic.utils.parallel
import
model_map
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
import
codecritic.dataset.algolr_prompt
as
algolr_prompt
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
,
help
=
"path/to/model"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
help
=
"path/to/model"
)
parser
.
add_argument
(
"--
sample"
,
type
=
str
,
help
=
"path/to/sample
"
)
parser
.
add_argument
(
"--
testset"
,
type
=
str
,
help
=
"path/to/testset
"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--reasoning"
,
action
=
"store_true"
,
help
=
"enable reasoning"
)
parser
.
add_argument
(
"--reasoning"
,
action
=
"store_true"
,
help
=
"enable reasoning"
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -31,11 +33,24 @@ if __name__ == "__main__":
...
@@ -31,11 +33,24 @@ if __name__ == "__main__":
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
dataset
=
load_jsonl
(
args
.
sample
)
dataset
=
load_jsonl
(
args
.
testset
)
ds
=
defaultdict
(
list
)
for
item
in
dataset
:
ds
[
item
[
"task_id"
]]
.
append
(
item
)
unsolvable
=
[]
dataset
=
[]
for
task_id
,
items
in
ds
.
items
():
if
all
([
not
x
[
"pass"
]
for
x
in
items
]):
for
item
in
items
:
item
[
"positive_score"
]
=
0
item
[
"negative_score"
]
=
0
unsolvable
.
extend
(
items
)
else
:
dataset
.
extend
(
items
)
if
args
.
reasoning
:
if
args
.
reasoning
:
for
item
in
dataset
:
dataset
=
[
algolr_prompt
.
process_to_reason_prompt
(
x
,
None
)
for
x
in
dataset
]
item
[
"messages"
]
.
append
(
THINK_MESSAGE
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
n
=
1
,
n
=
1
,
...
@@ -68,4 +83,4 @@ if __name__ == "__main__":
...
@@ -68,4 +83,4 @@ if __name__ == "__main__":
)
)
dataset
=
model_map
(
worker
,
dataset
,
args
.
tp
)
dataset
=
model_map
(
worker
,
dataset
,
args
.
tp
)
save_jsonl
(
dataset
,
args
.
output
)
save_jsonl
(
dataset
+
unsolvable
,
args
.
output
)
codecritic/cli/test_orm.py
View file @
6405c330
import
argparse
import
argparse
from
collections
import
defaultdict
import
json
import
json
import
requests
import
requests
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
@@ -21,18 +22,28 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
...
@@ -21,18 +22,28 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
,
help
=
"path/to/model"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
help
=
"path/to/model"
)
parser
.
add_argument
(
"--
sample"
,
type
=
str
,
help
=
"path/to/sample
"
)
parser
.
add_argument
(
"--
testset"
,
type
=
str
,
help
=
"path/to/testset
"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# compute score
# compute score
dataset
=
load_jsonl
(
args
.
sample
)
dataset
=
load_jsonl
(
args
.
testset
)
ds
=
defaultdict
(
list
)
for
item
in
dataset
:
ds
[
item
[
"task_id"
]]
.
append
(
item
)
for
task_id
,
items
in
ds
.
items
():
if
all
([
not
x
[
"pass"
]
for
x
in
items
]):
for
item
in
items
:
item
[
"positive_score"
]
=
0
server_url
=
"http://0.0.0.0:5000/get_reward"
server_url
=
"http://0.0.0.0:5000/get_reward"
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
for
item
in
tqdm
(
dataset
):
for
item
in
tqdm
(
dataset
):
query
=
tokenizer
.
apply_chat_template
(
item
[
"messages"
],
tokenize
=
False
)
if
'positive_score'
not
in
item
:
score
=
get_rewards_from_server
(
server_url
,
[
query
])[
0
]
query
=
tokenizer
.
apply_chat_template
(
item
[
"messages"
],
tokenize
=
False
)
item
[
"score"
]
=
score
score
=
get_rewards_from_server
(
server_url
,
[
query
])[
0
]
item
[
"positive_score"
]
=
score
save_jsonl
(
dataset
,
args
.
output
)
save_jsonl
(
dataset
,
args
.
output
)
scripts/algolr.sh
View file @
6405c330
...
@@ -13,18 +13,20 @@ apps="/lustre/S/nanziyuan/datasets/apps/"
...
@@ -13,18 +13,20 @@ apps="/lustre/S/nanziyuan/datasets/apps/"
sft
=
"
${
project
}
/data/train/
${
modelname
}
-sft.jsonl"
sft
=
"
${
project
}
/data/train/
${
modelname
}
-sft.jsonl"
ftmodel
=
"
${
project
}
/model/qwen25_coder_inst_7b-algolr"
ftmodel
=
"
${
project
}
/model/qwen25_coder_inst_7b-algolr"
testset
=
"/lustre/S/nanziyuan/projects/ccc/data/test/qwen25_coder_inst-apps-test.jsonl"
evalresults
=
"/lustre/S/nanziyuan/projects/ccc/data/eval/qwen25_code_inst-apps-test-algolr-score.jsonl"
#
##
export CUDA_VISIBLE_DEVICES=0,1,2,3
# export CUDA_VISIBLE_DEVICES=0,1,2,3
python
-m
codecritic.cli.algolr
\
#
python -m codecritic.cli.algolr \
--model
${
model
}
\
#
--model ${model} \
--dataset
${
trainset
}
\
#
--dataset ${trainset} \
--pairinfo
${
train_selected_pairs
}
\
#
--pairinfo ${train_selected_pairs} \
--apps
${
apps
}
\
#
--apps ${apps} \
--output
${
sft
}
\
#
--output ${sft} \
--level
beginner
\
#
--level beginner \
--tp
1
#
--tp 1
deepspeed
--module
\
deepspeed
--module
\
...
@@ -50,3 +52,11 @@ openrlhf.cli.train_sft \
...
@@ -50,3 +52,11 @@ openrlhf.cli.train_sft \
--load_checkpoint
\
--load_checkpoint
\
--gradient_checkpointing
\
--gradient_checkpointing
\
--use_tensorboard
"
${
ftmodel
}
_log"
--use_tensorboard
"
${
ftmodel
}
_log"
python
-m
codecritic.cli.test_genrm
\
--model
${
model
}
\
--testset
${
testset
}
\
--output
${
evalresults
}
\
--reasoning
\
--tp
1
scripts/train_orm.sh
View file @
6405c330
...
@@ -6,66 +6,69 @@ project="/lustre/S/nanziyuan/projects/ccc"
...
@@ -6,66 +6,69 @@ project="/lustre/S/nanziyuan/projects/ccc"
model
=
"/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
model
=
"/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
ftmodel
=
"
${
project
}
/model/qwen25_coder_inst_7b-orm"
ftmodel
=
"
${
project
}
/model/qwen25_coder_inst_7b-orm"
deepspeed
--module
\
testset
=
"/lustre/S/nanziyuan/projects/ccc/data/test/qwen25_coder_inst-apps-test.jsonl"
openrlhf.cli.train_rm
\
evalresults
=
"/lustre/S/nanziyuan/projects/ccc/data/eval/qwen25_code_inst-apps-test-orm-score.jsonl"
--save_path
${
ftmodel
}
\
--save_steps
-1
\
--logging_steps
1
\
--eval_steps
-1
\
--train_batch_size
256
\
--micro_train_batch_size
1
\
--pretrain
${
model
}
\
--bf16
\
--max_epochs
1
\
--max_len
8192
\
--zero_stage
3
\
--learning_rate
9e-6
\
--dataset
${
dataset
}
\
--apply_chat_template
\
--prompt_key
messages
\
--chosen_key
chosen
\
--rejected_key
rejected
\
--flash_attn
\
--load_checkpoint
\
--gradient_checkpointing
\
--use_tensorboard
"
${
ftmodel
}
_log"
# deepspeed --module \
# openrlhf.cli.train_rm \
# --save_path ${ftmodel} \
# --save_steps -1 \
# --logging_steps 1 \
# --eval_steps -1 \
# --train_batch_size 256 \
# --micro_train_batch_size 1 \
# --pretrain ${model} \
# --bf16 \
# --max_epochs 1 \
# --max_len 8192 \
# --zero_stage 3 \
# --learning_rate 9e-6 \
# --dataset ${dataset} \
# --apply_chat_template \
# --prompt_key messages \
# --chosen_key chosen \
# --rejected_key rejected \
# --flash_attn \
# --load_checkpoint \
# --gradient_checkpointing \
# --use_tensorboard "${ftmodel}_log"
# start_server() {
# echo "Starting server..."
# CUDA_VISIBLE_DEVICES=0 \
# python -m openrlhf.cli.serve_rm \
# --reward_pretrain ${model} \
# --normalize_reward \
# --port 5000 \
# --bf16 \
# --max_len 8192 &
# SERVER_PID=$!
# echo "Server started with PID: $SERVER_PID"
# }
# # Function to start the client
start_server
()
{
# start_client() {
echo
"Starting server..."
# echo "Starting client..."
CUDA_VISIBLE_DEVICES
=
0
\
# python -m codecritic.cli.run_rm_test \
python
-m
openrlhf.cli.serve_rm
\
# --model ${model} \
--reward_pretrain
${
ftmodel
}
\
# --test "${datasets}/sample/min_test.jsonl" \
--normalize_reward
\
# --apps /lustre/S/nanziyuan/datasets/apps/
--port
5000
\
# CLIENT_EXIT_CODE=$?
--bf16
\
# echo "Client finished with exit code: $CLIENT_EXIT_CODE"
--max_len
8192 &
# }
SERVER_PID
=
$!
echo
"Server started with PID:
$SERVER_PID
"
}
# # Function to stop the server
# Function to start the client
# stop_server() {
start_client
()
{
# echo "Stopping server..."
echo
"Starting client..."
# kill -SIGINT $SERVER_PID
python
-m
codecritic.cli.test_orm
\
# wait $SERVER_PID 2>/dev/null
--model
${
ftmodel
}
\
# echo "Server stopped."
--testset
${
testset
}
\
# }
--output
${
evalresults
}
CLIENT_EXIT_CODE
=
$?
echo
"Client finished with exit code:
$CLIENT_EXIT_CODE
"
}
# start_server
# Function to stop the server
# # Give the server some time to initialize (optional)
stop_server
()
{
# sleep 60
echo
"Stopping server..."
# start_client
kill
-SIGINT
$SERVER_PID
# stop_server
wait
$SERVER_PID
2>/dev/null
# echo "Execution complete."
echo
"Server stopped."
}
start_server
# Give the server some time to initialize (optional)
sleep 60
start_client
stop_server
echo
"Execution complete."
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment