Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
b65ddb76
Commit
b65ddb76
authored
Dec 28, 2024
by
nanziyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add star
parent
d11a2acf
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
242 additions
and
0 deletions
+242
-0
codecritic/cli/star.py
+144
-0
scripts/run.slurm
+98
-0
No files found.
codecritic/cli/star.py
0 → 100644
View file @
b65ddb76
import
argparse
import
random
import
os
from
transformers
import
AutoTokenizer
import
codecritic.data.cov
as
cov
import
codecritic.data.verify
as
verify
import
codecritic.utils.json
as
ijson
from
codecritic.utils.vllm
import
vllm_chatcomplete
,
vllm_score
def
preprocess_sft
(
item
,
idx
):
messages
=
item
[
"question"
][:
2
]
messages
.
append
({
"role"
:
"user"
,
"content"
:
cov
.
COV_PROMPT
})
result
=
item
[
"response"
][
0
][
"content"
]
if
result
==
"Yes"
:
eval_result
=
True
elif
result
==
"No"
:
eval_result
=
False
else
:
raise
ValueError
(
f
"Unknown critic token: {result}"
)
return
{
"messages"
:
messages
,
"eval_result"
:
eval_result
,
"index"
:
idx
}
def
is_reward_right
(
item
):
score_bool
=
item
[
"score"
]
>
0.5
eval_result
=
item
[
"eval_result"
]
return
score_bool
==
eval_result
def
transform_to_ifdata
(
item
):
question
=
item
[
"messages"
][:
3
]
response
=
[
item
[
"messages"
][
3
]]
+
verify
.
mk_critic_verify
(
item
[
"eval_result"
])
return
{
"question"
:
question
,
"response"
:
response
,
"eval_result"
:
item
[
"eval_result"
],
}
def
run_STaR
(
model_path
,
dataset
,
outdir
,
model_gpu
):
# step0 preprocess sft dataset & append prompt
dataset_size
=
len
(
dataset
)
dataset
=
[
preprocess_sft
(
x
,
idx
)
for
idx
,
x
in
enumerate
(
dataset
)]
# step1 generate Rationale
sampling_params
=
dict
(
n
=
10
,
temperature
=
0.6
,
max_tokens
=
2048
)
dataset
=
vllm_chatcomplete
(
model_path
,
dataset
,
sampling_params
,
model_gpu
)
print
(
"reasoning finished"
)
# step2 score
for
item
in
dataset
:
item
[
"messages"
]
.
append
({
"role"
:
"user"
,
"content"
:
verify
.
JUDGE_PROMPT
})
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
)
score_token
=
verify
.
get_score_token_id
(
tokenizer
)
dataset
=
vllm_score
(
model_path
,
dataset
,
score_token
,
model_gpu
)
# step3 filter consistent results
dataset
=
list
(
filter
(
is_reward_right
,
dataset
))
print
(
"number of consistent result:"
,
len
(
dataset
))
ijson
.
save_jsonl
(
dataset
,
outdir
+
"raw.jsonl"
)
# step4 select 2 problem for each problem
groups
=
{}
for
item
in
dataset
:
idx
=
item
[
"index"
]
if
idx
not
in
groups
:
groups
[
idx
]
=
[]
groups
[
idx
]
.
append
(
item
)
max_corrects
,
remain_corrects
=
[],
[]
min_incorrects
,
remain_incorrects
=
[],
[]
for
idx
,
lst
in
groups
.
items
():
correct_items
=
[
item
for
item
in
lst
if
item
[
"eval_result"
]]
incorrect_items
=
[
item
for
item
in
lst
if
not
item
[
"eval_result"
]]
if
correct_items
:
max_score
=
float
(
'-inf'
)
# Initialize with negative infinity
max_index
=
-
1
for
i
,
item
in
enumerate
(
correct_items
):
if
item
[
"score"
]
>
max_score
:
max_score
=
item
[
"score"
]
max_index
=
i
max_corrects
.
append
(
correct_items
.
pop
(
max_index
))
remain_corrects
.
extend
(
correct_items
)
if
incorrect_items
:
min_score
=
float
(
'inf'
)
min_index
=
-
1
for
i
,
item
in
enumerate
(
incorrect_items
):
if
item
[
"score"
]
<
min_score
:
min_score
=
item
[
"score"
]
min_index
=
i
min_incorrects
.
append
(
incorrect_items
.
pop
(
min_index
))
remain_incorrects
.
extend
(
incorrect_items
)
target
=
dataset_size
//
2
print
(
"target size of correct sample:"
,
target
)
sorted_remain_correct
=
sorted
(
remain_corrects
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)
sorted_remain_incorrect
=
sorted
(
remain_incorrects
,
key
=
lambda
x
:
x
[
"score"
])
lack_correct
=
target
-
len
(
max_corrects
)
lack_incorrect
=
target
-
len
(
min_incorrects
)
print
(
"lack of correct"
,
lack_correct
,
"lack of incorrect"
,
lack_incorrect
)
max_corrects
+=
sorted_remain_correct
[:
lack_correct
]
min_incorrects
+=
sorted_remain_incorrect
[:
lack_incorrect
]
train_number
=
int
(
target
*
0.98
)
train_dataset
=
max_corrects
[:
train_number
]
+
min_incorrects
[:
train_number
]
test_dataset
=
max_corrects
[
train_number
:]
+
min_incorrects
[
train_number
:]
train_dataset
=
[
transform_to_ifdata
(
x
)
for
x
in
train_dataset
]
test_dataset
=
[
transform_to_ifdata
(
x
)
for
x
in
test_dataset
]
random
.
shuffle
(
train_dataset
)
random
.
shuffle
(
test_dataset
)
print
(
"train dataset size:"
,
len
(
train_dataset
))
print
(
"test dataset size:"
,
len
(
test_dataset
))
ijson
.
save_jsonl
(
train_dataset
,
outdir
+
"train.jsonl"
)
ijson
.
save_jsonl
(
test_dataset
,
outdir
+
"test.jsonl"
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
)
parser
.
add_argument
(
"--sft"
,
type
=
str
)
parser
.
add_argument
(
"--outdir"
,
type
=
str
)
parser
.
add_argument
(
"--gpu"
,
type
=
int
,
default
=
1
,
help
=
"number of gpus 1 model required"
)
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
args
=
parser
.
parse_args
()
sft_dataset
=
ijson
.
load_jsonl
(
args
.
sft
)
run_STaR
(
args
.
model
,
sft_dataset
,
args
.
outdir
,
args
.
gpu
)
scripts/run.slurm
0 → 100644
View file @
b65ddb76
#!/bin/bash
#- Job parameters
# Please modify job name
#SBATCH -J cc # The job name
#SBATCH -o cc-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e cc-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw-80g # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:4 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=r8a30-a0 # Request a specific list of hosts
### #SBATCH --constraint="A30|A100" # Request GPU Type: A30 or A100_40GB
###
#- Log information
echo
"Job start at
$(
date
"+%Y-%m-%d %H:%M:%S"
)
"
echo
"Job run at:"
echo
"
$(
hostnamectl
)
"
echo
"
$(
df
-h
|
grep
-v
tmpfs
)
"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit
-l
unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit
-v
unlimited
#- Load environments
source
/tools/module_env.sh
module list
# list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load gcc/9.3.0
module load cuda-cudnn/12.1-8.9.3
##- virtualenv
source
"/workspace/S/nanziyuan/miniconda3/etc/profile.d/conda.sh"
conda activate openrlhf
echo
$(
module list
)
# list modules loaded
echo
$(
which gcc
)
echo
$(
which python
)
echo
$(
which python3
)
#- Other
cluster-quota
# nas quota
nvidia-smi
--format
=
csv
--query-gpu
=
name,driver_version,power.limit
# gpu info
#- WARNING! DO NOT MODIFY your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo
"Using GPU(s)
${
CUDA_VISIBLE_DEVICES
}
"
# which GPUs
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
echo
"This job is assigned the following resources by SLURM:"
scontrol show jobid
$SLURM_JOB_ID
-dd
| awk
'/IDX/ {print $2, $4}'
##- Monitor
# The script continues executing other tasks while the following command will execute after a while
module load slurm-tools/v1.0
(
sleep 3h
&&
slurm-gpu-atop-log-stats
$SLURM_JOB_ID
$CUDA_VISIBLE_DEVICES
)
&
echo
"Main program continues to run. Monitoring information will be exported after three hours."
#- Main program execution
##- Job step
bash run.sh
#- End
slurm-gpu-atop-log-stats
$SLURM_JOB_ID
$CUDA_VISIBLE_DEVICES
echo
"Job end at
$(
date
"+%Y-%m-%d %H:%M:%S"
)
"
# This will overwrite any existing atop logs from previous runs.
# WARNING: If your program times out or is terminated by scancel,
# the above script part might not execute correctly.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment