Commit b65ddb76 by nanziyuan

add star

parent d11a2acf
import argparse
import random
import os
from transformers import AutoTokenizer
import codecritic.data.cov as cov
import codecritic.data.verify as verify
import codecritic.utils.json as ijson
from codecritic.utils.vllm import vllm_chatcomplete, vllm_score
def preprocess_sft(item, idx):
messages = item["question"][:2]
messages.append({"role": "user", "content": cov.COV_PROMPT})
result = item["response"][0]["content"]
if result == "Yes":
eval_result = True
elif result == "No":
eval_result = False
else:
raise ValueError(f"Unknown critic token: {result}")
return {"messages": messages, "eval_result": eval_result, "index": idx}
def is_reward_right(item):
score_bool = item["score"] > 0.5
eval_result = item["eval_result"]
return score_bool == eval_result
def transform_to_ifdata(item):
question = item["messages"][:3]
response = [item["messages"][3]] + verify.mk_critic_verify(item["eval_result"])
return {
"question": question,
"response": response,
"eval_result": item["eval_result"],
}
def run_STaR(model_path, dataset, outdir, model_gpu):
# step0 preprocess sft dataset & append prompt
dataset_size = len(dataset)
dataset = [preprocess_sft(x, idx) for idx, x in enumerate(dataset)]
# step1 generate Rationale
sampling_params = dict(n=10, temperature=0.6, max_tokens=2048)
dataset = vllm_chatcomplete(model_path, dataset, sampling_params, model_gpu)
print("reasoning finished")
# step2 score
for item in dataset:
item["messages"].append({"role": "user", "content": verify.JUDGE_PROMPT})
tokenizer = AutoTokenizer.from_pretrained(model_path)
score_token = verify.get_score_token_id(tokenizer)
dataset = vllm_score(model_path, dataset, score_token, model_gpu)
# step3 filter consistent results
dataset = list(filter(is_reward_right, dataset))
print("number of consistent result:", len(dataset))
ijson.save_jsonl(dataset, outdir + "raw.jsonl")
# step4 select 2 problem for each problem
groups = {}
for item in dataset:
idx = item["index"]
if idx not in groups:
groups[idx] = []
groups[idx].append(item)
max_corrects, remain_corrects = [], []
min_incorrects, remain_incorrects = [], []
for idx, lst in groups.items():
correct_items = [item for item in lst if item["eval_result"]]
incorrect_items = [item for item in lst if not item["eval_result"]]
if correct_items:
max_score = float('-inf') # Initialize with negative infinity
max_index = -1
for i, item in enumerate(correct_items):
if item["score"] > max_score:
max_score = item["score"]
max_index = i
max_corrects.append(correct_items.pop(max_index))
remain_corrects.extend(correct_items)
if incorrect_items:
min_score = float('inf')
min_index = -1
for i, item in enumerate(incorrect_items):
if item["score"] < min_score:
min_score = item["score"]
min_index = i
min_incorrects.append(incorrect_items.pop(min_index))
remain_incorrects.extend(incorrect_items)
target = dataset_size // 2
print("target size of correct sample:", target)
sorted_remain_correct = sorted(remain_corrects, key=lambda x: x["score"], reverse=True)
sorted_remain_incorrect = sorted(remain_incorrects, key=lambda x: x["score"])
lack_correct = target - len(max_corrects)
lack_incorrect = target - len(min_incorrects)
print("lack of correct", lack_correct, "lack of incorrect", lack_incorrect)
max_corrects += sorted_remain_correct[:lack_correct]
min_incorrects += sorted_remain_incorrect[:lack_incorrect]
train_number = int(target * 0.98)
train_dataset = max_corrects[:train_number] + min_incorrects[:train_number]
test_dataset = max_corrects[train_number:] + min_incorrects[train_number:]
train_dataset = [transform_to_ifdata(x) for x in train_dataset]
test_dataset = [transform_to_ifdata(x) for x in test_dataset]
random.shuffle(train_dataset)
random.shuffle(test_dataset)
print("train dataset size:", len(train_dataset))
print("test dataset size:", len(test_dataset))
ijson.save_jsonl(train_dataset, outdir + "train.jsonl")
ijson.save_jsonl(test_dataset, outdir + "test.jsonl")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--sft", type=str)
parser.add_argument("--outdir", type=str)
parser.add_argument("--gpu", type=int, default=1, help="number of gpus 1 model required")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
args = parser.parse_args()
sft_dataset = ijson.load_jsonl(args.sft)
run_STaR(args.model, sft_dataset, args.outdir, args.gpu)
#!/bin/bash
#- Job parameters
# Please modify job name
#SBATCH -J cc # The job name
#SBATCH -o cc-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e cc-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw-80g # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:4 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=r8a30-a0 # Request a specific list of hosts
### #SBATCH --constraint="A30|A100" # Request GPU Type: A30 or A100_40GB
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
echo "$(df -h | grep -v tmpfs)"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit -l unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit -v unlimited
#- Load environments
source /tools/module_env.sh
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load gcc/9.3.0
module load cuda-cudnn/12.1-8.9.3
##- virtualenv
source "/workspace/S/nanziyuan/miniconda3/etc/profile.d/conda.sh"
conda activate openrlhf
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
#- Other
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- WARNING! DO NOT MODIFY your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Using GPU(s) ${CUDA_VISIBLE_DEVICES}" # which GPUs
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
echo "This job is assigned the following resources by SLURM:"
scontrol show jobid $SLURM_JOB_ID -dd | awk '/IDX/ {print $2, $4}'
##- Monitor
# The script continues executing other tasks while the following command will execute after a while
module load slurm-tools/v1.0
(sleep 3h && slurm-gpu-atop-log-stats $SLURM_JOB_ID $CUDA_VISIBLE_DEVICES) &
echo "Main program continues to run. Monitoring information will be exported after three hours."
#- Main program execution
##- Job step
bash run.sh
#- End
slurm-gpu-atop-log-stats $SLURM_JOB_ID $CUDA_VISIBLE_DEVICES
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
# This will overwrite any existing atop logs from previous runs.
# WARNING: If your program times out or is terminated by scancel,
# the above script part might not execute correctly.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment