Commit 4b8f6d6b by nanziyuan

bug fix & add support for reward_reason exp

parent c08b0931
......@@ -8,10 +8,11 @@ import json
from codecritic.data.cov import (
convert_preference_to_vot_prompt,
convert_sft_to_vot_prompt,
convert_cov_to_cov_dataset,
)
from codecritic.data.utils import save_jsonl_dataset
from codecritic.utils.json import load_json
from codecritic.utils.json import load_json, load_jsonl
client = OpenAI(
......@@ -40,12 +41,20 @@ def worker(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--sft_dataset", type=str)
parser.add_argument("--output_dir", type=str)
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x, splitter=False) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
if args.preference_dataset:
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x, splitter=False) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
elif args.sft_dataset:
sft_dataset = load_jsonl(args.sft_dataset)
cov_prompts = [convert_sft_to_vot_prompt(x, splitter=False) for x in sft_dataset]
else:
parser.error("preference_dataset or sft_dataset")
lock = Lock()
total_len = len(cov_prompts)
......
import argparse
from pathlib import Path
import random
from codecritic.utils.json import load_jsonl, save_jsonl
def add_mask_and_score(messages):
for idx, turn in enumerate(messages):
if idx != 3:
turn["mask"] = True
else:
turn["mask"] = False
if idx == 5:
turn["score"] = True
return messages
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--path", type=str)
parser.add_argument("--split", action="store_true")
args = parser.parse_args()
dataset = load_jsonl(args.path)
for item in dataset:
item["messages"] = add_mask_and_score(item["messages"])
if args.split:
random.shuffle(dataset)
split_len = int(len(dataset) * 0.01)
test = dataset[:split_len]
train = dataset[split_len:]
dataset_path = Path(args.path).parent
save_jsonl(train, dataset_path / "train.jsonl")
save_jsonl(test, dataset_path / "test.jsonl")
else:
save_jsonl(dataset, args.path)
......@@ -22,10 +22,11 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
return rewards
def test_reward_model(server_url, item, tokenizer):
response = item["messages"][-1]["content"]
code = code_template.format(extract_code(response))
item["messages"][-1]["content"] = code
def test_reward_model(server_url, item, tokenizer, preprocess=True):
if preprocess:
response = item["messages"][-1]["content"]
code = code_template.format(extract_code(response))
item["messages"][-1]["content"] = code
query = tokenizer.apply_chat_template(item["messages"], tokenize=False)
score = get_rewards_from_server(server_url, [query])[0]
......@@ -43,6 +44,7 @@ if __name__ == "__main__":
parser.add_argument("--model", type=str)
parser.add_argument("--test", type=str)
parser.add_argument("--apps", type=str)
parser.add_argument("--skip_preprocess", action="store_false")
args = parser.parse_args()
home_path = Path(args.model).parent
......@@ -53,7 +55,7 @@ if __name__ == "__main__":
test_dataset = load_jsonl(args.test)
server_url = "http://0.0.0.0:5000/get_reward"
tokenizer = AutoTokenizer.from_pretrained(args.model)
results = [test_reward_model(server_url, item, tokenizer) for item in tqdm(test_dataset)]
results = [test_reward_model(server_url, item, tokenizer, args.skip_proprocess) for item in tqdm(test_dataset)]
score_path = result_dir / "scores.jsonl"
save_jsonl(results, score_path)
......
import argparse
import os
from pathlib import Path
import pprint
......@@ -27,7 +28,7 @@ def append_prompt(item, content):
return item
def run_sft_model(model_path, test_path, apps_path, reason_prompt=None, model_gpu):
def run_sft_model(model_path, test_path, apps_path, reason_prompt, model_gpu):
home_path = Path(model_path).parent
result_dir = home_path / "eval"
result_dir.mkdir(exist_ok=True)
......@@ -68,6 +69,8 @@ if __name__ == "__main__":
parser.add_argument("--gpu", type=int, default=1, help="gpu number required by model")
args = parser.parse_args()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
reason_prompts = {"cov": COV_PROMPT}
reason_prompt = reason_prompts.get(args.reason, None)
run_sft_model(args.model, args.test, args.apps, reason_prompt, args.gpu)
......@@ -62,6 +62,15 @@ def convert_preference_to_vot_prompt(item, splitter=True):
return mk_messages(messages1), mk_messages(messages2)
def convert_sft_to_vot_prompt(item, splitter=True):
question = item["messages"][0]["content"]
response = item["messages"][1]["content"]
code = code_template.format(extract_code(response))
messages = mk_message(question, code) + mk_cov_prompt(item["eval_result"], splitter)
return mk_messages(messages)
def convert_cov_to_cov_dataset(item):
user_content = item["messages"][2]["content"]
item["messages"][2]["content"] = COV_PROMPT
......@@ -72,4 +81,5 @@ def convert_cov_to_cov_dataset(item):
else:
raise ValueError("Invalid prompt")
item["messages"] += mk_critic_verify(is_correct)
item["eval_result"] = is_correct
return item
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment