Commit 40af338d by nanziyuan

Wtf. I don't know what happened. Reason-ORM seems a perfect judger. I must save…

Wtf. I don't know what happened. Reason-ORM seems a perfect judger. I must save this version of code
parent 4b8f6d6b
......@@ -10,20 +10,20 @@ from codecritic.data.verify import mk_critic_verify
def mk_sft_dataset(messages):
assert len(messages) == 4
question = messages[:3]
response = messages[3:]
question = messages[:-1]
response = messages[-1:]
return dict(question=question, response=response)
def convert_preference_to_sft(item):
message = item["messages"][0]["content"]
chosen = item["chosen"][0]["content"]
rejected = item["rejected"][0]["content"]
message = item["messages"]
chosen = item["chosen"]
rejected = item["rejected"]
messages1 = mk_message(message, chosen) + mk_critic_verify(True)
messages2 = mk_message(message, rejected) + mk_critic_verify(False)
messages1 = message + chosen + mk_critic_verify(True)
messages2 = message + rejected + mk_critic_verify(False)
return mk_sft_dataset(messages1), mk_sft_dataset(messages2)
......
......@@ -22,12 +22,7 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
return rewards
def test_reward_model(server_url, item, tokenizer, preprocess=True):
if preprocess:
response = item["messages"][-1]["content"]
code = code_template.format(extract_code(response))
item["messages"][-1]["content"] = code
def test_reward_model(server_url, item, tokenizer):
query = tokenizer.apply_chat_template(item["messages"], tokenize=False)
score = get_rewards_from_server(server_url, [query])[0]
......@@ -44,7 +39,6 @@ if __name__ == "__main__":
parser.add_argument("--model", type=str)
parser.add_argument("--test", type=str)
parser.add_argument("--apps", type=str)
parser.add_argument("--skip_preprocess", action="store_false")
args = parser.parse_args()
home_path = Path(args.model).parent
......@@ -55,7 +49,7 @@ if __name__ == "__main__":
test_dataset = load_jsonl(args.test)
server_url = "http://0.0.0.0:5000/get_reward"
tokenizer = AutoTokenizer.from_pretrained(args.model)
results = [test_reward_model(server_url, item, tokenizer, args.skip_proprocess) for item in tqdm(test_dataset)]
results = [test_reward_model(server_url, item, tokenizer) for item in tqdm(test_dataset)]
score_path = result_dir / "scores.jsonl"
save_jsonl(results, score_path)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment