Commit 59fa167d by nzy

step4: switch to openrlhf

parent 17d4c247
import argparse import argparse
from copy import deepcopy from functools import partial
import json import json
from tqdm import tqdm from tqdm import tqdm
import requests import requests
...@@ -16,32 +16,20 @@ def get_rewards_from_server(server_url: str, messages: list[str]): ...@@ -16,32 +16,20 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
Gets reward scores from the API server. Gets reward scores from the API server.
""" """
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
payload = {"model": "model", "messages": messages} payload = {"query": messages}
response = requests.post(server_url, json=payload, headers=headers) response = requests.post(server_url, json=payload, headers=headers)
rewards = json.loads(response.text)["scores"] rewards = json.loads(response.text)["rewards"]
return rewards return rewards
def preprocess_dataset(model_path, test_dataset, gpu_num): def test_reward_model(server_url, item, tokenizer):
"apply chat_template and split the dataset to different gpu" response = item["messages"][-1]["content"]
tokenizer = AutoTokenizer.from_pretrained(model_path) code = code_template.format(extract_code(response))
item["messages"][-1]["content"] = code
result = [] query = tokenizer.apply_chat_template(item["messages"], tokenize=False)
for i, item in enumerate(test_dataset): score = get_rewards_from_server(server_url, [query])[0]
messages = deepcopy(item["messages"])
messages[-1]["content"] = code_template.format(
extract_code(messages[-1]["content"])
)
# https://github.com/hiyouga/LLaMA-Factory/blob/a45f3f5461e2936b9e119eda2ef4d8c7a4131740/tests/data/test_template.py#L58
# # llama factory's template should match tokenizer's `apply_chat_template`.
item["format_str"] = [tokenizer.apply_chat_template(messages, tokenize=False)]
result.append((item, 8000 + i % gpu_num))
return result
def test_reward_model(item, api_port):
server_url = f"http://0.0.0.0:{api_port}/v1/score/evaluation"
score = get_rewards_from_server(server_url, item["format_str"])[0]
return { return {
"problem_id": item["problem_id"], "problem_id": item["problem_id"],
"messages": item["messages"], "messages": item["messages"],
...@@ -63,9 +51,11 @@ if __name__ == "__main__": ...@@ -63,9 +51,11 @@ if __name__ == "__main__":
# compute score # compute score
score_path = result_dir / "scores.jsonl" score_path = result_dir / "scores.jsonl"
raw_test_dataset = load_jsonl(args.test) test_dataset = load_jsonl(args.test)
test_dataset = preprocess_dataset(args.model, raw_test_dataset, 1) server_url = "http://0.0.0.0:5000/get_reward"
results = [test_reward_model(*arg) for arg in tqdm(test_dataset)] tokenizer = AutoTokenizer.from_pretrained(args.model)
fun = partial(test_reward_model, server_url=server_url, tokenizer=tokenizer)
results = [fun(server_url, item) for item in tqdm(test_dataset)]
save_jsonl(results, score_path) save_jsonl(results, score_path)
# compute pass@k # compute pass@k
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment