Commit 59fa167d by nzy

step4: switch to openrlhf

parent 17d4c247
import argparse
from copy import deepcopy
from functools import partial
import json
from tqdm import tqdm
import requests
......@@ -16,32 +16,20 @@ def get_rewards_from_server(server_url: str, messages: list[str]):
Gets reward scores from the API server.
"""
headers = {"Content-Type": "application/json"}
payload = {"model": "model", "messages": messages}
payload = {"query": messages}
response = requests.post(server_url, json=payload, headers=headers)
rewards = json.loads(response.text)["scores"]
rewards = json.loads(response.text)["rewards"]
return rewards
def preprocess_dataset(model_path, test_dataset, gpu_num):
"apply chat_template and split the dataset to different gpu"
tokenizer = AutoTokenizer.from_pretrained(model_path)
def test_reward_model(server_url, item, tokenizer):
response = item["messages"][-1]["content"]
code = code_template.format(extract_code(response))
item["messages"][-1]["content"] = code
result = []
for i, item in enumerate(test_dataset):
messages = deepcopy(item["messages"])
messages[-1]["content"] = code_template.format(
extract_code(messages[-1]["content"])
)
# https://github.com/hiyouga/LLaMA-Factory/blob/a45f3f5461e2936b9e119eda2ef4d8c7a4131740/tests/data/test_template.py#L58
# # llama factory's template should match tokenizer's `apply_chat_template`.
item["format_str"] = [tokenizer.apply_chat_template(messages, tokenize=False)]
result.append((item, 8000 + i % gpu_num))
return result
query = tokenizer.apply_chat_template(item["messages"], tokenize=False)
score = get_rewards_from_server(server_url, [query])[0]
def test_reward_model(item, api_port):
server_url = f"http://0.0.0.0:{api_port}/v1/score/evaluation"
score = get_rewards_from_server(server_url, item["format_str"])[0]
return {
"problem_id": item["problem_id"],
"messages": item["messages"],
......@@ -63,9 +51,11 @@ if __name__ == "__main__":
# compute score
score_path = result_dir / "scores.jsonl"
raw_test_dataset = load_jsonl(args.test)
test_dataset = preprocess_dataset(args.model, raw_test_dataset, 1)
results = [test_reward_model(*arg) for arg in tqdm(test_dataset)]
test_dataset = load_jsonl(args.test)
server_url = "http://0.0.0.0:5000/get_reward"
tokenizer = AutoTokenizer.from_pretrained(args.model)
fun = partial(test_reward_model, server_url=server_url, tokenizer=tokenizer)
results = [fun(server_url, item) for item in tqdm(test_dataset)]
save_jsonl(results, score_path)
# compute pass@k
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment