Commit 0bd552ba by nzy

step4: mutli process reward model inference

parent 83e4a159
model = "/path/to/model" model = "/path/to/model"
apps = "/path/to/apps_dataset" apps = "/path/to/apps_dataset"
test_orm = "max_edit_distance"
[sample] [sample]
sample_prompt_path = "path" sample_prompt_path = "path"
sample_result_path = "path" sample_result_path = "path"
...@@ -23,8 +25,13 @@ metadata_path = "" ...@@ -23,8 +25,13 @@ metadata_path = ""
preference_dataset_path = "" preference_dataset_path = ""
dataset_info_path = "" dataset_info_path = ""
[preference_dataset.min_edit_distance] [preference_dataset.min_edit_distance]
metadata_path = "" metadata_path = ""
preference_dataset_path = "" preference_dataset_path = ""
dataset_info_path = "" dataset_info_path = ""
\ No newline at end of file
[orm.max_edit_distance]
model_path = ""
inference_yaml_path = ""
minimal_test_score_path = ""
eval_result_path = ""
\ No newline at end of file
...@@ -36,6 +36,21 @@ ORM training follows [@Ouyang2022TrainingLM] ...@@ -36,6 +36,21 @@ ORM training follows [@Ouyang2022TrainingLM]
### Step4 Evaluate ORM & Critic Model ### Step4 Evaluate ORM & Critic Model
Deploy the Llamafactory-trained reward model using Llamafactory. See this [issue](https://github.com/hiyouga/LLaMA-Factory/issues/4743#issuecomment-2218022614) for details.
Runing the following command.
```bash
API_PORT=8000 CUDA_VISIBLE_DEVICES=0 llamafactory-cli api deepseekcoder_rm.yaml
```
where the config file `deepseekcoder_rm.yaml` looks like
```yaml
model_name_or_path: /path/to/model
template: deepseekcoder
stage: rm
```
## Environment ## Environment
......
import requests
import subprocess
import os
import json
import time
from tqdm.contrib.concurrent import thread_map
from copy import deepcopy
from utils import load_jsonl, save_jsonl, extract_code, read_config
from utils_metric import group_results, score_pass_at_k
from transformers import AutoTokenizer
def run_server(api_port, cuda_device, rm_inference_yaml_path):
env = os.environ.copy()
env["API_PORT"] = api_port
env["CUDA_VISIBLE_DEVICES"] = cuda_device
server_process = subprocess.Popen(
["llamafactory-cli", "api", rm_inference_yaml_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env,
)
print(
f"Started server with PID {server_process.pid} on port {api_port} and CUDA device {cuda_device}"
)
return server_process
def stop_server(server_processes):
for server_process in server_processes:
server_process.terminate()
server_process.wait()
print(f"Terminated server with PID {server_process.pid}")
def get_rewards_from_server(server_url: str, messages: list[str]):
"""
Gets reward scores from the API server.
"""
headers = {"Content-Type": "application/json"}
payload = {"model": "model", "messages": messages}
response = requests.post(server_url, json=payload, headers=headers)
rewards = json.loads(response.text)["scores"]
return rewards
def preprocess_dataset(model_path, test_dataset, gpu_num):
"apply chat_template and split the dataset to different gpu"
tokenizer = AutoTokenizer.from_pretrained(model_path)
result = []
for i, item in enumerate(test_dataset):
messages = deepcopy(item["messages"])
messages[-1]["content"] = extract_code(messages[-1]["content"])
# https://github.com/hiyouga/LLaMA-Factory/blob/a45f3f5461e2936b9e119eda2ef4d8c7a4131740/tests/data/test_template.py#L58
# # llama factory's template should match tokenizer's `apply_chat_template`.
item["format_str"] = [tokenizer.apply_chat_template(messages, tokenize=False)]
result.append((item, 8000 + i % gpu_num))
return result
def reward_model_inference(args):
item, api_port = args
server_url = f"http://0.0.0.0:{api_port}/v1/score/evaluation"
score = get_rewards_from_server(server_url, item["format_str"])[0]
return {
"problem_id": item["problem_id"],
"messages": item["messages"],
"eval_result": item["eval_result"],
"score": score,
}
def mutli_process_reward_model_inference(
test_path, model_path, inference_cfg_path, result_path
):
cuda_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
gpu_num = len(cuda_devices)
test_dataset = preprocess_dataset(model_path, load_jsonl(test_path), gpu_num)
server_processes = [
run_server(8000 + i, cuda_devices[i], inference_cfg_path)
for i in range(gpu_num)
]
time.sleep(300) # Wait for the servers to start (adjust the sleep time as needed)
results = thread_map(reward_model_inference, test_dataset)
stop_server(server_processes)
save_jsonl(results, result_path)
return results
if __name__ == "__main__":
cfg = read_config()
orm_test_model = cfg["test_orm"]
results = mutli_process_reward_model_inference(
cfg["dataset"]["minimal_test_path"],
cfg["orm"][orm_test_model]["model_path"],
cfg["orm"][orm_test_model]["inference_yaml_path"],
cfg["orm"][orm_test_model]["minimal_test_score_path"],
)
groups = group_results(results)
eval_results = []
for k in range(16):
eval_results.append(score_pass_at_k(groups, 1))
save_jsonl(eval_results, cfg["orm"][orm_test_model]["eval_result_path"])
...@@ -46,6 +46,12 @@ def group_results(results, apps_path): ...@@ -46,6 +46,12 @@ def group_results(results, apps_path):
difficulty = dataset[split][int(idx)]["difficulty"] difficulty = dataset[split][int(idx)]["difficulty"]
groups[difficulty][problem_id].append(item) groups[difficulty][problem_id].append(item)
if "score" in results[0]:
for difficulty, problem in groups.items():
for problem_id, lst in problem.items():
sorted_lst = sorted(lst, key=lambda x: x["score"], reverse=True)
problem[problem_id] = sorted_lst
return groups return groups
...@@ -69,10 +75,8 @@ def score_pass_at_k(groups, k, strategy): ...@@ -69,10 +75,8 @@ def score_pass_at_k(groups, k, strategy):
for difficulty, problems in groups.items(): for difficulty, problems in groups.items():
num_samples, num_correct = [], [] num_samples, num_correct = [], []
for lst in problems.values(): for lst in problems.values():
# select topk
sorted_lst = sorted(lst, key=lambda x: x["score"], reverse=True)[:k]
num_samples.append(len(lst)) num_samples.append(len(lst))
num_correct.append(sum(item["eval_result"] for item in sorted_lst)) num_correct.append(sum(item["eval_result"] for item in lst[:k]))
pass_at_k = np.mean([c / n for c, n in zip(num_correct, num_samples)]) pass_at_k = np.mean([c / n for c, n in zip(num_correct, num_samples)])
result[difficulty] = pass_at_k result[difficulty] = pass_at_k
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment