Commit cabb00c5 by nzy

step4: remove all read_config

parent 191a00d3
import argparse
from pathlib import Path
import pprint
from utils_vllm import vllm_chatcomplete, vllm_score from utils_vllm import vllm_chatcomplete, vllm_score
from utils import read_config, load_jsonl, save_jsonl from utils import load_jsonl, save_jsonl, save_json, extract_code
from utils_metric import group_results, score_pass_at_k from utils_metric import group_results, score_pass_at_k
from utils_dataset import mk_critic_verify, get_score_token_id from utils_dataset import (
mk_critic_verify,
get_score_token_id,
code_template,
mk_critic_reason,
mk_critic_qa,
)
def preprocess_test_item(item):
question = item["messages"][0]["content"]
answer = item["messages"][1]["content"]
code = code_template.format(extract_code(answer))
critic_reason_prompt = mk_critic_reason("", "")[:1]
item["messages"] = mk_critic_qa(question, code) + critic_reason_prompt
return item
if __name__ == "__main__": if __name__ == "__main__":
cfg = read_config() parser = argparse.ArgumentParser()
vllm_chatcomplete( parser.add_argument("--model", type=str)
cfg["critic"]["model_path"], parser.add_argument("--test", type=str)
cfg["critic"]["test"]["prompt_path"], parser.add_argument("--apps", type=str)
cfg["critic"]["test"]["reason_result_path"], args = parser.parse_args()
cfg["critic"]["test"]["sampling_params"]
) home_path = Path(args.model).parent
result_dir = home_path / "eval"
result_dir.mkdir(exist_ok=True)
prompt_path = result_dir / "prompt.jsonl"
raw_test_dataset = load_jsonl(args.test)
test_dataset = [preprocess_test_item(item) for item in raw_test_dataset]
save_jsonl(test_dataset, prompt_path)
reason_path = result_dir / "reason.jsonl"
sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
save_json(sampling_params, result_dir / "sampling_params.json")
vllm_chatcomplete(args.model, prompt_path, reason_path, sampling_params)
score_token = get_score_token_id(args.model) score_token = get_score_token_id(args.model)
reason_results = load_jsonl(cfg["critic"]["test"]["reason_result_path"]) reason_results = load_jsonl(reason_path)
score_prompts = [] score_prompts = []
for item in reason_results: for item in reason_results:
item["messages"] += mk_critic_verify() item["messages"] += mk_critic_verify()
score_prompts.append(item) score_prompts.append(item)
save_jsonl(score_prompts, "test_score_prompt.jsonl") score_prompt_path = result_dir / "score_prompt.jsonl"
save_jsonl(score_prompts, score_prompt_path)
vllm_score( score_path = result_dir / "score.jsonl"
cfg["critic"]["model_path"], vllm_score(args.model, score_prompt_path, score_path, score_token)
"test_score_prompt.jsonl",
cfg["critic"]["test"]["score_result_path"],
score_token
)
results = load_jsonl(cfg["critic"]["test"]["score_result_path"]) results = load_jsonl(score_path)
groups = group_results(results, cfg["apps"]) groups = group_results(results, args.apps)
eval_results = [score_pass_at_k(groups, k, "critic") for k in range(1, 16)] eval_results = [score_pass_at_k(groups, k, "critic") for k in range(1, 16)]
save_jsonl(eval_results, cfg["critic"]["test"]["eval_result_path"]) result_path = result_dir / "result.jsonl"
print(eval_results) save_jsonl(eval_results, result_path)
pprint.pp(eval_results)
...@@ -95,3 +95,18 @@ def get_score_token_id(model_path, token_str="Yes"): ...@@ -95,3 +95,18 @@ def get_score_token_id(model_path, token_str="Yes"):
score_tokens = tokenizer.encode(token_str, add_special_tokens=False) score_tokens = tokenizer.encode(token_str, add_special_tokens=False)
assert len(score_tokens) == 1 assert len(score_tokens) == 1
return score_tokens[0] return score_tokens[0]
def mk_critic_reason(codedit, explanation):
user_question = {"role": "user", "content": "Edit your code in diff format to fix any issues and explain the changes."}
llm_answer_content = f"""\
**Edited Code (in diff format):**
```diff
{codedit}
```
**Explanation:**
{explanation}
"""
llm_answer = {"role": "assistant", "content": llm_answer_content}
return [user_question, llm_answer]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment