Commit 8bf1b27d by nanziyuan

fix bugs

parent 9c8fbf86
import argparse import argparse
import os import os
from pathlib import Path from pathlib import Path
import pprint
from tqdm import tqdm from tqdm import tqdm
import torch import torch
...@@ -11,10 +12,11 @@ import accelerate ...@@ -11,10 +12,11 @@ import accelerate
from codecritic.utils.json import load_jsonl, save_jsonl from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.data.verify import get_score_token_id from codecritic.data.verify import get_score_token_id
from codecritic.utils.metric import group_results, score_pass_at_k
@torch.inference_mode() @torch.inference_mode()
def hf_score(model, tokenizer, prompts): def hf_score(accelerator, model, tokenizer, prompts):
score_token = get_score_token_id(tokenizer) score_token = get_score_token_id(tokenizer)
with accelerator.split_between_processes(prompts) as partial_prompts: with accelerator.split_between_processes(prompts) as partial_prompts:
results = [] results = []
...@@ -23,7 +25,7 @@ def hf_score(model, tokenizer, prompts): ...@@ -23,7 +25,7 @@ def hf_score(model, tokenizer, prompts):
input_ids = tokenizer.apply_chat_template( input_ids = tokenizer.apply_chat_template(
item["messages"], add_generation_prompt=True, return_tensors="pt" item["messages"], add_generation_prompt=True, return_tensors="pt"
).to("cuda") ).to("cuda")
output = model(**input_ids) output = model(input_ids)
next_token_logits = output.logits[0, -1, :] next_token_logits = output.logits[0, -1, :]
score = F.softmax(next_token_logits, dim=0)[score_token].item() score = F.softmax(next_token_logits, dim=0)[score_token].item()
...@@ -35,32 +37,43 @@ def hf_score(model, tokenizer, prompts): ...@@ -35,32 +37,43 @@ def hf_score(model, tokenizer, prompts):
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str) parser.add_argument("--model", type=str)
parser.add_argument("--prompts", type=str) parser.add_argument("--test", type=str)
parser.add_argument("--out", type=str) parser.add_argument("--apps", type=str)
args = parser.parse_args() args = parser.parse_args()
home_path = Path(args.model).parent home_path = Path(args.model).parent
result_dir = home_path / "hf_eval" result_dir = home_path / "hf_eval"
result_dir.mkdir(exist_ok=True) result_dir.mkdir(exist_ok=True)
prompts = load_jsonl(args.prompts) prompts = load_jsonl(args.test)
os.environ["TOKENIZERS_PARALLELISM"] = "false" # os.environ["TOKENIZERS_PARALLELISM"] = "false"
accelerator = accelerate.Accelerator() accelerator = accelerate.Accelerator()
tokenizer = transformers.AutoTokenizer.from_pretrained(args.model) tokenizer = transformers.AutoTokenizer.from_pretrained(args.model)
model = transformers.AutoModelForCausalLM.from_pretrained( model = transformers.AutoModelForCausalLM.from_pretrained(args.model, device_map="auto")
args.model, device_map={"": accelerator.process_index} # model, tokenizer = accelerator.prepare(model, tokenizer)
)
for name, param in model.named_parameters():
print(f"{name}: {param.device}")
# model.generation_config.pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id # model.generation_config.pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id
# model.generation_config.eos_token_id = tokenizer.eos_token_id # model.generation_config.eos_token_id = tokenizer.eos_token_id
model.eval() model.eval()
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
results = hf_score(model, tokenizer, prompts) results = hf_score(accelerator, model, tokenizer, prompts)
if accelerator.is_main_process: if accelerator.is_main_process:
save_jsonl(results, args.out) score_path = result_dir / "scores.jsonl"
save_jsonl(results, score_path)
# compute pass@k
eval_result_path = result_dir / "passk.jsonl"
# results = load_jsonl(score_path)
groups = group_results(results, args.apps)
eval_results = [score_pass_at_k(groups, k, home_path.stem) for k in range(1, 16)]
save_jsonl(eval_results, eval_result_path)
pprint.pp(eval_results)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -15,14 +15,6 @@ from codecritic.utils.json import load_jsonl, save_jsonl ...@@ -15,14 +15,6 @@ from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.utils.metric import group_results, score_pass_at_k from codecritic.utils.metric import group_results, score_pass_at_k
def preprocess_test_item(item):
question = item["messages"][0]["content"]
answer = item["messages"][1]["content"]
code = code_template.format(extract_code(answer))
item["messages"] = mk_message(question, code)
return item
def append_prompt(item, content): def append_prompt(item, content):
item["messages"].append({"role": "user", "content": content}) item["messages"].append({"role": "user", "content": content})
return item return item
...@@ -34,8 +26,7 @@ def run_sft_model(model_path, test_path, apps_path, reason_prompt, model_gpu): ...@@ -34,8 +26,7 @@ def run_sft_model(model_path, test_path, apps_path, reason_prompt, model_gpu):
result_dir.mkdir(exist_ok=True) result_dir.mkdir(exist_ok=True)
# preprocess prompt # preprocess prompt
raw_test_dataset = load_jsonl(test_path) test_dataset = load_jsonl(test_path)
test_dataset = [preprocess_test_item(item) for item in raw_test_dataset]
# reason # reason
if reason_prompt: if reason_prompt:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment