Commit e2d03f03 by nzy

step4: refactor sftorm

parent 38be25e7
from utils_vllm_bkp import vllm_chatcomplete, vllm_score
from utils_vllm import vllm_chatcomplete, vllm_score
from utils import read_config, load_jsonl, save_jsonl
from utils_metric import group_results, score_pass_at_k
from utils_preference_dataset import mk_critic_verify
from transformers import AutoTokenizer
from utils_dataset import mk_critic_verify, get_score_token_id
if __name__ == "__main__":
......@@ -14,10 +14,7 @@ if __name__ == "__main__":
cfg["critic"]["test"]["sampling_params"]
)
tokenizer = AutoTokenizer.from_pretrained(cfg["model"])
score_tokens = tokenizer.encode("Yes", add_special_tokens=False)
assert len(score_tokens) == 1
score_token = score_tokens[0]
score_token = get_score_token_id(args.model)
reason_results = load_jsonl(cfg["critic"]["test"]["reason_result_path"])
score_prompts = []
......
import argparse
from pathlib import Path
import pprint
from utils_vllm import vllm_score
from utils import read_config, load_jsonl, save_jsonl, extract_code, code_template
from utils_dataset import mk_critic_qa, mk_critic_verify
from utils import load_jsonl, save_jsonl, extract_code, code_template
from utils_dataset import mk_critic_qa, mk_critic_verify, get_score_token_id
from utils_metric import group_results, score_pass_at_k
from transformers import AutoTokenizer
def preprocess_test_item(item):
......@@ -14,26 +16,31 @@ def preprocess_test_item(item):
if __name__ == "__main__":
cfg = read_config()
raw_test_dataset = load_jsonl(cfg["dataset"]["minimal_test_path"])
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--test", type=str)
parser.add_argument("--apps", type=str)
args = parser.parse_args()
home_path = Path(args.model).parent
result_dir = home_path / "eval"
result_dir.mkdir(exist_ok=True)
# preprocess prompt
prompt_path = result_dir / "prompt.jsonl"
raw_test_dataset = load_jsonl(args.test)
test_dataset = [preprocess_test_item(item) for item in raw_test_dataset]
save_jsonl(test_dataset, cfg["sftorm"]["test"]["prompt_path"])
tokenizer = AutoTokenizer.from_pretrained(cfg["sftorm"]["model_path"])
score_tokens = tokenizer.encode("Yes", add_special_tokens=False)
assert len(score_tokens) == 1
score_token = score_tokens[0]
vllm_score(
cfg["sftorm"]["model_path"],
cfg["sftorm"]["test"]["prompt_path"],
cfg["sftorm"]["test"]["score_result_path"],
score_token
)
results = load_jsonl(cfg["sftorm"]["test"]["score_result_path"])
groups = group_results(results, cfg["apps"])
save_jsonl(test_dataset, prompt_path)
# score
score_path = result_dir / "scores.jsonl"
score_token = get_score_token_id(args.model)
vllm_score(args.model, prompt_path, score_path, score_token)
# compute pass@k
eval_result_path = result_dir / "passk.jsonl"
results = load_jsonl(score_path)
groups = group_results(results, args.apps)
eval_results = [score_pass_at_k(groups, k, "sft-orm") for k in range(1, 16)]
save_jsonl(eval_results, cfg["sftorm"]["test"]["eval_result_path"])
print(eval_results)
save_jsonl(eval_results, eval_result_path)
pprint.pp(eval_results)
from utils import load_json, save_json, code_template
from transformers import AutoTokenizer
def mk_preference_dataset_info(dataset_name):
......@@ -87,3 +88,10 @@ def save_dataset(llamafactory_path, dataset_info, dataset):
dataset_name = dataset_info.keys()[0]
dataset_relative_path = dataset_info[dataset_name]["file_name"]
save_json(dataset, f"{llamafactory_path}/data/{dataset_relative_path}")
def get_score_token_id(model_path, token_str="Yes"):
tokenizer = AutoTokenizer.from_pretrained(model_path)
score_tokens = tokenizer.encode(token_str, add_special_tokens=False)
assert len(score_tokens) == 1
return score_tokens[0]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment