Commit d631895d by nzy

step4: eval scored results

parent e2d2b230
......@@ -62,6 +62,7 @@ deepspeed_cfg_path = ""
[critic.test]
reason_result_path = ""
score_result_path = ""
eval_result_path = ""
[critic.test.sampling_params]
n = 1
......@@ -80,4 +81,5 @@ deepspeed_cfg_path = ""
[sftorm.test]
prompt_path = ""
score_result_path = ""
\ No newline at end of file
score_result_path = ""
eval_result_path = ""
\ No newline at end of file
......@@ -18,6 +18,6 @@ if __name__ == "__main__":
# results = load_jsonl(result_path)
groups = group_results(results, cfg["apps"])
eval_results = [score_pass_at_k(groups, k, orm_test_model) for k in range(1, 32)]
eval_results = [score_pass_at_k(groups, k, orm_test_model) for k in range(1, 16)]
save_jsonl(eval_results, cfg["orm"][orm_test_model]["eval_result_path"])
print(eval_results)
from utils_vllm import vllm_score
from utils import read_config, load_jsonl, save_jsonl, extract_code
from utils_preference_dataset import code_template, mk_critic_qa, mk_critic_verify, mk_sft_item
from utils_preference_dataset import code_template, mk_critic_qa, mk_critic_verify
from utils_metric import group_results, score_pass_at_k
from transformers import AutoTokenizer
......@@ -29,4 +30,10 @@ if __name__ == "__main__":
cfg["sftorm"]["test"]["prompt_path"],
cfg["sftorm"]["test"]["score_result_path"],
score_token
)
\ No newline at end of file
)
results = load_jsonl(cfg["sftorm"]["test"]["score_result_path"])
groups = group_results(results, cfg["apps"])
eval_results = [score_pass_at_k(groups, k, "sft-orm") for k in range(1, 16)]
save_jsonl(eval_results, cfg["sftorm"]["test"]["eval_result_path"])
print(eval_results)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment