Commit d631895d by nzy

step4: eval scored results

parent e2d2b230
...@@ -62,6 +62,7 @@ deepspeed_cfg_path = "" ...@@ -62,6 +62,7 @@ deepspeed_cfg_path = ""
[critic.test] [critic.test]
reason_result_path = "" reason_result_path = ""
score_result_path = "" score_result_path = ""
eval_result_path = ""
[critic.test.sampling_params] [critic.test.sampling_params]
n = 1 n = 1
...@@ -80,4 +81,5 @@ deepspeed_cfg_path = "" ...@@ -80,4 +81,5 @@ deepspeed_cfg_path = ""
[sftorm.test] [sftorm.test]
prompt_path = "" prompt_path = ""
score_result_path = "" score_result_path = ""
\ No newline at end of file eval_result_path = ""
\ No newline at end of file
...@@ -18,6 +18,6 @@ if __name__ == "__main__": ...@@ -18,6 +18,6 @@ if __name__ == "__main__":
# results = load_jsonl(result_path) # results = load_jsonl(result_path)
groups = group_results(results, cfg["apps"]) groups = group_results(results, cfg["apps"])
eval_results = [score_pass_at_k(groups, k, orm_test_model) for k in range(1, 32)] eval_results = [score_pass_at_k(groups, k, orm_test_model) for k in range(1, 16)]
save_jsonl(eval_results, cfg["orm"][orm_test_model]["eval_result_path"]) save_jsonl(eval_results, cfg["orm"][orm_test_model]["eval_result_path"])
print(eval_results) print(eval_results)
from utils_vllm import vllm_score from utils_vllm import vllm_score
from utils import read_config, load_jsonl, save_jsonl, extract_code from utils import read_config, load_jsonl, save_jsonl, extract_code
from utils_preference_dataset import code_template, mk_critic_qa, mk_critic_verify, mk_sft_item from utils_preference_dataset import code_template, mk_critic_qa, mk_critic_verify
from utils_metric import group_results, score_pass_at_k
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -29,4 +30,10 @@ if __name__ == "__main__": ...@@ -29,4 +30,10 @@ if __name__ == "__main__":
cfg["sftorm"]["test"]["prompt_path"], cfg["sftorm"]["test"]["prompt_path"],
cfg["sftorm"]["test"]["score_result_path"], cfg["sftorm"]["test"]["score_result_path"],
score_token score_token
) )
\ No newline at end of file
results = load_jsonl(cfg["sftorm"]["test"]["score_result_path"])
groups = group_results(results, cfg["apps"])
eval_results = [score_pass_at_k(groups, k, "sft-orm") for k in range(1, 16)]
save_jsonl(eval_results, cfg["sftorm"]["test"]["eval_result_path"])
print(eval_results)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment