step4: test_critic_model fix bugs

4fdf6d7d · nanziyuan · b78d979f · 4fdf6d7d
Commit 4fdf6d7d authored Oct 25, 2024 by nanziyuan
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 7 deletions

step4_test_critic_model.py
+22 -7

No files found.
--- a/step4_test_critic_model.py
+++ b/step4_test_critic_model.py
-from utils_vllm import vllm_chatcomplete, vllm_score
-from utils import read_config
+from utils_vllm_bkp import vllm_chatcomplete, vllm_score
+from utils import read_config, load_jsonl, save_jsonl
+from utils_metric import group_results, score_pass_at_k
+from utils_preference_dataset import mk_critic_verify
 from transformers import AutoTokenizer


@@ -7,19 +9,33 @@ if __name__ == "__main__":
    cfg = read_config()
    vllm_chatcomplete(
        cfg["critic"]["model_path"],
-        cfg["dataset"]["minimal_test_path"],
+        cfg["critic"]["test"]["prompt_path"],
        cfg["critic"]["test"]["reason_result_path"],
        cfg["critic"]["test"]["sampling_params"]
    )

    tokenizer = AutoTokenizer.from_pretrained(cfg["model"])
-    score_tokens = tokenizer.encode("Yes")
+    score_tokens = tokenizer.encode("Yes", add_special_tokens=False)
    assert len(score_tokens) == 1
    score_token = score_tokens[0]

+    reason_results = load_jsonl(cfg["critic"]["test"]["reason_result_path"])
+    score_prompts = []
+    for item in reason_results:
+        item["messages"] += mk_critic_verify()
+        score_prompts.append(item)
+
+    save_jsonl(score_prompts, "test_score_prompt.jsonl")
+
    vllm_score(
        cfg["critic"]["model_path"],
-        cfg["critic"]["test"]["reason_result_path"],
+        "test_score_prompt.jsonl",
        cfg["critic"]["test"]["score_result_path"],
        score_token
-    )
\ No newline at end of file
+    )
+
+    results = load_jsonl(cfg["critic"]["test"]["score_result_path"])
+    groups = group_results(results, cfg["apps"])
+    eval_results = [score_pass_at_k(groups, k, "critic") for k in range(1, 16)]
+    save_jsonl(eval_results, cfg["critic"]["test"]["eval_result_path"])
+    print(eval_results)