fix

462724df · nanziyuan · 09dfbc06 · 462724df · 462724df · 462724df
Commit 462724df authored Mar 22, 2024 by nanziyuan
Hide whitespace changes
Inline Side-by-side

Showing with 148 additions and 5 deletions

count_pass.py
+67 -0

count_token.py
+59 -0

cpu.slurm.template
+1 -1

mk_config.py
+2 -2

mk_eval.py
+19 -2

samples.jsonl
+0 -0

No files found.
--- a/count_pass.py
+++ b/count_pass.py
+import json
+import numpy
+import numpy as np
+from typing import Any, Dict, List, Tuple, Union
+import pandas as pd
+
+SUCCESS = "success"
+
+# unbiased estimator from https://github.com/openai/human-eval
+def estimate_pass_at_k(
+    num_samples: Union[int, List[int], np.ndarray],
+    num_correct: Union[List[int], np.ndarray],
+    k: int,
+) -> np.ndarray:
+    """
+    Estimates pass@k of each problem and returns them in an array.
+    """
+
+    def estimator(n: int, c: int, k: int) -> float:
+        """
+        Calculates 1 - comb(n - c, k) / comb(n, k).
+        """
+        if n - c < k:
+            return 1.0
+        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
+
+    if isinstance(num_samples, int):
+        num_samples_it = itertools.repeat(num_samples, len(num_correct))
+    else:
+        assert len(num_samples) == len(num_correct)
+        num_samples_it = iter(num_samples)
+
+    return np.array(
+        [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]
+    )
+
+def count(path):
+    with open(path, "r") as f:
+        results = json.load(f)
+
+    total = np.array([r["nfiles"] for r in results["eval"].values()])
+    base_correct = []
+    new_correct = []
+
+    for res in results["eval"].values():
+        bc = sum([r[0] == SUCCESS for r in res["base"]])
+        base_correct.append(bc)
+        if res["plus"]:
+            new_correct.append(
+                sum(
+                    [
+                        res["plus"][i][0] == res["base"][i][0] == SUCCESS
+                        for i in range(len(res["plus"]))
+                    ]
+                )
+            )
+    base_correct = np.array(base_correct)
+
+    pass_at_k = [
+        {"k": k, "pass@k": estimate_pass_at_k(total, base_correct, k).mean()}
+        for k in range(1, total.min() + 1)
+        if total.min() >= k
+    ]
+
+    with open("result.jsonl", "w") as f:
+        for r in pass_at_k:
+            f.write(json.dumps(r) + '\n')
--- a/count_token.py
+++ b/count_token.py
+import pandas as pd
+from pathlib import Path
+import logging
+import json
+
+from tqdm import tqdm
+from transformers import AutoTokenizer
+from dataclasses import dataclass
+
+codellamas_path = Path("/lustre/models/CodeLlama_240112/")
+
+models_path = {
+    "7b": "codellama-CodeLlama-7b-hf",
+    "13b": "codellama-CodeLlama-13b-hf",
+    "34b": "CodeLlama-34b-hf",
+}
+
+
+CODE_EOS = 28956 # '```' in codellama's tokenizer
+EOS = 2 # default eos
+STOP_TOKENS = [CODE_EOS, EOS]
+
+def mk_prompt(question: str,  answer: str) -> str:
+    return question + answer
+
+def count_token(encode, cache, r):
+    program = r["completion"]
+    question = cache[r["task_id"]]
+    return len(encode(mk_prompt(question, program)))
+
+def main(model_name, sample_path):
+    model_path = codellamas_path / models_path[model_name]
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+
+    with open("../testsets/humaneval/humaneval_prompt.jsonl", "r") as f:
+        lines = [json.loads(l) for l in f.readlines()]
+    question_cache = {l["task_id"]: l["prompt"] for l in lines}
+        
+
+    programs = pd.read_json(sample_path, lines=True)
+    count_token_p = lambda r: count_token(tokenizer.encode, question_cache, r)
+    programs["token_num"] = programs.apply(count_token_p, axis=1)
+    print(programs["token_num"].mean())
+
+
+if __name__ == "__main__":
+    import argparse
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("name", type=str)
+    argparser.add_argument("path", type=str)
+    args = argparser.parse_args()
+
+    logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+                        datefmt="%d-%M-%Y %H:%M:%S",
+                        encoding='utf-8',
+                        level=logging.INFO)
+
+    logging.info(f"START: Testing codellama-{args.name}")
+    main(args.name, args.path)
--- a/cpu.slurm.template
+++ b/cpu.slurm.template
@@ -54,7 +54,7 @@ echo $(which python3)
 cluster-quota                    # nas quota

 #- Job step
-python /lustre/S/nanziyuan/projects/codellama_test/src/eval.py -n {name} -s {nstart} -e {nend}
+{test_program}

 #- End
 echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mk_config.py
+++ b/mk_config.py
@@ -81,7 +81,7 @@ def mk_config(llm_name, testset_name, exp_id):
        completion_dir=completion_dir.as_posix(),
        k=1000,
        num_gpus=num_gpus,
-        stop=["def"],
+        stop=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```"],
    )

    cfg_path = path / "config.json"
@@ -90,4 +90,4 @@ def mk_config(llm_name, testset_name, exp_id):
    mk_slurm(llm_name, num_gpus, cfg_path, slurm_dir)

 if __name__ == "__main__":
-    mk_config("70b_base", "apps_subset", 0)
+    mk_config("7b_base", "humaneval", 1)
--- a/mk_eval.py
+++ b/mk_eval.py
@@ -11,8 +11,25 @@ def merge_jsonl(completion_path, target_path):
            with open(f, "r") as infile:
                outfile.write(infile.read())

-def mk_test_slurm(exp_path):
-    name = Path(exp_path).stem
+def mk_eval(exp_path):
+    exp_path = Path(exp_path)
+    name = exp_path.stem
    eval_path = EXP / f"{name}-test"
+    eval_path.mkdir()
    completion_path = exp_path / "completion"
    merge_jsonl(completion_path, eval_path)
+
+    testset = name.split('-')[1]
+    if testset == "humaneval":
+        test_program = [f"cd {eval_path.absolute()}",
+                        "evalplus.evaluate --dataset humaneval --samples samples.jsonl"]
+    else:
+        raise NotImplementedError(f"Unknown testset: {testset})")
+
+    test_program = "\n".join(test_program)
+
+    with open("cpu.slurm.template", "r") as f:
+        template = f.read()
+
+    with open(eval_path / "cpu.slurm", "w") as f:
+        f.write(template.format(name=name, test_program=test_program))
--- a/samples.jsonl
+++ b/samples.jsonl