step1: fix minor bugs and test code on a small scale

2b493df5 · nanziyuan · 6af73e89 · 2b493df5 · 2b493df5 · 2b493df5
Commit 2b493df5 authored Sep 26, 2024 by nanziyuan
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 16 deletions

step1_evaluate_code.py
+2 -3

utils.py
+8 -3

utils_vllm.py
+16 -10

No files found.
--- a/step1_evaluate_code.py
+++ b/step1_evaluate_code.py
@@ -68,7 +68,7 @@ def test_generation(args, debug=False):
    finally:
        assert isinstance(curr_res, list)
        problem_results = np.asarray(curr_res)
-        code_sample["eval_result"] = np.all(problem_results > 0)
+        code_sample["eval_result"] = bool(np.all(problem_results > 0))

    return code_sample

@@ -101,4 +101,4 @@ if __name__ == "__main__":
        cfg["sample"]["sample_result_path"],
        cfg["apps"],
        cfg["evaluate"]["evaluate_result_path"],
-    )
\ No newline at end of file
+    )
--- a/utils.py
+++ b/utils.py
@@ -3,7 +3,13 @@ import json
 import os
 import re
 from codebleu import calc_codebleu
-import tomllib
+import sys
+
+if sys.version_info < (3, 11):
+    import tomli as tomllib
+else:
+    import tomllib
+


 def load_jsonl(file_path):
@@ -46,4 +52,4 @@ def read_config():
    args = argparser.parse_args()

    with open(args.config, "rb") as f:
-        return tomllib.load(f)
\ No newline at end of file
+        return tomllib.load(f)
--- a/utils_vllm.py
+++ b/utils_vllm.py
@@ -2,19 +2,20 @@ from vllm import LLM, SamplingParams

 import os
 import multiprocessing
+from itertools import chain
 from functools import partial

 from utils import load_jsonl, save_jsonl


-def worker(cuda_device, prompts, model, sampling_params):
+def worker(cuda_device, prompts, model_path, sampling_params):
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device

-    llm = LLM(model=model, seed=42, max_model_len=8 * 1024)
+    llm = LLM(model=model_path, seed=42, max_model_len=8 * 1024, swap_space=16)

    tokenizer = llm.get_tokenizer()
    stop_tokens = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
-    print(f"SUCCESS: load llm {model} on cuda {cuda_device}")
+    print(f"SUCCESS: load llm {model_path} on cuda {cuda_device}")

    vllm_sampling_params = SamplingParams(
        n=sampling_params['n'],
@@ -25,14 +26,19 @@ def worker(cuda_device, prompts, model, sampling_params):
    )

    text_prompts = [tokenizer.apply_chat_template(item["messages"], tokenize=False, add_generation_prompt=True) for item in prompts]
-    outputs = llm.generate(text_prompts, sampling_params=vllm_sampling_params, use_tqdm=False)
+    outputs = llm.generate(text_prompts, sampling_params=vllm_sampling_params, use_tqdm=True)

+    result = []
    for item, output in zip(prompts, outputs):
        for response in output.outputs:
            generated_text = response.text
-            item["messages"].append({"role": "assistant", "content": generated_text})

-    return True
+            messages, newitem = item["messages"].copy(), item.copy()
+            messages.append({"role": "assistant", "content": generated_text})
+            newitem["messages"] = messages
+            result.append(newitem)
+
+    return result


 def vllm_inference(model_path, prompt_path, output_path, sampling_params):
@@ -51,7 +57,8 @@ def vllm_inference(model_path, prompt_path, output_path, sampling_params):
    worker_llm = partial(worker, model_path=model_path, sampling_params=sampling_params)

    with multiprocessing.Pool(gpu_num) as pool:
-        status = pool.starmap(worker_llm, args)
-    print(f"Execution Status: {all(status)}")
+        nested_results = pool.starmap(worker_llm, args)

-    save_jsonl(prompts, output_path)
\ No newline at end of file
+    results = list(chain(*nested_results))
+    print(f"size of dataset: {len(results)}")
+    save_jsonl(results, output_path)