Commit 2b493df5 by nanziyuan

step1: fix minor bugs and test code on a small scale

parent 6af73e89
......@@ -68,7 +68,7 @@ def test_generation(args, debug=False):
finally:
assert isinstance(curr_res, list)
problem_results = np.asarray(curr_res)
code_sample["eval_result"] = np.all(problem_results > 0)
code_sample["eval_result"] = bool(np.all(problem_results > 0))
return code_sample
......@@ -101,4 +101,4 @@ if __name__ == "__main__":
cfg["sample"]["sample_result_path"],
cfg["apps"],
cfg["evaluate"]["evaluate_result_path"],
)
\ No newline at end of file
)
......@@ -3,7 +3,13 @@ import json
import os
import re
from codebleu import calc_codebleu
import tomllib
import sys
if sys.version_info < (3, 11):
import tomli as tomllib
else:
import tomllib
def load_jsonl(file_path):
......@@ -46,4 +52,4 @@ def read_config():
args = argparser.parse_args()
with open(args.config, "rb") as f:
return tomllib.load(f)
\ No newline at end of file
return tomllib.load(f)
......@@ -2,19 +2,20 @@ from vllm import LLM, SamplingParams
import os
import multiprocessing
from itertools import chain
from functools import partial
from utils import load_jsonl, save_jsonl
def worker(cuda_device, prompts, model, sampling_params):
def worker(cuda_device, prompts, model_path, sampling_params):
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
llm = LLM(model=model, seed=42, max_model_len=8 * 1024)
llm = LLM(model=model_path, seed=42, max_model_len=8 * 1024, swap_space=16)
tokenizer = llm.get_tokenizer()
stop_tokens = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
print(f"SUCCESS: load llm {model} on cuda {cuda_device}")
print(f"SUCCESS: load llm {model_path} on cuda {cuda_device}")
vllm_sampling_params = SamplingParams(
n=sampling_params['n'],
......@@ -25,14 +26,19 @@ def worker(cuda_device, prompts, model, sampling_params):
)
text_prompts = [tokenizer.apply_chat_template(item["messages"], tokenize=False, add_generation_prompt=True) for item in prompts]
outputs = llm.generate(text_prompts, sampling_params=vllm_sampling_params, use_tqdm=False)
outputs = llm.generate(text_prompts, sampling_params=vllm_sampling_params, use_tqdm=True)
result = []
for item, output in zip(prompts, outputs):
for response in output.outputs:
generated_text = response.text
item["messages"].append({"role": "assistant", "content": generated_text})
return True
messages, newitem = item["messages"].copy(), item.copy()
messages.append({"role": "assistant", "content": generated_text})
newitem["messages"] = messages
result.append(newitem)
return result
def vllm_inference(model_path, prompt_path, output_path, sampling_params):
......@@ -51,7 +57,8 @@ def vllm_inference(model_path, prompt_path, output_path, sampling_params):
worker_llm = partial(worker, model_path=model_path, sampling_params=sampling_params)
with multiprocessing.Pool(gpu_num) as pool:
status = pool.starmap(worker_llm, args)
print(f"Execution Status: {all(status)}")
nested_results = pool.starmap(worker_llm, args)
save_jsonl(prompts, output_path)
\ No newline at end of file
results = list(chain(*nested_results))
print(f"size of dataset: {len(results)}")
save_jsonl(results, output_path)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment