Commit d0f8150e by nanziyuan

step1: evaluate_code improve multiprocess performance with imap_unordered and chunksize

parent 2b493df5
......@@ -3,13 +3,11 @@
import json
import multiprocessing
from multiprocessing import Pool
import numpy as np
from typing import Dict
from datasets import load_dataset
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map
from step1_apps_test import run_test
from utils import extract_code, read_config, load_jsonl, save_jsonl
......@@ -85,7 +83,9 @@ def evaluate_code_samples(code_samples: list, dataset_path: str):
args = [(get_apps_item(sample), sample) for sample in code_samples]
cpu_num = multiprocessing.cpu_count()
results = process_map(test_generation, args, max_workers=cpu_num)
with Pool(processes=cpu_num) as pool:
results = list(pool.imap_unordered(test_generation, args, chunksize=1000))
return results
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment