Commit d0f8150e by nanziyuan

step1: evaluate_code improve multiprocess performance with imap_unordered and chunksize

parent 2b493df5
...@@ -3,13 +3,11 @@ ...@@ -3,13 +3,11 @@
import json import json
import multiprocessing import multiprocessing
from multiprocessing import Pool
import numpy as np import numpy as np
from typing import Dict from typing import Dict
from datasets import load_dataset from datasets import load_dataset
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map
from step1_apps_test import run_test from step1_apps_test import run_test
from utils import extract_code, read_config, load_jsonl, save_jsonl from utils import extract_code, read_config, load_jsonl, save_jsonl
...@@ -85,7 +83,9 @@ def evaluate_code_samples(code_samples: list, dataset_path: str): ...@@ -85,7 +83,9 @@ def evaluate_code_samples(code_samples: list, dataset_path: str):
args = [(get_apps_item(sample), sample) for sample in code_samples] args = [(get_apps_item(sample), sample) for sample in code_samples]
cpu_num = multiprocessing.cpu_count() cpu_num = multiprocessing.cpu_count()
results = process_map(test_generation, args, max_workers=cpu_num) with Pool(processes=cpu_num) as pool:
results = list(pool.imap_unordered(test_generation, args, chunksize=1000))
return results return results
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment