Commit d66937e2 by nanziyuan

adjust performance for apps eval

parent 91cf4380
......@@ -81,10 +81,8 @@ def evaluate_code_samples(code_samples, apps):
split, idx = task_id.split('-')
args.append((apps[split][int(idx)], sample))
cpu_num = multiprocessing.cpu_count()
# chunksize = max(len(code_samples) // (cpu_num * 5), 1)
chunksize = 10000
# TODO performance?
cpu_num = multiprocessing.cpu_count() // 2
chunksize = max(len(code_samples) // (cpu_num * 10), 1)
results = process_map(
test_generation, args, max_workers=cpu_num, chunksize=chunksize
)
......@@ -112,7 +110,7 @@ def evaluate(code_samples, apps):
assert len(set(x["solution_id"] for x in lst)) == 1, "Mismatched solution_id"
task_id, solution_id = lst[0]["task_id"], lst[0]["solution_id"]
if all(x["compilerr"] for x in lst):
is_pass = False
else:
......
......@@ -2,16 +2,32 @@ set -xe
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst"
# APPS
CUDA_VISIBLE_DEVICES=0,1,2,3 \
# CUDA_VISIBLE_DEVICES=0,1,2,3 \
python -m codecritic.cli.gen_dataset \
--model ${model} \
--apps /lustre/S/nanziyuan/datasets/apps/ \
--train "${project}/data/train/apps_train_samples.jsonl" \
--test "${project}/data/test/apps_test_samples.jsonl"
--train "${project}/data/train/${modelname}-apps-train.jsonl" \
--test "${project}/data/test/${modelname}-apps-test.jsonl"
# HumanEval & MBPP
# evalplus.evaluate \
# --model ${model} \
# --n_samples 50 \
# --temperature 0.8 \
# --dataset humaneval \
# --root "${project}/data/test/${modelname}-humaneval" \
# --backend vllm
# evalplus.evaluate \
# --model ${model} \
# --n_samples 50 \
# --temperature 0.8 \
# --dataset mbpp \
# --root "${project}/data/test/${modelname}-mbpp" \
# --backend vllm
# HumanEvalPack
......
set -xe
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-32B-Instruct"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst_32b"
# APPS
# CUDA_VISIBLE_DEVICES=0,1,2,3 \
python -m codecritic.cli.gen_dataset \
--model ${model} \
--apps /lustre/S/nanziyuan/datasets/apps/ \
--train "${project}/data/train/${modelname}-apps-train.jsonl" \
--test "${project}/data/test/${modelname}-apps-test.jsonl" \
--tp 2
# HumanEval & MBPP
evalplus.evaluate \
--model ${model} \
--n_samples 50 \
--temperature 0.8 \
--dataset humaneval \
--root "${project}/data/test/${modelname}-humaneval" \
--backend vllm \
--tp 2
evalplus.evaluate \
--model ${model} \
--n_samples 50 \
--temperature 0.8 \
--dataset mbpp \
--root "${project}/data/test/${modelname}-mbpp" \
--backend vllm \
--tp 2
# HumanEvalPack
# BigCodeBench
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment