Commit b319f162 by nanziyuan

fix gen_dataset bugs

parent b65ddb76
import argparse
import os
import json
from functools import partial
from collections import defaultdict
......@@ -21,7 +22,7 @@ def transform_to_prompt(apps, tokenizer):
for split in ["train", "test"]:
dataset = apps[split]
for item in dataset:
task_id = split + "-" + str(item["id"])
task_id = split + "-" + str(item["problem_id"])
try:
json.loads(item["input_output"])
except ValueError:
......@@ -40,7 +41,7 @@ def transform_to_prompt(apps, tokenizer):
prompts.append(
{
"dataset": "apps-" + item["difficulty"],
"task_id": "task_id",
"task_id": task_id,
"messages": prompt,
}
)
......@@ -58,6 +59,7 @@ if __name__ == "__main__":
"--gpu", type=int, default=1, help="gpu number required by one model"
)
args = parser.parse_args()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
apps = load_dataset(args.apps)
tokenizer = AutoTokenizer.from_pretrained(args.model)
......@@ -79,10 +81,11 @@ if __name__ == "__main__":
# postprocess
grouped = defaultdict(list)
for sample in dataset:
grouped[sample["task_id"]] = sample
grouped[sample["task_id"]].append(sample)
def is_in_test(task_id):
split, idx = task_id.split("-")
idx = int(idx)
if split == "test":
for start, end in [(0, 300), (3000, 3100), (4000, 4100)]:
if start <= idx < end:
......
......@@ -28,7 +28,7 @@ def pass_at_k(samples, ks: list[int]):
# groupby taskid
grouped = defaultdict(list)
for sample in samples:
grouped[sample["task_id"]] = sample
grouped[sample["task_id"]].append(sample)
num_samples, num_correct = [], []
for task_id, group in grouped.items():
......@@ -65,7 +65,7 @@ def pos_neg_filter_uncertain(item, threshold):
def top_at_k(samples, ks: list[int], score_func):
grouped = defaultdict(list)
for sample in samples:
grouped[sample["task_id"]] = sample
grouped[sample["task_id"]].append(sample)
num_samples, first_pass_indices = [], []
for task_id, group in grouped.items():
......
set -xe
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
# APPS
CUDA_VISIBLE_DEVICES=0,1,2,3 \
python -m codecritic.cli.gen_dataset \
--model ${model} \
--apps /lustre/S/nanziyuan/datasets/apps/ \
--train "${project}/data/train/apps_train_samples.jsonl" \
--test "${project}/data/test/apps_test_samples.jsonl"
# HumanEval & MBPP
# HumanEvalPack
# BigCodeBench
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment