import argparse
from itertools import chain

from codecritic.data.cov import (
    convert_preference_to_vot_prompt,
    convert_cov_to_cov_dataset,
)

from codecritic.utils.json import load_json
from codecritic.data.utils import save_jsonl_dataset
from codecritic.utils.vllm import vllm_chatcomplete


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str)
    parser.add_argument("--preference_dataset", type=str)
    parser.add_argument("--output_dir", type=str)
    args = parser.parse_args()

    preference_dataset = load_json(args.preference_dataset)
    cov_prompts = [convert_preference_to_vot_prompt(x) for x in preference_dataset]
    cov_prompts = list(chain(*cov_prompts))

    sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
    covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
    dataset = list(map(convert_cov_to_cov_dataset, covs))

    save_jsonl_dataset(dataset, args.output_dir)
