Commit afcf4289 by nzy

refactor: move all runnable script to codecritic.cli

parent ab8ccadd
import argparse
from itertools import chain
from codecritic.data.cov import (
convert_preference_to_vot_prompt,
convert_cov_to_cov_dataset,
)
from codecritic.utils.json import load_json
from codecritic.utils.data import save_jsonl_dataset
from codecritic.utils.vllm import vllm_chatcomplete
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--output_dir", type=str)
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
dataset = list(map(convert_cov_to_cov_dataset, covs))
save_jsonl_dataset(dataset, args.output_dir)
# Additional Experiment:
# Is reasoning really work? Let's verify step by step.
import argparse
from itertools import chain
from codecritic.utils.json import load_json
from codecritic.utils.data import (
extract_code,
code_template,
mk_message,
mk_messages,
mk_critic_verify,
save_jsonl_dataset,
SPLITTER,
)
......@@ -80,23 +76,3 @@ def convert_cov_to_cov_dataset(item):
raise ValueError("Invalid prompt")
item["messages"] += mk_critic_verify(is_correct)
return item
if __name__ == "__main__":
from codecritic.utils.vllm import vllm_chatcomplete
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--output_dir", type=str)
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
dataset = list(map(convert_cov_to_cov_dataset, covs))
save_jsonl_dataset(dataset, args.output_dir)
import re
from codecritic.utils.json import save_jsonl
from pathlib import Path
from codecritic.utils.json import save_jsonl
codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL)
code_template = """```python
{}
......@@ -69,22 +70,4 @@ def get_score_token_id(tokenizer, token_str="Yes"):
return score_tokens[0]
def mk_critic_reason(codedit, explanation):
user_question = {
"role": "user",
"content": "Edit your code in diff format to fix any issues and explain the changes.",
}
llm_answer_content = f"""\
**Edited Code (in diff format):**
```diff
{codedit}
```
**Explanation:**
{explanation}
"""
llm_answer = {"role": "assistant", "content": llm_answer_content}
return [user_question, llm_answer]
SPLITTER = "__I_wish_it_were_weekends_all_the_time.__"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment