Commit afcf4289 by nzy

refactor: move all runnable script to codecritic.cli

parent ab8ccadd
import argparse
from itertools import chain
from codecritic.data.cov import (
convert_preference_to_vot_prompt,
convert_cov_to_cov_dataset,
)
from codecritic.utils.json import load_json
from codecritic.utils.data import save_jsonl_dataset
from codecritic.utils.vllm import vllm_chatcomplete
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--output_dir", type=str)
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
dataset = list(map(convert_cov_to_cov_dataset, covs))
save_jsonl_dataset(dataset, args.output_dir)
# Additional Experiment: # Additional Experiment:
# Is reasoning really work? Let's verify step by step. # Is reasoning really work? Let's verify step by step.
import argparse
from itertools import chain
from codecritic.utils.json import load_json
from codecritic.utils.data import ( from codecritic.utils.data import (
extract_code, extract_code,
code_template, code_template,
mk_message, mk_message,
mk_messages, mk_messages,
mk_critic_verify, mk_critic_verify,
save_jsonl_dataset,
SPLITTER, SPLITTER,
) )
...@@ -80,23 +76,3 @@ def convert_cov_to_cov_dataset(item): ...@@ -80,23 +76,3 @@ def convert_cov_to_cov_dataset(item):
raise ValueError("Invalid prompt") raise ValueError("Invalid prompt")
item["messages"] += mk_critic_verify(is_correct) item["messages"] += mk_critic_verify(is_correct)
return item return item
if __name__ == "__main__":
from codecritic.utils.vllm import vllm_chatcomplete
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--output_dir", type=str)
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
cov_prompts = [convert_preference_to_vot_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=2048)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
dataset = list(map(convert_cov_to_cov_dataset, covs))
save_jsonl_dataset(dataset, args.output_dir)
import re import re
from codecritic.utils.json import save_jsonl
from pathlib import Path from pathlib import Path
from codecritic.utils.json import save_jsonl
codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL) codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL)
code_template = """```python code_template = """```python
{} {}
...@@ -69,22 +70,4 @@ def get_score_token_id(tokenizer, token_str="Yes"): ...@@ -69,22 +70,4 @@ def get_score_token_id(tokenizer, token_str="Yes"):
return score_tokens[0] return score_tokens[0]
def mk_critic_reason(codedit, explanation):
user_question = {
"role": "user",
"content": "Edit your code in diff format to fix any issues and explain the changes.",
}
llm_answer_content = f"""\
**Edited Code (in diff format):**
```diff
{codedit}
```
**Explanation:**
{explanation}
"""
llm_answer = {"role": "assistant", "content": llm_answer_content}
return [user_question, llm_answer]
SPLITTER = "__I_wish_it_were_weekends_all_the_time.__" SPLITTER = "__I_wish_it_were_weekends_all_the_time.__"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment