Commit 80ebb3b6 by nzy

qwq

parent 8bf1b27d
import argparse
from itertools import chain
from codecritic.data.cov_with_diff import (
transform_preference_to_qwq_prompt,
transform_qwqout_to_trainset
)
from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.utils.vllm import vllm_chatcomplete
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--out", type=str)
args = parser.parse_args()
preference_dataset = load_jsonl(args.preference_dataset)
cov_prompts = [transform_preference_to_qwq_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=6144)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
save_jsonl(covs, args.out + ".raw")
dataset = list(map(transform_qwqout_to_trainset, covs))
save_jsonl(dataset, args.out)
\ No newline at end of file
from difflib import unified_diff
import re
from codecritic.data.code import extract_code
from codecritic.data.verify import mk_critic_verify
SYS_PROMPT = f"""
You are an AI code reviewer tasked with analyzing code solutions to programming problems. You will be given a problem description, a code solution, and information about the solution's correctness. If the solution is incorrect, you will also be provided with a diff showing the differences between the given solution and a correct one.
Your task is to analyze the provided code *step-by-step*, pretending you do not know the final verdict of its correctness. Focus on understanding the code's logic, identifying potential issues, and reasoning through its execution.
Output your reasoning process within a markdown code block using the following format:
```rationale
[Your step-by-step reasoning here. Explain what the code does line by line, identify potential edge cases, and discuss possible errors. Be detailed and thorough.]
```
Finally, based on your analysis, state your conclusion about the code's correctness (either "Yes" or "No") using the following format:
Final Answer: (Yes or No)
"""
USER_PROMPT = """
Problem:
{problem}
Code:
{code}
Correctness(Yes or No): {correctness}
Diff (Only if Correctness is "No"):
{diff}
"""
def transform_preference_to_qwq_prompt(item):
assert all(len(item[x]) == 1 for x in ["prompt", "chosen", "rejected"])
problem = item["prompt"][0]["content"]
chosen_code = item["chosen"][0]["content"]
rejected_code = item["rejected"][0]["content"]
diff = unified_diff(
extract_code(rejected_code).splitlines(keepends=True),
extract_code(chosen_code).splitlines(keepends=True),
fromfile="incorrect.py",
tofile="correct.py",
n=1,
)
sys_message = {"role": "system", "content": SYS_PROMPT}
chosen_message = {
"role": "user",
"content": USER_PROMPT.format(
problem=problem, code=chosen_code, correctness="Yes", diff=""
),
}
rejected_message = {
"role": "user",
"content": USER_PROMPT.format(
problem=problem, code=rejected_code, correctness="No", diff=diff
),
}
return (
{
"messages": [sys_message, chosen_message],
"eval_result": True,
"problem_id": item["problem_id"],
"raw": item["prompt"] + item["chosen"],
},
{
"messages": [sys_message, rejected_message],
"eval_result": False,
"problem_id": item["problem_id"],
"raw": item["prompt"] + item["rejected"],
},
)
rationale_pattern = re.compile(r"```rationale(.+?)```", flags=re.DOTALL)
def extract_rationale(text):
rationale = [match.strip() for match in re.findall(rationale_pattern, text)]
if len(rationale) < 1:
return ""
elif len(rationale) > 1:
print("warning: multiple rationales")
return "\n".join(rationale)
else:
return rationale[0]
def transform_qwqout_to_trainset(item):
messages = item["raw"]
rationale = extract_rationale(item["messages"][-1])
messages += [
{"role": "user", "content": "Please analyze your code step by step."},
{"role": "assistant", "content": rationale},
]
messages += mk_critic_verify(item["eval_result"])
return {
"messages": messages,
"eval_result": item["eval_result"],
"problem_id": item["problem_id"],
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment