Commit 9af763c6 by nanziyuan

legacy

parent 1d45c3d1
import argparse
from difflib import unified_diff
from functools import partial
from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.utils.inference import generate_worker, SPLITTER
from codecritic.utils.parallel import model_map
from codecritic.dataset.code import code_template, extract_code
from vllm import SamplingParams
COV_PROMPT = "Please verify your *own* code step by step using Markdown code blocks. After each step, explain whether it's correct or not, and if not, explain the issue."
COV_EXAMPLE = """\
Here is an example of the format I’d like you to use for your response.
** Example RETURN FORMAT **
```python
def add_numbers(a, b):
return a + b
result = add_numbers(5, '10')
```
1. **Code:**
```python
def add_numbers(a, b):
return a + b
```
**Explanation:** This defines a function `add_numbers` that takes two arguments and returns their sum. Correct.
2. **Code:**
```python
result = add_numbers(5, '10')
```
**Explanation:** The second argument is a string (`'10'`), which will cause a TypeError when trying to add it to an integer. Incorrect.
"""
CORRECT_PROMPT = "Your code is correct."
INCORRECT_PROMPT = "Your code is incorrect."
DIFF_PROMPT = "Here’s a hint about how to fix this incorrect code and make it work correctly.\n{diff}"
JUDGE_PROMPT = "Is the code correct (Yes/No)?"
def mk_message(user, assistant):
return [
{"role": "user", "content": user},
{"role": "assistant", "content": assistant},
]
def mk_critic_verify(answer=None):
# answer: bool or none
message = [{"role": "user", "content": JUDGE_PROMPT}]
if answer is not None:
response = "Yes" if answer else "No"
message.append({"role": "assistant", "content": response})
return message
def mk_cov_prompt(is_correct, splitter, diff):
anchor = CORRECT_PROMPT if is_correct else INCORRECT_PROMPT
diff = "" if is_correct else DIFF_PROMPT.format(diff=diff)
turn1 = {"role": "user", "content": '\n'.join([anchor, diff, COV_PROMPT, COV_EXAMPLE])}
if splitter:
turn2 = {
"role": "assistant",
"content": "Here's a step-by-step verification of the code." + SPLITTER,
}
return [turn1, turn2]
else:
return [turn1]
def convert_preference_to_vot_prompt(item):
message = item["messages"][0]["content"]
chosen = item["chosen"][0]["content"]
rejected = item["rejected"][0]["content"]
chosen = code_template.format(extract_code(chosen))
rejected = code_template.format(extract_code(rejected))
diff = unified_diff(
extract_code(rejected).splitlines(keepends=True),
extract_code(chosen).splitlines(keepends=True),
fromfile="yourcode.py",
tofile="userfix.py",
n=1,
)
diff = ''.join(diff)
messages1 = mk_message(message, chosen) + mk_cov_prompt(True, SPLITTER, "")
messages2 = mk_message(message, rejected) + mk_cov_prompt(False, SPLITTER, diff)
return (
{"messages": messages1, "eval_result": True},
{"messages": messages2, "eval_result": False}
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--dataset", type=str)
parser.add_argument("--output", type=str)
args = parser.parse_args()
ds = load_jsonl(args.dataset)
prompts = []
for d in ds:
pos, neg = convert_preference_to_vot_prompt(d)
prompts.append(pos)
prompts.append(neg)
ds = prompts
sampling_params = SamplingParams(
n=1,
temperature=0,
top_p=0.95,
max_tokens=2048,
)
worker = partial(
generate_worker, model_path=args.model, sampling_params=sampling_params
)
ds = model_map(worker, ds, 1)
save_jsonl(ds, args.output + '.raw')
for d in ds:
msgs = d.pop("messages")
d["question"] = msgs[:2] + [{"role": "user", "content": COV_PROMPT}]
d["response"] = msgs[3:4] + mk_critic_verify(d["eval_result"])
save_jsonl(ds, args.output)
import argparse
from codecritic.utils.json import load_jsonl, save_jsonl
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str)
parser.add_argument("--w_reason", type=str)
parser.add_argument("--wo_reason", type=str)
args = parser.parse_args()
ds = load_jsonl(args.dataset)
w_reason, wo_reason = [], []
for item in ds:
question = item.pop("question")
response = item.pop("response")
msg1 = question[:2]
msg2 = question + response[:1]
w_reason.append({**item, "messages": msg2})
wo_reason.append({**item, "messages": msg1})
save_jsonl(wo_reason, args.wo_reason)
save_jsonl(w_reason, args.w_reason)
...@@ -5,7 +5,7 @@ import os ...@@ -5,7 +5,7 @@ import os
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm import SamplingParams from vllm import SamplingParams
from codecritic.dataset.genrm_prompt import THINK_MESSAGE, JUDGE_MESSAGE, JUDGE_TOEKNS from codecritic.dataset.genrm_prompt import JUDGE_MESSAGE, JUDGE_TOEKNS
from codecritic.dataset.legacy_genrm_prompt import COV_MESSAGE from codecritic.dataset.legacy_genrm_prompt import COV_MESSAGE
from codecritic.utils.inference import generate_worker, score_worker from codecritic.utils.inference import generate_worker, score_worker
from codecritic.utils.parallel import model_map from codecritic.utils.parallel import model_map
...@@ -67,6 +67,6 @@ if __name__ == "__main__": ...@@ -67,6 +67,6 @@ if __name__ == "__main__":
positive_token=positive_token, positive_token=positive_token,
negative_token=negative_token, negative_token=negative_token,
) )
dataset = model_map(worker, dataset, args.gpu_per_model) dataset = model_map(worker, dataset, args.tp)
save_jsonl(dataset, args.output) save_jsonl(dataset, args.output)
from codecritic.dataset.code import extract_code, code_template from codecritic.dataset.code import extract_code, code_template
from codecritic.data.utils import SPLITTER, mk_message
from codecritic.dataset.genrm_prompt import mk_judge_response from codecritic.dataset.genrm_prompt import mk_judge_response
COV_PROMPT = "Please verify your code step by step using Markdown code blocks. After each step, explain whether it's correct or not, and if not, explain the issue." COV_PROMPT = "Please verify your code step by step using Markdown code blocks. After each step, explain whether it's correct or not, and if not, explain the issue."
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment