Commit 432936ac by nanziyuan

add qwq & fix vllm bugs

parent 0e43e25c
......@@ -21,9 +21,9 @@ if __name__ == "__main__":
cov_prompts = [transform_preference_to_qwq_prompt(x) for x in preference_dataset]
cov_prompts = list(chain(*cov_prompts))
sampling_params = dict(n=1, temperature=0.0, max_tokens=6144)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params)
save_jsonl(covs, args.out + ".raw")
sampling_params = dict(n=1, temperature=0, max_tokens=6144)
covs = vllm_chatcomplete(args.model, cov_prompts, sampling_params, 2)
# save_jsonl(covs, args.out + ".raw")
dataset = list(map(transform_qwqout_to_trainset, covs))
save_jsonl(dataset, args.out)
\ No newline at end of file
save_jsonl(dataset, args.out)
......@@ -4,21 +4,24 @@ import re
from codecritic.data.code import extract_code
from codecritic.data.verify import mk_critic_verify
# QwQ doesn't follow my instruction, but output *really* reasonable explanation
SYS_PROMPT = f"""
You are an AI code reviewer tasked with analyzing code solutions to programming problems. You will be given a problem description, a code solution, and information about the solution's correctness. If the solution is incorrect, you will also be provided with a diff showing the differences between the given solution and a correct one.
Your task is to analyze the provided code *step-by-step*, pretending you do not know the final verdict of its correctness. Focus on understanding the code's logic, identifying potential issues, and reasoning through its execution.
Your task is to analyze the provided code *step-by-step*, reasoning through its logic and identifying potential issues. Initially, approach the analysis as if you don't know the final judgement of its correctness. However, your final conclusion about the code's correctness must align with the provided information.
Output your reasoning process within a markdown code block using the following format:
```rationale
[Your step-by-step reasoning here. Explain what the code does line by line, identify potential edge cases, and discuss possible errors. Be detailed and thorough.]
```Rationale
[Your step-by-step reasoning here. Explain what the code does line by line and discuss possible errors.]
```
Don't simulate its runtime behavior, mentally executing it with specific inputs, or predicting its output
Finally, based on your analysis, state your conclusion about the code's correctness (either "Yes" or "No") using the following format:
Final Answer: (Yes or No)
Final Answer:
(Yes or No)
"""
......@@ -37,18 +40,18 @@ Diff (Only if Correctness is "No"):
def transform_preference_to_qwq_prompt(item):
assert all(len(item[x]) == 1 for x in ["prompt", "chosen", "rejected"])
problem = item["prompt"][0]["content"]
assert all(len(item[x]) == 1 for x in ["messages", "chosen", "rejected"])
problem = item["messages"][0]["content"]
chosen_code = item["chosen"][0]["content"]
rejected_code = item["rejected"][0]["content"]
diff = unified_diff(
diff = "".join(unified_diff(
extract_code(rejected_code).splitlines(keepends=True),
extract_code(chosen_code).splitlines(keepends=True),
fromfile="incorrect.py",
tofile="correct.py",
n=1,
)
))
sys_message = {"role": "system", "content": SYS_PROMPT}
chosen_message = {
......@@ -68,14 +71,12 @@ def transform_preference_to_qwq_prompt(item):
{
"messages": [sys_message, chosen_message],
"eval_result": True,
"problem_id": item["problem_id"],
"raw": item["prompt"] + item["chosen"],
"raw": item["messages"] + item["chosen"],
},
{
"messages": [sys_message, rejected_message],
"eval_result": False,
"problem_id": item["problem_id"],
"raw": item["prompt"] + item["rejected"],
"raw": item["messages"] + item["rejected"],
},
)
......@@ -96,17 +97,17 @@ def extract_rationale(text):
def transform_qwqout_to_trainset(item):
messages = item["raw"]
rationale = extract_rationale(item["messages"][-1])
rationale = item["messages"][-1]["content"]
messages += [
response = [
{"role": "user", "content": "Please analyze your code step by step."},
{"role": "assistant", "content": rationale},
]
messages += mk_critic_verify(item["eval_result"])
response += mk_critic_verify(item["eval_result"])
return {
"messages": messages,
"question": messages,
"response": response,
"eval_result": item["eval_result"],
"problem_id": item["problem_id"],
}
......@@ -3,7 +3,7 @@ from vllm import LLM, SamplingParams
import os
from concurrent.futures import ProcessPoolExecutor
from itertools import chain, combinations
from functools import partial, wraps
from functools import partial
import subprocess
import numpy as np
......@@ -184,7 +184,7 @@ def score_worker(cuda_device, prompts, model_path, score_token):
positive_token = score_token[0]
positive_logprob = logprob.get(positive_token)
positive_prob = np.exp(positive_logprob.logprob) if positive_logprob else 0
return positive_prob
return {"score": positive_prob}
def compute_score_twotoken(logprob):
positive_token, negative_token = score_token[0], score_token[1]
......@@ -194,7 +194,10 @@ def score_worker(cuda_device, prompts, model_path, score_token):
negative_logprob = logprob.get(negative_token)
negative_prob = np.exp(negative_logprob.logprob) if negative_logprob else 0
return positive_prob / (positive_prob + negative_prob)
return {
"score": positive_prob / (positive_prob + negative_prob),
"uncertainty": 1 - (positive_prob + negative_prob)
}
if len(score_token) == 1:
compute_score = compute_score_onetoken
......@@ -229,10 +232,10 @@ def score_worker(cuda_device, prompts, model_path, score_token):
for item, output in zip(prompts, outputs):
for response in output.outputs:
# response.logprobs: list[dict[int, Logprob]] https://github.com/vllm-project/vllm/blob/main/vllm/sequence.py
score = compute_score(response.logprobs[0])
scores = compute_score(response.logprobs[0])
text = response.text
results.append({**item, "score": score, "critic_text": text})
results.append({**item, **scores, "critic_text": text})
return results
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment