COT_GENERATION_PROMPT = """
You are a programming teacher. Grade the **Solution**, verifying its correctness step by step. Use the **Expected Answer** to identify any erroneous steps or logic in the **Solution**. At the end of the verification, when you give your final grade, write it in the form:

**"Verification: Is the code correct (Yes/No)? X"**, where **X** is either **Yes** or **No**.

**Question:**
{problem}
**Solution:**
{solution}
**Expected Answer:**
{ground_truth}
"""


TRAIN_PROMPT = """
You are a programming teacher. Grade the **Solution**, verifying its correctness step by step.

**Question:**
{problem}
**Solution:**
{solution}
"""


def mk_distillation_messages(chosen, rejected):
    question = chosen["messages"][0]["content"]
    chosen_answer = chosen["code"]
    rejected_answer = rejected["code"]

    chosen_prompt = COT_GENERATION_PROMPT.format(
        problem=question,
        solution=chosen_answer,
        ground_truth="None"
    )
    rejected_prompt = COT_GENERATION_PROMPT.format(
        problem=question,
        solution=rejected_answer,
        ground_truth=chosen_answer
    )

    chosen["gen_prompt"] = [{"role": "user", "content": chosen_prompt}]
    rejected["gen_prompt"] = [{"role": "user", "content": rejected_prompt}]

    chosen_train_prompt = TRAIN_PROMPT.format(
        problem=question,
        solution=chosen_answer
    )

    rejected_train_prompt = TRAIN_PROMPT.format(
        problem=question,
        solution=rejected_answer
    )

    chosen["train_prompt"] = [{"role": "user", "content": chosen_train_prompt}]
    rejected["train_prompt"] = [{"role": "user", "content": rejected_train_prompt}]

    return [chosen, rejected]


def postprocess_result(response):
    """
    -> (valid_response: bool, cleaned_response: str, verification: bool)
    """
    verification_prefix = "Verification: Is the code correct"

    lines = response.splitlines()

    # Search for the verification line
    verification_line = None
    idx = len(lines) - 1
    for idx in range(len(lines) - 1, -1, -1):
        strip_line = lines[idx].strip()
        if verification_prefix in strip_line:
            verification_line = strip_line
            break

    if verification_line:
        cleaned_response = "\n".join(lines[:idx])
        parts = verification_line.split('?')
        if len(parts) > 1:
            answer = parts[1].strip().lower()  # Get the part after '?'
            if "yes" in answer:
                return True, cleaned_response, True
            elif "no" in answer:
                return True, cleaned_response, False
            else:
                # If the answer is neither "Yes" nor "No", assume incorrect
                return False, cleaned_response, False
        else:
            # If there's no '?', assume incorrect
            return False, cleaned_response, False
    else:
        # If no verification line is found, assume the result is incorrect
        return False, response, False
