import openai  # Assuming you're using OpenAI's API
import json

# Set your OpenAI API key
openai.api_key = "your_openai_api_key"

# Example input data
input_data = {
    "incorrect": """
def sum_even_numbers(numbers):
    total = 0
    for num in numbers:
        if num % 2 == 0:
            total += num
        return total  # Incorrect indentation: return is inside the loop
    """,
    "correct": """
def sum_even_numbers(numbers):
    total = 0
    for num in numbers:
        if num % 2 == 0:
            total += num
    return total  # Correct indentation: return is outside the loop
    """,
    "diff": """
-        return total  # Incorrect indentation: return is inside the loop
+    return total  # Correct indentation: return is outside the loop
    """,
    "problem": "Sum all even numbers in a given list."
}

# Prompt template for generating all hint levels
HINT_GENERATION_PROMPT = """
You are an expert code reviewer. Your task is to generate hints for the following code snippet at three levels of detail: high, middle, and low.

**Problem**: {problem}

**Code**:
{code}

**Diff (if applicable)**:
{diff}

Provide your response in the following JSON format:
{
  "high": "High-level hint (e.g., 'The code is incorrect.' or 'The code is correct.')",
  "middle": "Middle-level hint (e.g., 'The code contains a logic error related to the placement of the return statement.' or 'The code correctly implements the intended functionality.')",
  "low": "Low-level hint (e.g., 'The return statement is incorrectly placed inside the loop, causing the function to exit prematurely.' or 'The return statement is placed outside the loop, ensuring all numbers are processed.')"
}
"""

# Unified prompt template
UNIFIED_PROMPT = """
You are an expert code reviewer. Your task is to analyze the following code snippet step-by-step. Follow this structure:

1. **Intended Functionality**: What is the code trying to do?
2. **Code Analysis**:
   - If the code is **incorrect**:
     - What is the error in the code?
     - Why does this error occur?
   - If the code is **correct**:
     - Why is this code correct?
     - Are there any potential improvements or edge cases to consider?

**Problem**: {problem}

**Hint (if applicable)**: {hint}

Here is the code:
{code}

Provide your response in a clear and structured format.
"""

# Prompt template for verifying consistency
VERIFICATION_PROMPT = """
You are an expert code reviewer. Your task is to verify whether the following analysis is consistent with the hint and the problem description.

**Problem**: {problem}

**Hint**: {hint}

**Analysis**:
{analysis}

Provide your response as either "Consistent" or "Inconsistent" and briefly explain why.
"""

# Function to call the LLM
def call_llm(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4",  # Use the appropriate model
        messages=[{"role": "user", "content": prompt}],
        max_tokens=500,
        temperature=0.7,
    )
    return response.choices[0].message.content.strip()

# Function to generate all hint levels using the LLM
def generate_all_hints(code, problem, diff, is_correct=False):
    prompt = HINT_GENERATION_PROMPT.format(
        problem=problem,
        code=code,
        diff=diff if not is_correct else "No diff provided."
    )
    response = call_llm(prompt)
    try:
        hints = json.loads(response)  # Parse the JSON response
        return hints
    except json.JSONDecodeError:
        # Fallback if the response is not valid JSON
        return {
            "high": "The code is incorrect." if not is_correct else "The code is correct.",
            "middle": "The code contains a logic error." if not is_correct else "The code correctly implements the intended functionality.",
            "low": "The code contains a detailed error." if not is_correct else "The code is correct and handles all edge cases."
        }

# Function to analyze code using the unified prompt
def analyze_code(code, problem, hint=None):
    prompt = UNIFIED_PROMPT.format(code=code, problem=problem, hint=hint if hint else "No hint provided.")
    analysis = call_llm(prompt)
    return analysis

# Function to verify consistency using the LLM
def verify_consistency(analysis, hint, problem):
    prompt = VERIFICATION_PROMPT.format(
        problem=problem,
        hint=hint,
        analysis=analysis
    )
    verification_result = call_llm(prompt)
    return verification_result

# Function to synthesize training data with different hint levels
def synthesize_training_data(input_data, hint_level="high"):
    incorrect_code = input_data["incorrect"]
    correct_code = input_data["correct"]
    diff = input_data["diff"]
    problem = input_data["problem"]

    # Generate all hint levels for incorrect and correct code
    incorrect_hints = generate_all_hints(incorrect_code, problem, diff, is_correct=False)
    correct_hints = generate_all_hints(correct_code, problem, diff, is_correct=True)

    # Select the appropriate hint level
    incorrect_hint = incorrect_hints[hint_level]
    correct_hint = correct_hints[hint_level]

    # Analyze incorrect code with hints
    incorrect_analysis = analyze_code(incorrect_code, problem, incorrect_hint)

    # Analyze correct code with hints
    correct_analysis = analyze_code(correct_code, problem, correct_hint)

    # Verify consistency between analysis and hints
    incorrect_verification = verify_consistency(incorrect_analysis, incorrect_hint, problem)
    correct_verification = verify_consistency(correct_analysis, correct_hint, problem)

    # Combine results into training data
    training_data = {
        "incorrect_code": incorrect_code,
        "incorrect_analysis": incorrect_analysis,
        "incorrect_hint": incorrect_hint,
        "incorrect_verification": incorrect_verification,
        "correct_code": correct_code,
        "correct_analysis": correct_analysis,
        "correct_hint": correct_hint,
        "correct_verification": correct_verification,
        "diff": diff,
        "problem": problem,
        "hint_level": hint_level,
        "all_incorrect_hints": incorrect_hints,  # Include all hint levels for reference
        "all_correct_hints": correct_hints,      # Include all hint levels for reference
    }
    return training_data

# Main function
def main():
    # Synthesize training data with different hint levels
    hint_levels = ["high", "middle", "low"]  # High, middle, and low hint levels
    training_data_list = []

    for hint_level in hint_levels:
        training_data = synthesize_training_data(input_data, hint_level)
        training_data_list.append(training_data)

    # Save training data to a JSON file
    with open("training_data.json", "w") as f:
        json.dump(training_data_list, f, indent=4)

    print("Training data synthesized and saved to 'training_data.json'.")

if __name__ == "__main__":
    main()