import re
from difflib import unified_diff

codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL)
code_template = """```python
{}
```
"""


def extract_code(text: str):
    codes = [match.strip() for match in re.findall(codeblock_pattern, text)]
    if len(codes) > 0:
        code = "\n".join(codes)
        return code
    else:
        return ""

# Precompile regular expressions
SINGLE_LINE_COMMENT_REGEX = re.compile(r'#.*')
MULTILINE_DOUBLE_QUOTE_REGEX = re.compile(r'^\s*""".*?"""\s*$', flags=re.DOTALL | re.MULTILINE)
MULTILINE_SINGLE_QUOTE_REGEX = re.compile(r"^\s*'''.*?'''\s*$", flags=re.DOTALL | re.MULTILINE)


def preprocess_code(code):
    # Remove single-line comments
    code = SINGLE_LINE_COMMENT_REGEX.sub('', code)

    # Remove standalone docstrings (triple-quoted strings that are not part of an expression)
    code = MULTILINE_DOUBLE_QUOTE_REGEX.sub('', code)
    code = MULTILINE_SINGLE_QUOTE_REGEX.sub('', code)

    # Remove blank lines
    code = "\n".join([line for line in code.splitlines() if line.strip()])

    return code


def diff_code(incorrect, correct):
    diff = unified_diff(
        incorrect.splitlines(keepends=True),
        correct.splitlines(keepends=True),
        fromfile="incorrect.py",
        tofile="correct.py",
    )
    diff = ''.join(diff)
    return diff