import os
import re
import pandas as pd

if __name__ == '__main__':
    path_to_find = os.path.abspath(os.path.join(__file__, '..', '..'))
    file = "/nfs_global/S/zhuyaoyu/projects/CodeV-o1/ret_one/618534.out"
    with open(file, 'r') as f:
        content = f.read().strip()

    # print(path_to_find)

    # 用于存储提取到的路径
    extracted_paths = []

    # 按逗号和空格分割字符串
    items = re.split(r',\s*', content)

    # 遍历每个参数项
    for item in items:
        # 查找包含路径前缀的部分
        if path_to_find in item:
            # 使用正则表达式提取路径
            match = re.search(r"'([^']+)'", item)
            if match:
                path = match.group(1)
                # 检查提取的路径是否符合预期格式
                if re.match(r'^' + re.escape(path_to_find) + r'(/[^/]+)+/?$', path):
                    extracted_paths.append(path)
            else:
                # 处理没有引号的情况
                parts = item.split('=')
                if len(parts) > 1:
                    path = parts[1].strip()
                    if path.startswith(path_to_find) and re.match(r'^' + re.escape(path_to_find) + r'(/[^/]+)+/?$', path):
                        extracted_paths.append(path)

    # 去除重复的路径
    unique_paths = list(set(extracted_paths))

    # 打印提取到的路径
    # print(unique_paths)
    save_path = os.path.abspath(os.path.join(unique_paths[0], '..', '..'))
    global_steps = []
    for path in unique_paths:
        match = re.search(r'global_step_(\d+)', path)
        if match:
            # 提取数字并转换为整数
            step = int(match.group(1))
            global_steps.append(step)
    global_steps.sort()

    # 提取 Pass@k 的内容
    pass_pattern = r'(Pass@(\d+) is (\d+\.\d+)\.)'
    all_pass_matches = re.findall(pass_pattern, content)

    # 假设每组数据是连续出现的，按组分割
    grouped_pass_results = {}
    current_group = {}
    step_idx = 0
    for match in all_pass_matches:
        whole_match, k, value = match
        if not current_group or f'Pass@{k}' not in current_group:
            current_group[f'Pass@{k}'] = float(value)
        else:
            # 开始新的一组
            grouped_pass_results[global_steps[step_idx]] = current_group
            step_idx += 1
            current_group = {f'Pass@{k}': float(value)}

    # 添加最后一组
    if current_group:
        grouped_pass_results[global_steps[step_idx]] = current_group

    grouped_pass_results[0] = {"Pass@1": 0.3067, "Pass@5": 0.4476, "Pass@20": 0.5769}
    # print(grouped_pass_results)

    df = pd.DataFrame.from_dict(grouped_pass_results, orient='index').sort_index()
    df.index.name = 'Step'
    csv_file = os.path.join(save_path, 'verilog-eval-v2.csv')
    df.to_csv(csv_file)

    print(f"数据已保存到 {csv_file}")