import openpyxl
import csv
import sys
import re
import os

# ========== 配置 =============
excel_path = '测试输入.xlsx'
ccf_a_csv = 'CCF_A_list.csv'
sheet_names = ["Dadiannao"]  # e.g. ["Dadiannao", "Diannao"]
# =============================
# ========== 输出文件 =============
# 测试输出_CCF_A判断.xlsx
# =============================
def read_ccf_a_list(csv_path):
    abbr_set = set()
    fullname_set = set()
    with open(csv_path, encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        for row in reader:
            abbr = row['abbr'].strip()
            fullname = row['fullname'].strip()
            if abbr:
                abbr_set.add(abbr)
            if fullname:
                fullname_set.add(fullname)
    return abbr_set, fullname_set

def extract_bracket_content(s):
    """
    提取第一个括号内的内容（支持中英文括号）
    """
    match = re.search(r'[(（](.*?)[)）]', s)
    return match.group(1).strip() if match else None

def extract_abbr_before_colon(s):
    """
    提取冒号前的内容（支持中英文冒号），并只保留英文字母
    """
    split_result = re.split(r'[:：]', s, 1)
    if len(split_result) > 1:
        abbr = re.sub(r'[^A-Za-z]', '', split_result[0])
        return abbr if abbr else None
    return None

def main():
    # 1. 读取CCF-A列表
    abbr_set, fullname_set = read_ccf_a_list(ccf_a_csv)

    # 2. 读取Excel
    wb = openpyxl.load_workbook(excel_path)
    for sheetname in sheet_names:
        if sheetname not in wb.sheetnames:
            print(f"错误：Excel中不存在Sheet：{sheetname}")
            continue

        ws = wb[sheetname]

        # 3. 跳过前三行，第四行为标题
        header_row_idx = 4
        header = [cell.value if cell.value is not None else "" for cell in ws[header_row_idx]]

        # 4. 检查第四列和第五列的标题
        if len(header) < 5:
            print(f"错误：{sheetname} 页标题列数不足5列")
            sys.exit(1)
        col4 = header[3]
        col5 = header[4]
        if not (str(col4).strip() == "期刊/会议名称" and str(col5).strip() == "是否是CCF-A"):
            print(f"错误：{sheetname} 页，标题栏应该在第四行！！第四列标题为【{col4}】，第五列标题为【{col5}】，不符合要求！")
            sys.exit(1)

        for row in ws.iter_rows(min_row=header_row_idx+1):
            name_cell = row[3]  # 第四列
            result_cell = row[4]  # 第五列
            name = name_cell.value
            if name == 'Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems':
                breakpoint()
            if not name or not str(name).strip():
                result_cell.value = ""
                continue
            name_str = str(name).strip()
            # 1. 括号简称
            bracket = extract_bracket_content(name_str)
            bracket_abbr = re.sub(r'[^A-Za-z]', '', bracket) if bracket else None
            # 2. 冒号前简称
            colon_abbr = extract_abbr_before_colon(name_str)
            # 判断是否有简称（括号或冒号前）
            if bracket_abbr or colon_abbr:
                match = False
                if bracket_abbr and bracket_abbr in abbr_set:
                    match = True
                elif colon_abbr and colon_abbr in abbr_set:
                    match = True
                result_cell.value = "是" if match else "否"
            else:
                # 没有简称，判断全称包含
                is_ccfa = False
                for fullname in fullname_set:
                    if fullname and fullname in name_str:
                        is_ccfa = True
                        break
                result_cell.value = "是" if is_ccfa else "否"

    # 7. 保存新文件
    base, ext = os.path.splitext(excel_path)
    out_path = f"{base}_CCF_A判断{ext}"
    wb.save(out_path)
    print(f"已保存结果文件: {out_path}")

if __name__ == '__main__':
    main()