'''
Author: zhengxinhan
Date: 2025-05-08 15:02:02
LastEditors: zhengxinhan
LastEditTime: 2025-05-11 12:42:43
FilePath: /papertools_niuren_ccfa/utils.py
Description: 

Copyright (c) 2025 by m13521952989@163.com, All Rights Reserved. 
'''
def standardized_name(name):
    if "," in name:
        return name
    
    # 添加常见的姓氏前缀列表
    surname_prefixes = ["De", "Del", "Della", "Di", "Da", "Van", "Von", "Le", "La", "O'", "Mc", "Mac", 
                        "Al", "El", "Bin", "Ben", "Ibn", "Ter", "Saint", "St.", "Dos", "Das", "Los", "Las", "San"]
    
    # 特殊字符串处理
    special_str = [".", "Ms.", "Mr.", "Mrs.", "Dr.", "Prof.", "PhD", "MD", "Jr.", "Sr.", "The", "Honorable"]
    name_split = name.split(" ")
    name_split = [item.strip() for item in name_split if item.strip() != ""]
    name_split = [item for item in name_split if item not in special_str]
    
    # 如果分割后没有部分，直接返回原名
    if not name_split:
        return name
    
    # 单部分名字处理（只有一个单词）
    if len(name_split) == 1:
        return name_split[0]
    
    # 两部分名字处理
    if len(name_split) == 2:
        first_name = name_split[0]
        last_name = name_split[1]
        new_name = f"{last_name}, {first_name}"
    
    # 三部分名字处理
    elif len(name_split) == 3:
        # 检查中间部分是否为姓氏前缀
        if name_split[1] in surname_prefixes:
            # 例如 "Barbara De Salvo" 应该变成 "De Salvo, Barbara"
            first_name = name_split[0]
            last_name = f"{name_split[1]} {name_split[2]}"
            new_name = f"{last_name}, {first_name}"
        
        # 以下是原有的其他情况处理
        elif name_split[0].startswith("(") and name_split[0].endswith(")"):
            # (Alexander) Philip Dawid
            first_name = f"{name_split[1]} {name_split[0]}"
            last_name = f"{name_split[2]}"
            new_name = f"{last_name}, {first_name}"
        elif name_split[1].startswith("(") and name_split[1].endswith(")"):
            # Xinyan (Tracy) Cui --> Cui, Xinyan (Tracy)
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = f"{name_split[2]}"
            new_name = f"{last_name}, {first_name}"
        elif name_split[2].startswith("(") and name_split[2].endswith(")"):
            # Ye Fred (Ying)实际是姓Ye，名Ying，英文名Fred, 需要将其转换为"Ye, Ying Fred"
            first_name = f"{name_split[2][1:-1]} ({name_split[1]})"
            last_name = f"{name_split[0]}"
            new_name = f"{last_name}, {first_name}"
        elif name_split[0].endswith("."):
            # M. Jane Smith --> Smith, Jane M.
            first_name = f"{name_split[1]} {name_split[0]}"
            last_name = name_split[2]
            new_name = f"{last_name}, {first_name}"
        elif name_split[1].endswith("."):
            # Jane M. Smith --> Smith, Jane M.
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = name_split[2]
            new_name = f"{last_name}, {first_name}"
        elif name_split[2].endswith("."):
            # Wimmer-Schweingruber Robert F. --> Wimmer-Schweingruber, Robert F.
            first_name = f"{name_split[1]} {name_split[2]}"
            last_name = f"{name_split[0]}"
            new_name = f"{last_name}, {first_name}"
        else:
            # William Nelson Joy --> Joy, William Nelson
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = name_split[2]
            new_name = f"{last_name}, {first_name}"
    
    # 四部分或更多的名字处理
    else:
        # 检查复合姓氏模式
        compound_found = False
        
        # 检查倒数第二部分是否为姓氏前缀（确保列表长度足够）
        if len(name_split) >= 2 and name_split[-2] in surname_prefixes:
            last_name = f"{name_split[-2]} {name_split[-1]}"
            first_name = " ".join(name_split[:-2])
            compound_found = True
        
        # 从前往后检查是否有姓氏前缀组合
        if not compound_found:
            for i in range(len(name_split) - 1):
                if name_split[i] in surname_prefixes:
                    # 可能是复合姓氏的开始
                    # 例如 "Maria Del Carmen Gomez" 中的 "Del Carmen"
                    potential_compound = True
                    compound_parts = [name_split[i]]
                    
                    # 查看后续部分是否也可能是复合姓氏的一部分
                    j = i + 1
                    while j < len(name_split) - 1 and potential_compound:
                        if name_split[j] in surname_prefixes or name_split[j][0].isupper():
                            compound_parts.append(name_split[j])
                            j += 1
                        else:
                            potential_compound = False
                    
                    if potential_compound and j < len(name_split):
                        # 找到了复合姓氏
                        compound_parts.append(name_split[j])
                        last_name = " ".join(compound_parts)
                        first_name = " ".join(name_split[:i])
                        compound_found = True
                        break
        
        # 如果没有找到复合姓氏，使用默认处理
        if not compound_found:
            last_name = name_split[-1]
            first_name = " ".join(name_split[:-1])
        
        new_name = f"{last_name}, {first_name}"
    
    return new_name


def convert_to_lowercase(element):
    if isinstance(element, list):
        return [convert_to_lowercase(e) for e in element]
    elif isinstance(element, str):
        return element.lower()
    else:
        return element
    
def name_in_niuren_list(name, niuren_name_list):
    """
    判断名字是否在牛人列表中
    :param name: 名字
    :param niuren_name_list: 牛人列表
    :return: 如果在牛人列表中，返回 True，否则返回 False
    """
    name = convert_to_lowercase(name)
    niuren_name_list = convert_to_lowercase(niuren_name_list)
    for idx, niuren_name in enumerate(niuren_name_list):
        if isinstance(niuren_name, str):
            if name == niuren_name:
                return idx
        elif isinstance(niuren_name, list):
            if name in niuren_name:
                return idx

    for idx, niuren_name in enumerate(niuren_name_list):
        if isinstance(niuren_name, str):
            if niuren_name.startswith(name):
                if niuren_name[niuren_name.find(name) + len(name)] == " ":
                    return idx
            if name.startswith(niuren_name):
                if name[name.find(niuren_name) + len(niuren_name)] == " ":
                    return idx
        elif isinstance(niuren_name, list):
            for niuren_name_item in niuren_name:
                if niuren_name_item.startswith(name):
                    if niuren_name_item[niuren_name_item.find(name) + len(name)] == " ":
                        return idx
                if name.startswith(niuren_name_item):
                    if name[name.find(niuren_name_item) + len(niuren_name_item)] == " ":
                        return idx
    return -1