def contains_chinese(s_str):
    try:
        for char in s_str:
            if '\u4e00' <= char <= '\u9fff' or \
            '\u3400' <= char <= '\u4dbf' or \
            '\u20000' <= char <= '\u2a6df':
                return True
        return False
    except Exception as e:
        breakpoint()
        

from pypinyin import pinyin, Style
def chinese_to_english_name(chinese_name):
    # 将中文名字分割为姓和名
    # 假设姓为第一个字，名为后面的字
    if len(chinese_name) < 2:
        raise ValueError("名字长度不足")
    
    last_name = chinese_name[0]  # 姓
    first_name = chinese_name[1:]  # 名

    # 使用 pypinyin 转换为拼音
    last_name_pinyin = pinyin(last_name, style=Style.NORMAL, heteronym=False)[0][0].capitalize()
    first_name_pinyin = ''.join([word[0].capitalize() for word in pinyin(first_name, style=Style.NORMAL, heteronym=False)])

    # 格式化为 "Last Name, First Name"
    english_name = f"{last_name_pinyin}, {first_name_pinyin}"
    return english_name

def firstlast2lastfirst(name):
    if "," in name or contains_chinese(name):
        return name
    # 名人列表中存在一些名字是First Name Last Name的格式，将其转换为Last Name, First Name格式
    # 例如：'John Doe' -> 'Doe, John'，'M. Jane Smith' -> 'Smith, Jane M.'
    special_str = [".", "Ms.", "Mr.", "Mrs.", "Dr.", "Prof.", "PhD", "MD", "Jr.", "Sr.", "The", "Honorable"]
    name_split = name.split(" ")
    name_split = [item.strip() for item in name_split if item.strip() != ""]
    name_split = [item for item in name_split if item not in special_str]
    
    # Grigory Isaakovich Barenblatt --> Barenblatt, Alexa, Marc Isaakovich
    
    if len(name_split) == 2:
        first_name = name_split[0]
        last_name = name_split[1]
        new_name = f"{last_name}, {first_name}"
    elif len(name_split) == 3:
        if name_split[0].startswith("(") and name_split[0].endswith(")"):
            # (Alexander) Philip Dawid
            first_name = f"{name_split[1]} {name_split[0]}"
            last_name = f"{name_split[2]}"
        elif name_split[1].startswith("(") and name_split[1].endswith(")"):
            # Xinyan (Tracy) Cui --> Cui, Xinyan (Tracy)
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = f"{name_split[2]}"
        elif name_split[2].startswith("(") and name_split[2].endswith(")"):
            # Ye Fred (Ying)实际是姓Ye，名Ying，英文名Fred, 需要将其转换为"Ye, Ying Fred"
            # Zhu Jesse (Jingxu)实际是姓Zhu，名Jingxu，英文名Jesse, 需要将其转换为"Zhu, Jingxu Jesse"
            first_name = f"{name_split[2][1:-1]} ({name_split[1]})"
            last_name = f"{name_split[0]}"
        elif name_split[0].endswith("."):
            # M. Jane Smith --> Smith, Jane M.
            # K.W. Michael Siu --> Siu, Michael K.W.
            first_name = f"{name_split[1]} {name_split[0]}"
            last_name = name_split[2]
        elif name_split[1].endswith("."):
            # Jane M. Smith --> Smith, Jane M.
            # Pierre J.H. Richardson --> Richardson, Pierre J.H.
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = name_split[2]
        elif name_split[2].endswith("."):
            # Wimmer-Schweingruber Robert F. --> Wimmer-Schweingruber, Robert F.
            # Wilderer Peter A. --> Wilderer, Peter A.
            first_name = f"{name_split[1]} {name_split[2]}"
            last_name = f"{name_split[0]}"
        else:
            # William Nelson Joy --> Joy, William Nelson
            # Michael J Carey --> Carey, Michael J
            # len_3_list.append(" ".join(name_split))
            first_name = f"{name_split[0]} {name_split[1]}"
            last_name = name_split[2]
        
        new_name = f"{last_name}, {first_name}"
        # print(f"{name} --> {new_name}")
    else:
        # lens greater than 3 这部分名字应该不会跟其他部分名字出现重复
        last_name = name_split[-1]
        first_name = " ".join(name_split[:-1])
        new_name = f"{last_name}, {first_name}"
        
    return new_name

import numpy as np
def rule_of_same_names(item, row_values):
    # # 把肯定一样的信息行筛掉，如'Polanyi, John', 'Polanyi,  John'
    # name_set = set([name for idx, name in item])
    # # if len(name_set) == len(item):
    # #     return ""
    # if len(name_set) > 1:
    #     return ""
        
    # else:
    #     return 1.0  # 1.0表示同一个人，0.0表示不是同一个人
    row_values_sum = np.sum(row_values, axis=0)
    if all([item < 2 for item in row_values_sum]):
        return 1.0
    else:
        return 0.0
    
    
import json

# 创建一个空列表来存储解析后的 JSON 对象
def read_jsonl(file_path):
    """
    读取 JSONL 文件并返回解析后的数据
    :param file_path: JSONL 文件路径
    :return: 解析后的数据列表
    """
    data = []
    # 逐行读取文件
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            # 解析每一行的 JSON 数据
            json_line = json.loads(line)
            data.append(json_line)
            
    return data


def name_in_niuren_list(name, niuren_name_list):
    """
    判断名字是否在牛人列表中
    :param name: 名字
    :param niuren_name_list: 牛人列表
    :return: 如果在牛人列表中，返回 True，否则返回 False
    """
    for idx, niuren_name in enumerate(niuren_name_list):
        if name == niuren_name:
            return idx
        
    for idx, niuren_name in enumerate(niuren_name_list):
        if niuren_name.startswith(name):
            if niuren_name[niuren_name.find(name) + len(name)] == " ":
                return idx
        if name.startswith(niuren_name):
            if name[name.find(niuren_name) + len(niuren_name)] == " ":
                return idx
            
    return -1


import pycountry
import pycountry_convert as pc

def get_continent(country_name):
    if country_name == "South Korea":
        country_name = "Korea"
    try:
        country = pycountry.countries.get(name=country_name)
        if not country:
            # 可能是国家名称不规范，尝试使用通用名
            for c in pycountry.countries:
                if country_name.lower() in c.name.lower():
                    country = c
                    break
        if not country:
            return "未知洲"

        country_alpha2 = country.alpha_2
        continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
        continent_name = pc.convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except Exception as e:
        return f"出错：{e}"

# # 示例
# print(get_continent("China"))       # 输出: Asia
# print(get_continent("Germany"))     # 输出: Europe
# print(get_continent("Brazil"))      # 输出: South America


def parse_name(name):
    candidate_names = []
    # 按照逗号分割名字
    parts = name.split(',')
    if len(parts) < 2:
        return None  # 不符合格式，跳过

    last_name = parts[0].strip()  # 姓
    first_name_part = parts[1].strip()  # 名和可能的中间名

    # 按照空格分割名和中间名: "FeiFei-Q" 或 "FeiFei Q"
    if "-" in first_name_part:
        first_name_1 = first_name_part.split('-')[0].strip()
        first_name_2 = first_name_part.split('-')[1].strip()
        candidate_names.append(f"{last_name}, {first_name_1}")
        candidate_names.append(f"{last_name}, {first_name_1}-{first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}.")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}.")
    if ", " in first_name_part:
        first_name_1 = first_name_part.split(', ')[0].strip()
        first_name_2 = first_name_part.split(', ')[1].strip()
        candidate_names.append(f"{last_name}, {first_name_1}")
        candidate_names.append(f"{last_name}, {first_name_1}-{first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}.")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}.") 
    if " " in first_name_part:
        first_name_1 = first_name_part.split(' ')[0].strip()
        first_name_2 = first_name_part.split(' ')[1].strip()
        candidate_names.append(f"{last_name}, {first_name_1}")
        candidate_names.append(f"{last_name}, {first_name_1}-{first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2}.")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}")
        candidate_names.append(f"{last_name}, {first_name_1} {first_name_2[0]}.")
    # else:
    #     candidate_names.append(f"{last_name}, {first_name_part}")
    
    candidate_names.append(f"{last_name}, {first_name_part}")

    return candidate_names


import pandas as pd

def csv_to_xlsx(csv_file_path, xlsx_file_path):
    try:
        # 读取 CSV 文件
        df = pd.read_csv(csv_file_path)
        print(f"成功读取 CSV 文件：{csv_file_path}")
        
        # 将数据写入 XLSX 文件
        df.to_excel(xlsx_file_path, index=False, engine='openpyxl')
        print(f"成功将数据写入 XLSX 文件：{xlsx_file_path}")
    except Exception as e:
        print(f"转换过程中发生错误：{e}")
