from copy import deepcopy
from utils import get_continent
import pandas as pd
df = pd.read_excel("知名企业.xlsx")

qiye_name = df["企业-英文名"].tolist()

file_name = "good_papers-niuren-task_10.csv"
if file_name.endswith(".xlsx"):
    final = pd.read_excel(file_name, engine='openpyxl')
elif file_name.endswith(".csv"):
    final = pd.read_csv(file_name, encoding='utf-8-sig')
# add continent_country column
final["知名企业"] = ""

for row_idx, row in final.iterrows():
    row_qiye = []
    for qiye in qiye_name:
        try:
            if qiye in row["affiliations"]:
                if qiye == "Intel":
                    # 排除 "Intel" 这个词在 "Intelligence" 和 "Intelligent" 中的情况
                    aff_str = deepcopy(row["affiliations"])
                    aff_str = aff_str.replace("Intelligence", "")
                    aff_str = aff_str.replace("Intelligent", "")
                    if qiye in aff_str:
                        row_qiye.append(deepcopy(qiye))
                else:
                    row_qiye.append(deepcopy(qiye))
        except Exception as e:
            continue
    row_qiye = list(set(row_qiye))  # 去重,可能也不需要去重
    row_qiye = "; ".join(row_qiye)
    
    final.at[row_idx, "知名企业"] = deepcopy(row_qiye)
# save it as an csv file
final.to_csv("good_papers-niuren-task_11.csv", index=False, encoding='utf-8-sig')