niuren_titles = {
    "ACM Fellow": "", 
    "IEEE Fellow": "", 
    "AAAS": "", 
    "NAS ": "", 
    "NAE": "", 
    "NAI": "12", 
    "American Academy of Arts and Sciences": "", 
    "European Academy of Sciences": "", 
    "European Academy of Sciences and Arts": "", 
    "Academia Europaea (AE)": "", 
    "中国科学院": "", 
    "中国工程院": "", 
    "The Royal Society": "", 
    "The Royal Society of Canada": "",
    "Turing Award": ""
}


from utils import read_jsonl
from copy import deepcopy

papers_with_niuren_final = read_jsonl("good_papers-niuren.jsonl")

# keys: 'title', 'authors', 'filename', 'niuren_authors'
# breakpoint()

import pandas as pd
df_data = {'title': [], 'authors': [], 'affiliations': [], 'filename': [], 'niuren_authors': []}
for key in niuren_titles.keys():
    if key not in df_data.keys():
        df_data[key] = []
        
for item in papers_with_niuren_final:
    # if item == "empty line":
    #     continue
    if "title" not in item.keys():
        # continue
        item["title"] = item["applicant"] # 如果是applicant不是title，就用applicant代替title
        # 同时applicant情况下，author大概率没有affiliations，赋值[]
        for author in item['authors']:
            if "affiliations" not in author.keys():
                author['affiliations'] = []
    df_data['title'].append(item['title'])
    author_names = []
    author_affiliations = []
    
    for author in item['authors']:
        author_name = author['name']
        author_names.append(author_name)
        author_affiliations.extend(author['affiliations'])
    
    niuren_authors = [author_names[idx - 1] for idx in item['niuren_authors']]
    # author_names = list(set(author_names))  # 去重
    author_affiliations = list(set(author_affiliations))  # 去重
    
    df_data['authors'].append(deepcopy("; ".join(author_names)))
    df_data['affiliations'].append(deepcopy("; ".join(author_affiliations)))
    df_data['filename'].append(deepcopy(item['filename']))
    df_data['niuren_authors'].append(deepcopy("; ".join(niuren_authors)))
    
    for key in niuren_titles.keys():
        df_data[key].append(deepcopy(item[key]))
    
# save to csv file
df = pd.DataFrame(df_data)
df.to_csv("good_papers-niuren.csv", index=False, encoding='utf-8-sig')