Merge branch 'master' of http://62.234.201.16/nzy/papertools

ebfcafa0 · hanhusheng · 4ac20bd3 · d6cdd8eb · ebfcafa0
Commit ebfcafa0 authored May 09, 2025 by hanhusheng
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 12 deletions

niurenshaixuan/main.py
+8 -12

No files found.
--- a/niurenshaixuan/main.py
+++ b/niurenshaixuan/main.py
@@ -73,14 +73,11 @@ def load_fake_niuren():
    return fake_niuren_names, fake_niuren_papers


-def check_niuren(authors, title):
+def check_niuren(authors, title, niuren_pool_names, true_niuren_names, true_niuren_papers, fake_niuren_names, fake_niuren_papers):
    niuren = []
    order = []
    suspected_niuren = []

-    niuren_pool_names = load_niuren_pool()
-    true_niuren_names, true_niuren_papers = load_true_niuren()
-    fake_niuren_names, fake_niuren_papers = load_fake_niuren()

    for author_idx, author in enumerate(authors):
        if author == "":
@@ -112,13 +109,13 @@ def check_niuren(authors, title):
    return niuren_str, order_str, suspected_niuren_str


-def process_row(index, row):
+def process_row(index, row, niuren_pool_names, true_niuren_names, true_niuren_papers, fake_niuren_names, fake_niuren_papers):
    authors = row.get('引文作者', '')
    title = row.get('引文名称', '')
    if not isinstance(authors, str):
        return index, "", "", ""
    authors = [i.strip() for i in authors.split(";") if i!=""]
-    niuren_true, niuren_true_order, suspected_niuren = check_niuren(authors, title)
+    niuren_true, niuren_true_order, suspected_niuren = check_niuren(authors, title, niuren_pool_names, true_niuren_names, true_niuren_papers, fake_niuren_names, fake_niuren_papers)
    return index, niuren_true, niuren_true_order, suspected_niuren


@@ -126,6 +123,10 @@ if __name__ == "__main__":
    # 读取原始 Excel 文件的前两行
    original_header = pd.read_excel(input_file_path, nrows=2, header=None)

+    niuren_pool_names = load_niuren_pool()
+    true_niuren_names, true_niuren_papers = load_true_niuren()
+    fake_niuren_names, fake_niuren_papers = load_fake_niuren()
+
    # 读取数据并跳过前两行
    input_df = pd.read_excel(input_file_path, skiprows=2)

@@ -135,17 +136,12 @@ if __name__ == "__main__":
    input_df['疑似牛人'] = None


-    results = Parallel(n_jobs=-1)(delayed(process_row)(index, row) for index, row in tqdm(input_df.iterrows()))
+    results = Parallel(n_jobs=-1)(delayed(process_row)(index, row, niuren_pool_names, true_niuren_names, true_niuren_papers, fake_niuren_names, fake_niuren_papers) for index, row in tqdm(input_df.iterrows()))
    for index, niuren_true, niuren_true_order, suspected_niuren in results:
        input_df.at[index, '牛人'] = niuren_true
        input_df.at[index, '牛人署名顺序'] = niuren_true_order
        input_df.at[index, '疑似牛人'] = suspected_niuren

-    # for index, row in tqdm(input_df.iterrows()):
-    #     index, niuren_true, niuren_true_order, suspected_niuren = process_row(index, row)
-    #     input_df.at[index, '牛人'] = niuren_true
-    #     input_df.at[index, '牛人署名顺序'] = niuren_true_order
-    #     input_df.at[index, '疑似牛人'] = suspected_niuren


    max_cols = max(original_header.shape[1], input_df.shape[1])