Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
papertools
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yutong Wu
papertools
Commits
bc0ad88a
Commit
bc0ad88a
authored
May 09, 2025
by
Pengwei-Jin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
避免重复加载info文件夹
parent
91ab82b3
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
12 deletions
+8
-12
niurenshaixuan/main.py
+8
-12
No files found.
niurenshaixuan/main.py
View file @
bc0ad88a
...
...
@@ -73,14 +73,11 @@ def load_fake_niuren():
return
fake_niuren_names
,
fake_niuren_papers
def
check_niuren
(
authors
,
title
):
def
check_niuren
(
authors
,
title
,
niuren_pool_names
,
true_niuren_names
,
true_niuren_papers
,
fake_niuren_names
,
fake_niuren_papers
):
niuren
=
[]
order
=
[]
suspected_niuren
=
[]
niuren_pool_names
=
load_niuren_pool
()
true_niuren_names
,
true_niuren_papers
=
load_true_niuren
()
fake_niuren_names
,
fake_niuren_papers
=
load_fake_niuren
()
for
author_idx
,
author
in
enumerate
(
authors
):
if
author
==
""
:
...
...
@@ -112,13 +109,13 @@ def check_niuren(authors, title):
return
niuren_str
,
order_str
,
suspected_niuren_str
def
process_row
(
index
,
row
):
def
process_row
(
index
,
row
,
niuren_pool_names
,
true_niuren_names
,
true_niuren_papers
,
fake_niuren_names
,
fake_niuren_papers
):
authors
=
row
.
get
(
'引文作者'
,
''
)
title
=
row
.
get
(
'引文名称'
,
''
)
if
not
isinstance
(
authors
,
str
):
return
index
,
""
,
""
,
""
authors
=
[
i
.
strip
()
for
i
in
authors
.
split
(
";"
)
if
i
!=
""
]
niuren_true
,
niuren_true_order
,
suspected_niuren
=
check_niuren
(
authors
,
title
)
niuren_true
,
niuren_true_order
,
suspected_niuren
=
check_niuren
(
authors
,
title
,
niuren_pool_names
,
true_niuren_names
,
true_niuren_papers
,
fake_niuren_names
,
fake_niuren_papers
)
return
index
,
niuren_true
,
niuren_true_order
,
suspected_niuren
...
...
@@ -126,6 +123,10 @@ if __name__ == "__main__":
# 读取原始 Excel 文件的前两行
original_header
=
pd
.
read_excel
(
input_file_path
,
nrows
=
2
,
header
=
None
)
niuren_pool_names
=
load_niuren_pool
()
true_niuren_names
,
true_niuren_papers
=
load_true_niuren
()
fake_niuren_names
,
fake_niuren_papers
=
load_fake_niuren
()
# 读取数据并跳过前两行
input_df
=
pd
.
read_excel
(
input_file_path
,
skiprows
=
2
)
...
...
@@ -135,17 +136,12 @@ if __name__ == "__main__":
input_df
[
'疑似牛人'
]
=
None
results
=
Parallel
(
n_jobs
=-
1
)(
delayed
(
process_row
)(
index
,
row
)
for
index
,
row
in
tqdm
(
input_df
.
iterrows
()))
results
=
Parallel
(
n_jobs
=-
1
)(
delayed
(
process_row
)(
index
,
row
,
niuren_pool_names
,
true_niuren_names
,
true_niuren_papers
,
fake_niuren_names
,
fake_niuren_papers
)
for
index
,
row
in
tqdm
(
input_df
.
iterrows
()))
for
index
,
niuren_true
,
niuren_true_order
,
suspected_niuren
in
results
:
input_df
.
at
[
index
,
'牛人'
]
=
niuren_true
input_df
.
at
[
index
,
'牛人署名顺序'
]
=
niuren_true_order
input_df
.
at
[
index
,
'疑似牛人'
]
=
suspected_niuren
# for index, row in tqdm(input_df.iterrows()):
# index, niuren_true, niuren_true_order, suspected_niuren = process_row(index, row)
# input_df.at[index, '牛人'] = niuren_true
# input_df.at[index, '牛人署名顺序'] = niuren_true_order
# input_df.at[index, '疑似牛人'] = suspected_niuren
max_cols
=
max
(
original_header
.
shape
[
1
],
input_df
.
shape
[
1
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment