Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
papertools
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yutong Wu
papertools
Commits
34bba471
Commit
34bba471
authored
May 09, 2025
by
Pengwei-Jin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
author变为全小写形式匹配
parent
2cb16c72
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
8 deletions
+9
-8
niurenshaixuan/main.py
+8
-8
niurenshaixuan/utils.py
+1
-0
No files found.
niurenshaixuan/main.py
View file @
34bba471
...
@@ -12,7 +12,7 @@ output_file_path = '测试输出3.xlsx'
...
@@ -12,7 +12,7 @@ output_file_path = '测试输出3.xlsx'
def
load_niuren_pool
():
def
load_niuren_pool
():
niuren_pool
=
pd
.
read_csv
(
"info/new_niuren_format-merged_turing.csv"
,
encoding
=
'utf-8-sig'
)
niuren_pool
=
pd
.
read_csv
(
"info/new_niuren_format-merged_turing.csv"
,
encoding
=
'utf-8-sig'
)
niuren_pool_names
=
niuren_pool
[
"name"
]
.
tolist
()
niuren_pool_names
=
niuren_pool
[
"name"
]
.
tolist
()
niuren_pool_names
=
[
name
.
replace
(
"
\xa0
"
,
" "
)
for
name
in
niuren_pool_names
]
# 去除空格
niuren_pool_names
=
[
name
.
replace
(
"
\xa0
"
,
" "
)
.
lower
()
for
name
in
niuren_pool_names
]
# 去除空格
return
niuren_pool_names
return
niuren_pool_names
...
@@ -22,7 +22,7 @@ def load_true_niuren():
...
@@ -22,7 +22,7 @@ def load_true_niuren():
true_niuren
[
"别名列表"
]
=
None
true_niuren
[
"别名列表"
]
=
None
true_niuren
[
"别名列表"
]
=
true_niuren
[
"别名列表(各种奇奇怪怪的名字格式,比如first name和second name的顺序,以;分隔)"
]
.
apply
(
true_niuren
[
"别名列表"
]
=
true_niuren
[
"别名列表(各种奇奇怪怪的名字格式,比如first name和second name的顺序,以;分隔)"
]
.
apply
(
lambda
x
:
[
firstlast2lastfirst
(
i
.
strip
())
for
i
in
x
.
split
(
";"
)
if
i
!=
""
]
if
isinstance
(
x
,
str
)
else
[]
lambda
x
:
[
firstlast2lastfirst
(
i
.
strip
())
.
lower
()
for
i
in
x
.
split
(
";"
)
if
i
!=
""
]
if
isinstance
(
x
,
str
)
else
[]
)
)
true_niuren_names
=
[]
true_niuren_names
=
[]
...
@@ -31,9 +31,9 @@ def load_true_niuren():
...
@@ -31,9 +31,9 @@ def load_true_niuren():
# 构建牛人姓名列表
# 构建牛人姓名列表
for
_
,
row
in
true_niuren
.
iterrows
():
for
_
,
row
in
true_niuren
.
iterrows
():
if
row
[
"别名列表"
]:
if
row
[
"别名列表"
]:
true_niuren_names
.
append
([
firstlast2lastfirst
(
row
[
"姓名"
])]
+
row
[
"别名列表"
])
true_niuren_names
.
append
([
firstlast2lastfirst
(
row
[
"姓名"
])
.
lower
()
]
+
row
[
"别名列表"
])
else
:
else
:
true_niuren_names
.
append
(
firstlast2lastfirst
(
row
[
"姓名"
]))
true_niuren_names
.
append
(
firstlast2lastfirst
(
row
[
"姓名"
])
.
lower
()
)
# 构建牛人论文列表
# 构建牛人论文列表
for
true_niuren_idx
,
_
in
enumerate
(
true_niuren_names
):
for
true_niuren_idx
,
_
in
enumerate
(
true_niuren_names
):
...
@@ -58,7 +58,7 @@ def load_fake_niuren():
...
@@ -58,7 +58,7 @@ def load_fake_niuren():
# 构建非牛人姓名列表
# 构建非牛人姓名列表
for
_
,
row
in
fake_niuren
.
iterrows
():
for
_
,
row
in
fake_niuren
.
iterrows
():
fake_niuren_names
.
append
(
firstlast2lastfirst
(
row
[
"姓名"
]))
fake_niuren_names
.
append
(
firstlast2lastfirst
(
row
[
"姓名"
])
.
lower
()
)
# 构建非牛人论文列表
# 构建非牛人论文列表
for
fake_niuren_idx
,
_
in
enumerate
(
fake_niuren_names
):
for
fake_niuren_idx
,
_
in
enumerate
(
fake_niuren_names
):
...
@@ -87,12 +87,12 @@ def check_niuren(authors, title):
...
@@ -87,12 +87,12 @@ def check_niuren(authors, title):
continue
continue
# 作者姓名能否在牛人池中找到
# 作者姓名能否在牛人池中找到
name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
),
niuren_pool_names
)
name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
)
.
lower
()
,
niuren_pool_names
)
if
name_index
==
-
1
:
if
name_index
==
-
1
:
continue
continue
# 作者姓名能否在全局牛人中找到
# 作者姓名能否在全局牛人中找到
true_name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
),
true_niuren_names
)
true_name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
)
.
lower
()
,
true_niuren_names
)
if
true_name_index
!=
-
1
:
if
true_name_index
!=
-
1
:
if
title
.
strip
()
.
lower
()
in
true_niuren_papers
[
true_name_index
]:
if
title
.
strip
()
.
lower
()
in
true_niuren_papers
[
true_name_index
]:
niuren
.
append
(
author
)
niuren
.
append
(
author
)
...
@@ -100,7 +100,7 @@ def check_niuren(authors, title):
...
@@ -100,7 +100,7 @@ def check_niuren(authors, title):
continue
continue
# 作者姓名能否在全局非牛人中找到
# 作者姓名能否在全局非牛人中找到
fake_name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
),
fake_niuren_names
)
fake_name_index
=
name_in_niuren_list
(
firstlast2lastfirst
(
author
)
.
lower
()
,
fake_niuren_names
)
if
fake_name_index
!=
-
1
:
if
fake_name_index
!=
-
1
:
if
title
.
strip
()
.
lower
()
in
fake_niuren_papers
[
fake_name_index
]:
if
title
.
strip
()
.
lower
()
in
fake_niuren_papers
[
fake_name_index
]:
continue
continue
...
...
niurenshaixuan/utils.py
View file @
34bba471
...
@@ -4,6 +4,7 @@ def firstlast2lastfirst(name):
...
@@ -4,6 +4,7 @@ def firstlast2lastfirst(name):
# 名人列表中存在一些名字是First Name Last Name的格式,将其转换为Last Name, First Name格式
# 名人列表中存在一些名字是First Name Last Name的格式,将其转换为Last Name, First Name格式
# 例如:'John Doe' -> 'Doe, John','M. Jane Smith' -> 'Smith, Jane M.'
# 例如:'John Doe' -> 'Doe, John','M. Jane Smith' -> 'Smith, Jane M.'
special_str
=
[
"."
,
"Ms."
,
"Mr."
,
"Mrs."
,
"Dr."
,
"Prof."
,
"PhD"
,
"MD"
,
"Jr."
,
"Sr."
,
"The"
,
"Honorable"
]
special_str
=
[
"."
,
"Ms."
,
"Mr."
,
"Mrs."
,
"Dr."
,
"Prof."
,
"PhD"
,
"MD"
,
"Jr."
,
"Sr."
,
"The"
,
"Honorable"
]
special_str
=
[
item
.
lower
()
for
item
in
special_str
]
name_split
=
name
.
split
(
" "
)
name_split
=
name
.
split
(
" "
)
name_split
=
[
item
.
strip
()
for
item
in
name_split
if
item
.
strip
()
!=
""
]
name_split
=
[
item
.
strip
()
for
item
in
name_split
if
item
.
strip
()
!=
""
]
name_split
=
[
item
for
item
in
name_split
if
item
not
in
special_str
]
name_split
=
[
item
for
item
in
name_split
if
item
not
in
special_str
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment