Commit ae59d2a2 by jiangdongchen

process excel

parent 763c9946
......@@ -22,7 +22,6 @@ if __name__ == "__main__":
pdf_dir = (cwd_dir / config["pdf_dir"]).resolve()
rst_dir = (cwd_dir / config["result_path"]).resolve()
excel_path = (cwd_dir / config["excel_path"]).resolve()
sheet_name = config["sheet_name"]
# print(excel_path)
......@@ -42,4 +41,4 @@ if __name__ == "__main__":
base_url=config["base_url"])
# RE.main( pdf_dir, rst_dir, config["model"], client)
RE.read_rough_nameIndex_from_excel(excel_path, sheet_name)
RE.read_rough_nameIndex_from_excel(excel_path)
......@@ -5,6 +5,5 @@
"pdf_dir": "./Papers",
"result_path": "./result.json",
"excel_path": "./others/reference.xlsx",
"sheet_name": "j24-DianNao family",
"logLevel": 20
}
\ No newline at end of file
......@@ -69,20 +69,24 @@ def extract_first_page_text(pdf_path):
logging.warning(f"PDF has no pages: {pdf_path.name}")
return None
def read_rough_nameIndex_from_excel(excel_path: Path, sheet_name: str):
# excel表格的第4行开始读取索引和论文名称
def read_rough_nameIndex_from_excel(excel_path: Path):
# 读取 Excel 文件中的某个工作表
df = pd.read_excel( excel_path, sheet_name)
# 显示前几行数据
print(df.head())
# 获取所有工作表的数据
# excel_data = pd.read_excel('example.xlsx', sheet_name=None)
# for sheet_name, data in excel_data.items():
# print(f"工作表: {sheet_name}")
# print(data.head())
# 当你读取多个工作表时,pandas.read_excel(sheet_name=None) 会返回一个字典,其中:
# 键 是工作表的名称(sheet_name);
# 值 是每个工作表对应的 DataFrame。
# 通过 items(),你可以在一个循环中轻松地访问这两个部分
# 获取工作表的数据
excel_data = pd.read_excel(excel_path, sheet_name=None)
for sname, data in excel_data.items():
df = data.iloc[2:]
for index, row in df.iterrows():
print(row.iloc[0])
print(row.iloc[1])
def main(pdf_directory: Path, result_path: Path, configModel: str, client):
with open(result_path, "w", encoding="utf-8") as f:
......
......@@ -4,8 +4,7 @@
{
"name": "Jia Si",
"affiliations": [
"Key Laboratory for the Physics and Chemistry of Nanodevices and Center for Carbon-based Electronics, School of Electronics, Peking University, Beijing, China",
"Beijing Institute of Carbon-based Integrated Circuits, Beijing, China"
"Key Laboratory for the Physics and Chemistry of Nanodevices and Center for Carbon-based Electronics, School of Electronics, Peking University, Beijing, China"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment