README

a4679d73 · jiangdongchen · 2b8dc5d4 · a4679d73 · a4679d73 · a4679d73
Commit a4679d73 authored May 08, 2025 by jiangdongchen
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 75 additions and 6 deletions

README.md
+1 -6

logs/citation_process.log
+0 -0

psrc/checkCCFA.py
+74 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -53,12 +53,7 @@
            2. **遍历**excel表格中的论文名称进行模糊匹配
                1. 匹配成功后
                    1. 用pdf文件中的论文名称和索引标准化重命名pdf文件和excel表格中的论文名称
-                    2. 将pdf文件中的关键信息写入json文件中进行保存, 包括
-                        - 标题
-                        - 会议名称
-                        - 作者姓名
-                        - 机构
-                        - 国家
+                    2. 将pdf文件中的关键信息写入json文件中进行保存, 包括 标题 会议名称 作者姓名 机构 国家
                2. 匹配失败后，输出无法匹配的条目
                    1. 使用warning记录无法匹配的条目，方便后续处理


--- a/logs/citation_process.log
+++ b/logs/citation_process.log
--- a/psrc/checkCCFA.py
+++ b/psrc/checkCCFA.py
+from openai import OpenAI
+from pathlib import Path
+import json
+import openpyxl
+
+def chechCCFA( conferenceJournal, CCFA, configModel, client):
+    system_prompt = f"""
+    You are an expert academic conference/journal classifier. Your task is to determine if the given conference/journal name matches any entry in the provided CCF-A list.
+    CCF-A List (comma-separated): {CCFA}
+    Analysis Guidelines:
+    1. Perform fuzzy matching considering:
+       - Abbreviations vs full names (e.g. 'PPoPP' vs 'ACM SIGPLAN Symposium on Principles & Practice of Parallel Programming')
+       - Common variations (e.g. 'IEEE Transactions' vs 'IEEE Trans.')
+       - Minor spelling differences
+    2. Return JSON with:
+       - "IsCCFA": ture/false
+       - "MatchedName": the matched name from CCF-A list (empty string if no match)
+       - "Confidence": your confidence score (0-100)
+    Example Output:
+    {{
+        "IsCCFA": "ture",
+        "MatchedName": "IEEE International Symposium on High Performance Computer Architecture",
+        "Confidence": 0.95,
+        "Reason": "The input matches HPCA's full name"
+    }}
+    """
+
+    response = client.chat.completions.create(  
+        model=configModel,  
+        messages=[  
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": conferenceJournal},
+        ],  
+        temperature=0.2,  
+        max_tokens=4096,
+        # stream=True,
+        response_format={"type": "json_object"}  
+    ) 
+
+    return response.choices[0].message.content
+
+
+if __name__ == "__main__":
+    cwd_dir = Path.cwd()
+
+    # 构建 config.json 的完整路径
+    config_path = (cwd_dir / "config.json").resolve()
+
+    # 读取config.json中的配置参数
+    with open( config_path, 'r', encoding='utf-8') as f:
+        config = json.load(f)
+
+    client = OpenAI(api_key=config["api_key"], base_url=config["base_url"])
+    configModel = config["model"]
+    excel_path2 = Path(config["excel_path2"])
+    wb = openpyxl.load_workbook(excel_path2)
+    sheetCCF = wb["CCF-A列表"]
+    # 序号	简称	全称
+    # 1	PPoPP	ACM SIGPLAN Symposium on Principles & Practice of Parallel Programming
+    # 2	FAST	USENIX Conference on File and Storage Technologies
+    # 3	DAC	Design Automation Conference
+    # 4	HPCA	IEEE International Symposium on High Performance Computer Architecture
+    # 5	MICRO	IEEE/ACM International Symposium on Microarchitecture
+    CCFA_list = []
+    for row in sheetCCF.iter_rows(min_row=2, values_only=True): # 从第二行开始遍历
+        if row[0] and row[1]: # 确保索引和论文名称都存在
+            CCFA_list.append(row[1])
+            CCFA_list.append(row[2])
+    # 把list转为长的字符串, ','分割
+    CCFA = ','.join(CCFA_list)
+    conferenceJournal = "IEEE Journal of Solid-State Circuits"
+    result = chechCCFA(conferenceJournal, CCFA, configModel, client)
+    print(result)
\ No newline at end of file