Upload New File

ad8a710f · matianyun · 9de60412 · ad8a710f
Commit ad8a710f authored Jun 05, 2025 by matianyun
Hide whitespace changes
Inline Side-by-side

Showing with 109 additions and 0 deletions

guojiaming_国家索引自动填充.py
+109 -0

No files found.
--- a/guojiaming_国家索引自动填充.py
+++ b/guojiaming_国家索引自动填充.py
+import openpyxl
+import openpyxl
+from openpyxl.utils import get_column_letter
+
+def main():
+    # 配置参数
+    input_file = "workspace2.xlsx"          # 输入Excel文件名
+    output_file = "output3.xlsx"        # 输出Excel文件名
+    data_sheet_name = "c43-diannao"         # 数据所在Sheet名称
+    mapping_sheet_name = "全局国家及地区"  # 国家映射表Sheet名称  要提前把全局国家及地区放到同一个excel
+    country_col_name = "引文机构所属国家"  # 国家列名称
+    index_col_name = "引文机构所属国家索引-check" # 索引列名称
+    
+    print("正在加载工作簿...")
+    wb = openpyxl.load_workbook(input_file)
+    
+    # 获取映射表
+    mapping_sheet = wb[mapping_sheet_name]
+    print(f"已加载映射表: {mapping_sheet_name}")
+    
+    # 创建国家到索引的映射字典（处理"中国（China）"格式）
+    country_map = {}
+    for row in mapping_sheet.iter_rows(min_row=2, values_only=True):
+        if row and len(row) >= 2:
+            index_val = str(row[0])  # 第一列是索引
+            country_str = str(row[1]).strip()  # 第二列是国家名称
+            
+            # 处理"中国（China）"格式的国家名称
+            if "（" in country_str and "）" in country_str:
+                # 提取中文名称（括号前的内容）
+                chinese_name = country_str.split("（")[0].strip()
+                # 提取英文名称（括号内的内容）
+                english_name = country_str.split("（")[1].split("）")[0].strip()
+                
+                # 将两种格式都映射到同一个索引
+                country_map[chinese_name] = index_val
+                country_map[english_name] = index_val
+            else:
+                # 没有括号的普通格式
+                country_map[country_str] = index_val
+    
+    print(f"已创建国家映射: {len(country_map)} 个条目")
+    
+    # 获取数据表
+    data_sheet = wb[data_sheet_name]
+    print(f"已加载数据表: {data_sheet_name}")
+    
+    # 查找国家列和索引列的位置
+    country_col_idx = None
+    index_col_idx = None
+    
+    for col in range(1, data_sheet.max_column + 1):
+        header = data_sheet.cell(row=4, column=col).value
+        if header == country_col_name:
+            country_col_idx = col
+        elif header == index_col_name:
+            index_col_idx = col
+            
+    if country_col_idx is None:
+        raise ValueError(f"未找到列: {country_col_name}")
+    if index_col_idx is None:
+        raise ValueError(f"未找到列: {index_col_name}")
+    
+    print(f"国家列位置: {get_column_letter(country_col_idx)}")
+    print(f"索引列位置: {get_column_letter(index_col_idx)}")
+    
+    # 处理每一行数据
+    updated_rows = 0
+    for row in range(5, data_sheet.max_row + 1):
+        country_cell = data_sheet.cell(row=row, column=country_col_idx)
+        
+        # 跳过空单元格
+        if country_cell.value is None:
+            continue
+            
+        # 分割国家字符串
+        countries = [c.strip() for c in str(country_cell.value).split(';')]
+        indexes = []
+        
+        # 查找每个国家的索引
+        for country in countries:
+            if country in country_map:
+                indexes.append(country_map[country])
+            else:
+                # 尝试在映射键中查找（不区分大小写）
+                found = False
+                for key in country_map.keys():
+                    if country.lower() == key.lower():
+                        indexes.append(country_map[key])
+                        found = True
+                        break
+                
+                if not found:
+                    print(f"警告: 行 {row} 国家未找到: '{country}'")
+                    indexes.append("?")  # 未找到的标记
+        
+        # 更新索引单元格
+        index_str = ";".join(indexes)
+        data_sheet.cell(row=row, column=index_col_idx).value = index_str
+        updated_rows += 1
+    
+    # 保存结果
+    wb.save(output_file)
+    print(f"\n处理完成! 更新了 {updated_rows} 行数据")
+    print(f"结果已保存到: {output_file}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file