import pandas as pd # 文件路径 l2_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx' l1_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L1完整.xlsx' # 读取文件 l2_df = pd.read_excel(l2_file) l1_df = pd.read_excel(l1_file) print("L2文件列名:", l2_df.columns.tolist()) print(f"L2行数: {len(l2_df)}") print("\nL1文件列名:", l1_df.columns.tolist()) print(f"L1行数: {len(l1_df)}") # 创建L1单词查找字典(小写为key) l1_dict = {} for idx, row in l1_df.iterrows(): word = str(row.iloc[0]).strip().lower() pos = row.iloc[1] if len(row) > 1 else "" meaning = row.iloc[2] if len(row) > 2 else "" l1_dict[word] = { "row": idx + 2, # Excel从第2行开始 "pos": pos, "meaning": meaning } # 处理L2表格 is_l1 = [] l1_rows = [] l1_pos = [] l1_meaning = [] for idx, row in l2_df.iterrows(): word = str(row.iloc[0]).strip().lower() if word in l1_dict: info = l1_dict[word] is_l1.append("是") l1_rows.append(info["row"]) l1_pos.append(info["pos"]) l1_meaning.append(info["meaning"]) else: is_l1.append("否") l1_rows.append("") l1_pos.append("") l1_meaning.append("") # 添加新列 l2_df["是否为L1单词"] = is_l1 l2_df["在L1中的行数"] = l1_rows l2_df["L1词性"] = l1_pos l2_df["L1词义"] = l1_meaning # 保存结果 l2_df.to_excel(l2_file, index=False) # 统计 count_yes = is_l1.count("是") count_no = is_l1.count("否") print("\n" + "="*50) print(f"比对完成!") print(f"L1单词总数: {len(l1_df)}") print(f"L2单词总数: {len(l2_df)}") print(f"重复单词数: {count_yes}") print(f"不重复单词数: {count_no}") print(f"重复率: {count_yes/len(l2_df)*100:.2f}%") print("="*50) print("\n保存成功!已更新 L2知识库-三级+A2.xlsx")