import pandas as pd # 文件路径 file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx' fileL2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx' # 读取文件 df3034 = pd.read_excel(file3034) dfL2 = pd.read_excel(fileL2) print(f"30-34单词表: {len(df3034)} 个单词") print(f"L2知识库: {len(dfL2)} 个单词") # 创建L2单词查找集合 l2_words = set(str(word).strip().lower() for word in dfL2.iloc[:, 0]) # 检查每个单词(在列2"单词"中) missing_words = [] for idx, row in df3034.iterrows(): word = str(row['单词']).strip().lower() if word not in l2_words: missing_words.append(row['单词']) print("\n" + "="*60) if len(missing_words) == 0: print("✅ 所有30-34中的单词都在L2知识库中!") else: print(f"❌ 有 {len(missing_words)} 个单词不在L2知识库中:") for word in missing_words: print(f" - {word}") print("\n详细检查结果(前30个):") for idx, row in df3034.head(30).iterrows(): word = str(row['单词']).strip().lower() status = "✅" if word in l2_words else "❌" print(f"{idx+1:3d}. {str(row['单词']):30s} {status}") print("\n统计信息:") total = len(df3034) found = total - len(missing_words) print(f"总单词数: {total}") print(f"已在知识库: {found}") print(f"缺失单词: {len(missing_words)}") print(f"覆盖度: {found/total*100:.1f}%")