import pandas as pd # 文件路径 file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx' fileL2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx' # 读取文件 df3034 = pd.read_excel(file3034) dfL2 = pd.read_excel(fileL2) print(f"30-34单词表: {len(df3034)} 个单词") print(f"L2知识库: {len(dfL2)} 个单词") # 创建L2单词查找集合 l2_words = set(str(word).strip().lower() for word in dfL2.iloc[:, 0]) # 检查每个单词 missing_words = [] for idx, row in df3034.iterrows(): word = str(row['单词']).strip().lower() if word not in l2_words: missing_words.append(row) print("\n" + "="*60) if len(missing_words) == 0: print("✅ 所有30-34中的单词都在L2知识库中!") else: print(f"❌ 还有 {len(missing_words)} 个单词不在L2知识库中:") print("-"*60) print(f"{'单词':<30} {'词性':<10} {'中文释义'}") print("-"*60) for row in missing_words: print(f"{str(row['单词']):<30} {str(row['词性']):<10} {str(row['释义'])}") print("\n" + "="*60) print("统计信息:") total = len(df3034) found = total - len(missing_words) print(f"总单词数: {total}") print(f"已在知识库: {found}") print(f"缺失单词: {len(missing_words)}") print(f"覆盖度: {found/total*100:.1f}%")