import pandas as pd

# 文件路径
file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx'
fileL2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'

# 读取文件
df3034 = pd.read_excel(file3034)
dfL2 = pd.read_excel(fileL2)

print(f"30-34单词表: {len(df3034)} 个单词")
print(f"L2知识库: {len(dfL2)} 个单词")

# 创建L2单词查找集合
l2_words = set(str(word).strip().lower() for word in dfL2.iloc[:, 0])

# 检查每个单词
missing_words = []
for idx, row in df3034.iterrows():
    word = str(row['单词']).strip().lower()
    if word not in l2_words:
        missing_words.append(row)

print("\n" + "="*60)
if len(missing_words) == 0:
    print("✅ 所有30-34中的单词都在L2知识库中！")
else:
    print(f"❌ 还有 {len(missing_words)} 个单词不在L2知识库中：")
    print("-"*60)
    print(f"{'单词':<30} {'词性':<10} {'中文释义'}")
    print("-"*60)
    for row in missing_words:
        print(f"{str(row['单词']):<30} {str(row['词性']):<10} {str(row['释义'])}")

print("\n" + "="*60)
print("统计信息：")
total = len(df3034)
found = total - len(missing_words)
print(f"总单词数: {total}")
print(f"已在知识库: {found}")
print(f"缺失单词: {len(missing_words)}")
print(f"覆盖度: {found/total*100:.1f}%")