46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
|
||
import pandas as pd
|
||
|
||
# 文件路径
|
||
file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx'
|
||
fileL2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'
|
||
|
||
# 读取文件
|
||
df3034 = pd.read_excel(file3034)
|
||
dfL2 = pd.read_excel(fileL2)
|
||
|
||
print(f"30-34单词表: {len(df3034)} 个单词")
|
||
print(f"L2知识库: {len(dfL2)} 个单词")
|
||
|
||
# 创建L2单词查找集合
|
||
l2_words = set(str(word).strip().lower() for word in dfL2.iloc[:, 0])
|
||
|
||
# 检查每个单词
|
||
missing_words = []
|
||
for idx, row in df3034.iterrows():
|
||
word = str(row.iloc[0]).strip().lower()
|
||
if word not in l2_words:
|
||
missing_words.append(row.iloc[0])
|
||
|
||
print("\n" + "="*60)
|
||
if len(missing_words) == 0:
|
||
print("✅ 所有30-34中的单词都在L2知识库中!")
|
||
else:
|
||
print(f"❌ 有 {len(missing_words)} 个单词不在L2知识库中:")
|
||
for word in missing_words:
|
||
print(f" - {word}")
|
||
|
||
print("\n详细检查结果(前20个):")
|
||
for idx, row in df3034.head(20).iterrows():
|
||
word = str(row.iloc[0]).strip().lower()
|
||
status = "✅" if word in l2_words else "❌"
|
||
print(f"{idx+1:3d}. {row.iloc[0]:20s} {status}")
|
||
|
||
print("\n统计信息:")
|
||
total = len(df3034)
|
||
found = total - len(missing_words)
|
||
print(f"总单词数: {total}")
|
||
print(f"已在知识库: {found}")
|
||
print(f"缺失单词: {len(missing_words)}")
|
||
print(f"覆盖度: {found/total*100:.1f}%")
|