44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
|
||
import pandas as pd
|
||
|
||
# 文件路径
|
||
file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx'
|
||
fileL2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'
|
||
|
||
# 读取文件
|
||
df3034 = pd.read_excel(file3034)
|
||
dfL2 = pd.read_excel(fileL2)
|
||
|
||
print(f"30-34单词表: {len(df3034)} 个单词")
|
||
print(f"L2知识库: {len(dfL2)} 个单词")
|
||
|
||
# 创建L2单词查找集合
|
||
l2_words = set(str(word).strip().lower() for word in dfL2.iloc[:, 0])
|
||
|
||
# 检查每个单词
|
||
missing_words = []
|
||
for idx, row in df3034.iterrows():
|
||
word = str(row['单词']).strip().lower()
|
||
if word not in l2_words:
|
||
missing_words.append(row)
|
||
|
||
print("\n" + "="*60)
|
||
if len(missing_words) == 0:
|
||
print("✅ 所有30-34中的单词都在L2知识库中!")
|
||
else:
|
||
print(f"❌ 还有 {len(missing_words)} 个单词不在L2知识库中:")
|
||
print("-"*60)
|
||
print(f"{'单词':<30} {'词性':<10} {'中文释义'}")
|
||
print("-"*60)
|
||
for row in missing_words:
|
||
print(f"{str(row['单词']):<30} {str(row['词性']):<10} {str(row['释义'])}")
|
||
|
||
print("\n" + "="*60)
|
||
print("统计信息:")
|
||
total = len(df3034)
|
||
found = total - len(missing_words)
|
||
print(f"总单词数: {total}")
|
||
print(f"已在知识库: {found}")
|
||
print(f"缺失单词: {len(missing_words)}")
|
||
print(f"覆盖度: {found/total*100:.1f}%")
|