ai_member_xiaoyan/verify_ket.py

74 lines
2.4 KiB
Python

import pandas as pd
# 读取结果文件
fileResult = r'/root/.openclaw/workspace-xiaoyan/output/30-34_KET标注完成.xlsx'
fileKet = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/KET词汇表_完整版.xlsx'
dfResult = pd.read_excel(fileResult)
dfKet = pd.read_excel(fileKet)
print("=== 复核检查 ===")
print(f"结果文件总行数: {len(dfResult)}")
print(f"结果文件列名: {dfResult.columns.tolist()}")
# 创建KET词汇表的快速查找字典
ket_words = set()
for idx, row in dfKet.iterrows():
word = str(row['单词']).strip().lower()
ket_words.add(word)
# 手动复核前20个和后20个单词
print("\n=== 手动复核前20个单词 ===")
correct = 0
wrong = 0
for idx in range(min(20, len(dfResult))):
row = dfResult.iloc[idx]
word = str(row['单词']).strip().lower()
is_ket = word in ket_words
marked_as_ket = '【是】' in str(row['是否KET'])
status = "" if is_ket == marked_as_ket else ""
if is_ket == marked_as_ket:
correct += 1
else:
wrong += 1
print(f"{idx+1:2d}. {row['单词']:20s} 标注: {'' if marked_as_ket else '':3s} 实际: {'' if is_ket else '':3s} {status}")
print(f"\n前20个 - 正确: {correct}, 错误: {wrong}")
# 复核后20个
print("\n=== 手动复核后20个单词 ===")
correct2 = 0
wrong2 = 0
for idx in range(max(0, len(dfResult)-20), len(dfResult)):
row = dfResult.iloc[idx]
word = str(row['单词']).strip().lower()
is_ket = word in ket_words
marked_as_ket = '【是】' in str(row['是否KET'])
status = "" if is_ket == marked_as_ket else ""
if is_ket == marked_as_ket:
correct2 += 1
else:
wrong2 += 1
print(f"{idx+1:3d}. {row['单词']:20s} 标注: {'' if marked_as_ket else '':3s} 实际: {'' if is_ket else '':3s} {status}")
print(f"\n后20个 - 正确: {correct2}, 错误: {wrong2}")
# 复核所有标注为【否】的单词
print("\n=== 复核标注为【否】的所有单词 ===")
no_words = dfResult[dfResult['是否KET'] == '【否】']
print(f"标注为【否】的单词共 {len(no_words)} 个:")
all_correct = True
for idx, row in no_words.iterrows():
word = str(row['单词']).strip().lower()
if word in ket_words:
print(f"{row['单词']} 应该是【是】,但标注为【否】")
all_correct = False
if all_correct:
print("✓ 所有标注为【否】的单词都是正确的!")