74 lines
2.4 KiB
Python
74 lines
2.4 KiB
Python
|
|
import pandas as pd
|
|
|
|
# 读取结果文件
|
|
fileResult = r'/root/.openclaw/workspace-xiaoyan/output/30-34_KET标注完成.xlsx'
|
|
fileKet = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/KET词汇表_完整版.xlsx'
|
|
|
|
dfResult = pd.read_excel(fileResult)
|
|
dfKet = pd.read_excel(fileKet)
|
|
|
|
print("=== 复核检查 ===")
|
|
print(f"结果文件总行数: {len(dfResult)}")
|
|
print(f"结果文件列名: {dfResult.columns.tolist()}")
|
|
|
|
# 创建KET词汇表的快速查找字典
|
|
ket_words = set()
|
|
for idx, row in dfKet.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
ket_words.add(word)
|
|
|
|
# 手动复核前20个和后20个单词
|
|
print("\n=== 手动复核前20个单词 ===")
|
|
correct = 0
|
|
wrong = 0
|
|
for idx in range(min(20, len(dfResult))):
|
|
row = dfResult.iloc[idx]
|
|
word = str(row['单词']).strip().lower()
|
|
is_ket = word in ket_words
|
|
marked_as_ket = '【是】' in str(row['是否KET'])
|
|
|
|
status = "✓" if is_ket == marked_as_ket else "✗"
|
|
if is_ket == marked_as_ket:
|
|
correct += 1
|
|
else:
|
|
wrong += 1
|
|
|
|
print(f"{idx+1:2d}. {row['单词']:20s} 标注: {'是' if marked_as_ket else '否':3s} 实际: {'是' if is_ket else '否':3s} {status}")
|
|
|
|
print(f"\n前20个 - 正确: {correct}, 错误: {wrong}")
|
|
|
|
# 复核后20个
|
|
print("\n=== 手动复核后20个单词 ===")
|
|
correct2 = 0
|
|
wrong2 = 0
|
|
for idx in range(max(0, len(dfResult)-20), len(dfResult)):
|
|
row = dfResult.iloc[idx]
|
|
word = str(row['单词']).strip().lower()
|
|
is_ket = word in ket_words
|
|
marked_as_ket = '【是】' in str(row['是否KET'])
|
|
|
|
status = "✓" if is_ket == marked_as_ket else "✗"
|
|
if is_ket == marked_as_ket:
|
|
correct2 += 1
|
|
else:
|
|
wrong2 += 1
|
|
|
|
print(f"{idx+1:3d}. {row['单词']:20s} 标注: {'是' if marked_as_ket else '否':3s} 实际: {'是' if is_ket else '否':3s} {status}")
|
|
|
|
print(f"\n后20个 - 正确: {correct2}, 错误: {wrong2}")
|
|
|
|
# 复核所有标注为【否】的单词
|
|
print("\n=== 复核标注为【否】的所有单词 ===")
|
|
no_words = dfResult[dfResult['是否KET'] == '【否】']
|
|
print(f"标注为【否】的单词共 {len(no_words)} 个:")
|
|
all_correct = True
|
|
for idx, row in no_words.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
if word in ket_words:
|
|
print(f"✗ {row['单词']} 应该是【是】,但标注为【否】")
|
|
all_correct = False
|
|
|
|
if all_correct:
|
|
print("✓ 所有标注为【否】的单词都是正确的!")
|