import pandas as pd # 读取结果文件 fileResult = r'/root/.openclaw/workspace-xiaoyan/output/30-34_KET标注完成.xlsx' fileKet = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/KET词汇表_完整版.xlsx' dfResult = pd.read_excel(fileResult) dfKet = pd.read_excel(fileKet) print("=== 复核检查 ===") print(f"结果文件总行数: {len(dfResult)}") print(f"结果文件列名: {dfResult.columns.tolist()}") # 创建KET词汇表的快速查找字典 ket_words = set() for idx, row in dfKet.iterrows(): word = str(row['单词']).strip().lower() ket_words.add(word) # 手动复核前20个和后20个单词 print("\n=== 手动复核前20个单词 ===") correct = 0 wrong = 0 for idx in range(min(20, len(dfResult))): row = dfResult.iloc[idx] word = str(row['单词']).strip().lower() is_ket = word in ket_words marked_as_ket = '【是】' in str(row['是否KET']) status = "✓" if is_ket == marked_as_ket else "✗" if is_ket == marked_as_ket: correct += 1 else: wrong += 1 print(f"{idx+1:2d}. {row['单词']:20s} 标注: {'是' if marked_as_ket else '否':3s} 实际: {'是' if is_ket else '否':3s} {status}") print(f"\n前20个 - 正确: {correct}, 错误: {wrong}") # 复核后20个 print("\n=== 手动复核后20个单词 ===") correct2 = 0 wrong2 = 0 for idx in range(max(0, len(dfResult)-20), len(dfResult)): row = dfResult.iloc[idx] word = str(row['单词']).strip().lower() is_ket = word in ket_words marked_as_ket = '【是】' in str(row['是否KET']) status = "✓" if is_ket == marked_as_ket else "✗" if is_ket == marked_as_ket: correct2 += 1 else: wrong2 += 1 print(f"{idx+1:3d}. {row['单词']:20s} 标注: {'是' if marked_as_ket else '否':3s} 实际: {'是' if is_ket else '否':3s} {status}") print(f"\n后20个 - 正确: {correct2}, 错误: {wrong2}") # 复核所有标注为【否】的单词 print("\n=== 复核标注为【否】的所有单词 ===") no_words = dfResult[dfResult['是否KET'] == '【否】'] print(f"标注为【否】的单词共 {len(no_words)} 个:") all_correct = True for idx, row in no_words.iterrows(): word = str(row['单词']).strip().lower() if word in ket_words: print(f"✗ {row['单词']} 应该是【是】,但标注为【否】") all_correct = False if all_correct: print("✓ 所有标注为【否】的单词都是正确的!")