import pandas as pd from openpyxl import load_workbook # 文件路径 origin_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx" final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx" output_file = "/root/.openclaw/workspace-xiaoban/定稿版单词上下册分类结果.xlsx" # 读取原始单词表(带详细字段) df_origin = pd.read_excel(origin_file) # 读取定稿单词库 df_final = pd.read_excel(final_lib_file) # 给定稿库单词添加上下册分类 def get_category(unit): if pd.isna(unit) or unit.strip() == '' or unit.strip() == '不常见': return '不匹配' unit = unit.strip() if unit.startswith('S0-'): return '上册' if unit.startswith('S1-U'): unit_num = int(unit.split('-')[1][1:]) if unit_num <=6: return '上册' else: return '下册' return '不匹配' df_final['分类'] = df_final['占用情况'].apply(get_category) # 创建单词到分类的映射(仅包含定稿库中存在的单词) word_category_map = df_final[df_final['分类'] != '不匹配'].drop_duplicates('单词').set_index('单词')['分类'].to_dict() # 给原始单词表匹配分类 df_origin['分类'] = df_origin['单词'].map(word_category_map) # 拆分上下册 df_upper = df_origin[df_origin['分类'] == '上册'].drop(columns=['分类']) df_lower = df_origin[df_origin['分类'] == '下册'].drop(columns=['分类']) df_other = df_origin[~df_origin['分类'].isin(['上册', '下册'])].drop(columns=['分类']) # 写入结果 with pd.ExcelWriter(output_file, engine='openpyxl') as writer: df_upper.to_excel(writer, sheet_name='上册单词(定稿版)', index=False) df_lower.to_excel(writer, sheet_name='下册单词(定稿版)', index=False) if len(df_other) > 0: df_other.to_excel(writer, sheet_name='未匹配到定稿库的单词', index=False) print(f"处理完成!结果已保存到:{output_file}") print(f"上册匹配到单词数量:{len(df_upper)}") print(f"下册匹配到单词数量:{len(df_lower)}") print(f"未匹配到定稿库的单词数量:{len(df_other)}")