ai_member_xiaoban/makee_vala/final_reclassify.py
2026-03-18 08:00:08 +08:00

42 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库两个sheet上/下)
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
output_file = "/root/.openclaw/workspace-xiaoban/最终版单词上下册分类结果.xlsx"
# 读取定稿库的两个sheet
df_upper_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1')
df_lower_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1')
# 提取上下册单词列表,去空值
upper_words = set(df_upper_lib['单词'].dropna().tolist())
lower_words = set(df_lower_lib['单词'].dropna().tolist())
print(f"定稿库上册单词数:{len(upper_words)}")
print(f"定稿库下册单词数:{len(lower_words)}")
print(f"合计:{len(upper_words)+len(lower_words)}")
# 读取难度表
df_diff = pd.read_excel(difficulty_file)
# 匹配分类
df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配')
# 拆分结果
df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类'])
df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类'])
df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类'])
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
df_upper.to_excel(writer, sheet_name='上册单词(最终版)', index=False)
df_lower.to_excel(writer, sheet_name='下册单词(最终版)', index=False)
if len(df_other) >0:
df_other.to_excel(writer, sheet_name='未匹配单词', index=False)
print(f"\n处理完成!结果已保存到:{output_file}")
print(f"上册匹配到单词数:{len(df_upper)}")
print(f"下册匹配到单词数:{len(df_lower)}")
print(f"未匹配到单词数:{len(df_other)}")