42 lines
2.0 KiB
Python
42 lines
2.0 KiB
Python
import pandas as pd
|
||
|
||
# 文件路径
|
||
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库(两个sheet:上/下)
|
||
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
|
||
output_file = "/root/.openclaw/workspace-xiaoban/最终版单词上下册分类结果.xlsx"
|
||
|
||
# 读取定稿库的两个sheet
|
||
df_upper_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1(上)')
|
||
df_lower_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1(下)')
|
||
|
||
# 提取上下册单词列表,去空值
|
||
upper_words = set(df_upper_lib['单词'].dropna().tolist())
|
||
lower_words = set(df_lower_lib['单词'].dropna().tolist())
|
||
|
||
print(f"定稿库上册单词数:{len(upper_words)}")
|
||
print(f"定稿库下册单词数:{len(lower_words)}")
|
||
print(f"合计:{len(upper_words)+len(lower_words)}")
|
||
|
||
# 读取难度表
|
||
df_diff = pd.read_excel(difficulty_file)
|
||
|
||
# 匹配分类
|
||
df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配')
|
||
|
||
# 拆分结果
|
||
df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类'])
|
||
df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类'])
|
||
df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类'])
|
||
|
||
# 写入结果
|
||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
df_upper.to_excel(writer, sheet_name='上册单词(最终版)', index=False)
|
||
df_lower.to_excel(writer, sheet_name='下册单词(最终版)', index=False)
|
||
if len(df_other) >0:
|
||
df_other.to_excel(writer, sheet_name='未匹配单词', index=False)
|
||
|
||
print(f"\n处理完成!结果已保存到:{output_file}")
|
||
print(f"上册匹配到单词数:{len(df_upper)}")
|
||
print(f"下册匹配到单词数:{len(df_lower)}")
|
||
print(f"未匹配到单词数:{len(df_other)}")
|