ai_member_xiaoban/makee_vala/reclassify_simple.py

29 lines
1.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
output_file = "/root/.openclaw/workspace-xiaoban/极简版单词上下册分类结果.xlsx"
# 读取表格
df_final = pd.read_excel(final_lib_file)
df_diff = pd.read_excel(difficulty_file)
# 完全按原始顺序拆分前250行上册后250行下册无视内容
final_words_all = df_final['单词'].tolist()
upper_words = final_words_all[:250]
lower_words = final_words_all[250:]
# 直接匹配,无视重复
upper_df = df_diff[df_diff['单词'].isin(upper_words)]
lower_df = df_diff[df_diff['单词'].isin(lower_words)]
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
upper_df.to_excel(writer, sheet_name='上册单词', index=False)
lower_df.to_excel(writer, sheet_name='下册单词', index=False)
print(f"处理完成!结果已保存到:{output_file}")
print(f"上册单词数量:{len(upper_df)}")
print(f"下册单词数量:{len(lower_df)}")