31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
|
|
import pandas as pd
|
|
|
|
file_new = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2新版_L1重复标记.xlsx'
|
|
file_old = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'
|
|
|
|
df_new = pd.read_excel(file_new)
|
|
df_old = pd.read_excel(file_old)
|
|
|
|
old_words = set(str(w).strip().lower() for w in df_old.iloc[:, 0])
|
|
|
|
only_in_new = []
|
|
for idx, row in df_new.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
if word not in old_words:
|
|
only_in_new.append({
|
|
'单词': row['单词'],
|
|
'词性': row['词性'],
|
|
'词义': row['词义']
|
|
})
|
|
|
|
df_result = pd.DataFrame(only_in_new)
|
|
|
|
output_path = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2新版独有单词.xlsx'
|
|
df_result.to_excel(output_path, index=False)
|
|
|
|
print(f"L2新版总单词数: {len(df_new)}")
|
|
print(f"L2知识库总单词数: {len(df_old)}")
|
|
print(f"L2新版中存在但L2知识库中不存在的单词: {len(df_result)} 个")
|
|
print(f"\n已保存至: {output_path}")
|