ai_member_xiaoyan/mark_l1_in_l2new.py

55 lines
1.7 KiB
Python

import pandas as pd
file_l2 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2新版独有单词.xlsx'
file_l1 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L1完整.xlsx'
df_l2 = pd.read_excel(file_l2)
df_l1 = pd.read_excel(file_l1)
l1_words = {}
for idx, row in df_l1.iterrows():
word = str(row['单词']).strip().lower()
if word not in l1_words:
l1_words[word] = []
l1_words[word].append({
'row': idx + 2,
'词性': row['词性'],
'词义': row['词义']
})
is_l1_list = []
l1_row_list = []
l1_pos_list = []
l1_meaning_list = []
for idx, row in df_l2.iterrows():
word = str(row['单词']).strip().lower()
if word in l1_words:
is_l1_list.append('')
matches = l1_words[word]
l1_row_list.append(', '.join(str(m['row']) for m in matches))
l1_pos_list.append(', '.join(str(m['词性']) for m in matches))
l1_meaning_list.append(', '.join(str(m['词义']) for m in matches))
else:
is_l1_list.append('')
l1_row_list.append('')
l1_pos_list.append('')
l1_meaning_list.append('')
df_l2['是否为L1单词'] = is_l1_list
df_l2['L1行数'] = l1_row_list
df_l2['L1词性'] = l1_pos_list
df_l2['L1词义'] = l1_meaning_list
output_path = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2新版独有单词.xlsx'
df_l2.to_excel(output_path, index=False)
count_yes = sum(1 for v in is_l1_list if v == '')
count_no = sum(1 for v in is_l1_list if v == '')
print(f"L2新版独有单词总数: {len(df_l2)}")
print(f"在L1中存在的: {count_yes}")
print(f"不在L1中的: {count_no}")
print(f"\n已保存至: {output_path}")