72 lines
1.9 KiB
Python
72 lines
1.9 KiB
Python
|
||
import pandas as pd
|
||
|
||
# 文件路径
|
||
l2_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'
|
||
l1_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L1完整.xlsx'
|
||
|
||
# 读取文件
|
||
l2_df = pd.read_excel(l2_file)
|
||
l1_df = pd.read_excel(l1_file)
|
||
|
||
print("L2文件列名:", l2_df.columns.tolist())
|
||
print(f"L2行数: {len(l2_df)}")
|
||
print("\nL1文件列名:", l1_df.columns.tolist())
|
||
print(f"L1行数: {len(l1_df)}")
|
||
|
||
# 创建L1单词查找字典(小写为key)
|
||
l1_dict = {}
|
||
for idx, row in l1_df.iterrows():
|
||
word = str(row.iloc[0]).strip().lower()
|
||
pos = row.iloc[1] if len(row) > 1 else ""
|
||
meaning = row.iloc[2] if len(row) > 2 else ""
|
||
l1_dict[word] = {
|
||
"row": idx + 2, # Excel从第2行开始
|
||
"pos": pos,
|
||
"meaning": meaning
|
||
}
|
||
|
||
# 处理L2表格
|
||
is_l1 = []
|
||
l1_rows = []
|
||
l1_pos = []
|
||
l1_meaning = []
|
||
|
||
for idx, row in l2_df.iterrows():
|
||
word = str(row.iloc[0]).strip().lower()
|
||
if word in l1_dict:
|
||
info = l1_dict[word]
|
||
is_l1.append("是")
|
||
l1_rows.append(info["row"])
|
||
l1_pos.append(info["pos"])
|
||
l1_meaning.append(info["meaning"])
|
||
else:
|
||
is_l1.append("否")
|
||
l1_rows.append("")
|
||
l1_pos.append("")
|
||
l1_meaning.append("")
|
||
|
||
# 添加新列
|
||
l2_df["是否为L1单词"] = is_l1
|
||
l2_df["在L1中的行数"] = l1_rows
|
||
l2_df["L1词性"] = l1_pos
|
||
l2_df["L1词义"] = l1_meaning
|
||
|
||
# 保存结果
|
||
l2_df.to_excel(l2_file, index=False)
|
||
|
||
# 统计
|
||
count_yes = is_l1.count("是")
|
||
count_no = is_l1.count("否")
|
||
|
||
print("\n" + "="*50)
|
||
print(f"比对完成!")
|
||
print(f"L1单词总数: {len(l1_df)}")
|
||
print(f"L2单词总数: {len(l2_df)}")
|
||
print(f"重复单词数: {count_yes}")
|
||
print(f"不重复单词数: {count_no}")
|
||
print(f"重复率: {count_yes/len(l2_df)*100:.2f}%")
|
||
print("="*50)
|
||
|
||
print("\n保存成功!已更新 L2知识库-三级+A2.xlsx")
|