import pandas as pd

# 文件路径
l2_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L2知识库-三级+A2.xlsx'
l1_file = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/L1完整.xlsx'

# 读取文件
l2_df = pd.read_excel(l2_file)
l1_df = pd.read_excel(l1_file)

print("L2文件列名:", l2_df.columns.tolist())
print(f"L2行数: {len(l2_df)}")
print("\nL1文件列名:", l1_df.columns.tolist())
print(f"L1行数: {len(l1_df)}")

# 创建L1单词查找字典（小写为key）
l1_dict = {}
for idx, row in l1_df.iterrows():
    word = str(row.iloc[0]).strip().lower()
    pos = row.iloc[1] if len(row) > 1 else ""
    meaning = row.iloc[2] if len(row) > 2 else ""
    l1_dict[word] = {
        "row": idx + 2,  # Excel从第2行开始
        "pos": pos,
        "meaning": meaning
    }

# 处理L2表格
is_l1 = []
l1_rows = []
l1_pos = []
l1_meaning = []

for idx, row in l2_df.iterrows():
    word = str(row.iloc[0]).strip().lower()
    if word in l1_dict:
        info = l1_dict[word]
        is_l1.append("是")
        l1_rows.append(info["row"])
        l1_pos.append(info["pos"])
        l1_meaning.append(info["meaning"])
    else:
        is_l1.append("否")
        l1_rows.append("")
        l1_pos.append("")
        l1_meaning.append("")

# 添加新列
l2_df["是否为L1单词"] = is_l1
l2_df["在L1中的行数"] = l1_rows
l2_df["L1词性"] = l1_pos
l2_df["L1词义"] = l1_meaning

# 保存结果
l2_df.to_excel(l2_file, index=False)

# 统计
count_yes = is_l1.count("是")
count_no = is_l1.count("否")

print("\n" + "="*50)
print(f"比对完成！")
print(f"L1单词总数: {len(l1_df)}")
print(f"L2单词总数: {len(l2_df)}")
print(f"重复单词数: {count_yes}")
print(f"不重复单词数: {count_no}")
print(f"重复率: {count_yes/len(l2_df)*100:.2f}%")
print("="*50)

print("\n保存成功！已更新 L2知识库-三级+A2.xlsx")