ai_member_xiaoban/makee_vala/match_columns.py
2026-03-18 08:00:08 +08:00

44 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 文件路径
table1_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx"
table3_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---e503b23c-829e-4367-b819-762856bd50b5.xlsx"
output_path = "/root/.openclaw/workspace-xiaoban/匹配完成的LV1词汇表.xlsx"
# 读取两个表格
df1 = pd.read_excel(table1_path)
df3 = pd.read_excel(table3_path)
print(f"表一总条数:{len(df1)}")
print(f"表三总条数:{len(df3)}")
print(f"表一列名:{list(df1.columns)}")
print(f"表三列名:{list(df3.columns)}")
# 创建映射统一将单词转为字符串作为key匹配三个字段
word_map = {}
for _, row in df1.iterrows():
word = str(row['单词']).strip()
word_map[word] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 给表三添加三列
def get_value(word, col):
key = str(word).strip()
return word_map.get(key, {}).get(col, None)
df3['难度D'] = df3['单词'].apply(lambda x: get_value(x, '难度D'))
df3['实现成本(T)'] = df3['单词'].apply(lambda x: get_value(x, '实现成本(T)'))
df3['单词系数'] = df3['单词'].apply(lambda x: get_value(x, '单词系数'))
# 保存结果
df3.to_excel(output_path, index=False)
# 统计匹配情况
match_count = df3['难度D'].notna().sum()
print(f"\n匹配完成!结果已保存到:{output_path}")
print(f"成功匹配条数:{match_count}")
print(f"未匹配条数:{len(df3) - match_count}")