ai_member_xiaoban/makee_vala/match_lower_final.py

41 lines
1.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 文件路径
difficulty_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" # 难度_成本单词系数1.0表
lower_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---59ff96e7-d862-476b-be16-3162afcd818f.xlsx" # 最新的下册单词表
output_path = "/root/.openclaw/workspace-xiaoban/最终版_LV1下册词汇匹配系数结果.xlsx"
# 读取表格
df_diff = pd.read_excel(difficulty_path)
df_lower = pd.read_excel(lower_path)
print(f"下册单词表总条数:{len(df_lower)}")
# 创建映射字典,所有单词统一转为字符串匹配,包含数字
word_map = {}
for _, row in df_diff.iterrows():
word_key = str(row['单词']).strip()
word_map[word_key] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 匹配字段
def match_field(word, field):
key = str(word).strip()
return word_map.get(key, {}).get(field, None)
df_lower['难度D'] = df_lower['单词'].apply(lambda x: match_field(x, '难度D'))
df_lower['实现成本(T)'] = df_lower['单词'].apply(lambda x: match_field(x, '实现成本(T)'))
df_lower['单词系数'] = df_lower['单词'].apply(lambda x: match_field(x, '单词系数'))
# 保存结果
df_lower.to_excel(output_path, index=False)
# 统计
success_count = df_lower['难度D'].notna().sum()
print(f"\n匹配完成!结果已保存到:{output_path}")
print(f"成功匹配条数:{success_count}")
print(f"未匹配条数:{len(df_lower) - success_count}")