ai_member_xiaoyan/regenerate_files.py

82 lines
2.7 KiB
Python

import pandas as pd
import os
# ---------------------------
# 重新生成并保存到正确位置
# ---------------------------
# 文件路径
file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx'
fileLevel3 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/三级单词表原始版.xlsx'
fileKet = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/KET词汇表_完整版.xlsx'
outputDir = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表'
# 1. 生成【是否三级】版本
df3034 = pd.read_excel(file3034)
dfLevel3 = pd.read_excel(fileLevel3)
# 创建三级词汇表字典
level3_dict = {}
for idx, row in dfLevel3.iterrows():
word = str(row['单词']).strip().lower()
pos = row['三级词性'] if pd.notna(row['三级词性']) else row['A2词性']
meaning = row['三级中文释义'] if pd.notna(row['三级中文释义']) else row['A2中文释义']
level3_dict[word] = {'row': idx+2, 'pos': pos, 'meaning': meaning}
# 添加【是否三级】列
result_level3 = []
for idx, row in df3034.iterrows():
word = str(row['单词']).strip().lower()
if word in level3_dict:
info = level3_dict[word]
result_level3.append(f"【是】- 第{info['row']}行 - {info['pos']} - {info['meaning']}")
else:
result_level3.append("【否】")
df3034['是否三级'] = result_level3
# 保存文件1
output1 = os.path.join(outputDir, '30-34_三级标注完成.xlsx')
df3034.to_excel(output1, index=False)
print(f"已保存: {output1}")
# 2. 生成【是否KET】版本
df3034_ket = pd.read_excel(file3034)
dfKet = pd.read_excel(fileKet)
# 创建KET词汇表字典
ket_dict = {}
for idx, row in dfKet.iterrows():
word = str(row['单词']).strip().lower()
ket_dict[word] = {'row': idx+2, 'pos': row['词性']}
# 添加【是否KET】列
result_ket = []
for idx, row in df3034_ket.iterrows():
word = str(row['单词']).strip().lower()
if word in ket_dict:
info = ket_dict[word]
result_ket.append(f"【是】- 第{info['row']}行 - {info['pos']}")
else:
result_ket.append("【否】")
df3034_ket['是否KET'] = result_ket
# 保存文件2
output2 = os.path.join(outputDir, '30-34_KET标注完成.xlsx')
df3034_ket.to_excel(output2, index=False)
print(f"已保存: {output2}")
# 3. 生成同时有两个标注的版本
df3034_both = pd.read_excel(file3034)
df3034_both['是否三级'] = result_level3
df3034_both['是否KET'] = result_ket
output3 = os.path.join(outputDir, '30-34_完整标注版.xlsx')
df3034_both.to_excel(output3, index=False)
print(f"已保存: {output3}")
print("\n所有文件已保存到: L2单词表目录")
print("\n目录内容:")
for f in os.listdir(outputDir):
print(f" - {f}")