82 lines
2.7 KiB
Python
82 lines
2.7 KiB
Python
|
|
import pandas as pd
|
|
import os
|
|
|
|
# ---------------------------
|
|
# 重新生成并保存到正确位置
|
|
# ---------------------------
|
|
|
|
# 文件路径
|
|
file3034 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/30-34.xlsx'
|
|
fileLevel3 = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/三级单词表原始版.xlsx'
|
|
fileKet = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表/KET词汇表_完整版.xlsx'
|
|
|
|
outputDir = r'/root/.openclaw/workspace-xiaoyan/business_knowledge/L2单词表'
|
|
|
|
# 1. 生成【是否三级】版本
|
|
df3034 = pd.read_excel(file3034)
|
|
dfLevel3 = pd.read_excel(fileLevel3)
|
|
|
|
# 创建三级词汇表字典
|
|
level3_dict = {}
|
|
for idx, row in dfLevel3.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
pos = row['三级词性'] if pd.notna(row['三级词性']) else row['A2词性']
|
|
meaning = row['三级中文释义'] if pd.notna(row['三级中文释义']) else row['A2中文释义']
|
|
level3_dict[word] = {'row': idx+2, 'pos': pos, 'meaning': meaning}
|
|
|
|
# 添加【是否三级】列
|
|
result_level3 = []
|
|
for idx, row in df3034.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
if word in level3_dict:
|
|
info = level3_dict[word]
|
|
result_level3.append(f"【是】- 第{info['row']}行 - {info['pos']} - {info['meaning']}")
|
|
else:
|
|
result_level3.append("【否】")
|
|
df3034['是否三级'] = result_level3
|
|
|
|
# 保存文件1
|
|
output1 = os.path.join(outputDir, '30-34_三级标注完成.xlsx')
|
|
df3034.to_excel(output1, index=False)
|
|
print(f"已保存: {output1}")
|
|
|
|
# 2. 生成【是否KET】版本
|
|
df3034_ket = pd.read_excel(file3034)
|
|
dfKet = pd.read_excel(fileKet)
|
|
|
|
# 创建KET词汇表字典
|
|
ket_dict = {}
|
|
for idx, row in dfKet.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
ket_dict[word] = {'row': idx+2, 'pos': row['词性']}
|
|
|
|
# 添加【是否KET】列
|
|
result_ket = []
|
|
for idx, row in df3034_ket.iterrows():
|
|
word = str(row['单词']).strip().lower()
|
|
if word in ket_dict:
|
|
info = ket_dict[word]
|
|
result_ket.append(f"【是】- 第{info['row']}行 - {info['pos']}")
|
|
else:
|
|
result_ket.append("【否】")
|
|
df3034_ket['是否KET'] = result_ket
|
|
|
|
# 保存文件2
|
|
output2 = os.path.join(outputDir, '30-34_KET标注完成.xlsx')
|
|
df3034_ket.to_excel(output2, index=False)
|
|
print(f"已保存: {output2}")
|
|
|
|
# 3. 生成同时有两个标注的版本
|
|
df3034_both = pd.read_excel(file3034)
|
|
df3034_both['是否三级'] = result_level3
|
|
df3034_both['是否KET'] = result_ket
|
|
output3 = os.path.join(outputDir, '30-34_完整标注版.xlsx')
|
|
df3034_both.to_excel(output3, index=False)
|
|
print(f"已保存: {output3}")
|
|
|
|
print("\n所有文件已保存到: L2单词表目录")
|
|
print("\n目录内容:")
|
|
for f in os.listdir(outputDir):
|
|
print(f" - {f}")
|