每日总结更新 20260314

This commit is contained in:
xiaoban-ai 2026-03-14 08:00:01 +08:00
parent 0fea5554da
commit 268b849b3f
29 changed files with 598 additions and 1 deletions

Binary file not shown.

15
check_file_structure.py Normal file
View File

@ -0,0 +1,15 @@
import pandas as pd
# 文件路径
file1 = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx"
file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx"
print("===== 第一份表格结构 =====")
df1 = pd.read_excel(file1)
print(f"列名:{list(df1.columns)}")
print(f"前5行数据\n{df1.head()}\n")
print("===== 第二份表格结构 =====")
df2 = pd.read_excel(file2)
print(f"列名:{list(df2.columns)}")
print(f"前5行数据\n{df2.head()}")

8
check_new_lib.py Normal file
View File

@ -0,0 +1,8 @@
import pandas as pd
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx"
df_final = pd.read_excel(final_lib_file)
print("新定稿单词库列名:", list(df_final.columns))
print("\n前10行预览")
print(df_final.head(10))

11
check_new_word_lib.py Normal file
View File

@ -0,0 +1,11 @@
import pandas as pd
# 新的定稿单词库路径
new_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx"
# 原始带详细字段的单词表路径
origin_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx"
print("===== 新定稿单词库结构 =====")
df_new = pd.read_excel(new_file)
print(f"列名:{list(df_new.columns)}")
print(f"前10行数据预览\n{df_new.head(10)}")

14
check_sheets.py Normal file
View File

@ -0,0 +1,14 @@
import pandas as pd
from openpyxl import load_workbook
# 最新的定稿库文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx"
# 查看所有sheet
wb = load_workbook(final_lib_file, read_only=True)
print(f"文件包含的sheet{wb.sheetnames}")
for sheet_name in wb.sheetnames:
df = pd.read_excel(final_lib_file, sheet_name=sheet_name)
print(f"\nsheet名称{sheet_name},行数:{len(df)}")
print(f"前3行预览\n{df.head(3)}")

10
check_unit_info.py Normal file
View File

@ -0,0 +1,10 @@
import pandas as pd
file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx"
df2 = pd.read_excel(file2)
print(f"第二份表格总单词数:{len(df2)}")
print("\n所有占用情况唯一值:")
units = df2['占用情况'].dropna().unique()
for unit in units:
print(unit)

41
check_word_match.py Normal file
View File

@ -0,0 +1,41 @@
import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
# 读取
df_final = pd.read_excel(final_lib_file)
df_diff = pd.read_excel(difficulty_file)
# 处理定稿库单词:去空、去非字符串(比如数字)、转小写统一对比
final_words = []
for w in df_final['单词'].tolist():
if pd.notna(w) and isinstance(w, str):
final_words.append(w.lower())
final_set = set(final_words)
print(f"定稿库有效单词(纯字符串,去空):{len(final_set)}")
print(f"定稿库原始总条目数:{len(df_final)}")
print(f"定稿库非字符串/空值条目数:{len(df_final) - len(final_words)}")
# 处理难度表单词
diff_words = []
for w in df_diff['单词'].tolist():
if pd.notna(w) and isinstance(w, str):
diff_words.append(w.lower())
diff_set = set(diff_words)
print(f"\n难度表有效单词:{len(diff_set)}")
print(f"难度表原始总条目数:{len(df_diff)}")
# 差异统计
match_count = len(diff_set & final_set)
unmatch_count = len(diff_set - final_set)
print(f"\n匹配上的单词数量:{match_count}")
print(f"未匹配的单词数量:{unmatch_count}")
# 查看定稿库中不是单词的内容
print("\n定稿库中不是有效单词的内容示例:")
for w in df_final['单词'].tolist():
if pd.isna(w) or not isinstance(w, str):
print(w, type(w))
break

33
confirm_category_rule.py Normal file
View File

@ -0,0 +1,33 @@
import pandas as pd
new_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx"
df_new = pd.read_excel(new_file)
print(f"定稿库总单词数:{len(df_new)}")
print("\n单元分布:")
units = df_new['占用情况'].dropna().unique()
units_sorted = sorted(units, key=lambda x: (int(x.split('-')[1][1:]) if x.startswith('S') else 999, int(x.split('-')[2][1:]) if len(x.split('-'))>2 else 999))
for unit in units_sorted:
count = len(df_new[df_new['占用情况'] == unit])
print(f"{unit}: {count}")
# 统计上册S0 + S1 U1-U6和下册S1 U7+)的数量
upper_count = 0
lower_count = 0
for idx, row in df_new.iterrows():
unit = row['占用情况']
if pd.isna(unit) or unit == '不常见':
continue
unit = unit.strip()
if unit.startswith('S0-'):
upper_count +=1
elif unit.startswith('S1-U'):
unit_num = int(unit.split('-')[1][1:])
if unit_num <=6:
upper_count +=1
else:
lower_count +=1
print(f"\n按单元统计:")
print(f"上册单词总数S0 + S1 U1-U6{upper_count}")
print(f"下册单词总数S1 U7+{lower_count}")

View File

@ -56,3 +56,17 @@ cat: /root/.openclaw/workspace-xiaoban/.feishu_token: No such file or directory
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 261 100 18 100 243 375 5070 --:--:-- --:--:-- --:--:-- 5553
404 page not found/root/.openclaw/workspace-xiaoban/daily_summary.sh: line 42: /home/ubuntu/.nvm/versions/node/v24.14.0/bin/openclaw: No such file or directory
[master 0fea555] 每日总结更新 20260313
7 files changed, 3946 insertions(+), 1 deletion(-)
create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_V2\344\274\230\345\214\226\347\211\210.csv"
create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\345\220\253\346\210\220\346\234\254\350\257\204\345\210\206.csv"
create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\345\220\253\346\210\220\346\234\254\350\257\204\345\210\206_\346\234\200\346\226\260\347\211\210.csv"
create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\346\234\200\347\273\210\344\274\230\345\214\226\347\211\210.csv"
create mode 100644 "\345\255\246\344\271\240\345\206\205\345\256\271\347\263\273\346\225\260\350\241\250_\350\256\241\347\256\227\345\256\214\346\210\220.xlsx"
error: src refspec main does not match any
error: failed to push some refs to 'https://git.valavala.com/ai_member_only/ai_member_xiaoban'
cat: /root/.openclaw/workspace-xiaoban/.feishu_token: No such file or directory
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 261 100 18 100 243 428 5780 --:--:-- --:--:-- --:--:-- 6365
404 page not found/root/.openclaw/workspace-xiaoban/daily_summary.sh: line 42: /home/ubuntu/.nvm/versions/node/v24.14.0/bin/openclaw: No such file or directory

41
final_reclassify.py Normal file
View File

@ -0,0 +1,41 @@
import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库两个sheet上/下)
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
output_file = "/root/.openclaw/workspace-xiaoban/最终版单词上下册分类结果.xlsx"
# 读取定稿库的两个sheet
df_upper_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1')
df_lower_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1')
# 提取上下册单词列表,去空值
upper_words = set(df_upper_lib['单词'].dropna().tolist())
lower_words = set(df_lower_lib['单词'].dropna().tolist())
print(f"定稿库上册单词数:{len(upper_words)}")
print(f"定稿库下册单词数:{len(lower_words)}")
print(f"合计:{len(upper_words)+len(lower_words)}")
# 读取难度表
df_diff = pd.read_excel(difficulty_file)
# 匹配分类
df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配')
# 拆分结果
df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类'])
df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类'])
df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类'])
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
df_upper.to_excel(writer, sheet_name='上册单词(最终版)', index=False)
df_lower.to_excel(writer, sheet_name='下册单词(最终版)', index=False)
if len(df_other) >0:
df_other.to_excel(writer, sheet_name='未匹配单词', index=False)
print(f"\n处理完成!结果已保存到:{output_file}")
print(f"上册匹配到单词数:{len(df_upper)}")
print(f"下册匹配到单词数:{len(df_lower)}")
print(f"未匹配到单词数:{len(df_other)}")

View File

@ -0,0 +1,72 @@
import pandas as pd
# 你提供的核心逻辑适配Excel输入输出
def process_vocabulary_system(file_path):
# 1. 加载Excel数据
try:
df = pd.read_excel(file_path)
except FileNotFoundError:
return "Error: File not found."
df.columns = [c.strip() for c in df.columns]
print(f"加载文件成功,共{len(df)}条单词记录")
# 2. 你定义的特殊规则
t2_special_list = {
'invisible': {'air', 'wind', 'smoke', 'gas'},
'abstract': {'song', 'friend', 'hobby', 'art', 'pe', 'music', 'fun'},
'generalized': {'child', 'children', 'father', 'mother', 'food', 'colour', 'animal', 'toy'},
'identity': {'address', 'age', 'aunt', 'name'}
}
# 预展开T2特殊词集合
all_t2_special = {item for sublist in t2_special_list.values() for item in sublist}
# 3. 核心处理逻辑
def apply_rules(row):
# 清洗输入
word = str(row.get('单词', '')).lower().strip()
t_score = pd.to_numeric(row.get('实现成本(T)', 1), errors='coerce')
if pd.isna(t_score):
t_score = 1
# 规则分支
if t_score >= 3:
scheme = "逻辑交互 / UI 处理"
reason = "英语骨架词。涉及空间位置、时序或数量的逻辑判定需系统重度UI引导。"
link = "建议设计‘解谜指令’,如:利用 here/there 进行远近空间坐标对比任务。"
elif t_score == 2 or word in all_t2_special:
scheme = "动画 / 特效 / UI处理"
if word in t2_special_list['invisible']:
reason = "隐形名词。需环境联动(如风吹树叶)和特效辅助表现。"
link = "联动关联实物wind 联动 tree/leaf 的动态表现。"
elif word in t2_special_list['generalized']:
reason = "泛化概念。无法用单一图片代表需UI组合展示或多模型联动。"
link = f"联动具体成员,由 {word} 展示其下属的 T1 级具象单词集合。"
elif word in t2_special_list['abstract'] or word in t2_special_list['identity']:
reason = "抽象/身份信息。需通过情节演绎或特定 UI 界面(如家谱)界定。"
link = "联动相关动作song 联动 singage 联动 numbers。"
else:
reason = "动作/状态词。需 Animator 动画、粒子特效或角色表情反馈。"
link = "建议设计状态切换任务open vs closeddirty vs clean。"
else: # T1 情况
scheme = "静态模型展示"
reason = "具象实物。在 Unity 中对应单一、静态的物理模型或材质资源。"
link = "可作为背景或道具。建议联动颜色词或方位词增加任务厚度。"
return pd.Series([scheme, reason, link])
# 执行规则生成新列
df[['教学方案展示', '实现理由', '联动建议']] = df.apply(apply_rules, axis=1)
# 4. 导出为Excel
output_file = "/root/.openclaw/workspace-xiaoban/LV1词汇教学方案生成结果.xlsx"
df.to_excel(output_file, index=False)
return f"Success: 处理完成,结果已保存到 {output_file}"
# 处理刚收到的LV1词汇表
input_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---d41d887f-5d65-4eab-928d-a717e5097e8c.xlsx"
result = process_vocabulary_system(input_path)
print(result)

43
match_columns.py Normal file
View File

@ -0,0 +1,43 @@
import pandas as pd
# 文件路径
table1_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx"
table3_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---e503b23c-829e-4367-b819-762856bd50b5.xlsx"
output_path = "/root/.openclaw/workspace-xiaoban/匹配完成的LV1词汇表.xlsx"
# 读取两个表格
df1 = pd.read_excel(table1_path)
df3 = pd.read_excel(table3_path)
print(f"表一总条数:{len(df1)}")
print(f"表三总条数:{len(df3)}")
print(f"表一列名:{list(df1.columns)}")
print(f"表三列名:{list(df3.columns)}")
# 创建映射统一将单词转为字符串作为key匹配三个字段
word_map = {}
for _, row in df1.iterrows():
word = str(row['单词']).strip()
word_map[word] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 给表三添加三列
def get_value(word, col):
key = str(word).strip()
return word_map.get(key, {}).get(col, None)
df3['难度D'] = df3['单词'].apply(lambda x: get_value(x, '难度D'))
df3['实现成本(T)'] = df3['单词'].apply(lambda x: get_value(x, '实现成本(T)'))
df3['单词系数'] = df3['单词'].apply(lambda x: get_value(x, '单词系数'))
# 保存结果
df3.to_excel(output_path, index=False)
# 统计匹配情况
match_count = df3['难度D'].notna().sum()
print(f"\n匹配完成!结果已保存到:{output_path}")
print(f"成功匹配条数:{match_count}")
print(f"未匹配条数:{len(df3) - match_count}")

40
match_lower_final.py Normal file
View File

@ -0,0 +1,40 @@
import pandas as pd
# 文件路径
difficulty_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" # 难度_成本单词系数1.0表
lower_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---59ff96e7-d862-476b-be16-3162afcd818f.xlsx" # 最新的下册单词表
output_path = "/root/.openclaw/workspace-xiaoban/最终版_LV1下册词汇匹配系数结果.xlsx"
# 读取表格
df_diff = pd.read_excel(difficulty_path)
df_lower = pd.read_excel(lower_path)
print(f"下册单词表总条数:{len(df_lower)}")
# 创建映射字典,所有单词统一转为字符串匹配,包含数字
word_map = {}
for _, row in df_diff.iterrows():
word_key = str(row['单词']).strip()
word_map[word_key] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 匹配字段
def match_field(word, field):
key = str(word).strip()
return word_map.get(key, {}).get(field, None)
df_lower['难度D'] = df_lower['单词'].apply(lambda x: match_field(x, '难度D'))
df_lower['实现成本(T)'] = df_lower['单词'].apply(lambda x: match_field(x, '实现成本(T)'))
df_lower['单词系数'] = df_lower['单词'].apply(lambda x: match_field(x, '单词系数'))
# 保存结果
df_lower.to_excel(output_path, index=False)
# 统计
success_count = df_lower['难度D'].notna().sum()
print(f"\n匹配完成!结果已保存到:{output_path}")
print(f"成功匹配条数:{success_count}")
print(f"未匹配条数:{len(df_lower) - success_count}")

39
match_lv1_lower.py Normal file
View File

@ -0,0 +1,39 @@
import pandas as pd
# 文件路径
difficulty_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" # 难度表
lv1_lower_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---5b90d819-abf3-4882-8772-ed8f3e0b449f.xlsx" # LV1下册词汇表
output_path = "/root/.openclaw/workspace-xiaoban/正确版_LV1下册词汇匹配结果.xlsx"
# 读取表格
df_diff = pd.read_excel(difficulty_path)
df_lower = pd.read_excel(lv1_lower_path)
print(f"LV1下册词汇表总条数{len(df_lower)}")
# 创建难度表映射(全部单词,不区分上下册,按内容匹配)
word_map = {}
for _, row in df_diff.iterrows():
word = str(row['单词']).strip()
word_map[word] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 匹配字段
def get_value(word, col):
key = str(word).strip()
return word_map.get(key, {}).get(col, None)
df_lower['难度D'] = df_lower['单词'].apply(lambda x: get_value(x, '难度D'))
df_lower['实现成本(T)'] = df_lower['单词'].apply(lambda x: get_value(x, '实现成本(T)'))
df_lower['单词系数'] = df_lower['单词'].apply(lambda x: get_value(x, '单词系数'))
# 保存结果
df_lower.to_excel(output_path, index=False)
match_count = df_lower['难度D'].notna().sum()
print(f"\nLV1下册匹配完成结果已保存到{output_path}")
print(f"成功匹配条数:{match_count}")
print(f"未匹配条数:{len(df_lower) - match_count}")

41
match_remaining.py Normal file
View File

@ -0,0 +1,41 @@
import pandas as pd
# 文件路径
table1_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx"
table2_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---5b90d819-abf3-4882-8772-ed8f3e0b449f.xlsx" # 剩下的480行
output_path = "/root/.openclaw/workspace-xiaoban/匹配完成的LV1下册词汇表.xlsx"
# 读取表格
df1 = pd.read_excel(table1_path)
df2 = pd.read_excel(table2_path)
print(f"表一总条数:{len(df1)}")
print(f"待处理的下册表总条数:{len(df2)}")
# 创建映射
word_map = {}
for _, row in df1.iterrows():
word = str(row['单词']).strip()
word_map[word] = {
'难度D': row['难度D'],
'实现成本(T)': row['实现成本(T)'],
'单词系数': row['单词系数']
}
# 匹配字段
def get_value(word, col):
key = str(word).strip()
return word_map.get(key, {}).get(col, None)
df2['难度D'] = df2['单词'].apply(lambda x: get_value(x, '难度D'))
df2['实现成本(T)'] = df2['单词'].apply(lambda x: get_value(x, '实现成本(T)'))
df2['单词系数'] = df2['单词'].apply(lambda x: get_value(x, '单词系数'))
# 保存
df2.to_excel(output_path, index=False)
# 统计
match_count = df2['难度D'].notna().sum()
print(f"\n处理完成!结果已保存到:{output_path}")
print(f"成功匹配条数:{match_count}")
print(f"未匹配条数:{len(df2) - match_count}")

42
new_reclassify.py Normal file
View File

@ -0,0 +1,42 @@
import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 第一份:定稿单词库(仅单词列表)
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 第二份:难度表
output_file = "/root/.openclaw/workspace-xiaoban/最新定稿版单词上下册分类结果.xlsx"
# 读取两个表格
df_final = pd.read_excel(final_lib_file)
df_diff = pd.read_excel(difficulty_file)
# 提取定稿单词列表,去空值,去重
final_words = df_final['单词'].dropna().unique().tolist()
total = len(final_words)
print(f"定稿单词库总有效不重复单词数:{total}")
# 按照定稿库顺序:前一半上册,后一半下册
upper_words = set(final_words[:total//2])
lower_words = set(final_words[total//2:])
print(f"上册单词数:{len(upper_words)}")
print(f"下册单词数:{len(lower_words)}")
# 分类难度表单词匹配分类
df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配')
# 拆分结果
df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类'])
df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类'])
df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类'])
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
df_upper.to_excel(writer, sheet_name='上册单词', index=False)
df_lower.to_excel(writer, sheet_name='下册单词', index=False)
if len(df_other) >0:
df_other.to_excel(writer, sheet_name='未匹配单词', index=False)
print(f"\n处理完成!结果已保存到:{output_file}")
print(f"上册匹配到单词数:{len(df_upper)}")
print(f"下册匹配到单词数:{len(df_lower)}")
print(f"未匹配到单词数:{len(df_other)}")

53
process_word_list.py Normal file
View File

@ -0,0 +1,53 @@
import pandas as pd
from openpyxl import load_workbook
# 文件路径
file1 = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx"
file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx"
output_file = "/root/.openclaw/workspace-xiaoban/单词上下分类结果.xlsx"
# 读取第一个表格(带详细字段的单词表)
df1 = pd.read_excel(file1)
# 读取第二个表格LV1词汇表
df2 = pd.read_excel(file2)
# 给第二份表格添加上下分类
def get_category(unit):
if pd.isna(unit) or unit == '不常见':
return '其他'
unit = unit.strip()
if unit.startswith('S0-'):
return ''
if unit.startswith('S1-U'):
# 提取单元号
unit_num = int(unit.split('-')[1][1:])
if unit_num <= 6:
return ''
else:
return ''
return '其他'
df2['分类'] = df2['占用情况'].apply(get_category)
# 创建单词到分类的映射
word_category_map = df2.drop_duplicates('单词').set_index('单词')['分类'].to_dict()
# 给第一份表格添加分类列
df1['分类'] = df1['单词'].map(word_category_map)
# 拆分分类
df_upper = df1[df1['分类'] == ''].drop(columns=['分类'])
df_lower = df1[df1['分类'] == ''].drop(columns=['分类'])
df_other = df1[df1['分类'] == '其他'].drop(columns=['分类'])
# 写入结果到Excel分三个sheet
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
df_upper.to_excel(writer, sheet_name='上册单词', index=False)
df_lower.to_excel(writer, sheet_name='下册单词', index=False)
if len(df_other) > 0:
df_other.to_excel(writer, sheet_name='其他分类单词', index=False)
print(f"处理完成!结果已保存到:{output_file}")
print(f"上册单词数量:{len(df_upper)}")
print(f"下册单词数量:{len(df_lower)}")
print(f"其他分类单词数量:{len(df_other)}")

28
reclassify_simple.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
# 文件路径
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库
difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表
output_file = "/root/.openclaw/workspace-xiaoban/极简版单词上下册分类结果.xlsx"
# 读取表格
df_final = pd.read_excel(final_lib_file)
df_diff = pd.read_excel(difficulty_file)
# 完全按原始顺序拆分前250行上册后250行下册无视内容
final_words_all = df_final['单词'].tolist()
upper_words = final_words_all[:250]
lower_words = final_words_all[250:]
# 直接匹配,无视重复
upper_df = df_diff[df_diff['单词'].isin(upper_words)]
lower_df = df_diff[df_diff['单词'].isin(lower_words)]
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
upper_df.to_excel(writer, sheet_name='上册单词', index=False)
lower_df.to_excel(writer, sheet_name='下册单词', index=False)
print(f"处理完成!结果已保存到:{output_file}")
print(f"上册单词数量:{len(upper_df)}")
print(f"下册单词数量:{len(lower_df)}")

52
reclassify_word.py Normal file
View File

@ -0,0 +1,52 @@
import pandas as pd
from openpyxl import load_workbook
# 文件路径
origin_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx"
final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx"
output_file = "/root/.openclaw/workspace-xiaoban/定稿版单词上下册分类结果.xlsx"
# 读取原始单词表(带详细字段)
df_origin = pd.read_excel(origin_file)
# 读取定稿单词库
df_final = pd.read_excel(final_lib_file)
# 给定稿库单词添加上下册分类
def get_category(unit):
if pd.isna(unit) or unit.strip() == '' or unit.strip() == '不常见':
return '不匹配'
unit = unit.strip()
if unit.startswith('S0-'):
return '上册'
if unit.startswith('S1-U'):
unit_num = int(unit.split('-')[1][1:])
if unit_num <=6:
return '上册'
else:
return '下册'
return '不匹配'
df_final['分类'] = df_final['占用情况'].apply(get_category)
# 创建单词到分类的映射(仅包含定稿库中存在的单词)
word_category_map = df_final[df_final['分类'] != '不匹配'].drop_duplicates('单词').set_index('单词')['分类'].to_dict()
# 给原始单词表匹配分类
df_origin['分类'] = df_origin['单词'].map(word_category_map)
# 拆分上下册
df_upper = df_origin[df_origin['分类'] == '上册'].drop(columns=['分类'])
df_lower = df_origin[df_origin['分类'] == '下册'].drop(columns=['分类'])
df_other = df_origin[~df_origin['分类'].isin(['上册', '下册'])].drop(columns=['分类'])
# 写入结果
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
df_upper.to_excel(writer, sheet_name='上册单词(定稿版)', index=False)
df_lower.to_excel(writer, sheet_name='下册单词(定稿版)', index=False)
if len(df_other) > 0:
df_other.to_excel(writer, sheet_name='未匹配到定稿库的单词', index=False)
print(f"处理完成!结果已保存到:{output_file}")
print(f"上册匹配到单词数量:{len(df_upper)}")
print(f"下册匹配到单词数量:{len(df_lower)}")
print(f"未匹配到定稿库的单词数量:{len(df_other)}")

View File

@ -1,3 +1,3 @@
=== 每日总结 20260313 ===
=== 每日总结 20260314 ===
## 昨日关键进展
无昨日记忆记录

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.