diff --git a/LV1词汇教学方案生成结果.xlsx b/LV1词汇教学方案生成结果.xlsx new file mode 100644 index 0000000..e3db9c8 Binary files /dev/null and b/LV1词汇教学方案生成结果.xlsx differ diff --git a/check_file_structure.py b/check_file_structure.py new file mode 100644 index 0000000..532b8a4 --- /dev/null +++ b/check_file_structure.py @@ -0,0 +1,15 @@ +import pandas as pd + +# 文件路径 +file1 = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx" +file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx" + +print("===== 第一份表格结构 =====") +df1 = pd.read_excel(file1) +print(f"列名:{list(df1.columns)}") +print(f"前5行数据:\n{df1.head()}\n") + +print("===== 第二份表格结构 =====") +df2 = pd.read_excel(file2) +print(f"列名:{list(df2.columns)}") +print(f"前5行数据:\n{df2.head()}") diff --git a/check_new_lib.py b/check_new_lib.py new file mode 100644 index 0000000..9dcef5b --- /dev/null +++ b/check_new_lib.py @@ -0,0 +1,8 @@ +import pandas as pd + +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" +df_final = pd.read_excel(final_lib_file) + +print("新定稿单词库列名:", list(df_final.columns)) +print("\n前10行预览:") +print(df_final.head(10)) diff --git a/check_new_word_lib.py b/check_new_word_lib.py new file mode 100644 index 0000000..d6aa64b --- /dev/null +++ b/check_new_word_lib.py @@ -0,0 +1,11 @@ +import pandas as pd + +# 新的定稿单词库路径 +new_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx" +# 原始带详细字段的单词表路径 +origin_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx" + +print("===== 新定稿单词库结构 =====") +df_new = pd.read_excel(new_file) +print(f"列名:{list(df_new.columns)}") +print(f"前10行数据预览:\n{df_new.head(10)}") diff --git a/check_sheets.py b/check_sheets.py new file mode 100644 index 0000000..f0d0eeb --- /dev/null +++ b/check_sheets.py @@ -0,0 +1,14 @@ +import pandas as pd +from openpyxl import load_workbook + +# 最新的定稿库文件路径 +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" + +# 查看所有sheet +wb = load_workbook(final_lib_file, read_only=True) +print(f"文件包含的sheet:{wb.sheetnames}") + +for sheet_name in wb.sheetnames: + df = pd.read_excel(final_lib_file, sheet_name=sheet_name) + print(f"\nsheet名称:{sheet_name},行数:{len(df)}") + print(f"前3行预览:\n{df.head(3)}") diff --git a/check_unit_info.py b/check_unit_info.py new file mode 100644 index 0000000..32d5a02 --- /dev/null +++ b/check_unit_info.py @@ -0,0 +1,10 @@ +import pandas as pd + +file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx" +df2 = pd.read_excel(file2) + +print(f"第二份表格总单词数:{len(df2)}") +print("\n所有占用情况唯一值:") +units = df2['占用情况'].dropna().unique() +for unit in units: + print(unit) diff --git a/check_word_match.py b/check_word_match.py new file mode 100644 index 0000000..92aca13 --- /dev/null +++ b/check_word_match.py @@ -0,0 +1,41 @@ +import pandas as pd + +# 文件路径 +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库 +difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表 + +# 读取 +df_final = pd.read_excel(final_lib_file) +df_diff = pd.read_excel(difficulty_file) + +# 处理定稿库单词:去空、去非字符串(比如数字)、转小写统一对比 +final_words = [] +for w in df_final['单词'].tolist(): + if pd.notna(w) and isinstance(w, str): + final_words.append(w.lower()) +final_set = set(final_words) +print(f"定稿库有效单词(纯字符串,去空):{len(final_set)}个") +print(f"定稿库原始总条目数:{len(df_final)}") +print(f"定稿库非字符串/空值条目数:{len(df_final) - len(final_words)}") + +# 处理难度表单词 +diff_words = [] +for w in df_diff['单词'].tolist(): + if pd.notna(w) and isinstance(w, str): + diff_words.append(w.lower()) +diff_set = set(diff_words) +print(f"\n难度表有效单词:{len(diff_set)}个") +print(f"难度表原始总条目数:{len(df_diff)}") + +# 差异统计 +match_count = len(diff_set & final_set) +unmatch_count = len(diff_set - final_set) +print(f"\n匹配上的单词数量:{match_count}") +print(f"未匹配的单词数量:{unmatch_count}") + +# 查看定稿库中不是单词的内容 +print("\n定稿库中不是有效单词的内容示例:") +for w in df_final['单词'].tolist(): + if pd.isna(w) or not isinstance(w, str): + print(w, type(w)) + break diff --git a/confirm_category_rule.py b/confirm_category_rule.py new file mode 100644 index 0000000..55691c8 --- /dev/null +++ b/confirm_category_rule.py @@ -0,0 +1,33 @@ +import pandas as pd + +new_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx" +df_new = pd.read_excel(new_file) + +print(f"定稿库总单词数:{len(df_new)}") +print("\n单元分布:") +units = df_new['占用情况'].dropna().unique() +units_sorted = sorted(units, key=lambda x: (int(x.split('-')[1][1:]) if x.startswith('S') else 999, int(x.split('-')[2][1:]) if len(x.split('-'))>2 else 999)) +for unit in units_sorted: + count = len(df_new[df_new['占用情况'] == unit]) + print(f"{unit}: {count}个") + +# 统计上册(S0 + S1 U1-U6)和下册(S1 U7+)的数量 +upper_count = 0 +lower_count = 0 +for idx, row in df_new.iterrows(): + unit = row['占用情况'] + if pd.isna(unit) or unit == '不常见': + continue + unit = unit.strip() + if unit.startswith('S0-'): + upper_count +=1 + elif unit.startswith('S1-U'): + unit_num = int(unit.split('-')[1][1:]) + if unit_num <=6: + upper_count +=1 + else: + lower_count +=1 + +print(f"\n按单元统计:") +print(f"上册单词总数(S0 + S1 U1-U6):{upper_count}") +print(f"下册单词总数(S1 U7+):{lower_count}") diff --git a/daily_summary.log b/daily_summary.log index 454e713..561b27f 100644 --- a/daily_summary.log +++ b/daily_summary.log @@ -56,3 +56,17 @@ cat: /root/.openclaw/workspace-xiaoban/.feishu_token: No such file or directory Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 261 100 18 100 243 375 5070 --:--:-- --:--:-- --:--:-- 5553 404 page not found/root/.openclaw/workspace-xiaoban/daily_summary.sh: line 42: /home/ubuntu/.nvm/versions/node/v24.14.0/bin/openclaw: No such file or directory +[master 0fea555] 每日总结更新 20260313 + 7 files changed, 3946 insertions(+), 1 deletion(-) + create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_V2\344\274\230\345\214\226\347\211\210.csv" + create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\345\220\253\346\210\220\346\234\254\350\257\204\345\210\206.csv" + create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\345\220\253\346\210\220\346\234\254\350\257\204\345\210\206_\346\234\200\346\226\260\347\211\210.csv" + create mode 100644 "\345\215\225\350\257\215\351\242\204\345\244\204\347\220\206\345\205\250\350\241\250_\346\234\200\347\273\210\344\274\230\345\214\226\347\211\210.csv" + create mode 100644 "\345\255\246\344\271\240\345\206\205\345\256\271\347\263\273\346\225\260\350\241\250_\350\256\241\347\256\227\345\256\214\346\210\220.xlsx" +error: src refspec main does not match any +error: failed to push some refs to 'https://git.valavala.com/ai_member_only/ai_member_xiaoban' +cat: /root/.openclaw/workspace-xiaoban/.feishu_token: No such file or directory + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed + 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 261 100 18 100 243 428 5780 --:--:-- --:--:-- --:--:-- 6365 +404 page not found/root/.openclaw/workspace-xiaoban/daily_summary.sh: line 42: /home/ubuntu/.nvm/versions/node/v24.14.0/bin/openclaw: No such file or directory diff --git a/final_reclassify.py b/final_reclassify.py new file mode 100644 index 0000000..e3feb1d --- /dev/null +++ b/final_reclassify.py @@ -0,0 +1,41 @@ +import pandas as pd + +# 文件路径 +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库(两个sheet:上/下) +difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表 +output_file = "/root/.openclaw/workspace-xiaoban/最终版单词上下册分类结果.xlsx" + +# 读取定稿库的两个sheet +df_upper_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1(上)') +df_lower_lib = pd.read_excel(final_lib_file, sheet_name='单词表-LV1(下)') + +# 提取上下册单词列表,去空值 +upper_words = set(df_upper_lib['单词'].dropna().tolist()) +lower_words = set(df_lower_lib['单词'].dropna().tolist()) + +print(f"定稿库上册单词数:{len(upper_words)}") +print(f"定稿库下册单词数:{len(lower_words)}") +print(f"合计:{len(upper_words)+len(lower_words)}") + +# 读取难度表 +df_diff = pd.read_excel(difficulty_file) + +# 匹配分类 +df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配') + +# 拆分结果 +df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类']) +df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类']) +df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类']) + +# 写入结果 +with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + df_upper.to_excel(writer, sheet_name='上册单词(最终版)', index=False) + df_lower.to_excel(writer, sheet_name='下册单词(最终版)', index=False) + if len(df_other) >0: + df_other.to_excel(writer, sheet_name='未匹配单词', index=False) + +print(f"\n处理完成!结果已保存到:{output_file}") +print(f"上册匹配到单词数:{len(df_upper)}") +print(f"下册匹配到单词数:{len(df_lower)}") +print(f"未匹配到单词数:{len(df_other)}") diff --git a/generate_teaching_scheme.py b/generate_teaching_scheme.py new file mode 100644 index 0000000..6199bb8 --- /dev/null +++ b/generate_teaching_scheme.py @@ -0,0 +1,72 @@ +import pandas as pd + +# 你提供的核心逻辑,适配Excel输入输出 +def process_vocabulary_system(file_path): + # 1. 加载Excel数据 + try: + df = pd.read_excel(file_path) + except FileNotFoundError: + return "Error: File not found." + + df.columns = [c.strip() for c in df.columns] + print(f"加载文件成功,共{len(df)}条单词记录") + + # 2. 你定义的特殊规则 + t2_special_list = { + 'invisible': {'air', 'wind', 'smoke', 'gas'}, + 'abstract': {'song', 'friend', 'hobby', 'art', 'pe', 'music', 'fun'}, + 'generalized': {'child', 'children', 'father', 'mother', 'food', 'colour', 'animal', 'toy'}, + 'identity': {'address', 'age', 'aunt', 'name'} + } + + # 预展开T2特殊词集合 + all_t2_special = {item for sublist in t2_special_list.values() for item in sublist} + + # 3. 核心处理逻辑 + def apply_rules(row): + # 清洗输入 + word = str(row.get('单词', '')).lower().strip() + t_score = pd.to_numeric(row.get('实现成本(T)', 1), errors='coerce') + if pd.isna(t_score): + t_score = 1 + + # 规则分支 + if t_score >= 3: + scheme = "逻辑交互 / UI 处理" + reason = "英语骨架词。涉及空间位置、时序或数量的逻辑判定,需系统重度UI引导。" + link = "建议设计‘解谜指令’,如:利用 here/there 进行远近空间坐标对比任务。" + + elif t_score == 2 or word in all_t2_special: + scheme = "动画 / 特效 / UI处理" + if word in t2_special_list['invisible']: + reason = "隐形名词。需环境联动(如风吹树叶)和特效辅助表现。" + link = "联动关联实物,如:wind 联动 tree/leaf 的动态表现。" + elif word in t2_special_list['generalized']: + reason = "泛化概念。无法用单一图片代表,需UI组合展示或多模型联动。" + link = f"联动具体成员,由 {word} 展示其下属的 T1 级具象单词集合。" + elif word in t2_special_list['abstract'] or word in t2_special_list['identity']: + reason = "抽象/身份信息。需通过情节演绎或特定 UI 界面(如家谱)界定。" + link = "联动相关动作,如:song 联动 sing;age 联动 numbers。" + else: + reason = "动作/状态词。需 Animator 动画、粒子特效或角色表情反馈。" + link = "建议设计状态切换任务,如:open vs closed;dirty vs clean。" + + else: # T1 情况 + scheme = "静态模型展示" + reason = "具象实物。在 Unity 中对应单一、静态的物理模型或材质资源。" + link = "可作为背景或道具。建议联动颜色词或方位词增加任务厚度。" + + return pd.Series([scheme, reason, link]) + + # 执行规则生成新列 + df[['教学方案展示', '实现理由', '联动建议']] = df.apply(apply_rules, axis=1) + + # 4. 导出为Excel + output_file = "/root/.openclaw/workspace-xiaoban/LV1词汇教学方案生成结果.xlsx" + df.to_excel(output_file, index=False) + return f"Success: 处理完成,结果已保存到 {output_file}" + +# 处理刚收到的LV1词汇表 +input_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---d41d887f-5d65-4eab-928d-a717e5097e8c.xlsx" +result = process_vocabulary_system(input_path) +print(result) diff --git a/match_columns.py b/match_columns.py new file mode 100644 index 0000000..86b5535 --- /dev/null +++ b/match_columns.py @@ -0,0 +1,43 @@ +import pandas as pd + +# 文件路径 +table1_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" +table3_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---e503b23c-829e-4367-b819-762856bd50b5.xlsx" +output_path = "/root/.openclaw/workspace-xiaoban/匹配完成的LV1词汇表.xlsx" + +# 读取两个表格 +df1 = pd.read_excel(table1_path) +df3 = pd.read_excel(table3_path) + +print(f"表一总条数:{len(df1)}") +print(f"表三总条数:{len(df3)}") +print(f"表一列名:{list(df1.columns)}") +print(f"表三列名:{list(df3.columns)}") + +# 创建映射:统一将单词转为字符串作为key,匹配三个字段 +word_map = {} +for _, row in df1.iterrows(): + word = str(row['单词']).strip() + word_map[word] = { + '难度(D)': row['难度(D)'], + '实现成本(T)': row['实现成本(T)'], + '单词系数': row['单词系数'] + } + +# 给表三添加三列 +def get_value(word, col): + key = str(word).strip() + return word_map.get(key, {}).get(col, None) + +df3['难度(D)'] = df3['单词'].apply(lambda x: get_value(x, '难度(D)')) +df3['实现成本(T)'] = df3['单词'].apply(lambda x: get_value(x, '实现成本(T)')) +df3['单词系数'] = df3['单词'].apply(lambda x: get_value(x, '单词系数')) + +# 保存结果 +df3.to_excel(output_path, index=False) + +# 统计匹配情况 +match_count = df3['难度(D)'].notna().sum() +print(f"\n匹配完成!结果已保存到:{output_path}") +print(f"成功匹配条数:{match_count}") +print(f"未匹配条数:{len(df3) - match_count}") diff --git a/match_lower_final.py b/match_lower_final.py new file mode 100644 index 0000000..21b8b59 --- /dev/null +++ b/match_lower_final.py @@ -0,0 +1,40 @@ +import pandas as pd + +# 文件路径 +difficulty_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" # 难度_成本单词系数1.0表 +lower_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---59ff96e7-d862-476b-be16-3162afcd818f.xlsx" # 最新的下册单词表 +output_path = "/root/.openclaw/workspace-xiaoban/最终版_LV1下册词汇匹配系数结果.xlsx" + +# 读取表格 +df_diff = pd.read_excel(difficulty_path) +df_lower = pd.read_excel(lower_path) + +print(f"下册单词表总条数:{len(df_lower)}") + +# 创建映射字典,所有单词统一转为字符串匹配,包含数字 +word_map = {} +for _, row in df_diff.iterrows(): + word_key = str(row['单词']).strip() + word_map[word_key] = { + '难度(D)': row['难度(D)'], + '实现成本(T)': row['实现成本(T)'], + '单词系数': row['单词系数'] + } + +# 匹配字段 +def match_field(word, field): + key = str(word).strip() + return word_map.get(key, {}).get(field, None) + +df_lower['难度(D)'] = df_lower['单词'].apply(lambda x: match_field(x, '难度(D)')) +df_lower['实现成本(T)'] = df_lower['单词'].apply(lambda x: match_field(x, '实现成本(T)')) +df_lower['单词系数'] = df_lower['单词'].apply(lambda x: match_field(x, '单词系数')) + +# 保存结果 +df_lower.to_excel(output_path, index=False) + +# 统计 +success_count = df_lower['难度(D)'].notna().sum() +print(f"\n匹配完成!结果已保存到:{output_path}") +print(f"成功匹配条数:{success_count}") +print(f"未匹配条数:{len(df_lower) - success_count}") diff --git a/match_lv1_lower.py b/match_lv1_lower.py new file mode 100644 index 0000000..0dcde31 --- /dev/null +++ b/match_lv1_lower.py @@ -0,0 +1,39 @@ +import pandas as pd + +# 文件路径 +difficulty_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" # 难度表 +lv1_lower_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---5b90d819-abf3-4882-8772-ed8f3e0b449f.xlsx" # LV1下册词汇表 +output_path = "/root/.openclaw/workspace-xiaoban/正确版_LV1下册词汇匹配结果.xlsx" + +# 读取表格 +df_diff = pd.read_excel(difficulty_path) +df_lower = pd.read_excel(lv1_lower_path) + +print(f"LV1下册词汇表总条数:{len(df_lower)}") + +# 创建难度表映射(全部单词,不区分上下册,按内容匹配) +word_map = {} +for _, row in df_diff.iterrows(): + word = str(row['单词']).strip() + word_map[word] = { + '难度(D)': row['难度(D)'], + '实现成本(T)': row['实现成本(T)'], + '单词系数': row['单词系数'] + } + +# 匹配字段 +def get_value(word, col): + key = str(word).strip() + return word_map.get(key, {}).get(col, None) + +df_lower['难度(D)'] = df_lower['单词'].apply(lambda x: get_value(x, '难度(D)')) +df_lower['实现成本(T)'] = df_lower['单词'].apply(lambda x: get_value(x, '实现成本(T)')) +df_lower['单词系数'] = df_lower['单词'].apply(lambda x: get_value(x, '单词系数')) + +# 保存结果 +df_lower.to_excel(output_path, index=False) + +match_count = df_lower['难度(D)'].notna().sum() +print(f"\nLV1下册匹配完成!结果已保存到:{output_path}") +print(f"成功匹配条数:{match_count}") +print(f"未匹配条数:{len(df_lower) - match_count}") diff --git a/match_remaining.py b/match_remaining.py new file mode 100644 index 0000000..dc892d9 --- /dev/null +++ b/match_remaining.py @@ -0,0 +1,41 @@ +import pandas as pd + +# 文件路径 +table1_path = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---4d1d9fe3-1e36-4df1-baf6-d826fcf7a05e.xlsx" +table2_path = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---5b90d819-abf3-4882-8772-ed8f3e0b449f.xlsx" # 剩下的480行 +output_path = "/root/.openclaw/workspace-xiaoban/匹配完成的LV1下册词汇表.xlsx" + +# 读取表格 +df1 = pd.read_excel(table1_path) +df2 = pd.read_excel(table2_path) + +print(f"表一总条数:{len(df1)}") +print(f"待处理的下册表总条数:{len(df2)}") + +# 创建映射 +word_map = {} +for _, row in df1.iterrows(): + word = str(row['单词']).strip() + word_map[word] = { + '难度(D)': row['难度(D)'], + '实现成本(T)': row['实现成本(T)'], + '单词系数': row['单词系数'] + } + +# 匹配字段 +def get_value(word, col): + key = str(word).strip() + return word_map.get(key, {}).get(col, None) + +df2['难度(D)'] = df2['单词'].apply(lambda x: get_value(x, '难度(D)')) +df2['实现成本(T)'] = df2['单词'].apply(lambda x: get_value(x, '实现成本(T)')) +df2['单词系数'] = df2['单词'].apply(lambda x: get_value(x, '单词系数')) + +# 保存 +df2.to_excel(output_path, index=False) + +# 统计 +match_count = df2['难度(D)'].notna().sum() +print(f"\n处理完成!结果已保存到:{output_path}") +print(f"成功匹配条数:{match_count}") +print(f"未匹配条数:{len(df2) - match_count}") diff --git a/new_reclassify.py b/new_reclassify.py new file mode 100644 index 0000000..627ca20 --- /dev/null +++ b/new_reclassify.py @@ -0,0 +1,42 @@ +import pandas as pd + +# 文件路径 +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 第一份:定稿单词库(仅单词列表) +difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 第二份:难度表 +output_file = "/root/.openclaw/workspace-xiaoban/最新定稿版单词上下册分类结果.xlsx" + +# 读取两个表格 +df_final = pd.read_excel(final_lib_file) +df_diff = pd.read_excel(difficulty_file) + +# 提取定稿单词列表,去空值,去重 +final_words = df_final['单词'].dropna().unique().tolist() +total = len(final_words) +print(f"定稿单词库总有效不重复单词数:{total}") + +# 按照定稿库顺序:前一半上册,后一半下册 +upper_words = set(final_words[:total//2]) +lower_words = set(final_words[total//2:]) + +print(f"上册单词数:{len(upper_words)}") +print(f"下册单词数:{len(lower_words)}") + +# 分类难度表单词匹配分类 +df_diff['分类'] = df_diff['单词'].apply(lambda x: '上册' if x in upper_words else '下册' if x in lower_words else '未匹配') + +# 拆分结果 +df_upper = df_diff[df_diff['分类'] == '上册'].drop(columns=['分类']) +df_lower = df_diff[df_diff['分类'] == '下册'].drop(columns=['分类']) +df_other = df_diff[df_diff['分类'] == '未匹配'].drop(columns=['分类']) + +# 写入结果 +with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + df_upper.to_excel(writer, sheet_name='上册单词', index=False) + df_lower.to_excel(writer, sheet_name='下册单词', index=False) + if len(df_other) >0: + df_other.to_excel(writer, sheet_name='未匹配单词', index=False) + +print(f"\n处理完成!结果已保存到:{output_file}") +print(f"上册匹配到单词数:{len(df_upper)}") +print(f"下册匹配到单词数:{len(df_lower)}") +print(f"未匹配到单词数:{len(df_other)}") diff --git a/process_word_list.py b/process_word_list.py new file mode 100644 index 0000000..871e546 --- /dev/null +++ b/process_word_list.py @@ -0,0 +1,53 @@ +import pandas as pd +from openpyxl import load_workbook + +# 文件路径 +file1 = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx" +file2 = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---286e16db-d460-460d-95a4-242f28a0429c.xlsx" +output_file = "/root/.openclaw/workspace-xiaoban/单词上下分类结果.xlsx" + +# 读取第一个表格(带详细字段的单词表) +df1 = pd.read_excel(file1) +# 读取第二个表格(LV1词汇表) +df2 = pd.read_excel(file2) + +# 给第二份表格添加上下分类 +def get_category(unit): + if pd.isna(unit) or unit == '不常见': + return '其他' + unit = unit.strip() + if unit.startswith('S0-'): + return '上' + if unit.startswith('S1-U'): + # 提取单元号 + unit_num = int(unit.split('-')[1][1:]) + if unit_num <= 6: + return '上' + else: + return '下' + return '其他' + +df2['分类'] = df2['占用情况'].apply(get_category) + +# 创建单词到分类的映射 +word_category_map = df2.drop_duplicates('单词').set_index('单词')['分类'].to_dict() + +# 给第一份表格添加分类列 +df1['分类'] = df1['单词'].map(word_category_map) + +# 拆分分类 +df_upper = df1[df1['分类'] == '上'].drop(columns=['分类']) +df_lower = df1[df1['分类'] == '下'].drop(columns=['分类']) +df_other = df1[df1['分类'] == '其他'].drop(columns=['分类']) + +# 写入结果到Excel,分三个sheet +with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + df_upper.to_excel(writer, sheet_name='上册单词', index=False) + df_lower.to_excel(writer, sheet_name='下册单词', index=False) + if len(df_other) > 0: + df_other.to_excel(writer, sheet_name='其他分类单词', index=False) + +print(f"处理完成!结果已保存到:{output_file}") +print(f"上册单词数量:{len(df_upper)}") +print(f"下册单词数量:{len(df_lower)}") +print(f"其他分类单词数量:{len(df_other)}") diff --git a/reclassify_simple.py b/reclassify_simple.py new file mode 100644 index 0000000..9d88f6f --- /dev/null +++ b/reclassify_simple.py @@ -0,0 +1,28 @@ +import pandas as pd + +# 文件路径 +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---1de9de11-1a6b-45c7-856a-4d69f9b26aa9.xlsx" # 定稿单词库 +difficulty_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---a5011ea1-5bef-47af-be44-633db83f822e.xlsx" # 难度表 +output_file = "/root/.openclaw/workspace-xiaoban/极简版单词上下册分类结果.xlsx" + +# 读取表格 +df_final = pd.read_excel(final_lib_file) +df_diff = pd.read_excel(difficulty_file) + +# 完全按原始顺序拆分:前250行上册,后250行下册,无视内容 +final_words_all = df_final['单词'].tolist() +upper_words = final_words_all[:250] +lower_words = final_words_all[250:] + +# 直接匹配,无视重复 +upper_df = df_diff[df_diff['单词'].isin(upper_words)] +lower_df = df_diff[df_diff['单词'].isin(lower_words)] + +# 写入结果 +with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + upper_df.to_excel(writer, sheet_name='上册单词', index=False) + lower_df.to_excel(writer, sheet_name='下册单词', index=False) + +print(f"处理完成!结果已保存到:{output_file}") +print(f"上册单词数量:{len(upper_df)}") +print(f"下册单词数量:{len(lower_df)}") diff --git a/reclassify_word.py b/reclassify_word.py new file mode 100644 index 0000000..d00ce0b --- /dev/null +++ b/reclassify_word.py @@ -0,0 +1,52 @@ +import pandas as pd +from openpyxl import load_workbook + +# 文件路径 +origin_file = "/root/.openclaw/media/inbound/é_¾åº_æ_æ_å_è_ç³_æ_1.0---8b762144-a4a3-481d-bdb8-b3b0dcbf875a.xlsx" +final_lib_file = "/root/.openclaw/media/inbound/â_¼ï_LV1-å_ç_å_è_åº_-ç¼_å_é_è_ç_è_é---23d539f8-33d6-4679-b9ae-91520114ae54.xlsx" +output_file = "/root/.openclaw/workspace-xiaoban/定稿版单词上下册分类结果.xlsx" + +# 读取原始单词表(带详细字段) +df_origin = pd.read_excel(origin_file) +# 读取定稿单词库 +df_final = pd.read_excel(final_lib_file) + +# 给定稿库单词添加上下册分类 +def get_category(unit): + if pd.isna(unit) or unit.strip() == '' or unit.strip() == '不常见': + return '不匹配' + unit = unit.strip() + if unit.startswith('S0-'): + return '上册' + if unit.startswith('S1-U'): + unit_num = int(unit.split('-')[1][1:]) + if unit_num <=6: + return '上册' + else: + return '下册' + return '不匹配' + +df_final['分类'] = df_final['占用情况'].apply(get_category) + +# 创建单词到分类的映射(仅包含定稿库中存在的单词) +word_category_map = df_final[df_final['分类'] != '不匹配'].drop_duplicates('单词').set_index('单词')['分类'].to_dict() + +# 给原始单词表匹配分类 +df_origin['分类'] = df_origin['单词'].map(word_category_map) + +# 拆分上下册 +df_upper = df_origin[df_origin['分类'] == '上册'].drop(columns=['分类']) +df_lower = df_origin[df_origin['分类'] == '下册'].drop(columns=['分类']) +df_other = df_origin[~df_origin['分类'].isin(['上册', '下册'])].drop(columns=['分类']) + +# 写入结果 +with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + df_upper.to_excel(writer, sheet_name='上册单词(定稿版)', index=False) + df_lower.to_excel(writer, sheet_name='下册单词(定稿版)', index=False) + if len(df_other) > 0: + df_other.to_excel(writer, sheet_name='未匹配到定稿库的单词', index=False) + +print(f"处理完成!结果已保存到:{output_file}") +print(f"上册匹配到单词数量:{len(df_upper)}") +print(f"下册匹配到单词数量:{len(df_lower)}") +print(f"未匹配到定稿库的单词数量:{len(df_other)}") diff --git a/tmp_daily_summary.md b/tmp_daily_summary.md index 009d472..e6e0aad 100644 --- a/tmp_daily_summary.md +++ b/tmp_daily_summary.md @@ -1,3 +1,3 @@ -=== 每日总结 20260313 === +=== 每日总结 20260314 === ## 昨日关键进展 无昨日记忆记录 diff --git a/匹配完成的LV1下册词汇表.xlsx b/匹配完成的LV1下册词汇表.xlsx new file mode 100644 index 0000000..c85342b Binary files /dev/null and b/匹配完成的LV1下册词汇表.xlsx differ diff --git a/匹配完成的LV1词汇表.xlsx b/匹配完成的LV1词汇表.xlsx new file mode 100644 index 0000000..4cf4328 Binary files /dev/null and b/匹配完成的LV1词汇表.xlsx differ diff --git a/单词上下分类结果.xlsx b/单词上下分类结果.xlsx new file mode 100644 index 0000000..587f7a0 Binary files /dev/null and b/单词上下分类结果.xlsx differ diff --git a/定稿版单词上下册分类结果.xlsx b/定稿版单词上下册分类结果.xlsx new file mode 100644 index 0000000..7aecb29 Binary files /dev/null and b/定稿版单词上下册分类结果.xlsx differ diff --git a/最新定稿版单词上下册分类结果.xlsx b/最新定稿版单词上下册分类结果.xlsx new file mode 100644 index 0000000..ba63b4d Binary files /dev/null and b/最新定稿版单词上下册分类结果.xlsx differ diff --git a/最终版_LV1下册词汇匹配系数结果.xlsx b/最终版_LV1下册词汇匹配系数结果.xlsx new file mode 100644 index 0000000..c635396 Binary files /dev/null and b/最终版_LV1下册词汇匹配系数结果.xlsx differ diff --git a/最终版单词上下册分类结果.xlsx b/最终版单词上下册分类结果.xlsx new file mode 100644 index 0000000..848dfb5 Binary files /dev/null and b/最终版单词上下册分类结果.xlsx differ diff --git a/极简版单词上下册分类结果.xlsx b/极简版单词上下册分类结果.xlsx new file mode 100644 index 0000000..2913147 Binary files /dev/null and b/极简版单词上下册分类结果.xlsx differ diff --git a/正确版_LV1下册词汇匹配结果.xlsx b/正确版_LV1下册词汇匹配结果.xlsx new file mode 100644 index 0000000..d99592a Binary files /dev/null and b/正确版_LV1下册词汇匹配结果.xlsx differ