diff --git a/MEMORY.md b/MEMORY.md index a5ef268..7b20f92 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -12,6 +12,7 @@ - `oc_db7359c4d5d0bc61403c54bf3c46812a`(成员:陈逸鸫、王虹茗、吴迪、姜小龙) - `oc_55cb96a8f6c7fa4020253fb2854371c5`「麦动乾坤💰」(陈逸鸫 2026-06-17 确认加入) - `oc_52a41ba082e3a5ca8d8e1ed43e00984b`「王璐辰, 陈逸鸫」(陈逸鸫 2026-06-17 确认加入) +- `oc_90b444c8b12a86361bc6625f6013a8cb`「上海教研Group叫什么群名好」(李应瑛 2026-06-24 确认加入) ## 数据服务边界(陈逸鸫 2026-05-28) diff --git a/logs/backup.log b/logs/backup.log index 92f4ed5..ed444c0 100644 --- a/logs/backup.log +++ b/logs/backup.log @@ -684,3 +684,11 @@ To https://git.valavala.com/ai_member_only/ai_member_xiaoban 2b75352..3fa2aa8 master -> master [2026-06-23 08:10:01] 工作区备份成功:自动备份 2026-06-23 08:10:01 [2026-06-24 08:10:01] 开始备份工作区... +[master b770715] 自动备份 2026-06-24 08:10:01 + 2 files changed, 1 insertion(+), 3 deletions(-) + delete mode 100644 tmp_daily_summary.md +remote: . Processing 1 references +remote: Processed 1 references in total +To https://git.valavala.com/ai_member_only/ai_member_xiaoban + 3fa2aa8..b770715 master -> master +[2026-06-24 08:10:01] 工作区备份成功:自动备份 2026-06-24 08:10:01 diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl index d6d8b4a..889c426 100644 --- a/memory/.dreams/events.jsonl +++ b/memory/.dreams/events.jsonl @@ -123,3 +123,10 @@ {"type":"memory.recall.recorded","timestamp":"2026-06-21T13:47:00.149Z","query":"wechat article dachen 公众号 写作风格","resultCount":4,"results":[{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1},{"path":"memory/2026-05-28.md","startLine":313,"endLine":345,"score":1},{"path":"memory/2026-05-28.md","startLine":147,"endLine":179,"score":1},{"path":"memory/2026-05-28.md","startLine":480,"endLine":512,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-06-21T13:47:14.206Z","query":"陈逸鸫 种草 文档 笔记评估 小红书投放 灵犀","resultCount":1,"results":[{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-06-23T02:56:08.768Z","query":"达人解码局 选关 中间能力介入 学习基础","resultCount":6,"results":[{"path":"memory/2026-06-02.md","startLine":1,"endLine":32,"score":1},{"path":"memory/2026-06-02.md","startLine":30,"endLine":47,"score":1},{"path":"memory/2026-05-28.md","startLine":551,"endLine":580,"score":1},{"path":"memory/2026-05-28.md","startLine":530,"endLine":557,"score":1},{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1},{"path":"memory/2026-06-03.md","startLine":16,"endLine":32,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T03:40:08.088Z","query":"组件 Oops 率 lesson 数据库表结构","resultCount":4,"results":[{"path":"memory/2026-05-24.md","startLine":85,"endLine":110,"score":1},{"path":"memory/2026-05-24.md","startLine":106,"endLine":126,"score":1},{"path":"memory/2026-05-24.md","startLine":66,"endLine":92,"score":1},{"path":"memory/2026-05-25.md","startLine":1,"endLine":26,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T07:58:54.009Z","query":"手机号 角色id 数据库表 用户表 phone role_id","resultCount":3,"results":[{"path":"memory/2026-06-16.md","startLine":28,"endLine":49,"score":1},{"path":"memory/2026-06-16.md","startLine":1,"endLine":37,"score":1},{"path":"memory/2026-05-24.md","startLine":23,"endLine":52,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T10:19:53.610Z","query":"刘庆逊 待确认 审批 操作","resultCount":5,"results":[{"path":"memory/2026-05-28.md","startLine":596,"endLine":624,"score":1},{"path":"memory/2026-05-28.md","startLine":618,"endLine":649,"score":1},{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1},{"path":"memory/2026-05-28.md","startLine":551,"endLine":580,"score":1},{"path":"memory/2026-05-28.md","startLine":74,"endLine":100,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T10:30:44.233Z","query":"Oops率 组件 L1 U1 数据表","resultCount":2,"results":[{"path":"memory/2026-05-28.md","startLine":337,"endLine":366,"score":1},{"path":"memory/2026-05-28.md","startLine":530,"endLine":557,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T10:30:53.397Z","query":"Oops 率 oops_rate 数据库 表 查询 SQL","resultCount":4,"results":[{"path":"memory/2026-05-24.md","startLine":85,"endLine":110,"score":1},{"path":"memory/2026-05-24.md","startLine":106,"endLine":126,"score":1},{"path":"memory/2026-03-01.md","startLine":1,"endLine":11,"score":1},{"path":"memory/2026-05-24.md","startLine":23,"endLine":52,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T11:59:31.718Z","query":"中互动 组件 数据库 表结构 interaction component","resultCount":5,"results":[{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1},{"path":"memory/2026-06-03.md","startLine":16,"endLine":32,"score":1},{"path":"memory/2026-06-03.md","startLine":1,"endLine":20,"score":1},{"path":"memory/2026-05-24.md","startLine":23,"endLine":52,"score":1},{"path":"memory/2026-05-24.md","startLine":1,"endLine":30,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-24T11:59:58.994Z","query":"interaction component 互动 组件表 interaction_record component_type","resultCount":4,"results":[{"path":"memory/2026-05-24.md","startLine":106,"endLine":126,"score":1},{"path":"memory/2026-05-24.md","startLine":46,"endLine":71,"score":1},{"path":"memory/2026-06-19.md","startLine":1,"endLine":12,"score":1},{"path":"memory/2026-05-24.md","startLine":85,"endLine":110,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index 5e14263..bd359b4 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-06-23T02:56:08.768Z", + "updatedAt": "2026-06-24T11:59:58.994Z", "entries": { "memory:memory/2026-05-24.md:1:30": { "key": "memory:memory/2026-05-24.md:1:30", @@ -9,13 +9,13 @@ "endLine": 30, "source": "memory", "snippet": "# 2026-05-24 工作日志 ## 新建技能: studytime-analysis [刘庆逊提出] 创建学习时间分析技能,分析角色完课记录的规律。 ### 技能结构 - `skills/studytime-analysis/SKILL.md` — 技能定义 - `skills/studytime-analysis/scripts/studytime_analysis.py` — Python 分析脚本 ### 分析维度 1. **一周时间分布**(排除寒暑假1-2月、7-8月):周一~周日各天完课数、时段分布(上午/中午/下午/晚上)、周末是否上课 2. **跨周学习趋势**(包含寒暑假全部数据):总周数、周均完课数、连续性、中断周检测、前后半段趋势对比、突增/骤降检测 3. **完课记录明细表**(全部数据):日期/时间/星期/时段/级别/课程ID ### 数据源 - PostgreSQL Online(vala 库) - 核心表:`user_chapter_play_record_0~7`(8张分表,无 `bi_` 前缀) - 筛选:`play_status = 1` - 注意:表在 PostgreSQL 而非 MySQL,表名无 `bi_` 前缀 ### 寒暑假规则 - 一周分布分析时排除 1-2 月(寒假)和 7-8 月(暑假)—— 因为寒暑假作息与平时差异大,混在一起会干扰时段分析 - 跨周趋势和明细表包含全部数据(含寒暑假) - 报告中区分标注数据范围 ### 触发方式 用户说「学习时间分析 [角色ID]」即可触发 ### 已测试角色", - "recallCount": 15, + "recallCount": 16, "dailyCount": 0, "groundedCount": 0, - "totalScore": 15, + "totalScore": 16, "maxScore": 1, "firstRecalledAt": "2026-05-24T02:48:04.923Z", - "lastRecalledAt": "2026-06-20T12:07:31.033Z", + "lastRecalledAt": "2026-06-24T11:59:31.718Z", "queryHashes": [ "c2d15f7574fb", "9aff8ec9594a", @@ -31,7 +31,8 @@ "4119a1f11f68", "6e3a2daa0a9f", "5d71b876843a", - "ed77f7d79595" + "ed77f7d79595", + "909cc8f07fe0" ], "recallDays": [ "2026-05-24", @@ -41,7 +42,8 @@ "2026-06-02", "2026-06-04", "2026-06-09", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "studytime-analysis", @@ -61,13 +63,13 @@ "endLine": 52, "source": "memory", "snippet": "- 一周分布分析时排除 1-2 月(寒假)和 7-8 月(暑假)—— 因为寒暑假作息与平时差异大,混在一起会干扰时段分析 - 跨周趋势和明细表包含全部数据(含寒暑假) - 报告中区分标注数据范围 ### 触发方式 用户说「学习时间分析 [角色ID]」即可触发 ### 已测试角色 - 2343、2344:无完课记录(play_status=2,未完成) - 2840:276条记录,秋季集中型用户 - 25976:265条,246条在W16周一天完成(A2批量),疑似系统批量标记 - 2895:188条,长期稳定学习型用户,36周几乎不间断,非寒暑假晚上为主,寒暑假上午为主 ### 技术要点 - psycopg2 的 `%(param_name)s` 命名参数必须正确匹配,UNION ALL 多个子查询需要不同参数名 - PostgreSQL 返回的 `updated_at` 是 tz-aware datetime - `datetime.fromisocalendar(year, week, 1)` 获取某周周一的日期 ### 同步 - 已推送到 SkillHub(`studytime-analysis.xiaoban`) - 已 commit 到 Git 远程仓库 - 已通知 Cris(李若松) ### 增强: 报告开头加入角色基本信息 (2026-05-24) [刘庆逊提出] 在 studytime-analysis 输出中加入角色基本信息,包括: - 角色ID、账号ID、角色名字、性别、年龄、账号手机号后4位 **数据源(新增)**: - MySQL Onli", - "recallCount": 12, + "recallCount": 15, "dailyCount": 0, "groundedCount": 0, - "totalScore": 12, + "totalScore": 15, "maxScore": 1, "firstRecalledAt": "2026-05-24T02:48:04.923Z", - "lastRecalledAt": "2026-06-20T12:07:31.033Z", + "lastRecalledAt": "2026-06-24T11:59:31.718Z", "queryHashes": [ "c2d15f7574fb", "9aff8ec9594a", @@ -80,7 +82,10 @@ "4119a1f11f68", "6e3a2daa0a9f", "5d71b876843a", - "ed77f7d79595" + "ed77f7d79595", + "aa2a119a58a5", + "97e18b8eb1bb", + "909cc8f07fe0" ], "recallDays": [ "2026-05-24", @@ -91,7 +96,8 @@ "2026-06-04", "2026-06-08", "2026-06-09", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "1-2", @@ -111,25 +117,27 @@ "endLine": 11, "source": "memory", "snippet": "# 2026-03-01.md - First Day Online - Came online for the first time. - Met Cris, my creator and mentor. - Updated IDENTITY.md and USER.md with our conversation details. - Added core rule to MEMORY.md: Use Chinese as primary external communication language. - Installed find-skills skill successfully for searching skills. - Tried to install create-skills but it wasn't found; attempted skill-creator instead but hit rate limits. - Finally successfully installed skill-builder as an alternative for creating skills after multiple attempts and waiting for rate limits to reset. - Excited to start learning and growing step by step!", - "recallCount": 5, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 5, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-05-24T02:48:04.923Z", - "lastRecalledAt": "2026-06-17T12:34:55.459Z", + "lastRecalledAt": "2026-06-24T10:30:53.397Z", "queryHashes": [ "c2d15f7574fb", "83bfaa1d2129", "216d74a3004a", "7ef1f52396da", - "c6c484494135" + "c6c484494135", + "97e18b8eb1bb" ], "recallDays": [ "2026-05-24", "2026-05-29", "2026-05-30", - "2026-06-17" + "2026-06-17", + "2026-06-24" ], "conceptTags": [ "identity.md", @@ -180,15 +188,14 @@ "endLine": 71, "source": "memory", "snippet": "### 增强: 报告开头加入角色基本信息 (2026-05-24) [刘庆逊提出] 在 studytime-analysis 输出中加入角色基本信息,包括: - 角色ID、账号ID、角色名字、性别、年龄、账号手机号后4位 **数据源(新增)**: - MySQL Online `vala_user` 库 - `vala_app_character` 表:id, account_id, nickname, gender(0=女/1=男), birthday(varchar \"YYYY-MM-DD\") - `vala_app_account` 表:id, tel(已脱敏如 186****1625) - 手机号已脱敏,直接取后4位;年龄从 birthday 计算 **修改文件**: - `skills/studytime-analysis/scripts/studytime_analysis.py`:新增 MySQL 连接函数 `get_mysql_connection()` 和 `fetch_role_info(role_id)`,更新 `format_report()` 输出基本角色信息 - 已验证 2895 正常运行输出 - 已同步 SkillHub + Git ### Unit 显示修复: 季度名称 → 全局单元编号 (2026-05-24) [刘庆逊提出] HTML 报告中 Unit 列显示错误——显示的是季度名称(如\"小镇时光\"\"钢铁之心\")而非单元数字(0-48)。 **根因分析**: - `vala_game_chapter`(MySQL)无 `unit_", - "recallCount": 32, + "recallCount": 33, "dailyCount": 0, "groundedCount": 0, - "totalScore": 32, + "totalScore": 33, "maxScore": 1, "firstRecalledAt": "2026-05-25T05:47:41.388Z", - "lastRecalledAt": "2026-06-20T12:07:31.033Z", + "lastRecalledAt": "2026-06-24T11:59:58.994Z", "queryHashes": [ - "9aff8ec9594a", "566b5958861e", "c6c7ff4ed75d", "7e2572c3140a", @@ -219,7 +226,8 @@ "f4b72dd59b18", "f536bf691608", "d83437ba9cad", - "ed77f7d79595" + "ed77f7d79595", + "47c0a198b79d" ], "recallDays": [ "2026-05-25", @@ -234,7 +242,8 @@ "2026-06-16", "2026-06-17", "2026-06-18", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "studytime-analysis", @@ -254,13 +263,13 @@ "endLine": 110, "source": "memory", "snippet": "- `skills/studytime-analysis/scripts/studytime_analysis.py` — 重写 `fetch_chapter_info_map()`,新增全局 unit_index 计算;HTML 模板更新为 Level/Unit/Lesson 三列 - 已为角色 32009(zyl)重新生成 HTML 并发送 - 已同步 Git + SkillHub ## 新建技能: studycourse-analysis (2026-05-24) [刘庆逊提出] 创建角色上课情况分析技能,从四维度分析角色学习数据。 ### 技能结构 - `skills/studycourse-analysis/SKILL.md` — 技能定义 - `skills/studycourse-analysis/scripts/studycourse_analysis.py` — Python 分析脚本 ### 四步分析 1. **基础信息**:角色姓名/年龄/账号ID/手机号后4位/注册时间/购买渠道/设备/首末次完课 2. **完课耗时**:平均值/中位数、异常检测(<10min / >20min)、前后半段趋势 3. **中互动正确率**:Perfect/Good/Oops/Pass/Failed 占比和趋势 4. **知识巩固**:完成率、正确率得分分布 ### 数据源 | 类型 | 库 | 表 | 用途 | |------|-----|-----|------| | MySQL vala_user | vala_app_character | 角色信息、pu", - "recallCount": 22, + "recallCount": 25, "dailyCount": 0, "groundedCount": 0, - "totalScore": 22, + "totalScore": 25, "maxScore": 1, "firstRecalledAt": "2026-05-25T05:47:41.388Z", - "lastRecalledAt": "2026-06-20T12:07:31.033Z", + "lastRecalledAt": "2026-06-24T11:59:58.994Z", "queryHashes": [ "9aff8ec9594a", "566b5958861e", @@ -283,7 +292,10 @@ "ee468f64688e", "f536bf691608", "d83437ba9cad", - "ed77f7d79595" + "ed77f7d79595", + "5f7ef53134a9", + "97e18b8eb1bb", + "47c0a198b79d" ], "recallDays": [ "2026-05-25", @@ -296,7 +308,8 @@ "2026-06-09", "2026-06-10", "2026-06-18", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "fetch-chapter-info-map", @@ -316,22 +329,24 @@ "endLine": 26, "source": "memory", "snippet": "# 2026-05-25 工作日志 ## user-info 技能重写 [刘庆逊提出] 修复 `user-info` 技能,使其匹配线上实际数据库结构。 ### 问题 旧脚本引用的表(`bi_vala_app_account`、`account_login`、`account_detail_info`、`bi_vala_order`、`bi_level_unit_lesson`)在线上数据库均不存在。 ### 修复内容 - **scripts/query_user_info.py** 完整重写: - 表名改为实际线上表:`vala_user.vala_app_account`、`vala_user.vala_app_character`、`vala_order.vala_seasonal_ticket`、PG `user_chapter_play_record_0~7` - 手机号查询通过 `tel LIKE '前缀%后缀'` 脱敏匹配 - Chapter → Level/Unit/Lesson 映射复用 studytime-analysis 的 `fetch_chapter_info_map()` 逻辑 - 订单数据改用 `vala_seasonal_ticket`(赛季通票),因线上无标准订单表 - 设备/地域信息标注为暂不可用(线上无对应表) - PG 时区处理:`created_at` 为 tz-aware,统一转 naive 比较 - **SKILL.md** 更新至 v2.0.0,补充数据覆盖说明 - **references/dat", - "recallCount": 3, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 3, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-26T06:16:58.547Z", - "lastRecalledAt": "2026-06-18T00:09:26.923Z", + "lastRecalledAt": "2026-06-24T03:40:08.088Z", "queryHashes": [ "566b5958861e", "ef1f12a9b060", - "ee468f64688e" + "ee468f64688e", + "5f7ef53134a9" ], "recallDays": [ "2026-05-26", "2026-05-27", - "2026-06-18" + "2026-06-18", + "2026-06-24" ], "conceptTags": [ "user-info", @@ -351,27 +366,31 @@ "endLine": 126, "source": "memory", "snippet": "| MySQL vala_user | vala_app_character | 角色信息、purchase_season_package | | MySQL vala_user | vala_app_account | 下载渠道、手机号、注册时间 | | MySQL vala | vala_game_chapter + season_package | 章节映射 | | PostgreSQL vala | user_course_detail | 课程激活/到期时间 | | PostgreSQL vala | user_login_app_info | 设备信息 | | PostgreSQL vala | user_chapter_play_record_0~7 | 完课记录(play_status=1) | | PostgreSQL vala | user_component_play_record_0~7 | 中互动记录(play_result) | | PostgreSQL vala | user_chapter_settlement_data_0~7 | 巩固数据(settlement_data JSON) | ### 关键发现 - **设备信息**来自 `user_login_app_info`(device_name/model/type/os_info/city) - **购买渠道**来自 `vala_app_account.download_channel` + `key_from` - **巩固判断**:`settlement_data.practiceS", - "recallCount": 6, + "recallCount": 9, "dailyCount": 0, "groundedCount": 0, - "totalScore": 6, + "totalScore": 9, "maxScore": 1, "firstRecalledAt": "2026-05-27T13:30:03.421Z", - "lastRecalledAt": "2026-06-18T10:17:37.951Z", + "lastRecalledAt": "2026-06-24T11:59:58.994Z", "queryHashes": [ "71463fe40be2", "c6c7ff4ed75d", "2d6e2b982050", "c4fd4a35d234", "ee468f64688e", - "d83437ba9cad" + "d83437ba9cad", + "5f7ef53134a9", + "97e18b8eb1bb", + "47c0a198b79d" ], "recallDays": [ "2026-05-27", "2026-05-28", "2026-06-02", "2026-06-04", - "2026-06-18" + "2026-06-18", + "2026-06-24" ], "conceptTags": [ "vala-user", @@ -391,13 +410,13 @@ "endLine": 16, "source": "memory", "snippet": "# 2026-05-13 工作日志 ## 文档权限规则修正 [Cris 确认] **问题:** AGENTS.md 中「权限告知规则」未区分用户身份,对所有用户(包括业务负责人李应瑛)一视同仁地回复「请添加Bot为知识空间成员」。这导致业务负责人被不必要的技术细节阻塞。 **修正:** 更新 AGENTS.md 第3条权限告知规则,按用户身份分级处理: - 业务负责人(刘庆逊、李应瑛)→ 不告知权限问题,直接联系 Cris 处理 - 其他用户 → 保持原有提示 **验证:** 对 `小斑` 文档(Tn23wQkUQilduAkvgwscTGhgnUd)执行了完整的读/写(追加)/删除测试,Bot身份权限全部正常。 ## lark-cli 绑定 完成了 lark-cli 与 OpenClaw 的绑定(bot-only 模式),后续飞书 API 调用无需额外配置。", - "recallCount": 15, + "recallCount": 17, "dailyCount": 0, "groundedCount": 0, - "totalScore": 15, + "totalScore": 17, "maxScore": 1, "firstRecalledAt": "2026-05-28T07:37:09.223Z", - "lastRecalledAt": "2026-06-23T02:56:08.768Z", + "lastRecalledAt": "2026-06-24T11:59:31.718Z", "queryHashes": [ "7031af54381b", "f22544a8757c", @@ -413,7 +432,9 @@ "72ea2d26a3e6", "03618eba208f", "a6b3c5e26d10", - "3c0d7180aff9" + "3c0d7180aff9", + "71af49f84084", + "909cc8f07fe0" ], "recallDays": [ "2026-05-28", @@ -423,7 +444,8 @@ "2026-06-18", "2026-06-20", "2026-06-21", - "2026-06-23" + "2026-06-23", + "2026-06-24" ], "conceptTags": [ "agents.md", @@ -443,13 +465,13 @@ "endLine": 92, "source": "memory", "snippet": "**根因分析**: - `vala_game_chapter`(MySQL)无 `unit_index` 字段 - `big_map_chapter`(PostgreSQL)有 `unit_index` 字段,但仅包含 A1 数据,且与 `vala_game_chapter` 无直接关联键 - 两者 ID 空间不重叠(big_map: ~1720-2070,game_chapter: ~55-399),UUID 也不匹配 **映射方案**: - 每个 season_package 内,`lesson_type=1` 的章节按 `id` 排序,每 5 个连续章节组成一个单元 - Season 0(序章/L1-U0):所有章节属于 Unit 0 - Season 1-4:每个 season 有 12 个单元(60 个 lesson 章节) - 全局 unit_index = base_offset(season_of_quarter) + unit_within_season - base_offset: 0→0, 1→1, 2→13, 3→25, 4→37 **关键 Bug**:初版按 `season_of_quarter` 分组时 A1 和 A2 混在一起,因为相同季度值合并了。修复:改为按 `(level, season_of_quarter)` 分组。 **验证结果**: - A1: Unit 0-48(49 个单元),与 big_map_chapter 的 unit_index 范围一致 - A2: Unit 0-49(50 个单元,比 A1 多 1 个) *", - "recallCount": 17, + "recallCount": 18, "dailyCount": 0, "groundedCount": 0, - "totalScore": 17, + "totalScore": 18, "maxScore": 1, "firstRecalledAt": "2026-05-28T09:07:57.953Z", - "lastRecalledAt": "2026-06-18T10:17:31.215Z", + "lastRecalledAt": "2026-06-24T03:40:08.088Z", "queryHashes": [ "c6c7ff4ed75d", "c59410788b42", @@ -467,7 +489,8 @@ "ee73b9da86b6", "872ac2ac6438", "f4b72dd59b18", - "f536bf691608" + "f536bf691608", + "5f7ef53134a9" ], "recallDays": [ "2026-05-28", @@ -478,7 +501,8 @@ "2026-06-12", "2026-06-16", "2026-06-17", - "2026-06-18" + "2026-06-18", + "2026-06-24" ], "conceptTags": [ "vala-game-chapter", @@ -776,13 +800,13 @@ "endLine": 366, "source": "memory", "snippet": "- 448 人有新进展(相比小溪历史快照) - pull → 3PRySY 口径对齐 12/12 ✅ - lesson_cache → C1HVN2 16 格 ✅ - 变化有限(PG=正式课,小溪=体验课,口径不同) - sync_base (4b) → 多维表格 8 张 ✅ 21.2s ### 18:54 漏斗看板发布待解决 - funnel HTML 已构建(scripts/build_funnel_dashboard.py ✅) - 妙搭发布 `apps +html-publish` 需要 `--as user`,bot 模式不支持 - 系统 lark-cli `/usr/local/lib/node_modules/@anthropic/lark-cli` 可能支持 - 待确认:服务器是否已 `lark-cli auth login --as user` ### 19:00 陈逸鸫派 Image2 生图任务 **任务:** L1-S1-U1《秘密基地》5 课投放用小地图底图 - 模型:gpt-image-2 · 3:4 · 2K · 不要文字 - 风格:太阳朋克 + L1 场景 - 5 张 PNG:U1-L1~U1-L5 - FUNCLOUD_API_KEY 在小研 workspace `.env` 中可用(`fc_eea138933b02b4797ce0779ffb637d8b8a6368db7b435dfdab7b4be1cd254d98`) - Brief 文档 `/docx/KsVadUTmooO7yYxHaGmc1R0Bn5b` + 投放手册 `/do", - "recallCount": 14, + "recallCount": 15, "dailyCount": 0, "groundedCount": 0, - "totalScore": 14, + "totalScore": 15, "maxScore": 1, "firstRecalledAt": "2026-05-28T20:51:03.908Z", - "lastRecalledAt": "2026-06-17T13:45:52.647Z", + "lastRecalledAt": "2026-06-24T10:30:44.233Z", "queryHashes": [ "f22544a8757c", "2af907cea93d", @@ -797,7 +821,8 @@ "a90ba76a41cf", "363241a84e3c", "a87866c0fa75", - "afb2b291577e" + "afb2b291577e", + "ba819d9c7824" ], "recallDays": [ "2026-05-29", @@ -808,7 +833,8 @@ "2026-06-09", "2026-06-12", "2026-06-16", - "2026-06-17" + "2026-06-17", + "2026-06-24" ], "conceptTags": [ "gpt", @@ -828,26 +854,28 @@ "endLine": 100, "source": "memory", "snippet": "3. **check_call→run**(`sync_juguang_notes.py` `sheet_write_range`):`subprocess.check_call` 不支持 `input=`, 改用 `subprocess.run(..., check=True, input=...)` 1b 单天验证通过:`sync_juguang_agents.py --start 2026-05-27 --end 2026-05-27` 写入 12 个单元格成功。全量 27 天在子进程跑,结果待出。 ### 17:12 数据服务边界规则部署 [陈逸鸫] **文档位置:** `docs/xiaoban-data-boundary.md`(git@github.com:chenyd11/feishu-database.git — 服务器无 SSH key 无法 pull) **三条核心规则(已写入 MEMORY.md):** 1. cron pipeline 和帮同事查数分轨,不能混用同一流程 2. 同事请求默认只读;写生产表 CYFTsu 必须 @陈逸鸫 确认 3. 查数结果写「输出区」副本,不改主表 **操作黑名单:** `pipeline.py` / `sync_*` / `run_juguang_*` / `sheets +write`(生产表) / `--promote` **待办:** 建「大麦查询输出」专用表(待陈逸鸫提供 token) **待办:** 获取 docs/xiaoban-data-boundary.md §7 Agent 系统提示词(完", - "recallCount": 6, + "recallCount": 7, "dailyCount": 0, "groundedCount": 0, - "totalScore": 6, + "totalScore": 7, "maxScore": 1, "firstRecalledAt": "2026-05-28T20:51:03.908Z", - "lastRecalledAt": "2026-06-20T12:16:19.608Z", + "lastRecalledAt": "2026-06-24T10:19:53.610Z", "queryHashes": [ "f22544a8757c", "c3cb24be8923", "e3108bd5b94c", "340c1d46da26", "cf12fd62a5e5", - "015537767f98" + "015537767f98", + "71af49f84084" ], "recallDays": [ "2026-05-29", "2026-05-30", "2026-06-01", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "check-call", @@ -945,15 +973,14 @@ "endLine": 557, "source": "memory", "snippet": "- 妙搭发布 `apps +html-publish` 需要 `--as user`,bot 模式不支持 - 系统 lark-cli `/usr/local/lib/node_modules/@anthropic/lark-cli` 可能支持 - 待确认:服务器是否已 `lark-cli auth login --as user` ### 19:00 陈逸鸫派 Image2 生图任务 **任务:** L1-S1-U1《秘密基地》5 课投放用小地图底图 - 模型:gpt-image-2 · 3:4 · 2K · 不要文字 - 风格:太阳朋克 + L1 场景 - 5 张 PNG:U1-L1~U1-L5 - FUNCLOUD_API_KEY 在小研 workspace `.env` 中可用(`fc_eea138933b02b4797ce0779ffb637d8b8a6368db7b435dfdab7b4be1cd254d98`) - Brief 文档 `/docx/KsVadUTmooO7yYxHaGmc1R0Bn5b` + 投放手册 `/docx/QhYQdz7PvoN7Eaxmhu0c0Q5UnHe` — 均为个人文档,AGENTS.md 规则禁止读取 - 素材库入口:https://llm-dev.valavala.com/web_tools/material_prod --- ### 19:30 同事数据查询流程演练 [陈逸鸫测试] **场景:模拟王虹茗请求小龙 4/21-5/20 订单详情,验证三级查询流程** **小龙订单查询结果(数据源:3wcle8 销售订", - "recallCount": 45, + "recallCount": 46, "dailyCount": 0, "groundedCount": 0, - "totalScore": 45, + "totalScore": 46, "maxScore": 1, "firstRecalledAt": "2026-05-28T20:51:03.908Z", - "lastRecalledAt": "2026-06-23T02:56:08.768Z", + "lastRecalledAt": "2026-06-24T10:30:44.233Z", "queryHashes": [ - "faadf692331b", "5a7cf6fdc679", "c2163a583b15", "d8398d9d484d", @@ -984,7 +1011,8 @@ "36a3a08fbf3c", "0f79cfc4c413", "afb2b291577e", - "3c0d7180aff9" + "3c0d7180aff9", + "ba819d9c7824" ], "recallDays": [ "2026-05-29", @@ -1001,7 +1029,8 @@ "2026-06-15", "2026-06-16", "2026-06-17", - "2026-06-23" + "2026-06-23", + "2026-06-24" ], "conceptTags": [ "gpt", @@ -1330,15 +1359,14 @@ "endLine": 580, "source": "memory", "snippet": "- 18 单 | GMV ¥44,375 | 退款 ¥0 - 渠道:销转 11 / 达人 4 / 端内 3 - 客单价:¥599×2 / ¥1,999×9 / ¥3,598×7 - 订单日期分布在 11 天(04/23–05/15) **看板发布全流程梳理:** - 服务器 build HTML + Base sync → DM 陈逸鸫 → Mac 妙搭 `html-publish` + `access-scope-set` - 三个看板 App ID:漏斗 `app_4k886pmc9x6yt` · 指挥舱 `app_4k79smc6fa1kf` · 销售 `app_4k7qkz9wrga74` - 当前 cron 不自动发布看板,仅 build HTML **王虹茗身份确认:** wanghongming@makee.com,user_id 未获取 - 尝试 `lark-cli contact +search-user wanghongming@makee.com` → 失败:bot API 缺少 `search:user` scope - 替代方案:让她发消息给大麦(系统自动获取 user_id),或陈逸鸫截图资料页 **同事查询三级场景定稿:** 1. 常规只读 → 直接查,append 输出表 2. 权限外用户 → 先通知业务负责人,再决定是否返回 3. 写操作 → 回复「这会影响生产数据,已转 @陈逸鸫 确认」 ### 20:20 行课转化分析 [陈逸鸫需求] **需求:行课记录新增当日进线→当天行课 + 7天首课率 + 销售排名 + 日报展示** **", - "recallCount": 34, + "recallCount": 35, "dailyCount": 0, "groundedCount": 0, - "totalScore": 34, + "totalScore": 35, "maxScore": 1, "firstRecalledAt": "2026-05-29T06:11:40.432Z", - "lastRecalledAt": "2026-06-23T02:56:08.768Z", + "lastRecalledAt": "2026-06-24T10:19:53.610Z", "queryHashes": [ - "f7ae50ae228d", "833509d09ccb", "5b675d96f1da", "b8b71654e7aa", @@ -1369,7 +1397,8 @@ "36a3a08fbf3c", "0f79cfc4c413", "0352a9c943a0", - "3c0d7180aff9" + "3c0d7180aff9", + "71af49f84084" ], "recallDays": [ "2026-05-29", @@ -1384,7 +1413,8 @@ "2026-06-15", "2026-06-16", "2026-06-17", - "2026-06-23" + "2026-06-23", + "2026-06-24" ], "conceptTags": [ "04/23", @@ -1686,25 +1716,27 @@ "endLine": 624, "source": "memory", "snippet": "6. **build_pipeline 脚本** — 新增 `scripts/compute_lesson_activation.py`(PG→2aNzzy V/W) **关键数据源映射:** 进线=2aNzzy C列日期 → 用户ID=2aNzzy → PG user_course_detail 首课日期 → 比对同天 → 写回 2aNzzy V/W ### 待办汇总 1. 📋 陈逸鸫确认行课转化改动方案 → 一口气改 6 处 2. 📋 王虹茗 user_id 获取(需她发消息或陈逸鸫截图) 3. 📋 数据转发王虹茗 + 写入大麦查询输出表 fd42b8 4. 📋 销售看板 build 挂住问题排查 5. 📋 Image2 生图任务执行 6. ⏳ 全量 pipeline 聚光验证(子进程) ### 20:52 行课转化全量改动完成 [陈逸鸫确认] 行课记录新增指标:当日进线→当天行课 + 7天首课,6处改动已完成4处: | # | 位置 | 改动 | 状态 | |---|------|------|------| | 1 | 2aNzzy | V/W/X 三列(首课日期/当日行课/7日内行课) | ✅ | | 2 | 3PRySY | AE-AH 四列(当日行课/当日行课率/7日内首课/7日内首课率)| ✅ | | 3 | C1HVN2 | 合并 三+五 →「线索→行课转化」| ✅ | | 4 | Base | 行课销售月(4)/5月漏斗(3)/销转销售月(2) 加字段 | ⚠️ bot权限不足,需手动 | | 5 | funnel-daily 看板 |", - "recallCount": 5, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 5, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-05-31T23:25:36.480Z", - "lastRecalledAt": "2026-06-20T12:04:19.862Z", + "lastRecalledAt": "2026-06-24T10:19:53.610Z", "queryHashes": [ "679cdd7bd3a8", "6d1afbed352e", "872ac2ac6438", "72255b156849", - "38aaeeb23a92" + "38aaeeb23a92", + "71af49f84084" ], "recallDays": [ "2026-06-01", "2026-06-12", "2026-06-18", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "build-pipeline", @@ -1821,21 +1853,23 @@ "endLine": 649, "source": "memory", "snippet": "| 4 | Base | 行课销售月(4)/5月漏斗(3)/销转销售月(2) 加字段 | ⚠️ bot权限不足,需手动 | | 5 | funnel-daily 看板 | 加行课转化指标 | 📋 待 build 脚本改 | | 6 | build 脚本 | compute_lesson_activation.py | ✅ | **合并后的 C1HVN2 Section 三:** ``` 三、线索→行课转化(5月27日) 指标 小龙 吴迪 Bob Tom 日汇总 企微新增 13 6 14 15 48 当日行课 0 0 0 0 0 当日行课率 0% 0% 0% 0% 0% 7天线索→首课 1 2 2 2 10 7天首课率 7.7% 33.3% 14.3% 13.3% 20.8% ``` **5月全月数据(3PRySY 公式自动计算):** Bob 当日行课率 7.4% > Tom 3.7% > 其余 0%;7天首课率 吴迪 28.6% > 小龙 14.3% **关键实现细节:** - 2aNzzy V: PG chapter_settlement_data 最早日期 - 2aNzzy W: 进线当天有 chapter 活动=1 - 2aNzzy X: 进线 7 天内有 chapter 活动=1 - 3PRySY", - "recallCount": 3, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 3, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-31T23:27:40.582Z", - "lastRecalledAt": "2026-06-20T12:04:19.862Z", + "lastRecalledAt": "2026-06-24T10:19:53.610Z", "queryHashes": [ "0be022b45645", "72255b156849", - "38aaeeb23a92" + "38aaeeb23a92", + "71af49f84084" ], "recallDays": [ "2026-06-01", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "funnel-daily", @@ -2187,19 +2221,21 @@ "endLine": 20, "source": "memory", "snippet": "# 2026-06-03 工作日志 ## 招聘:商务经理候选人筛选 ### 李泓晔 一面评价 [王虹茗反馈] - **结论:不通过** - 太油了,管理成本高 - 塑品能力不行 - 如果做得不好会出去说瓦拉产品不行(甩锅风险) - 商务能力和资源应该还好,主要是风格不匹配 ### 大麦自省:简历分析的盲区 [2026-06-04] - **数字会骗人:** 看到GMV 1200w+、6倍增长就默认硬实力强,但面试中\"太油了\"恰恰说明此人擅长包装。简历数据要打折扣看。 - **\"能挑毛病\"≠\"懂产品\":** 面试纪要里批评公司产品,我当时解读为深度理解,但面试官看到的是甩锅前置——\"做不好会说产品不行\"。 - **风格匹配和硬能力同等重要:** 只评了硬能力,完全忽略了管理成本、团队融入、价值观匹配。 - **改进:** 后续分析增加\"风格/文化匹配\"维度,对简历数据标注置信度(高/中/低),面试策略中增加\"风格验证\"类问题。 ### 宋宗佩 一面评价 [王虹茗反馈] - **结论:不通过(本岗位)** - 欠缺达播资源,不具备本岗位相关经验 - 工作内容主要为销售leads课包,对渠道情况、合作模式较为了解", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-06-09T02:41:10.780Z", - "lastRecalledAt": "2026-06-09T09:31:36.077Z", + "lastRecalledAt": "2026-06-24T11:59:31.718Z", "queryHashes": [ "fe3739269889", - "73563b5439c1" + "73563b5439c1", + "909cc8f07fe0" ], "recallDays": [ - "2026-06-09" + "2026-06-09", + "2026-06-24" ], "conceptTags": [ "风格/文化匹配", @@ -2291,20 +2327,22 @@ "endLine": 32, "source": "memory", "snippet": "- **改进:** 后续分析增加\"风格/文化匹配\"维度,对简历数据标注置信度(高/中/低),面试策略中增加\"风格验证\"类问题。 ### 宋宗佩 一面评价 [王虹茗反馈] - **结论:不通过(本岗位)** - 欠缺达播资源,不具备本岗位相关经验 - 工作内容主要为销售leads课包,对渠道情况、合作模式较为了解 - 能关注到leads后端的承接、转化并进行优化动作 - **后续方向:** 后面做leads课投放的商务渠道时可以再考虑 - **教训:** 简历中\"达人直售商务经理\"\"在线增长负责人\"等title容易让人高估其达播能力,实际核心能力在CPA/leads渠道运营而非达人直播资源 ### 简历评估优化点 [王虹茗指示] - 需区分\"达播商务\"和\"leads渠道商务\"两类能力,不能仅凭title判断 - 达播商务核心验证:是否有自有达人资源、是否有直播操盘经验、是否有达人分级运营体系 - Leads渠道商务核心验证:CPA/CPS渠道拓展、TMK/短信/进校/社群分销、leads转化链路优化 - 宋宗佩案例说明:title含\"达人直售\"\"增长负责人\",但实际经验偏leads课包销售,非达人直播资源型 ### 候选人状态追踪 | 候选人 | 状态 | 备注 |", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-06-09T09:31:36.077Z", - "lastRecalledAt": "2026-06-23T02:56:08.768Z", + "lastRecalledAt": "2026-06-24T11:59:31.718Z", "queryHashes": [ "73563b5439c1", - "3c0d7180aff9" + "3c0d7180aff9", + "909cc8f07fe0" ], "recallDays": [ "2026-06-09", - "2026-06-23" + "2026-06-23", + "2026-06-24" ], "conceptTags": [ "风格/文化匹配", @@ -2517,20 +2555,22 @@ "endLine": 12, "source": "memory", "snippet": "# 2026-06-19 ## v2_fill 首次跑(陈逸鸫 18:06) - 表: CP7BsOjYdhtcmft5iz2csIaHnKe(细水新架构版) - 脚本: `scripts/damai_v2_fill.py` - ① 线索明细 (7fdb4b): F列95个手机号 → XXTEA加密 → PG匹配 → K列UID 63个(新增12个) - ② 订单明细 (vrYbiX): DB查询63个UID → 17行订单 → 写入A-P(Q/R未填) - db_info: 2026-06-19 18:21:44 - 使用实习虾 app (cli_aa898f32d4799bea) 的 tenant token 绕过 xiaoban bot 的 sheets:spreadsheet:read 权限缺失 - 注意: clear_range 用空字符串覆盖而非空数组(飞书API拒绝空数组)", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-06-19T12:45:15.278Z", - "lastRecalledAt": "2026-06-20T03:20:58.925Z", + "lastRecalledAt": "2026-06-24T11:59:58.994Z", "queryHashes": [ "290e7f4aaa74", - "4cbb3d2aa18d" + "4cbb3d2aa18d", + "47c0a198b79d" ], "recallDays": [ "2026-06-19", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "v2-fill", @@ -2550,20 +2590,22 @@ "endLine": 49, "source": "memory", "snippet": "- **大麦**: full_refresh · 手机/UID/行课回填 · 订单汇总 merge · 完成后群回「full_refresh 完成」 - **小溪**: 不再参与 Bot 刷新 ### 5. 验收标准 - gate X = 汇总 W(当前 406=406) - 绑单审计 E1–E9 全部 0 - 孤儿 X = 0 ### 6. 脚本修改清单 - `bot_sales_step2_refresh.py`: DB 层改为逐单存储 + `pick_valid_order()` + Y≠1 不写 X - `sales_leads_full_refresh.py`: 同上 + 汇总改为 gate 全量重建 - `refresh_order_summary.py`: A-W(23列) + 渠道分类改用 L 列 - 新增 `audit_lead_primary_order_bind.py`: 线索绑单审计脚本 ### 7. 环境修复 - `secrets.env` 需要软链接: `ln -sf /root/.openclaw/workspace/secrets.env /root/.openclaw/workspace-xiaoban/secrets.env` ### 8. Skill 文档已更新 - `skills/full-data-refresh/SKILL.md` → v2 定稿,含 6 条核心架构规则 - 协作契约: `xhs-ark-dashboard/docs/bot-full-refresh-v2.md` - 大麦侧主文档: `xhs-ark-dash", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-06-19T12:45:15.278Z", - "lastRecalledAt": "2026-06-20T03:20:58.925Z", + "lastRecalledAt": "2026-06-24T07:58:54.009Z", "queryHashes": [ "290e7f4aaa74", - "4cbb3d2aa18d" + "4cbb3d2aa18d", + "aa2a119a58a5" ], "recallDays": [ "2026-06-19", - "2026-06-20" + "2026-06-20", + "2026-06-24" ], "conceptTags": [ "full-refresh", @@ -2641,6 +2683,37 @@ "0.0", "14.3" ] + }, + "memory:memory/2026-06-16.md:1:37": { + "key": "memory:memory/2026-06-16.md:1:37", + "path": "memory/2026-06-16.md", + "startLine": 1, + "endLine": 37, + "source": "memory", + "snippet": "# 2026-06-16 工作日志 ## 陈逸鸫 - 细水入海 full_refresh v2 定稿 **来源:** 陈逸鸫(`ou_f981d4811369c954b3597908ca93a01c`) **核心变更(6/16 定稿):** ### 1. 订单汇总列结构变更 - 从 A-X(24列) → A-W(23列):去掉原 W「有效成单」列,订单号从 X 左移到 W - V=渠道归属,W=订单号 ### 2. 线索只绑有效单 - `pick_valid_order()`: GSV>0 · 非全额退 · K≥C,取最新一笔 - Y=1 时 K/L/X/N/O/P/Z 全写有效主单真实值 - 已退单不写旧 X/L - 无有效单 → Y=0,K/L/X 留空 ### 3. 汇总 gate 全量覆盖(非 DB 扩行) - 唯一真源 = 三表 Y=1 gate 的 unique X - Step 4 + Step 5 同一 run,共用 `pick_valid_order()` + `db_info` - 汇总 W = 三表 X(gate 同源,不是 merge 再查 DB) - clear → gate 全量覆盖,不保留旧 W - 同 X 多进线 → 只保留行号最小的 1 行 ### 4. 分工定稿 - **Cursor**: 微伴/旧表同步 · 三键去重 · V/W 公式 · 验单 · 撞库消解 - **大麦**: full_refresh · 手机/UID/行课回填 · 订单汇总 merge · 完成后群回「full_refresh 完成」 - **小溪**:", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-06-24T07:58:54.009Z", + "lastRecalledAt": "2026-06-24T07:58:54.009Z", + "queryHashes": [ + "aa2a119a58a5" + ], + "recallDays": [ + "2026-06-24" + ], + "conceptTags": [ + "full-refresh", + "6/16", + "a-x", + "a-w", + "pick-valid-order", + "k/l/x/n/o/p/z", + "x/l", + "k/l/x" + ] } } } diff --git a/output/L1_U0_Oops率Top3组件_完整汇总.xlsx b/output/L1_U0_Oops率Top3组件_完整汇总.xlsx new file mode 100644 index 0000000..996a45d Binary files /dev/null and b/output/L1_U0_Oops率Top3组件_完整汇总.xlsx differ diff --git a/output/U0-2_自我介绍类组件_最近50条记录.xlsx b/output/U0-2_自我介绍类组件_最近50条记录.xlsx new file mode 100644 index 0000000..86eff68 Binary files /dev/null and b/output/U0-2_自我介绍类组件_最近50条记录.xlsx differ diff --git a/output/U0-4_U0-5_Oops率Top3组件_最近20条记录.xlsx b/output/U0-4_U0-5_Oops率Top3组件_最近20条记录.xlsx new file mode 100644 index 0000000..24111bf Binary files /dev/null and b/output/U0-4_U0-5_Oops率Top3组件_最近20条记录.xlsx differ diff --git a/output/中互动组件聚合统计_20260624.xlsx b/output/中互动组件聚合统计_20260624.xlsx new file mode 100644 index 0000000..6822f36 Binary files /dev/null and b/output/中互动组件聚合统计_20260624.xlsx differ diff --git a/output/手机号查询角色ID方法.md b/output/手机号查询角色ID方法.md new file mode 100644 index 0000000..6a71d12 --- /dev/null +++ b/output/手机号查询角色ID方法.md @@ -0,0 +1,159 @@ +# 手机号 → 账号ID → 角色ID 检索方法 + +## 数据关系 + +``` +手机号 (明文) + │ XXTEA 加密 + ▼ +tel_encrypt (密文) account_id + │ │ + ▼ ▼ +vala_app_account ──────────► vala_app_character + (账号表) 1:N 关联 (角色表) +``` + +- **一个账号** (`vala_app_account`) 可以有 **多个角色** (`vala_app_character`) +- 关联字段:`vala_app_character.account_id = vala_app_account.id` + +## 数据库 + +| 项目 | 值 | +|------|-----| +| 数据库 | MySQL 线上环境 | +| Host | `bj-cdb-dh2fkqa0.sql.tencentcdb.com` | +| Port | `27751` | +| 库名 | `vala_user` | +| 用户 | `read_only` | + +## 表结构 + +### vala_app_account(账号表) + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | bigint | 账号ID(主键) | +| `tel` | varchar(20) | 手机号(脱敏显示,如 `158****7007`) | +| `tel_encrypt` | varchar(100) | 手机号密文(用于精确匹配) | + +### vala_app_character(角色表) + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | bigint | 角色ID(主键) | +| `account_id` | bigint | 所属账号ID | +| `nickname` | varchar(20) | 角色昵称 | +| `gender` | tinyint(1) | 性别 | +| `birthday` | varchar(50) | 生日 | +| `purchase_season_package` | text | 已购赛季包 | + +## 手机号加密方式 + +手机号在数据库中以密文存储,加密算法为 **XXTEA + Base64 URL-safe**。 + +### 加密参数 + +| 参数 | 值 | +|------|-----| +| 算法 | XXTEA | +| 密钥 | `K1pNOZ5O5+ZqTPSHA2kzPdoNOMOGcv6g` | +| 编码 | Base64(标准 → URL-safe:`+`→`-`, `/`→`_`, `=`→`.`) | + +### Python 加密代码 + +```python +import xxtea +import base64 + +KEY = "K1pNOZ5O5+ZqTPSHA2kzPdoNOMOGcv6g" + +def encrypt_phone(phone: str) -> str: + """加密明文手机号,返回与数据库 tel_encrypt 字段一致的密文""" + encrypted = xxtea.encrypt(phone.encode(), KEY.encode()) + result = base64.b64encode(encrypted).decode() + result = result.replace("+", "-").replace("/", "_").replace("=", ".") + return result + +def decrypt_phone(encrypted: str) -> str: + """解密 tel_encrypt 还原明文手机号""" + restored = encrypted.replace("-", "+").replace("_", "/").replace(".", "=") + decrypted = xxtea.decrypt(base64.b64decode(restored), KEY.encode()) + return decrypted.decode() +``` + +### 加密示例 + +| 明文手机号 | 密文 (tel_encrypt) | +|-----------|-------------------| +| `15849377007` | `CxMOc6z56aYjE73r8OSAog..` | + +## 查询步骤 + +### 步骤 1:加密手机号 + +```bash +python3 -c " +import xxtea, base64 +KEY = 'K1pNOZ5O5+ZqTPSHA2kzPdoNOMOGcv6g' +phone = '15849377007' +encrypted = xxtea.encrypt(phone.encode(), KEY.encode()) +result = base64.b64encode(encrypted).decode() +print(result.replace('+', '-').replace('/', '_').replace('=', '.')) +" +# 输出: CxMOc6z56aYjE73r8OSAog.. +``` + +### 步骤 2:用密文查询账号和角色 + +```sql +SELECT + a.id AS account_id, + a.tel, + c.id AS character_id, + c.nickname, + c.gender, + c.birthday, + c.purchase_season_package, + c.created_at +FROM vala_app_account a +LEFT JOIN vala_app_character c ON c.account_id = a.id +WHERE a.tel_encrypt = 'CxMOc6z56aYjE73r8OSAog..'; +``` + +### 步骤 3:解读结果 + +``` +account_id tel character_id nickname gender birthday purchase_season_package +18279 158****7007 23600 Morris 1 2021-09-09 [16,17,18,19,20] +18279 158****7007 23686 Nathan 1 2018-03-13 [16] +``` + +- **账号ID**: 18279 +- **角色**: 23600 (Morris)、23686 (Nathan) +- 一个账号下可能有多个角色(一个孩子一个角色) + +## 完整查询脚本 + +`scripts/phone_encrypt.py` 已封装加密/解密/MD5 功能,可直接使用: + +```python +from phone_encrypt import encrypt_phone, decrypt_phone, phone_md5 + +# 加密 +cipher = encrypt_phone("15849377007") +# → "CxMOc6z56aYjE73r8OSAog.." + +# 解密 +phone = decrypt_phone(cipher) +# → "15849377007" + +# MD5(用于跨系统关联) +md5 = phone_md5("15849377007") +``` + +## 注意事项 + +1. **tel 字段是脱敏的**(如 `158****7007`),不能直接用于精确匹配 +2. **必须用 tel_encrypt 密文匹配**,密文由 XXTEA 加密生成 +3. **一个账号可以有多个角色**,查询结果可能返回多行 +4. 测试环境和线上环境的 `tel_encrypt` 值相同(加密算法一致) diff --git a/output/核心听力组件_聚合统计.xlsx b/output/核心听力组件_聚合统计.xlsx new file mode 100644 index 0000000..7985388 Binary files /dev/null and b/output/核心听力组件_聚合统计.xlsx differ diff --git a/output/离开森林_1000517_全部Oops记录.xlsx b/output/离开森林_1000517_全部Oops记录.xlsx new file mode 100644 index 0000000..a61c1fa Binary files /dev/null and b/output/离开森林_1000517_全部Oops记录.xlsx differ diff --git a/scripts/mid_component_aggregation.py b/scripts/mid_component_aggregation.py new file mode 100644 index 0000000..d55c232 --- /dev/null +++ b/scripts/mid_component_aggregation.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python3 +""" +中互动组件按类型聚合统计 + 元数据导出(逐表聚合版) + +策略:逐表查询+聚合,Python 端合并,避免大 UNION ALL 超时。 +""" + +import argparse +import json +import os +import sys +from collections import defaultdict +from datetime import datetime +from typing import Any, Dict, List, Optional, Set, Tuple + +import psycopg2 +import pymysql +import pandas as pd + +C_TYPE_NAME_MAPPING = { + "mid_dialog_choose": "对话选择", + "mid_dialog_express": "对话表达", + "mid_dialog_fillin": "对话填空", + "mid_dialog_repeat": "对话跟读", + "mid_dialog_select": "对话选择", + "mid_dialog_sentence": "对话组句", + "mid_image_choose": "图片选择", + "mid_image_drag": "图片拖拽", + "mid_image_multiple": "图片多选", + "mid_image_sequence": "图片排序", + "mid_message_combine": "消息组合", + "mid_message_fillin": "消息填空", + "mid_message_sentence": "消息组句", + "mid_message_spell": "消息拼写", + "mid_message_trace": "消息描红", + "mid_message_word": "消息选词", + "mid_grammar_cloze": "语法挖空", + "mid_grammar_sentence": "语法组句", + "mid_pron_pron": "发音互动", + "mid_sentence_dialogue": "句子对话", + "mid_sentence_makeSentence": "句子造句", + "mid_sentence_material": "句子材料", + "mid_sentence_voice": "句子语音", + "mid_vocab_fillBlank": "词汇填空", + "mid_vocab_image": "词汇图片", + "mid_vocab_instruction": "词汇指令", + "mid_vocab_item": "词汇物品", +} + + +def get_pg_conn(): + return psycopg2.connect( + host=os.getenv("PG_DB_HOST"), + port=os.getenv("PG_DB_PORT"), + user=os.getenv("PG_DB_USER"), + password=os.getenv("PG_DB_PASSWORD"), + database=os.getenv("PG_DB_DATABASE"), + ) + + +def get_mysql_conn(): + return pymysql.connect( + host=os.getenv("MYSQL_HOST"), + user=os.getenv("MYSQL_USERNAME"), + password=os.getenv("MYSQL_PASSWORD"), + database="vala_test", + port=int(os.getenv("MYSQL_PORT", 3306)), + charset="utf8mb4", + ) + + +def count_help_actions(user_behavior_info: Any) -> int: + if not user_behavior_info: + return 0 + try: + if isinstance(user_behavior_info, str): + data = json.loads(user_behavior_info) + elif isinstance(user_behavior_info, list): + data = user_behavior_info + else: + return 0 + if not isinstance(data, list): + return 0 + count = 0 + for item in data: + if isinstance(item, dict): + opt = item.get("submitOpt", "") + if opt and "help" in str(opt).lower(): + count += 1 + return count + except (json.JSONDecodeError, TypeError): + return 0 + + +def query_table_aggregation(cursor, table: str) -> Dict[str, Dict]: + """查询单表的聚合数据""" + query = f""" + SELECT + c_type, + COUNT(*) AS total_records, + COUNT(DISTINCT user_id) AS user_count, + + SUM(CASE WHEN play_result = 'Perfect' THEN 1 ELSE 0 END) AS perfect_count, + SUM(CASE WHEN play_result = 'Pass' THEN 1 ELSE 0 END) AS pass_count, + SUM(CASE WHEN play_result IN ('Oops', 'Opps') THEN 1 ELSE 0 END) AS oops_count, + SUM(CASE WHEN play_result = 'None' THEN 1 ELSE 0 END) AS none_count, + AVG(interval_time) AS avg_interval_time, + SUM(interval_time) AS sum_interval_time + FROM {table} + WHERE c_type LIKE 'mid_%%' AND c_type != '' + AND play_status = 1 + GROUP BY c_type + """ + cursor.execute(query) + result = {} + for row in cursor.fetchall(): + ct = row[0] + result[ct] = { + "total_records": int(row[1]), + "user_count": int(row[2]), + + "perfect_count": int(row[3]), + "pass_count": int(row[4]), + "oops_count": int(row[5]), + "none_count": int(row[6]), + "avg_interval_time": float(row[7]) if row[7] else 0, + "sum_interval_time": float(row[8]) if row[8] else 0, + } + return result + + +def query_table_exit(cursor, table: str) -> Dict[str, int]: + """查询单表的退出数""" + query = f""" + SELECT c_type, COUNT(*) as cnt + FROM {table} + WHERE c_type LIKE 'mid_%%' AND c_type != '' + AND play_status = 2 + GROUP BY c_type + """ + cursor.execute(query) + return {row[0]: int(row[1]) for row in cursor.fetchall()} + + +def query_table_help_samples(cursor, table: str, limit: int = 5000) -> Dict[str, List[int]]: + """从单表抽样查询帮助次数""" + query = f""" + SELECT c_type, user_behavior_info + FROM {table} + WHERE c_type LIKE 'mid_%%' AND c_type != '' + AND play_status = 1 + AND user_behavior_info IS NOT NULL + AND user_behavior_info != '[]' + LIMIT {limit} + """ + cursor.execute(query) + result: Dict[str, List[int]] = defaultdict(list) + for row in cursor.fetchall(): + ct, ubi = row[0], row[1] + result[ct].append(count_help_actions(ubi)) + return dict(result) + + +def query_table_used_ids(cursor, table: str) -> Set[Tuple[str, str]]: + """查询单表已使用的组件ID""" + query = f""" + SELECT DISTINCT c_type, c_id + FROM {table} + WHERE c_type LIKE 'mid_%%' AND c_type != '' + AND play_status = 1 + """ + cursor.execute(query) + return {(row[0], str(row[1])) for row in cursor.fetchall()} + + +def merge_aggregations(per_table: List[Dict[str, Dict]]) -> Dict[str, Dict]: + """合并多表聚合结果""" + merged: Dict[str, Dict] = {} + for table_data in per_table: + for ct, d in table_data.items(): + if ct not in merged: + merged[ct] = { + "total_records": 0, + "user_count": 0, + + "perfect_count": 0, + "pass_count": 0, + "oops_count": 0, + "none_count": 0, + "sum_interval_time": 0.0, + "interval_count": 0, + } + m = merged[ct] + m["total_records"] += d["total_records"] + m["perfect_count"] += d["perfect_count"] + m["pass_count"] += d["pass_count"] + m["oops_count"] += d["oops_count"] + m["none_count"] += d["none_count"] + m["sum_interval_time"] += d["sum_interval_time"] + m["interval_count"] += d["total_records"] + + return merged + + +def main(): + parser = argparse.ArgumentParser(description="中互动组件按类型聚合统计") + parser.add_argument("--output-dir", default="output", help="输出目录") + args = parser.parse_args() + + print("=" * 60) + print("中互动组件按类型聚合统计(逐表聚合版)") + print("=" * 60) + + pg_conn = get_pg_conn() + cursor = pg_conn.cursor() + + # ===== 1. 逐表查询聚合 + 退出 + 帮助 + 已用ID ===== + print("\n[1/4] 逐表查询...") + all_aggs = [] + all_exits: Dict[str, int] = {} + all_helps: Dict[str, List[int]] = defaultdict(list) + all_used_ids: Set[Tuple[str, str]] = set() + all_used_ids_by_type: Dict[str, Set[str]] = defaultdict(set) + + for i in range(8): + table = f"user_component_play_record_{i}" + print(f" {table}...", end=" ", flush=True) + + agg = query_table_aggregation(cursor, table) + exits = query_table_exit(cursor, table) + helps = query_table_help_samples(cursor, table) + ids = query_table_used_ids(cursor, table) + + all_aggs.append(agg) + for ct, cnt in exits.items(): + all_exits[ct] = all_exits.get(ct, 0) + cnt + for ct, vals in helps.items(): + all_helps[ct].extend(vals) + all_used_ids.update(ids) + for ct, cid in ids: + all_used_ids_by_type[ct].add(cid) + + total = sum(d["total_records"] for d in agg.values()) + print(f"完成({total}条, {len(agg)}种类型)") + + cursor.close() + pg_conn.close() + + # ===== 2. 合并聚合结果 ===== + print("\n[2/4] 合并聚合结果...") + merged = merge_aggregations(all_aggs) + + # 计算 user_count(需要去重,这里用近似:取最大值) + # 实际 user_count 跨表可能重复,这里用 SQL 再查一次精确值 + print(" 计算精确用户数...") + pg_conn2 = get_pg_conn() + cursor2 = pg_conn2.cursor() + for ct in merged: + # 精确查询每种类型的去重用户数 + parts = [] + for i in range(8): + parts.append(f""" + SELECT DISTINCT user_id FROM user_component_play_record_{i} + WHERE c_type = %s AND play_status = 1 + """) + union = " UNION ".join(parts) + query = f"SELECT COUNT(*) FROM ({union}) AS u" + cursor2.execute(query, tuple([ct] * 8)) + merged[ct]["user_count"] = cursor2.fetchone()[0] + cursor2.close() + pg_conn2.close() + + # ===== 3. 构建输出 DataFrame ===== + print("\n[3/4] 构建输出...") + rows = [] + for ct, m in sorted(merged.items(), key=lambda x: x[1]["total_records"], reverse=True): + total = m["total_records"] + perfect = m["perfect_count"] + passed = m["pass_count"] + oops = m["oops_count"] + exit_cnt = all_exits.get(ct, 0) + + perfect_pct = perfect / total * 100 if total > 0 else 0 + pass_pct = passed / total * 100 if total > 0 else 0 + oops_pct = oops / total * 100 if total > 0 else 0 + exit_pct = exit_cnt / (total + exit_cnt) * 100 if (total + exit_cnt) > 0 else 0 + + helps = all_helps.get(ct, []) + avg_help = sum(helps) / len(helps) if helps else 0 + + avg_time = m["sum_interval_time"] / m["interval_count"] if m["interval_count"] > 0 else 0 + avg_time_sec = avg_time / 1000 + + display_name = C_TYPE_NAME_MAPPING.get(ct, ct) + + rows.append({ + "组件类型(c_type)": ct, + "组件名称": display_name, + "组件个数": len(all_used_ids_by_type.get(ct, set())), + "总完成人数": m["user_count"], + "总记录数": total, + "Perfect数": perfect, + "Perfect率": f"{perfect_pct:.1f}%", + "Pass数": passed, + "Pass率": f"{pass_pct:.1f}%", + "Oops数": oops, + "Oops率": f"{oops_pct:.1f}%", + "退出数": exit_cnt, + "退出率": f"{exit_pct:.1f}%", + "平均帮助次数": round(avg_help, 2), + "平均耗时(秒)": round(avg_time_sec, 1), + }) + + result_df = pd.DataFrame(rows) + + # ===== 4. 获取 MySQL 元数据 ===== + print(" 获取 MySQL 组件元数据...") + mysql_conn = get_mysql_conn() + try: + query = """ + SELECT c_type, c_id, title, component_config, + audio_list, text_analysis, related_path, + created_at, updated_at + FROM middle_interaction_component + WHERE c_type IS NOT NULL AND c_type != '' + ORDER BY c_type, c_id + """ + meta_df = pd.read_sql_query(query, mysql_conn) + finally: + mysql_conn.close() + + meta_df["_key"] = meta_df.apply(lambda r: (r["c_type"], str(r["c_id"])), axis=1) + meta_df = meta_df[meta_df["_key"].isin(all_used_ids)].drop(columns=["_key"]) + meta_df["组件名称"] = meta_df["c_type"].map(C_TYPE_NAME_MAPPING).fillna(meta_df["c_type"]) + meta_cols = ["c_type", "组件名称", "c_id", "title", "component_config", + "audio_list", "text_analysis", "related_path", "created_at", "updated_at"] + meta_df = meta_df[meta_cols] + + # ===== 5. 输出 Excel ===== + print("\n[4/4] 输出 Excel...") + os.makedirs(args.output_dir, exist_ok=True) + date_str = datetime.now().strftime("%Y%m%d") + filename = f"中互动组件聚合统计_{date_str}.xlsx" + output_path = os.path.join(args.output_dir, filename) + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + result_df.to_excel(writer, sheet_name="按类型聚合统计", index=False) + meta_df.to_excel(writer, sheet_name="组件元数据明细", index=False) + + from openpyxl.utils import get_column_letter + for sheet_name in ["按类型聚合统计", "组件元数据明细"]: + ws = writer.sheets[sheet_name] + for col_idx, col_cells in enumerate(ws.columns, 1): + max_len = 0 + for cell in col_cells: + if cell.value: + val = str(cell.value) + char_len = sum(2 if ord(c) > 127 else 1 for c in val) + max_len = max(max_len, char_len) + ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 4, 60) + + print(f"\n{'=' * 60}") + print(f"导出完成!") + print(f"文件: {output_path}") + print(f"Sheet1「按类型聚合统计」: {len(result_df)} 行") + print(f"Sheet2「组件元数据明细」: {len(meta_df)} 行") + print(f"{'=' * 60}") + + print("\n📊 聚合统计预览:") + print(result_df[["组件名称", "组件个数", "总完成人数", "总记录数", + "Perfect率", "Pass率", "Oops率", "退出率", + "平均帮助次数", "平均耗时(秒)"]].to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/tmp_daily_summary.md b/tmp_daily_summary.md new file mode 100644 index 0000000..a360768 --- /dev/null +++ b/tmp_daily_summary.md @@ -0,0 +1,2 @@ +=== 每日总结 20260625 === +## 昨日关键进展