diff --git a/.vala_skill_hashes b/.vala_skill_hashes index cd25703..db60d5c 100644 --- a/.vala_skill_hashes +++ b/.vala_skill_hashes @@ -14,4 +14,4 @@ user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13 user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8 feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445 -feishu-feedback-sync 1ec556db6c8523c36efacde6fb92659a5274f65b8104f83e38eacbdc419377ba +feishu-feedback-sync 5a3c470b3508b9683ee917712ffdfacb5f429b4e03a3f75dee837c19a840a7b8 diff --git a/MEMORY.md b/MEMORY.md index 425c6a7..960a480 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -51,6 +51,30 @@ ## 经验教训 +### 微信反馈全链路(2026-05-22 刘新玉确认) +微信用户反馈与飞书使用完全一致的采集→整理→归纳→分发流程,复用相同代码仅替换数据源和文档目标。 + +**crontab 全貌:** +| 频率 | 飞书 | 微信 | +|------|------|------| +| 每分钟 | `run_export_lark_feedback.sh` → 表格导出 | `sync_wechat_feedback_minutely.sh` → 表格同步 | +| 每分钟 | `detect_p0_realtime.py` → P0 实时推送 | `detect_p0_wechat.py` → P0 实时推送 | +| 每5分钟 | `run_lark_group_sync.sh` → MySQL 入库 | N/A(外部直接写库) | +| 10:00/10:02 | `sync_feishu_feedback_wrapper.sh` → 聚类+占位符 | `sync_wechat_feedback_wrapper.sh` → 聚类+占位符 | +| 10:05/10:07 | `ai_summarize_feedback.py` → DeepSeek+回写+分发 | `ai_summarize_feedback.py --channel wechat` → DeepSeek+回写+分发 | + +**关键文件:** +- `scripts/sync_wechat_feedback.py` — 微信同步(monkey-patch 飞书常量 + 自定义 fetch_wechat_data) +- `scripts/detect_p0_wechat.py` — 微信 P0 实时检测 +- `scripts/sync_wechat_feedback_minutely.sh` — 分钟级表格同步 +- `scripts/ai_summarize_feedback.py` — 已扩展 `--channel wechat` 支持 +- `sync_feishu_feedback.py` — `update_summary_doc_as_children` 新增 `title_prefix` 参数 + +**注意:** +- 微信数据映射:`svr_msg_id→message_id`, `refer_msg_svrid→quote_message_id` +- AI 描述索引对齐:`generate_summary` 跳过单消息簇后需用 `index_mapping` 修正占位符编号 +- `--apply-ai` 重建聚类时需保留原始 `message_id`,否则空串导致 Union-Find 递归死循环 + (在此记录工作中总结的经验教训,供后续参考) --- diff --git a/data/last_wechat_sync_id b/data/last_wechat_sync_id index a0e999a..2907ff5 100644 --- a/data/last_wechat_sync_id +++ b/data/last_wechat_sync_id @@ -1 +1 @@ -860 \ No newline at end of file +1583 diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl index f4a0f5d..a76a3cb 100644 --- a/memory/.dreams/events.jsonl +++ b/memory/.dreams/events.jsonl @@ -14,3 +14,5 @@ {"type":"memory.recall.recorded","timestamp":"2026-05-12T06:08:28.032Z","query":"优先级规则 priority rules","resultCount":2,"results":[{"path":"memory/2026-05-07.md","startLine":23,"endLine":50,"score":1},{"path":"memory/2026-05-07.md","startLine":1,"endLine":28,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-14T13:09:32.054Z","query":"微信用户反馈 数据库 表结构 wechat_group_message","resultCount":1,"results":[{"path":"memory/2026-05-07.md","startLine":86,"endLine":116,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-21T10:38:19.153Z","query":"crontab 注释 每分钟 归纳分类 P0分发 步骤","resultCount":4,"results":[{"path":"memory/2026-05-09.md","startLine":17,"endLine":37,"score":1},{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-10.md","startLine":44,"endLine":68,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-22T02:03:49.934Z","query":"10点 汇总 问题反馈 小葵小葵群 发送消息","resultCount":5,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-22T02:16:39.747Z","query":"刘新玉 问题反馈 2026年5月 最近工作 需求","resultCount":8,"results":[{"path":"memory/2026-05-07.md","startLine":189,"endLine":209,"score":1},{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1},{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index 34efb26..ba6798b 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-05-21T10:38:19.153Z", + "updatedAt": "2026-05-22T02:16:39.747Z", "entries": { "memory:memory/2026-04-18.md:1:5": { "key": "memory:memory/2026-04-18.md:1:5", @@ -46,22 +46,25 @@ "endLine": 224, "source": "memory", "snippet": "3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固定模板) ```markdown ### 问题 N > **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** | 发言人 | 要点 | |--------|------| | ... | ... | **当前问题排查结论:** ... ``` #### 结论提取规则增强 - 解释性关键词:上云/预下载/加载/原因是/改为了/首次 → 标记为分析性发言 - 分析性发言 + 日志上传 → 输出「疑似{原因},已上传日志,排查中」 - 分析性发言 + 无日志 → 输出「{原因},待确认」 - 无分析 + 无日志 → 改为「暂未排查到问题」(刘新玉确认,比「暂未排查到根因」更准确) #### 4/28 最终归纳结果(AI 生成) 1. **NPC HUD 显示**:在移动端关卡内(11-2),NPC 头上的 HUD 偶尔变成一小条 → 暂未排查到问题 2. **iOS Loading 慢**:在 iOS 端关卡内(L1 3-2),Loading 耗时约 10 秒(正常 3 秒),导致组件数据丢失/无音频 → 疑似关卡内容上云加载导致,已上传日志,排查中 #### 结论提取的边界 - 刘新玉指出:\"暂未排查到问题\" vs \"暂未排查到根因\" → 前者更准确(问题被描述了但可能没被排查)", - "recallCount": 4, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "f23f51c6129c", + "015553fc9cb1" ], "recallDays": [ "2026-05-06", - "2026-05-07" + "2026-05-07", + "2026-05-22" ], "conceptTags": [ "角色/组件", @@ -81,26 +84,29 @@ "endLine": 142, "source": "memory", "snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N", - "recallCount": 6, + "recallCount": 8, "dailyCount": 0, "groundedCount": 0, - "totalScore": 6, + "totalScore": 8, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-21T10:38:19.153Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", "49c0959dc960", "70caeba05281", - "2f315a9f8529" + "2f315a9f8529", + "f23f51c6129c", + "015553fc9cb1" ], "recallDays": [ "2026-05-06", "2026-05-07", "2026-05-11", - "2026-05-21" + "2026-05-21", + "2026-05-22" ], "conceptTags": [ "quote-message-id", @@ -120,22 +126,25 @@ "endLine": 166, "source": "memory", "snippet": "- \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N > **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** | 发言人 | 要点 | |--------|------| | 报告人 | 🚩 报告:... | | ... | ... | | 最终人 | ✅ 结论/待排查 | ``` ### 维度提取规则 | 维度 | 优先级/来源 | |------|------------| | 端 | iOS > iPad > pad端 > Android > 移动端 > PC(正则匹配,忽略大小写) | | 环节 | 关卡内/知识巩固/单元挑战/听力挑战/阅读挑战/口语挑战/写作挑战/单元强化/瓦拉学院/报告(从消息文本匹配) | | 课程 | 匹配数字编号(如 11-2、L1 3-2) | | 角色/组件 | NPC/HUD/音频/组件/数据/Loading/加载/日志(从消息文本匹配) | | 现象 | 从消息中提取要害描述,截断在 35 字符以内 | ### 现象提取逻辑 1. 优先从包含 \"Bug的表现是这样的:\"、\"问题是\"、\"发现\"、\"出现\" 等关键词的消息中截取描述句 2. 提取的句子去除 URL、图片标记、疑问句 3. 截断到 35 字符防止过长 ### Bug 修复记录", - "recallCount": 4, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "f23f51c6129c", + "015553fc9cb1" ], "recallDays": [ "2026-05-06", - "2026-05-07" + "2026-05-07", + "2026-05-22" ], "conceptTags": [ "日志上传/排查/查", @@ -155,24 +164,27 @@ "endLine": 205, "source": "memory", "snippet": "- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) - 单消息簇被跳过(需至少 2 条消息才能形成问题) ### Skill 文件最终状态 - `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 - crontab 每日 10:00 执行(与步骤3一起) ### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 #### 问题 脚本规则匹配生成的问题描述质量差: - 组件匹配失败(NPC/HUD → \"未知组件\") - 现象摘取了完整原始消息(含 @、无关词) - 端识别不稳定 #### 决策 **脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** - 脚本 `summarize_cluster` 改为输出: 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 2. 发言人-要点表格(规则生成) 3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固", - "recallCount": 5, + "recallCount": 7, "dailyCount": 0, "groundedCount": 0, - "totalScore": 5, + "totalScore": 7, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-11T09:48:27.002Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", "49c0959dc960", - "70caeba05281" + "70caeba05281", + "f23f51c6129c", + "015553fc9cb1" ], "recallDays": [ "2026-05-06", "2026-05-07", - "2026-05-11" + "2026-05-11", + "2026-05-22" ], "conceptTags": [ "已包含完整步骤1-4的文档", @@ -192,22 +204,25 @@ "endLine": 86, "source": "memory", "snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) #### 5. 刘新玉要求简化文档格式(~11:46) - 反馈文档太繁琐,只要在原有格式把\"问题 1\"改成\"P0-问题 1\" - 去掉了优先级分布汇总、额外信息行,只保留标题前缀 + 一行判定依据 - 最终格式:`### P0-问题 1` + `**优先级判定:** 规则...` - 知识库文档已更新为简洁版 #### 6. 修复推断引用算法 Bug - 策略2的 else 分支(同发送者无引用时往前找不同发送者)缺少时间限制 - 导致胡陈辰 20:45 的媒体消息被推断引用到徐思清 12:29(跨8小时) - 修复:加了 `GAP_THRESHOLD_MIN` 检查 #### 7. 文档写入方式改进 - 从 `insert_before` + `replace_range` 改为 `append` 模式", - "recallCount": 4, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-05-07T02:22:47.713Z", - "lastRecalledAt": "2026-05-11T09:48:27.002Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "cd9c89262c30", "ac7fd0b52a4e", "49c0959dc960", - "70caeba05281" + "70caeba05281", + "f23f51c6129c", + "015553fc9cb1" ], "recallDays": [ "2026-05-07", - "2026-05-11" + "2026-05-11", + "2026-05-22" ], "conceptTags": [ "generate-summary", @@ -227,20 +242,22 @@ "endLine": 102, "source": "memory", "snippet": "### 飞书问题反馈按引用关系重新排序 [刘新玉] 要求按问题完整解决过程排序——通过 `quote_message_id` 串联同一问题的讨论链。 #### 排序逻辑 1. 从数据库读取全部消息及引用关系 2. 构建引用图:每个消息的 `quote_message_id` 指向其父消息 3. 聚合问题链(cluster):同一引用链的消息归为一组,连续排列 4. 同 cluster 内按时间排序,子回复紧跟父消息 5. Cluster 间按最早时间排序 6. 无引用关系的独立消息按时间线补充 #### 处理过程 - 写入前先通过 `lark-cli sheets +write` 清空 sheet(`--raw-data=\"[]\"`) - 4/28 23条 → 生成完整引用链排序,写入 - 4/29 2条 → 无引用关系,直接写入 #### 4/28 问题链总结 | 问题 | 涉及人 | 消息数 | |------|--------|--------| | NPC HUD显示bug(仅移动端) | 徐思清→王胤鑫 | 3 | | 关卡出现规律 | 王胤鑫→庞鸿潇→梁晨 | 4 | | Playtesting数据记录 | 孙时敏 | 1 | | iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题(4/29) | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果:0 条,", - "recallCount": 3, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 3, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-07T02:22:47.713Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "015553fc9cb1" ], "recallDays": [ - "2026-05-07" + "2026-05-07", + "2026-05-22" ], "conceptTags": [ "网络", @@ -328,20 +345,22 @@ "endLine": 120, "source": "memory", "snippet": "| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题(4/29) | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果:0 条,该群此时间段无消息记录 ### 反馈同步 Skill 创建 [刘新玉] 将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill,并计划注册定时任务。 #### Skill 文件 - `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 - `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 #### Skill 功能 1. 从 MySQL `lark_group_message` 查询近 N 天数据 2. 写入知识库电子表格(按天分 sheet) 3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 #### 策略2:推断缺失引用关系 [刘新玉] 问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-05-08T10:25:44.365Z", - "lastRecalledAt": "2026-05-11T10:43:36.686Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", "queryHashes": [ "cc0dd7ef50d7", - "5abc37103c15" + "5abc37103c15", + "015553fc9cb1" ], "recallDays": [ "2026-05-08", - "2026-05-11" + "2026-05-11", + "2026-05-22" ], "conceptTags": [ "网络", @@ -701,6 +720,37 @@ "content-parts", "连接/查询/返回存在潜在连接泄漏风险" ] + }, + "memory:memory/2026-05-07.md:189:209": { + "key": "memory:memory/2026-05-07.md:189:209", + "path": "memory/2026-05-07.md", + "startLine": 189, + "endLine": 209, + "source": "memory", + "snippet": "| 每分钟 | `sync_feishu_feedback_dispatch_p0.sh` | 7(仅P0) | P0问题实时分发到群 | | 每天 10:00 | `sync_feishu_feedback_wrapper.sh` | 7(全部) | 全量分发到群 | ### 完整数据流 ``` 群消息 → 每1分钟入MySQL → 每1分钟刷新表格 → 每1分钟归纳+分类 → P0实时发群 / 每天10:00全量发群 ``` ### 关键技术细节 - **拆分方式**:`sync_feishu_feedback.py` 新增 `--steps` 参数(如 `--steps 1-3`、`--steps 4-6`、`--steps 7`) - **分发模式**:新增 `--dispatch-mode` 参数,`p0` 只发P0级,`all` 发全部 - **电子表格**:`TVivwmzqXiW3YakDUzucFMRenvf`(\"内容测试问题反馈\") - **归纳子文档父节点**:`RaL6whoYMijyYHkSlWrc7OLLnBy`(space_id: 7612229802338045122) - **分发目标群**:`oc_4171a2188f2554522a4309f2d7c27753`(「小葵小葵」群) - **分发Bot**:xiaokui(`/root/.openclaw/credentials/xiaokui`) - **新 wrapper 脚本**: - `scripts/sync_feishu_feedback_realtime.sh`(步骤1-", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-22T02:16:39.747Z", + "lastRecalledAt": "2026-05-22T02:16:39.747Z", + "queryHashes": [ + "015553fc9cb1" + ], + "recallDays": [ + "2026-05-22" + ], + "conceptTags": [ + "sync-feishu-feedback-wrapper.sh", + "sync-feishu-feedback.py", + "1-3", + "4-6", + "dispatch-mode", + "space-id", + "openclaw/credentials/xiaokui", + "分钟" + ] } } } diff --git a/memory/2026-05-22.md b/memory/2026-05-22.md new file mode 100644 index 0000000..bc0de9b --- /dev/null +++ b/memory/2026-05-22.md @@ -0,0 +1,201 @@ +# 2026-05-22 工作日志 + +## 刘新玉 - 反馈调度链路优化 + +### AI 归纳质量修复 +- **DeepSeek 返回空根因**:`max_tokens: 256` 被 v4-pro 推理(reasoning)全部消耗,实际输出 token=0。修复:`max_tokens` 改为 `1024` +- **空描述回退**:在 `ai_summarize_feedback.py` 的 `generate_descriptions()` 中新增 `generate_fallback_description()`,AI 返回空时回退到关键词规则生成(import `sync_feishu_feedback` 的 `extract_location_elements` + `generate_problem_description`) +- **修复后效果**:4 个簇全部 AI 生成,质量对比规则版提升显著(见下方对照表) + +### 2026-05-21 数据修复后的效果对照 +| 簇 | 规则版(10:00 群聊) | AI 版(修复后) | +|----|---------------------|----------------| +| #1 (27条) | "在关卡4-4,组件数据丢失" | "U13-4的听力选择题型缺少重点句配置" | +| #2 (9条) | "媒体文件: a8c55e4a3670.png" | "项目中音频文件se_Spaceship_TV_Food存在ogg和mp3格式重名" | +| #3 (4条) | "如果一定要删除一个的话" | "游戏音效存在ogg和mp3两个文件,实际播放调用mp3" | +| #4 (2条) | "图片 (1/1)" | "无明确问题" | + +### 新调度流程(明天 5/23 生效) +``` +10:00 → sync(步骤1-6) + 保存 context.json + 生成占位符文档 ❌不分发 +10:05 → DeepSeek 归纳 → 回写文档 → 📨分发到小葵小葵群 +``` + +### "今日问题拆解"逻辑梳理 +刘新玉查看了文档效果,分析了三个关键生成逻辑: +1. **聚类 (sort_threads)**:Union-Find 算法,直接引用+推断引用。当前问题:簇#1 27条消息多个不同话题被合并 +2. **结论提取 (extract_conclusion)**:关键词匹配("已修复"→已修复,"设计如此"→非问题等)。当前问题:簇#1 多个子问题被一个结论覆盖 +3. **发言人标记**:🚩 标记首条消息发送者,✅ 标记末条消息发送者。多子问题长对话时标记不准 + +### 已知待优化问题 +1. 簇 #1 话题混杂(U13知识点 + 音频跟读 + Spine bug) +2. 簇 #2 和 #3 本质是同一问题(ogg/mp3重名),被拆成两个簇 +3. 结论提取用关键词导致"设计如此"覆盖了不同子问题的结论 +4. 刘新玉尚未给出具体优化方向 + +### 修改的文件 +- `/root/.openclaw/workspace-xiaokui/scripts/ai_summarize_feedback.py` — max_tokens 256→1024,新增 generate_fallback_description,import sync_feishu_feedback 用于回退 +- `/root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh` — 10:00 任务改为 --ai-placeholders --skip-dispatch +- `/root/.openclaw/workspace-xiaokui/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — --apply-ai 回写成功后新增 dispatch_summary_to_chat 分发 +# 2026-05-22 工作日志 + +## 刘新玉 - 反馈调度链路优化 + +### AI 归纳质量修复 +- **DeepSeek 返回空根因**:`max_tokens: 256` 被 v4-pro 推理(reasoning)全部消耗,实际输出 token=0。修复:`max_tokens` 改为 `1024` +- **空描述回退**:在 `ai_summarize_feedback.py` 的 `generate_descriptions()` 中新增 `generate_fallback_description()`,AI 返回空时回退到关键词规则生成(import `sync_feishu_feedback` 的 `extract_location_elements` + `generate_problem_description`) +- **修复后效果**:4 个簇全部 AI 生成,质量对比规则版提升显著(见下方对照表) + +### 2026-05-21 数据修复后的效果对照 +| 簇 | 规则版(10:00 群聊) | AI 版(修复后) | +|----|---------------------|----------------| +| #1 (27条) | "在关卡4-4,组件数据丢失" | "U13-4的听力选择题型缺少重点句配置" | +| #2 (9条) | "媒体文件: a8c55e4a3670.png" | "项目中音频文件se_Spaceship_TV_Food存在ogg和mp3格式重名" | +| #3 (4条) | "如果一定要删除一个的话" | "游戏音效存在ogg和mp3两个文件,实际播放调用mp3" | +| #4 (2条) | "图片 (1/1)" | "无明确问题" | + +### 新调度流程(明天 5/23 生效) +``` +10:00 → sync(步骤1-6) + 保存 context.json + 生成占位符文档 ❌不分发 +10:05 → DeepSeek 归纳 → 回写文档 → 📨分发到小葵小葵群 +``` + +### "今日问题拆解"逻辑梳理 +刘新玉查看了文档效果,分析了三个关键生成逻辑: +1. **聚类 (sort_threads)**:Union-Find 算法,直接引用+推断引用。当前问题:簇#1 27条消息多个不同话题被合并 +2. **结论提取 (extract_conclusion)**:关键词匹配("已修复"→已修复,"设计如此"→非问题等)。当前问题:簇#1 多个子问题被一个结论覆盖 +3. **发言人标记**:🚩 标记首条消息发送者,✅ 标记末条消息发送者。多子问题长对话时标记不准 + +### 已知待优化问题 +1. 簇 #1 话题混杂(U13知识点 + 音频跟读 + Spine bug) +2. 簇 #2 和 #3 本质是同一问题(ogg/mp3重名),被拆成两个簇 +3. 结论提取用关键词导致"设计如此"覆盖了不同子问题的结论 +4. 刘新玉尚未给出具体优化方向 + +### 修改的文件 +- `/root/.openclaw/workspace-xiaokui/scripts/ai_summarize_feedback.py` — max_tokens 256→1024,新增 generate_fallback_description,import sync_feishu_feedback 用于回退 +- `/root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh` — 10:00 任务改为 --ai-placeholders --skip-dispatch +- `/root/.openclaw/workspace-xiaokui/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — --apply-ai 回写成功后新增 dispatch_summary_to_chat 分发 + +## 刘新玉 - 聚类/结论/发言人三项优化(5/22 上午) + +### 聚类优化 +- **内容相似度校验**:`infer_missing_references` 策略2(同发送者)和策略3(最近不同发送者)加入 `content_similarity()` 校验,低于 `SIMILARITY_FLOOR=0.08` 不建立推断引用 +- **超大簇拆分**:新增 `split_oversized_clusters()`,对超过 `MAX_CLUSTER_MSGS=15` 条的簇按相邻消息内容相似度找边界拆分 +- **相似度算法**:中文2字以上词 + 英文3字以上词的 Jaccard 系数,过滤停用词(收到/好的/明白等) +- **拆分层数限制**:最多拆3次,每段至少3条消息,以 sender 切换 + 相似度 < 0.05 为边界 + +### 结论提取优化 +- `extract_conclusion` 对 >8 条消息的簇改为聚焦后 30% 消息(至少3条),避免前面无关结论污染 + +### 发言人标记优化 +- `summarize_cluster` 新增 `is_topic_initiator()` 检测函数,识别话题切换点并用 📌 标记多话题发起者 +- 判定逻辑:非首发言人、非简单回复(屏蔽"好的/收到/OK"等)、发送者切换、含问题特征关键词 + +### 2026-05-21 回归验证结果 +| 指标 | 优化前 | 优化后 | +|------|--------|--------| +| 簇数 | 4 | 7 | +| 最大簇 | 27条 | 12条 | +| ogg/mp3 问题 | 拆成2个簇 | 合并为1个簇 ✅ | +| Spine bug | 混在U13里丢失 | 独立为簇 #7 ✅ | +| U13知识点 | 和音频/Spline混 | 独立为簇 #2 + #4 ✅ | + +### 新增常量/函数 +- `SIMILARITY_FLOOR = 0.08`、`MAX_CLUSTER_MSGS = 15` +- `content_similarity(text1, text2)`、`split_oversized_clusters(clusters_dict)`、`is_topic_initiator(prev_name, name, text)` +- 修改:`infer_missing_references()`、`extract_conclusion()`、`summarize_cluster()`、`sort_threads()` + +## 刘新玉 - 分发文档链接修复(5/22 11:20) + +### 问题 +分发到小葵小葵群的文档链接硬编码为总文档 `RaL6whoYMijyYHkSlWrc7OLLnBy`,而非当天子文档 + +### 修复 +- `dispatch_summary_to_chat()` 新增 `doc_url` 参数 +- `--apply-ai` 流程:从 `list_child_nodes()` 返回的 `nodes[title]["node_token"]` 构造链接 +- `main()` 分发循环:同上,对每个 day 查 `list_child_nodes()` 获取对应 node_token +- 链接格式:`https://makee-interactive.feishu.cn/wiki/{node_token}` + +## 刘新玉 - AI 描述人名脱敏(5/22 11:30) + +### 问题 +问题描述中包含员工姓名"江涛" + +### 修复(两层防护) +1. **System Prompt** 新增规则6:严禁出现员工姓名,用"某角色/某员工"替代 +2. **`strip_names(description, cluster_data)` 后处理**: + - 步骤1:从 cluster 提取发送者姓名,精确替换 + - 步骤2:姓氏 + 1个中文字符模式匹配,排除内容词白名单(文件/资源/游戏/动画/设计等50+词) + - 替换为"相关人员" + +### 修复效果(簇 #7) +| 修复前 | 修复后 | +|--------|--------| +| "角色江涛的spine动画中,眼睛设计应为睁开但实际显示为闭眼。" | "Spine动画中角色眼睛呈现闭眼状态,与设计不符。" | +## 刘新玉 - 反馈流程完整概览(5/22 12:00) + +### 全链路三层架构 +``` +采集层 → 每5分钟(飞书群同步) / 每分钟(微信群导出+P0检测) +汇总层 → 每天 10:00(飞书) 10:02(微信) 聚类+归纳+写入知识库 +AI层 → 每天 10:05(飞书) 10:07(微信) DeepSeek生成描述+回写+分发群聊 +``` + +### 时间线 +| 时间 | 飞书 | 微信 | +|------|------|------| +| 10:00 | sync_feishu_feedback (占位符,不分发) | — | +| 10:02 | — | sync_wechat_feedback (占位符,不分发) | +| 10:05 | ai_summarize_feedback (回写+分发) | — | +| 10:07 | — | ai_summarize_feedback --channel wechat (回写+分发) | + +## 刘新玉 - 微信反馈同步系统搭建(5/22 下午) + +### 背景 +刘新玉要求微信用户反馈流程与飞书一致(收集→整理→归纳→分发),之前微信只有 MySQL 入库和每分钟同步到表格。 + +### 新增文件 +- `scripts/sync_wechat_feedback.py` — 微信同步脚本,复用飞书所有逻辑(聚类/优先级/归纳/分发),仅替换数据源和文档目标 +- `scripts/sync_wechat_feedback_wrapper.sh` — crontab wrapper + +### 改动文件 +- `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — `update_summary_doc_as_children()` 新增 `title_prefix` 参数,支持 "微信-" 前缀 +- `scripts/ai_summarize_feedback.py` — 新增 `--channel wechat` 参数,支持微信专属上下文路径和回写脚本 + +### 微信数据映射 +``` +wechat_group_message → sync_wechat_feedback.fetch_wechat_data() → 统一元组格式 +svr_msg_id → message_id (r[0]) +sender_name → sender_name (r[1]) +msg_type → msg_type (r[2]) +content → content (r[3]) +media_url → media_url (r[4]) +refer_msg_svrid → quote_message_id (r[5]) +msg_time → msg_time (r[6]) +msg_timestamp → msg_timestamp (r[7]) +``` + +### 知识库结构 +| 文档 | node_token | 父文档 | 用途 | +|------|-----------|--------|------| +| 飞书问题反馈 | FOxgwkVtLiTl3ZkrkIXcT8T2nJg | SB3dwaSshie7ifklKlLc2GswnqX | 飞书原始数据 | +| 微信问题反馈 | R4HRwNU42iwH1Hk3OMCcB6i7n1u | SB3dwa... | 微信原始数据 | +| 飞书用户反馈问题汇总 | RaL6whoYMijyYHkSlWrc7OLLnBy | SB3dwa... | 飞书每日归纳 | +| 微信用户反馈问题汇总 | XhtGwjitFizzCNkw8Xzc2IXsnuf | SB3dwa... | 微信每日归纳 | + +### 飞书表格 +| 表格 | sheet token | 用途 | +|------|-------------|------| +| 内容测试问题反馈 | AHtnsehwShUVyDtjasSciIvgn7b | 飞书导出 | +| 用户救火群 | RUXfsytPzhJO5kt2uwCcvdIgnLg | 微信导出 | + +### WeChat 2026-05-21 测试结果 +- 62 条消息 → 28 个初始簇 → 10 个有效簇(≥2条) +- P0: 3 个(阅读问题、发音问题)— 自动判定正常 +- AI 归纳 dry-run 通过,占位符替换逻辑正确 + +### 待确认 +- 微信 crontab 条目是否已添加 ✅ 全部就位 +- 微信父文档 `XhtGwjitFizzCNkw8Xzc2IXsnuf` 下子文档创建权限 ✅ 已验证 +- 飞书总文档名称是否需要更新为 "飞书用户反馈问题汇总"(当前显示为 "用户反馈问题汇总") diff --git a/output/daily_feedback/ai_descriptions_2026-05-21.json b/output/daily_feedback/ai_descriptions_2026-05-21.json new file mode 100644 index 0000000..b5ee703 --- /dev/null +++ b/output/daily_feedback/ai_descriptions_2026-05-21.json @@ -0,0 +1,33 @@ +{ + "date": "2026-05-21", + "descriptions": [ + { + "index": 1, + "description": "看图选词的学习流相关人员,相关人员引导提示有朗读但实际未提供跟读功能。" + }, + { + "index": 2, + "description": "在灰度部署后,U13-3的看图选词活动缺少图片配置,前端提示资源不存在。" + }, + { + "index": 3, + "description": "资源管理中存在同名但不同格式的音频文件(ogg与mp3),导致调用冲突,需删除ogg版本以解决问题。" + }, + { + "index": 4, + "description": "U13的每个关卡详情中,点击查看相关知识点时显示为空内容。" + }, + { + "index": 5, + "description": "引导语因AI解析问题生成不正确。" + }, + { + "index": 6, + "description": "无明确问题" + }, + { + "index": 7, + "description": "spine动画中角色眼睛呈现闭眼状态,与设计要求的睁眼效果不符。" + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/ai_descriptions_wechat_2026-05-21.json b/output/daily_feedback/ai_descriptions_wechat_2026-05-21.json new file mode 100644 index 0000000..44c5b8f --- /dev/null +++ b/output/daily_feedback/ai_descriptions_wechat_2026-05-21.json @@ -0,0 +1,117 @@ +{ + "date": "2026-05-21", + "descriptions": [ + { + "index": 1, + "description": "在华为matepad 2023版设备上,知识巩固环节跟读单词后无反应。" + }, + { + "index": 2, + "description": "无明确问题" + }, + { + "index": 3, + "description": "点击机器人进入房间或使用望远镜观察时,房间内显示的是相关人员蒙布覆盖的物体" + }, + { + "index": 4, + "description": "飞船和望远镜功能在收音权限已开启的情况下仍无法正相关人员用" + }, + { + "index": 5, + "description": "红米平板设备上,用户反馈无法看到具体资源。" + }, + { + "index": 6, + "description": "无明确问题" + }, + { + "index": 7, + "description": "看图说话模块在学员回答完第一题进入第二题后,没有相关人员声音且不再领读。" + }, + { + "index": 8, + "description": "13426341955账号的熏听功能总是自动跳出的问题" + }, + { + "index": 9, + "description": "在磨耳朵功能中,听几句后应用会自动退出。" + }, + { + "index": 10, + "description": "无明确问题" + }, + { + "index": 11, + "description": "无明确问题" + }, + { + "index": 12, + "description": "无明确问题" + }, + { + "index": 13, + "description": "无明确问题" + }, + { + "index": 14, + "description": "无明确问题" + }, + { + "index": 15, + "description": "输入号码13886728616后读音没有反应。" + }, + { + "index": 16, + "description": "无明确问题" + }, + { + "index": 17, + "description": "在华为MatePad 2023版的L1第三单元知识巩固单词环节,朗读后无反应。" + }, + { + "index": 18, + "description": "游戏重启操作指引中,相关人员误将杀进相关人员新启动描述为退出重新登录,导致用户操作误解。" + }, + { + "index": 19, + "description": "无明确问题" + }, + { + "index": 20, + "description": "在学而思学习机上使用瓦拉英语APP的U1·L05颜色课相关人员,AI语音出现卡顿吞音且点击小喇叭键无声音。" + }, + { + "index": 21, + "description": "无明确问题" + }, + { + "index": 22, + "description": "无明确问题" + }, + { + "index": 23, + "description": "无明确问题" + }, + { + "index": 24, + "description": "无明确问题" + }, + { + "index": 25, + "description": "在翻译列表页中点击播放按钮无法播放音频,重新下载后问题仍然存在。" + }, + { + "index": 26, + "description": "无明确问题" + }, + { + "index": 27, + "description": "无明确问题" + }, + { + "index": 28, + "description": "无明确问题" + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/cluster_context_2026-05-21.json b/output/daily_feedback/cluster_context_2026-05-21.json new file mode 100644 index 0000000..115fa21 --- /dev/null +++ b/output/daily_feedback/cluster_context_2026-05-21.json @@ -0,0 +1,474 @@ +{ + "date": "2026-05-21", + "total_clusters": 10, + "clusters": [ + { + "index": 1, + "cluster_id": "7000233802530691778", + "location": { + "端": "未知", + "环节": "知识巩固", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "关卡/内容类", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "13886728616 读音没有反应", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 09:11:55" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "[聊天记录] 小星星~~和瓦拉英语-萌萌老师(早10晚7)\n小星星~~: [视频]\n小星星~~: 您好知识巩固单词这个读了以后没反应,试了两课都这样\n小星星~~: L1第三单元好像\n瓦拉英语-萌萌老师(早10晚7): 咱们设备型号是什么\n小星星~~: 华为matepad\n小星星~~: 2023版的\n小星星~~: [视频]\n小星星~~: [图片]", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 09:11:59" + }, + { + "sender": "嘿哈", + "content": "这个让用户截图一下系统版本,看一下是鸿蒙几的版本", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 09:13:33" + }, + { + "sender": "嘿哈", + "content": "辛苦让用户截图一下系统版本,不是设备的型号\n ↳ 回复 八哥-16619720408: 这个让用户截图一下系统版本,看一下是鸿蒙几的版本", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 10:33:58" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "@八哥-16619720408 要晚上了\n ↳ 回复 嘿哈: 辛苦让用户截图一下系统版本,不是设备的型号", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 10:37:05" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "飞哥设备型号\n ↳ 回复 萌萌老师: @嘿哈 要晚上了", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 19:31:27" + } + ] + }, + { + "index": 2, + "cluster_id": "3067526779395812366", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "kevin", + "content": "其他的收音也都不正常?", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:28:42" + }, + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "[聊天记录] 瓦拉英语-露露班班(早10晚7)和杨小聪🐤\n杨小聪🐤: [图片]\n杨小聪🐤: 老师,点这个机器人为什么到房间里是白色蒙着布的这个\n杨小聪🐤: [图片]", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 10:29:13" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "她还用望远镜 试了也一样的问题", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:29:27" + } + ] + }, + { + "index": 3, + "cluster_id": "7312835140432337893", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "老师帮忙看看飞船的问题", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:29:31" + }, + { + "sender": "kevin", + "content": "确定收音的权限打开了吧?\n ↳ 回复 瓦拉英语-萌萌老师(早10晚7): 她还用望远镜 试了也一样的问题", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 10:33:31" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "[图片] 0917eb069d430c083c7de081b3a581dd", + "msg_type": "image", + "media_url": "", + "time": "2026-05-21 10:34:04" + } + ] + }, + { + "index": 4, + "cluster_id": "819675340660592838", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "嘿哈", + "content": "@梁音 老师,这个日志有上传的时间么?\n ↳ 回复 梁音: ID:22236,红米平板,已上传日志,辛苦老师帮忙看看@kevin", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 10:52:08" + }, + { + "sender": "嘿哈", + "content": "收到", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:53:42" + }, + { + "sender": "嘿哈", + "content": "@瓦拉英语-露露班班(早10晚7)", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:57:54" + }, + { + "sender": "嘿哈", + "content": "辛苦让用户录制一个视频看一下吧。我这看资源是正常的啊", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 10:57:54" + }, + { + "sender": "嘿哈", + "content": "[图片] ecc7f810b34808be4d38d8efc7efc844 size:48694", + "msg_type": "image", + "media_url": "", + "time": "2026-05-21 10:57:55" + }, + { + "sender": "嘿哈", + "content": "@瓦拉英语-露露班班(早10晚7) 不用找用户了,我们查一下吧", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 11:03:09" + }, + { + "sender": "嘿哈", + "content": "@瓦拉英语-露露班班(早10晚7) 这个确认了一下,设计如此,需要解锁美食功能才能看到具体的资源\n ↳ 回复 瓦拉英语-露露班班(早10晚7): 老师帮忙看看飞船的问题", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 11:09:13" + }, + { + "sender": "嘿哈", + "content": "[图片] 56be7a302115d3f783c241f8c4c1f885 size:25876", + "msg_type": "image", + "media_url": "", + "time": "2026-05-21 11:09:13" + }, + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "收到", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 11:09:49" + }, + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "原来如此", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 11:09:52" + } + ] + }, + { + "index": 5, + "cluster_id": "4408046217168970551", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "音频" + }, + "priority": "P2", + "priority_detail": "", + "category": "声音/音频类", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-益达老师", + "content": "[视频] 58秒 size:5739662", + "msg_type": "video", + "media_url": "", + "time": "2026-05-21 12:02:09" + }, + { + "sender": "瓦拉英语-益达老师", + "content": "学员反馈看图说话有问题,正常会有领读的,点不点小喇叭都有领读,孩子回答出来问题会夸奖一下,但是到了第二题之后就不读了,任何声音都没有了,说这个板块有问题,之前也有遇到过,麻烦老师看下~\n\n学员号码:15009861201", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 12:02:26" + }, + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "@八哥-16619720408 飞哥康康\n ↳ 回复 瓦拉英语-益达老师: 学员反馈看图说话有问题,正常会有领读的,点不点小喇叭都有领读,孩子回答出来问题会夸奖一下,但是到了第二题之后就不读了,任何声音都没有了,说这个板块有问题,之前也有遇到过,麻烦老师看下~\n\n学员号码:15009861201", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 18:41:34" + }, + { + "sender": "花生", + "content": "这是哪个关卡\n ↳ 回复 瓦拉英语-益达老师: 学员反馈看图说话有问题,正常会有领读的,点不点小喇叭都有领读,孩子回答出来问题会夸奖一下,但是到了第二题之后就不读了,任何声音都没有了,说这个板块有问题,之前也有遇到过,麻烦老师看下~\n\n学员号码:15009861201", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 18:45:40" + }, + { + "sender": "瓦拉英语-益达老师", + "content": "第五单元第一课\n ↳ 回复 花生: 这是哪个关卡", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 18:51:38" + } + ] + }, + { + "index": 6, + "cluster_id": "2433356206959451290", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "13426341955 熏听总是跳出来", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 15:08:03" + }, + { + "sender": "瓦拉英语-露露班班(早10晚7)", + "content": "@瓦拉场务-果果糖 范老师看看~\n ↳ 回复 瓦拉英语-萌萌老师(早10晚7): 13426341955 熏听总是跳出来", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 18:41:27" + } + ] + }, + { + "index": 7, + "cluster_id": "742589932050397252", + "location": { + "端": "未知", + "环节": "知识巩固", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "关卡/内容类", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "[聊天记录] 小星星~~和瓦拉英语-萌萌老师(早10晚7)\n小星星~~: [视频]\n小星星~~: 您好知识巩固单词这个读了以后没反应,试了两课都这样\n小星星~~: L1第三单元好像\n瓦拉英语-萌萌老师(早10晚7): 咱们设备型号是什么\n小星星~~: 华为matepad\n小星星~~: 2023版的\n小星星~~: [视频]\n小星星~~: [图片]", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 19:32:01" + }, + { + "sender": "嘿哈", + "content": "这个看起来像是之前的问题啊,引动用户完全杀了进程重新进一下试试呢\n ↳ 回复 瓦拉英语-萌萌老师(早10晚7): 小星星~~和瓦拉英语-萌萌老师(早10晚7)1900<...", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 19:33:30" + } + ] + }, + { + "index": 8, + "cluster_id": "1929630950513132420", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "就是退出重新登陆是吗", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:34:15" + }, + { + "sender": "嘿哈", + "content": "是杀进程重新启动,不是退出重新登录\n ↳ 回复 瓦拉英语-萌萌老师(早10晚7): 就是退出重新登陆是吗", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 19:35:55" + } + ] + }, + { + "index": 9, + "cluster_id": "7478433313658217040", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "音频" + }, + "priority": "P2", + "priority_detail": "", + "category": "声音/音频类", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "这个第一次反馈", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:35:46" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "之前不是她吧", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:35:54" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "家长怎么操作呢", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:38:11" + }, + { + "sender": "瓦拉英语-萌萌老师(早10晚7)", + "content": "把APP划走,重新打开吗", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:38:43" + }, + { + "sender": "嘿哈", + "content": "是的", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:39:18" + }, + { + "sender": "瓦拉英语-尼克老师", + "content": "家长反馈点黄色小喇叭不出声音", + "msg_type": "text", + "media_url": "", + "time": "2026-05-21 19:45:09" + }, + { + "sender": ".oO(王_计)Oo.", + "content": "这个问题今天热更就解决了\n ↳ 回复 瓦拉英语-尼克老师: 家长反馈点黄色小喇叭不出声音", + "msg_type": "link", + "media_url": "", + "time": "2026-05-21 20:31:08" + }, + { + "sender": "王虹茗", + "content": "这个也是相同问题吗?@.oO(王_计)Oo.\n ↳ 回复 .oO(王_计)Oo.: 这个问题今天热更就解决了57> /var/log/xiaokui_ai_summarize.log 2>&1 """ -import sys, os, json, argparse, urllib.request +import sys, os, json, argparse, re, urllib.request from datetime import datetime, date, timedelta # === 配置 === @@ -20,7 +20,10 @@ DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1" DEEPSEEK_MODEL = "deepseek-v4-pro" CONTEXT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "output", "daily_feedback") -SKILL_SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts") +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SKILL_SCRIPT_DIR = os.path.join(SCRIPT_DIR, "..", "skills", "feishu-feedback-sync", "scripts") +sys.path.insert(0, SKILL_SCRIPT_DIR) +import sync_feishu_feedback # noqa: E402 — 用于 fallback 关键词规则 SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务是: 阅读一段来自测试群的多人对话(可能包含多个发言人、多轮讨论), @@ -32,13 +35,15 @@ SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务 3. 如果对话中有多种说法,优先采用最后确认的描述 4. 输出仅一句中文,不要加任何前缀、编号、引号或换行 5. 如果对话全是无实质内容的闲聊(如"好的""收到"),输出"无明确问题" +6. **严禁**在问题描述中出现任何员工姓名(如江涛、张骜等),人名用"相关人员"替代 输出格式(严格):直接输出问题描述,无任何额外文字。""" -def load_context(date_str): +def load_context(date_str, channel="feishu"): """加载指定日期的 cluster_context JSON""" - path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json") + prefix = "wechat_cluster_context" if channel == "wechat" else "cluster_context" + path = os.path.join(CONTEXT_DIR, f"{prefix}_{date_str}.json") if not os.path.exists(path): print(f" ⚠️ 无上下文文件: {path}") return None @@ -85,7 +90,7 @@ def call_deepseek(system_prompt, user_prompt, max_retries=2): {"role": "user", "content": user_prompt}, ], "temperature": 0.3, - "max_tokens": 256, + "max_tokens": 1024, }).encode() for attempt in range(max_retries + 1): @@ -114,6 +119,70 @@ def call_deepseek(system_prompt, user_prompt, max_retries=2): raise +def generate_fallback_description(cluster): + """AI 返回空描述时的回退:调用 sync_feishu_feedback.py 的关键词规则生成""" + # 将 context JSON 消息格式转换为 sync_feishu_feedback 期望的数据库行格式 + # 数据库行: (msg_id, sender, msg_type, content, media_url, quote_id, time, timestamp) + converted = [] + for m in cluster.get("messages", []): + converted.append(( + m.get("message_id", ""), + m.get("sender", ""), + m.get("msg_type", "text"), + m.get("content", ""), + m.get("media_url", ""), + m.get("quote_message_id", ""), + m.get("time", ""), + 0, + )) + idx = cluster.get("index", 0) + location = sync_feishu_feedback.extract_location_elements(converted) + root_text = converted[0][3] if converted else "" + return sync_feishu_feedback.generate_problem_description(converted, location, root_text, ai_placeholder=False, placeholder_idx=idx) + + +def strip_names(text, cluster=None): + """移除问题描述中的员工姓名(后处理兜底)。 + 1. 优先使用簇中实际发送者姓名做精确替换 + 2. 然后对常见姓氏+1字做保守匹配(排除已知内容词) + """ + import re + if not text: + return text + + # 1. 精确替换:簇中出现的发送者姓名 + if cluster: + sender_names = set() + for m in cluster.get("messages", []): + name = m.get("sender", "").strip() + if name and len(name) >= 2: + sender_names.add(name) + for name in sorted(sender_names, key=len, reverse=True): + text = text.replace(name, '相关人员') + + # 2. 保守模式:姓氏 + 1个中文字符(两字名),排除已知内容词 + surnames = '李王张刘陈杨赵黄周吴徐孙胡朱高林何郭马罗梁宋郑谢韩唐冯于董萧程曹袁邓许傅沈曾彭吕苏卢蒋蔡贾丁魏薛叶阎余潘杜戴夏钟汪田任姜范方石姚谭廖邹熊金陆郝孔白崔康毛邱秦江史顾侯邵孟龙万段雷钱汤尹黎易常武乔贺赖龚文' + pattern = '[' + surnames + '][一-鿿]' + # 需要排除的已知内容词 + content_words = { + '文件','资源','存在','动画','角色','设计','问题','音频','显示', + '界面','关卡','课程','内容','配置','重点','引导','模型', + '测试','环境','部署','灰度','版本','组件','数据','命名', + '图片','视频','格式','选项','处理','结果','玩家','游戏', + '开发','项目','报告','任务','状态','进度','确认','反馈', + '功能','系统','后台','前端','服务','需要','可能','正常', + '异常','错误','修复','解决','检查','查看','说明','登录', + '注册','打开','关闭','更新','调试','运行','启动','停止', + '通过','失败','成功','完成','开始','结束','使用','操作', + '调整','优化','修改','增加','删除','添加','移除','切换', + } + def _replace(m): + name = m.group(0) + return '相关人员' if name not in content_words else name + text = re.sub(pattern, _replace, text) + return text + + def generate_descriptions(context_data, dry_run=False): """为所有问题簇生成 AI 描述""" clusters = context_data.get("clusters", []) @@ -140,26 +209,42 @@ def generate_descriptions(context_data, dry_run=False): print(f" ❌ 簇 #{idx} API 调用失败: {e}") description = f"[API调用失败: {str(e)[:50]}]" - print(f" 📝 描述: {description}") + # AI 返回空描述时回退 + if not description or not description.strip(): + description = generate_fallback_description(cluster) + print(f" ⚠️ AI 返回空,回退: {description}") + else: + print(f" 📝 描述: {description}") + + # 脱敏:移除员工姓名 + description = strip_names(description, cluster=cluster) descriptions.append({"index": idx, "description": description}) return descriptions -def apply_descriptions(date_str, descriptions): - """调用 sync_feishu_feedback.py --apply-ai 回写文档""" +def apply_descriptions(date_str, descriptions, channel="feishu"): + """调用 sync_*_feedback.py --apply-ai 回写文档 + channel: "feishu" 或 "wechat" + """ sys.path.insert(0, SKILL_SCRIPT_DIR) + # 渠道前缀 + prefix = "wechat_" if channel == "wechat" else "" + # 先保存描述 JSON - desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json") + desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{channel}_{date_str}.json") payload = {"date": date_str, "descriptions": descriptions} with open(desc_path, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2) print(f" 💾 描述已保存: {desc_path}") # 调用 --apply-ai - sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py") - import subprocess + if channel == "wechat": + sync_script = os.path.join(SCRIPT_DIR, "sync_wechat_feedback.py") + else: + sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py") + env = os.environ.copy() env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui" env["HOME"] = "/root" @@ -173,7 +258,8 @@ def apply_descriptions(date_str, descriptions): if "AI 描述已应用" in result.stdout or "✅" in result.stdout: print(f" ✅ AI 描述已回写到知识库文档") # 回写成功后清理上下文文件,避免心跳重复处理 - context_path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json") + ctx_prefix = "wechat_cluster_context" if channel == "wechat" else "cluster_context" + context_path = os.path.join(CONTEXT_DIR, f"{ctx_prefix}_{date_str}.json") if os.path.exists(context_path): os.remove(context_path) print(f" 🗑️ 已清理上下文文件: {context_path}") @@ -189,18 +275,21 @@ def main(): parser = argparse.ArgumentParser(description="AI 问题归纳") parser.add_argument("--date", help="日期 YYYY-MM-DD,默认昨天") parser.add_argument("--dry-run", action="store_true", help="仅预览不实际调用 API") + parser.add_argument("--channel", default="feishu", choices=["feishu", "wechat"], + help="数据渠道(默认 feishu)") args = parser.parse_args() if args.date: date_str = args.date else: - # 默认处理昨天的数据(每天 10:05 运行,处理 10:00 生成的前一天数据) date_str = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d") - print(f"📋 AI 问题归纳 - {date_str}") + channel = args.channel + label = "微信" if channel == "wechat" else "飞书" + print(f"📋 AI 问题归纳 - {date_str} [{label}]") os.makedirs(CONTEXT_DIR, exist_ok=True) - context = load_context(date_str) + context = load_context(date_str, channel=channel) if not context: print(" ℹ️ 无待处理数据,退出") return @@ -210,14 +299,14 @@ def main(): return if args.dry_run: - desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json") + desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{channel}_{date_str}.json") payload = {"date": date_str, "descriptions": descriptions} with open(desc_path, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2) print(f"[DRY-RUN] 描述已保存到 {desc_path},未回写文档") return - apply_descriptions(date_str, descriptions) + apply_descriptions(date_str, descriptions, channel=channel) if __name__ == "__main__": diff --git a/scripts/detect_p0_wechat.py b/scripts/detect_p0_wechat.py new file mode 100755 index 0000000..b4c531e --- /dev/null +++ b/scripts/detect_p0_wechat.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +微信 P0 问题实时检测与分发 + +功能: + 1. 从 MySQL 读取最近一段时间的微信用户火线救火群消息 + 2. 复用 sync_feishu_feedback.py 的聚类 + 优先级判定逻辑 + 3. 过滤已推送过的 P0 簇(去重) + 4. 仅推送新增 P0 到「小葵小葵」群 + +设计: + - 每分钟由 crontab 调用一次 + - 查询最近 2 小时的消息,确保聚类质量 + - 用「簇签名」(sorted message_ids)做去重 + - 每天 10:00 清空去重状态(与全量分发错开) + +用法: + python3 detect_p0_wechat.py [--dry-run] [--lookback-minutes 120] +""" + +import sys, os, json, hashlib, argparse, pymysql +from datetime import datetime, timedelta + +SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts") +sys.path.insert(0, SKILL_DIR) + +from sync_feishu_feedback import ( + sort_threads, get_tenant_token, + DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS, + MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB, +) +from priority_classifier import compute_final_priority + +# === 微信专用配置 === +STATE_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "tmp", "p0_dispatched_state_wechat.json") +LOOKBACK_MINUTES = 120 +CLUSTER_MIN_SIZE = 2 + + +def load_dispatched_state(): + try: + with open(STATE_FILE, "r") as f: + state = json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + state = {} + cutoff = (datetime.now() - timedelta(hours=24)).isoformat() + return {k: v for k, v in state.items() if v > cutoff} + + +def save_dispatched_state(state): + os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True) + tmp = STATE_FILE + ".tmp" + with open(tmp, "w") as f: + json.dump(state, f, ensure_ascii=False, indent=2) + os.rename(tmp, STATE_FILE) + + +def cluster_signature(cluster_msgs): + ids = sorted(str(m[0]) for m in cluster_msgs) + return hashlib.md5(",".join(ids).encode()).hexdigest() + + +def is_probably_p0(cluster_msgs): + if len(cluster_msgs) < CLUSTER_MIN_SIZE: + return False, None + info = compute_final_priority(cluster_msgs) + return info["priority"] == "P0", info + + +def generate_p0_alert_text(cluster_msgs, priority_info): + root_sender = cluster_msgs[0][1] + root_time = cluster_msgs[0][6] + root_text = "" + for m in cluster_msgs: + t = str(m[3]) if m[3] else "" + t = t.strip() + if t and len(t) > 3: + root_text = t[:100] + break + senders = list(dict.fromkeys(m[1] for m in cluster_msgs)) + + return "\n".join([ + f"🚨 微信 P0 实时告警", + f"", + f"**报告人:** {root_sender}", + f"**时间:** {root_time}", + f"**涉及人员:** {'、'.join(senders[:5])}" + ("等" if len(senders) > 5 else ""), + f"**消息数:** {len(cluster_msgs)} 条", + f"", + f"**摘要:** {root_text}", + f"", + f"**判定依据:** {priority_info.get('reasoning', 'P0')}", + f"**修复时限:** {priority_info.get('deadline', '2小时内')}", + ]) + + +def dispatch_p0_alert(alert_text): + import urllib.request + token = get_tenant_token(cred_dir=DISPATCH_CRED_DIR) + + content_parts = [] + for line in alert_text.split("\n"): + if line.strip(): + content_parts.append([{"tag": "text", "text": line + "\n"}]) + + if P0_NOTIFY_USERS: + at_line = [{"tag": "text", "text": "\n⚠️ 请关注: "}] + for uid in P0_NOTIFY_USERS: + at_line.append({"tag": "at", "user_id": uid}) + at_line.append({"tag": "text", "text": " "}) + content_parts.append(at_line) + + post_content = json.dumps({ + "zh_cn": { + "title": "🚨 微信 P0 问题实时告警", + "content": content_parts + } + }, ensure_ascii=False) + + body = json.dumps({ + "receive_id": DISPATCH_CHAT_ID, + "msg_type": "post", + "content": post_content + }, ensure_ascii=False).encode() + + req = urllib.request.Request( + "https://open.feishu.cn/open-apis/im/v1/messages?receive_id_type=chat_id", + data=body, + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + method="POST" + ) + resp = urllib.request.urlopen(req, timeout=10) + d = json.loads(resp.read()) + if d.get("code") == 0: + return True + else: + print(f" ⚠️ 实时分发失败: {d.get('msg', '')[:100]}") + return False + + +def should_clear_state(): + now = datetime.now() + return now.hour == 10 and now.minute <= 1 + + +def main(): + parser = argparse.ArgumentParser(description="微信 P0 问题实时检测与分发") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--lookback-minutes", type=int, default=LOOKBACK_MINUTES) + args = parser.parse_args() + + if should_clear_state(): + print("[P0-wechat] 10:00 清空去重状态") + save_dispatched_state({}) + print("[P0-wechat] 全量分发时段,跳过实时检测") + return + + print(f"[P0-wechat] 扫描最近 {args.lookback_minutes} 分钟微信消息...") + lookback_start = (datetime.now() - timedelta(minutes=args.lookback_minutes)).strftime("%Y-%m-%d %H:%M:%S") + now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + conn = pymysql.connect( + host=MYSQL_HOST, port=MYSQL_PORT, + user=MYSQL_USER, password=MYSQL_PASS, + database=MYSQL_DB, charset="utf8mb4" + ) + cursor = conn.cursor() + cursor.execute(""" + SELECT svr_msg_id, sender_name, msg_type, content, media_url, refer_msg_svrid, + DATE_FORMAT(msg_time, '%%Y-%%m-%%d %%H:%%i:%%s') as msg_time, msg_timestamp + FROM wechat_group_message + WHERE msg_time >= %s AND msg_time <= %s + ORDER BY msg_time ASC + """, (lookback_start, now_str)) + raw_rows = cursor.fetchall() + conn.close() + + # 映射为统一元组格式 (message_id, sender_name, msg_type, content, media_url, quote_message_id, msg_time, msg_timestamp) + rows = [] + for row in raw_rows: + svr_id, sname, mtype, content, murl, ref_id, mtime, mts = row + rows.append(( + str(svr_id) if svr_id else "", + sname or "", + mtype or "text", + content or "", + murl or "", + str(ref_id) if ref_id else "", + mtime or "", + int(mts) if mts else 0, + )) + + print(f"[P0-wechat] 查询到 {len(rows)} 条微信消息") + + if len(rows) < 2: + print("[P0-wechat] 消息不足,退出") + return + + sorted_msgs, clusters, cluster_order = sort_threads(rows) + print(f"[P0-wechat] 聚类完成:{len(clusters)} 个簇") + + state = load_dispatched_state() + print(f"[P0-wechat] 已记录 {len(state)} 个已推送簇签名") + + new_p0_count = 0 + for cid in cluster_order: + cmsgs = clusters[cid] + is_p0, info = is_probably_p0(cmsgs) + if not is_p0: + continue + + sig = cluster_signature(cmsgs) + if sig in state: + print(f"[P0-wechat] 已推送过,跳过: sig={sig[:8]}...") + continue + + print(f"[P0-wechat] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息") + + if args.dry_run: + alert = generate_p0_alert_text(cmsgs, info) + print(f"[DRY-RUN] 将发送:\n{alert}") + state[sig] = datetime.now().isoformat() + new_p0_count += 1 + else: + alert = generate_p0_alert_text(cmsgs, info) + if dispatch_p0_alert(alert): + print(f"[P0-wechat] ✅ P0 已实时推送") + state[sig] = datetime.now().isoformat() + new_p0_count += 1 + else: + print(f"[P0-wechat] ❌ 推送失败") + + if new_p0_count > 0: + save_dispatched_state(state) + print(f"[P0-wechat] 共推送 {new_p0_count} 个新 P0") + + print("[P0-wechat] 完成") + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_feishu_feedback_wrapper.sh b/scripts/sync_feishu_feedback_wrapper.sh index 4a25abe..5ec6fa4 100755 --- a/scripts/sync_feishu_feedback_wrapper.sh +++ b/scripts/sync_feishu_feedback_wrapper.sh @@ -18,7 +18,7 @@ log "=== 每日全量分发开始 ===" YESTERDAY=$(date -d "yesterday" +%Y-%m-%d) cd /root/.openclaw/workspace-xiaokui -python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --date "$YESTERDAY" --steps 7 --dispatch-mode all >> "$LOG_FILE" 2>&1 +python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --date "$YESTERDAY" --steps 7 --ai-placeholders --skip-dispatch >> "$LOG_FILE" 2>&1 log "=== 每日全量分发结束 ===" exit 0 diff --git a/scripts/sync_wechat_feedback.py b/scripts/sync_wechat_feedback.py new file mode 100755 index 0000000..aa989d6 --- /dev/null +++ b/scripts/sync_wechat_feedback.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +""" +微信用户反馈同步脚本 — 复用飞书聚类/归纳/优先级/分发逻辑 + +数据格式与飞书 sync_feishu_feedback.py 完全一致: + (message_id, sender_name, msg_type, content, media_url, quote_message_id, msg_time, msg_timestamp) + +用法: + python3 sync_wechat_feedback.py --date 2026-05-21 --steps 7 --ai-placeholders --skip-dispatch + python3 sync_wechat_feedback.py --apply-ai /path/to/ai_descriptions.json +""" + +import sys +import os + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SKILL_DIR = os.path.join(SCRIPT_DIR, "..", "skills", "feishu-feedback-sync", "scripts") +sys.path.insert(0, SKILL_DIR) + +import argparse +import json +import pymysql +import subprocess +from datetime import datetime, timedelta + +# ── Monkey-patch 微信专用常量 BEFORE importing 共享模块 ── +import sync_feishu_feedback as fsf + +# 微信知识库父文档 +fsf.SUMMARY_PARENT_NODE = "XhtGwjitFizzCNkw8Xzc2IXsnuf" # 微信用户反馈问题汇总 + +# ── 数据库 ── +DB_CONFIG = { + "host": "bj-cdb-8frbdwju.sql.tencentcdb.com", + "port": 25413, + "user": "read_only", + "password": "fdsfiidier^$*hjfdijjd232", + "database": "vala_test", + "charset": "utf8mb4", +} + +CLI = "lark-cli" +CRED_DIR = "/root/.openclaw/credentials/xiaokui" +CONTEXT_DIR = os.path.join(SCRIPT_DIR, "..", "output", "daily_feedback") + + +def fetch_wechat_data(date_str): + """从 wechat_group_message 读取指定日期的消息, + 映射为与飞书 fetch_data 完全一致的元组格式: + (message_id, sender_name, msg_type, content, media_url, quote_message_id, msg_time, msg_timestamp) + """ + conn = pymysql.connect(**DB_CONFIG) + cursor = conn.cursor() + cursor.execute(""" + SELECT svr_msg_id, sender_name, msg_type, content, media_url, + refer_msg_svrid, DATE_FORMAT(msg_time, '%%Y-%%m-%%d %%H:%%i:%%s') as msg_time, + msg_timestamp + FROM wechat_group_message + WHERE msg_time >= %s AND msg_time < %s + ORDER BY msg_time ASC + """, (f"{date_str} 00:00:00", f"{date_str} 23:59:59")) + rows = cursor.fetchall() + conn.close() + + result = [] + for row in rows: + svr_id, sname, mtype, content, murl, ref_id, mtime, mts = row + result.append(( + str(svr_id) if svr_id else "", # message_id + sname or "", # sender_name + mtype or "text", # msg_type + content or "", # content + murl or "", # media_url + str(ref_id) if ref_id else "", # quote_message_id + mtime or "", # msg_time + int(mts) if mts else 0, # msg_timestamp + )) + return result + + +def gen_context_json(date_str, clusters, cluster_order): + """保存簇上下文 JSON 供 AI 使用""" + os.makedirs(CONTEXT_DIR, exist_ok=True) + ctx = {"date": date_str, "clusters": []} + for idx, cid in enumerate(cluster_order): + cmsgs = clusters[cid] + ctx["clusters"].append({ + "index": idx + 1, + "cluster_id": cid, + "message_count": len(cmsgs), + "messages": [ + {"sender": m[1], "content": m[3], "msg_type": m[2], + "time": m[6], "message_id": m[0], "quote_message_id": m[5]} + for m in cmsgs + ], + }) + path = os.path.join(CONTEXT_DIR, f"wechat_cluster_context_{date_str}.json") + with open(path, "w", encoding="utf-8") as f: + json.dump(ctx, f, ensure_ascii=False, indent=2) + print(f" 📝 微信 AI 上下文已保存: {path}") + return path + + +def main(): + parser = argparse.ArgumentParser(description="微信问题反馈同步") + parser.add_argument("--date", type=str, required=True, help="处理日期 YYYY-MM-DD") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--skip-priority", action="store_true") + parser.add_argument("--skip-dispatch", action="store_true") + parser.add_argument("--ai-placeholders", action="store_true", + help="使用 [待AI归纳:#N] 占位符") + parser.add_argument("--apply-ai", type=str, default=None, + help="应用 AI 描述 JSON") + parser.add_argument("--steps", type=str, default="1-7") + args = parser.parse_args() + + date_str = args.date + + # ── --apply-ai 模式 ── + if args.apply_ai: + with open(args.apply_ai, "r", encoding="utf-8") as f: + ai_data = json.load(f) + descriptions = ai_data.get("descriptions", []) + print(f"📋 加载 {len(descriptions)} 条微信 AI 描述,日期: {date_str}") + + # 读取上下文重建聚类 + ctx_path = os.path.join(CONTEXT_DIR, f"wechat_cluster_context_{date_str}.json") + if not os.path.exists(ctx_path): + print(f"❌ 上下文文件不存在: {ctx_path}") + sys.exit(1) + with open(ctx_path, "r", encoding="utf-8") as f: + ctx = json.load(f) + + clusters = {} + cluster_order = [] + original_index_map = {} # cluster_id → original context index + for c in ctx["clusters"]: + cid = c["cluster_id"] + original_index_map[cid] = c["index"] + msgs_data = c["messages"] + msgs = [] + for m in msgs_data: + msgs.append(( + m.get("message_id", cid), + m["sender"], + m.get("msg_type", "text"), + m["content"], + "", + m.get("quote_message_id", ""), + m.get("time", ""), + 0 + )) + if cid not in clusters: + clusters[cid] = [] + cluster_order.append(cid) + clusters[cid].extend(msgs) + + # 生成归中文档(复用飞书函数) + # generate_summary 会跳过 <2 条消息的簇并重新编号 + # 需要建立 original_index → placeholder_number 映射 + summary_md = fsf.generate_summary( + clusters, cluster_order, + skip_priority=True, ai_placeholders=True + ) + if isinstance(summary_md, tuple): + summary_md = summary_md[0] + + # 构建 original_index → placeholder_number 映射 + valid_count = 0 + index_mapping = {} # original_index → placeholder_number + for cid in cluster_order: + if len(clusters[cid]) >= 2: + valid_count += 1 + idx = original_index_map.get(cid, valid_count) + index_mapping[idx] = valid_count + + # 替换占位符:用 original_index 查映射得到 placeholder_number + for item in descriptions: + old_idx = item["index"] + desc = item["description"] + new_idx = index_mapping.get(old_idx) + if new_idx is None: + # 该簇被 generate_summary 跳过(单消息),忽略 + continue + placeholder = f"[待AI归纳:#{new_idx}]" + summary_md = summary_md.replace(placeholder, desc) + print(f" 🔄 微信 #{old_idx}→#{new_idx}: {placeholder} → {desc[:50]}...") + + # 回写知识库子文档 + title = f"微信-{date_str} 问题反馈" + nodes = fsf.list_child_nodes() + node_info = nodes.get(title, {}) + obj_token = node_info.get("obj_token") + + if not obj_token: + print(f" 📝 创建新文档: {title}") + fsf.update_summary_doc_as_children({date_str: summary_md}, title_prefix="微信-") + nodes = fsf.list_child_nodes() + node_info = nodes.get(title, {}) + obj_token = node_info.get("obj_token") + + if not obj_token: + print("❌ 无法创建/找到文档") + sys.exit(1) + + tmp_md = "tmp/wechat_ai_summary.md" + with open(tmp_md, "w", encoding="utf-8") as f: + f.write(summary_md) + env = os.environ.copy() + env["LARKSUITE_CLI_CONFIG_DIR"] = CRED_DIR + result = subprocess.run( + [CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", + "--mode", "overwrite", "--markdown", f"@{tmp_md}"], + env=env, capture_output=True, text=True, timeout=15) + os.unlink(tmp_md) + + try: + resp = json.loads(result.stdout) + assert resp.get("ok"), f"写入失败: {result.stdout[:300]}" + print(f" ✅ AI 描述已应用到微信文档: {title}") + + # 分发到群聊 + if not args.skip_dispatch: + print(f" 📨 分发微信归纳到群聊...") + child_nt = node_info.get("node_token", fsf.SUMMARY_PARENT_NODE) + child_url = f"https://makee-interactive.feishu.cn/wiki/{child_nt}" + fsf.dispatch_summary_to_chat( + f"微信-{date_str}", summary_md, + p0_only=False, doc_url=child_url + ) + print(f" ✅ 已分发") + except Exception as e: + print(f" ❌ {e}") + sys.exit(1) + + if os.path.exists(ctx_path): + os.unlink(ctx_path) + print(f" 🗑️ 已清理上下文文件") + return + + # ── 正常同步流程 ── + print(f"\n📊 查询微信 {date_str} 数据...") + rows = fetch_wechat_data(date_str) + if not rows: + print(f" ⚠️ {date_str} 无微信数据") + return + + print(f" 📋 共 {len(rows)} 条消息") + + do_summary = int(args.steps.split("-")[-1]) >= 3 + total = len(rows) + + if do_summary: + # 步骤 3:聚类(直接复用飞书 sort_threads) + sorted_rows, clusters, cluster_order = fsf.sort_threads(rows) + if not cluster_order: + print(f" 无有效问题簇(需要≥2条消息)") + return + + print(f" 聚类完成:{len(cluster_order)} 个问题") + for cid in cluster_order: + cmsgs = clusters[cid] + earliest = min(m[6] for m in cmsgs) + print(f" 簇 {cid}: {len(cmsgs)} 条消息,始于 {earliest}") + + # 保存 AI 上下文 + ctx_path = None + if args.ai_placeholders: + ctx_path = gen_context_json(date_str, clusters, cluster_order) + + # 步骤 4-6:生成归纳 + 优先级 + 写入知识库 + summary_md = fsf.generate_summary( + clusters, cluster_order, + skip_priority=args.skip_priority, + ai_placeholders=args.ai_placeholders + ) + if isinstance(summary_md, tuple): + summary_md = summary_md[0] + + print(summary_md) + + if not args.dry_run: + fsf.update_summary_doc_as_children({date_str: summary_md}, title_prefix="微信-") + print(f" ✅ 微信-{date_str} 问题反馈 写入成功") + + # 步骤 7:分发(非 ai_placeholders 模式直接分发,否则等 --apply-ai) + if do_summary and not args.skip_dispatch and not args.ai_placeholders: + print(f"\n📨 微信步骤7:问题分发...") + child_nodes = fsf.list_child_nodes() + child_title = f"微信-{date_str} 问题反馈" + child_info = child_nodes.get(child_title, {}) + child_nt = child_info.get("node_token", fsf.SUMMARY_PARENT_NODE) + child_url = f"https://makee-interactive.feishu.cn/wiki/{child_nt}" + fsf.dispatch_summary_to_chat( + f"微信-{date_str}", summary_md, + p0_only=False, doc_url=child_url + ) + print(f" ✅ 已分发") + + print(f"\n🎉 微信同步完成,总计处理 {total} 条") + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_wechat_feedback_wrapper.sh b/scripts/sync_wechat_feedback_wrapper.sh new file mode 100755 index 0000000..8e4a592 --- /dev/null +++ b/scripts/sync_wechat_feedback_wrapper.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# 任务名称:微信用户反馈每日全量同步(占位符模式,不分发) +# 执行时间:每天 10:02 +# 归属 Agent:小葵 xiaokui + +set -e + +export PATH=/root/.nvm/versions/node/v24.14.0/bin:$PATH +LOG_FILE="/var/log/xiaokui_wechat_feedback_sync.log" + +log() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE" +} + +log "=== 微信每日同步开始 ===" + +YESTERDAY=$(date -d "yesterday" +%Y-%m-%d) + +cd /root/.openclaw/workspace-xiaokui +python3 scripts/sync_wechat_feedback.py --date "$YESTERDAY" --steps 7 --ai-placeholders --skip-dispatch >> "$LOG_FILE" 2>&1 + +log "=== 微信每日同步结束 ===" +exit 0 diff --git a/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc b/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc index d978e80..cdb538b 100644 Binary files a/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc and b/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc differ diff --git a/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py b/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py index 53a786e..e1dc461 100755 --- a/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py +++ b/skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py @@ -49,6 +49,30 @@ P0_NOTIFY_USERS = [ # 推断引用策略参数 TIME_WINDOW_MIN = 2 # 同发送者聚类时间窗口(分钟) GAP_THRESHOLD_MIN = 30 # 大时间跨度视为新话题(分钟) +SIMILARITY_FLOOR = 0.08 # 推断引用的最小内容相似度(低于此值视为话题切换) +MAX_CLUSTER_MSGS = 15 # 单簇最大消息数(超过则尝试拆分) + + +def content_similarity(text1, text2): + """计算两条消息的内容相似度(中文分词 + Jaccard 系数) + 用于判断推断引用时是否发生话题切换。 + """ + if not text1 or not text2: + return 0.0 + # 提取中文词(2字以上)和英文词(3字以上),过滤纯数字和标点 + t1 = set(re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', str(text1))) + t2 = set(re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', str(text2))) + if not t1 or not t2: + return 0.0 + # 去掉停用词级别的短词 + stop_words = {'收到', '好的', '明白', '可以', '这个', '那个', '请问', '图片', '文件', '媒体', '报告'} + t1 = t1 - stop_words + t2 = t2 - stop_words + if not t1 or not t2: + return 0.0 + intersection = t1 & t2 + union = t1 | t2 + return len(intersection) / len(union) def get_env(): @@ -189,6 +213,9 @@ def infer_missing_references(rows): if rows[j][1] == sender: diff = (ts - parse_time(rows[j][6])).total_seconds() / 60 if diff <= TIME_WINDOW_MIN: + # 内容相似度检查:同发送者短时间内的续话应有关联 + if content_similarity(content, rows[j][3]) < SIMILARITY_FLOOR: + break # 内容差异大,可能是发新话题 if rows[j][5]: same_sender_parent = rows[j][5] else: @@ -201,13 +228,15 @@ def infer_missing_references(rows): break break - # 策略 3: 最近不同发送者(有 gap 限制防止跨话题) + # 策略 3: 最近不同发送者(gap + 内容相似度双重校验防跨话题) prev_other = None for j in range(i - 1, -1, -1): if rows[j][1] != sender: diff = (ts - parse_time(rows[j][6])).total_seconds() / 60 if diff <= GAP_THRESHOLD_MIN: - prev_other = rows[j][0] + # 内容相似度检查:当前消息和前一条不同发送者的消息是否话题相关 + if content_similarity(content, rows[j][3]) >= SIMILARITY_FLOOR: + prev_other = rows[j][0] break # 优先级:@提及 > 同发送者聚类 > 最近不同发送者 @@ -313,6 +342,61 @@ def sort_threads(rows): clusters = try_merge_orphan_clusters(dict(clusters)) clusters = defaultdict(list, clusters) + # 拆分超大簇:>MAX_CLUSTER_MSGS 条消息的簇按内容相似度切分 + def split_oversized_clusters(clusters_dict): + """对超过 MAX_CLUSTER_MSGS 条的簇,按内容相似度找边界拆分成子簇""" + oversized_ids = [cid for cid, cmsgs in clusters_dict.items() if len(cmsgs) > MAX_CLUSTER_MSGS] + if not oversized_ids: + return clusters_dict + + for oid in oversized_ids: + msgs = sorted(clusters_dict[oid], key=lambda m: m[6]) + if len(msgs) <= MAX_CLUSTER_MSGS: + continue + + # 计算相邻消息间的相似度,找低相似度边界 + boundaries = [] # (pos, similarity) — 在 pos 之后切分 + for i in range(len(msgs) - 1): + sim = content_similarity(msgs[i][3], msgs[i+1][3]) + # 在 sender 切换 + 内容相似度极低的位置标记为边界 + if msgs[i][1] != msgs[i+1][1] and sim < 0.05: + boundaries.append((i, sim)) + + if not boundaries: + continue + + # 按相似度最低的边界切分(每条边界产生一个新簇) + boundaries.sort(key=lambda x: x[1]) # 最低相似度优先 + split_points = set() + for pos, sim in boundaries: + if len(split_points) >= 3: # 最多拆 3 次 + break + # 保证拆分后每段至少 3 条消息 + prev_points = sorted(split_points) + if any(abs(pos - p) < 3 for p in prev_points): + continue + split_points.add(pos) + + if not split_points: + continue + + # 执行拆分 + sorted_bps = sorted(split_points) + prev = 0 + del clusters_dict[oid] + for bp in sorted_bps: + sub = msgs[prev:bp+1] + clusters_dict[sub[0][0]] = sub + prev = bp + 1 + sub = msgs[prev:] + if sub: + clusters_dict[sub[0][0]] = sub + + return clusters_dict + + clusters = split_oversized_clusters(dict(clusters)) + clusters = defaultdict(list, clusters) + # 簇间按最早时间排序 cluster_order = sorted(clusters.keys(), key=lambda cid: min(m[6] for m in clusters[cid])) @@ -764,6 +848,23 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None, ai_placeholder=Fals last_speaker = cluster_msgs[-1][1] seen_speakers = set() + # 识别多话题发起者(非首条、非简单回复、发言人切换且有实质内容) + def is_topic_initiator(prev_name, name, text): + if not text or len(text) < 10: + return False + if name == first_speaker: + return False + if prev_name == name: + return False # 同一人连续发言是续话,不是新话题 + # 过滤简单回复/确认 + simple_replies = {'好的', '收到', 'OK', 'ok', '嗯', '额', '是', '对', '行', '好', 'ok', '好嘞'} + clean = text.strip().rstrip('。.,,!!??') + if clean in simple_replies or len(clean) <= 5: + return False + # 包含问题描述特征的词 + problem_keywords = r'问题|bug|不对|错了|错了|没有|空的|少了|多了|显示|报错|异常|失效|打不开|进不去|没有配|是不是|应该|加载|识别|跟读|spine|HUD|UI|音频|声音' + return bool(re.search(problem_keywords, text, re.IGNORECASE)) + for i, m in enumerate(cluster_msgs): name = m[1] text = clean_text(str(m[3])) or "" @@ -788,6 +889,8 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None, ai_placeholder=Fals role_tag = "🚩 报告:" elif name == last_speaker and i == len(cluster_msgs) - 1: role_tag = "✅ " + elif i > 0 and is_topic_initiator(cluster_msgs[i-1][1], name, clean_text(str(m[3])) or ""): + role_tag = "📌 " seen_speakers.add(name) lines.append(f"| {name} | {role_tag}{dialogue_info} |") @@ -797,11 +900,18 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None, ai_placeholder=Fals def extract_conclusion(cluster_msgs): """ - 从问题簇全部消息中提取排查结论。 + 从问题簇消息中提取排查结论。 + 长对话(>8条)聚焦后 30% 消息匹配,避免前面无关结论污染。 返回: str 结论文本 """ - all_text = " ".join(str(m[3]) or "" for m in cluster_msgs) + # 长簇聚焦尾部消息(>8条时只用后 30%,至少3条) + if len(cluster_msgs) > 8: + focus_msgs = cluster_msgs[-max(3, len(cluster_msgs) // 3):] + else: + focus_msgs = cluster_msgs + + all_text = " ".join(str(m[3]) or "" for m in focus_msgs) all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text) def match_any(pattern): @@ -810,7 +920,7 @@ def extract_conclusion(cluster_msgs): # 收集解释性/分析性发言 analysis_pat = r'因为|原因是|应该是|可能是|改为了|导致|造成|引起|预下载|上云|缓存|首次|正常.*情况|一般|通常|默认|预期' analysis_lines = [] - for m in cluster_msgs: + for m in focus_msgs: name = m[1] text = str(m[3]) or "" text = re.sub(r'\[Image:[^\]]+\]', '', text).strip() @@ -959,14 +1069,15 @@ def generate_summary(clusters, cluster_order, skip_priority=False, ai_placeholde # 保存 AI 上下文 JSON context_path = None - if ai_placeholders and context_data: - import datetime as dt_module - today_str = dt_module.date.today().strftime("%Y-%m-%d") + if context_data: + # 用数据实际日期命名(取第一个簇第一条消息的日期) + first_msg_time = context_data[0]["messages"][0]["time"] + data_date = first_msg_time[:10] if first_msg_time else date.today().strftime("%Y-%m-%d") context_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), "output", "daily_feedback") os.makedirs(context_dir, exist_ok=True) - context_path = os.path.join(context_dir, f"cluster_context_{today_str}.json") + context_path = os.path.join(context_dir, f"cluster_context_{data_date}.json") payload = { - "date": today_str, + "date": data_date, "total_clusters": len(context_data), "clusters": context_data, } @@ -1162,10 +1273,11 @@ def _delete_child_node(obj_token): return False -def update_summary_doc_as_children(day_summaries): +def update_summary_doc_as_children(day_summaries, title_prefix=""): """ 将各日期的归纳结果写入「用户反馈问题汇总」的子文档中。 - day_summaries: dict, key=日期字符串(如'2026-05-06'), value={'summary': str, 'has_p0': bool} + day_summaries: dict, key=日期字符串(如'2026-05-06'), value=summary markdown 字符串 + title_prefix: 可选标题前缀,如 "微信-" 用于区分渠道 逻辑: 1. 列出已有子文档 @@ -1179,7 +1291,7 @@ def update_summary_doc_as_children(day_summaries): lock_path = "/tmp/xiaokui_summary_create.lock" for day in sorted(day_summaries.keys(), reverse=True): - title = f"{day} 问题反馈" + title = f"{title_prefix}{day} 问题反馈" content = day_summaries[day] # 加锁:整个检查+创建+写入流程串行化,彻底避免并发冲突 @@ -1283,7 +1395,7 @@ def update_summary_doc_as_children(day_summaries): -def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): +def dispatch_summary_to_chat(day_label, summary_text, p0_only=False, doc_url=None): """ 将「今日问题归纳」部分发送到指定群聊。 P0问题会 @指定人员。 @@ -1292,6 +1404,7 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): day_label: 日期标签,如 '2026-04-28' summary_text: 今日问题归纳的 markdown 文本 p0_only: True=仅发送 P0 部分,False=发送全部 + doc_url: 当天详细文档的飞书链接(可选,不传则回退到总文档链接) """ if not DISPATCH_CHAT_ID: print(" ⚠️ 未配置分发群 chat_id,跳过步骤7") @@ -1299,6 +1412,7 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): # 构建消息内容 # 提取「今日问题归纳」部分(到「今日问题拆解」之前) + # 同时过滤掉"无明确问题"条目(仅限分发消息,文档中保留) lines = summary_text.split("\n") 归纳_lines = [] in_归纳 = False @@ -1309,6 +1423,10 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): if "今日问题拆解" in line: break if in_归纳: + # 过滤:跳过"无明确问题"条目 + stripped = line.strip() + if stripped == "- 无明确问题" or stripped == "* 无明确问题": + continue 归纳_lines.append(line) 归纳_content = "\n".join(归纳_lines).strip() @@ -1361,9 +1479,10 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): content_parts.append([{"tag": "text", "text": 归纳_content}]) # 追加文档链接 + summary_doc_url = doc_url or f"https://makee-interactive.feishu.cn/wiki/{SUMMARY_PARENT_NODE}" content_parts.append([ {"tag": "text", "text": "\n📄 详细文档:"}, - {"tag": "a", "text": f"{day_label} 问题反馈", "href": f"https://makee-interactive.feishu.cn/wiki/RaL6whoYMijyYHkSlWrc7OLLnBy"} + {"tag": "a", "text": f"{day_label} 问题反馈", "href": summary_doc_url} ]) post_content = json.dumps({ @@ -1474,6 +1593,12 @@ def main(): resp = json.loads(result.stdout) if resp.get("ok"): print(f"🎉 AI 描述已应用到文档: {title}") + # 分发 AI 归纳到群聊 + print(f"📨 分发 AI 归纳到群聊...") + child_node_token = nodes[title].get("node_token", SUMMARY_PARENT_NODE) + child_url = f"https://makee-interactive.feishu.cn/wiki/{child_node_token}" + dispatch_summary_to_chat(target_date, summary_md, p0_only=False, doc_url=child_url) + print(f" ✅ 已分发到群聊") else: print(f"❌ 写入失败: {result.stdout[:300]}") sys.exit(1) @@ -1570,13 +1695,19 @@ def main(): if do_summary and not args.skip_dispatch: dispatch_mode = args.dispatch_mode print(f"\n📨 步骤7:问题分发(模式: {dispatch_mode})...") + child_nodes = list_child_nodes() for day, summary_text in day_summaries.items(): # 检查当天归纳中是否有 P0 问题 has_p0 = "⚠️ P0级" in summary_text if dispatch_mode == "p0" and not has_p0: print(f" [{day}] 无P0问题,跳过分发") continue - dispatch_summary_to_chat(day, summary_text, p0_only=(dispatch_mode == "p0")) + # 获取当天子文档链接 + child_title = f"{day} 问题反馈" + child_info = child_nodes.get(child_title, {}) + child_node_token = child_info.get("node_token", SUMMARY_PARENT_NODE) + child_url = f"https://makee-interactive.feishu.cn/wiki/{child_node_token}" + dispatch_summary_to_chat(day, summary_text, p0_only=(dispatch_mode == "p0"), doc_url=child_url) print(f"\n🎉 同步完成,总计写入 {total_written} 条")