From 7931b77dfb4d602eb23089bd8b20d5f461aab721 Mon Sep 17 00:00:00 2001 From: --git_token Date: Tue, 12 May 2026 08:10:01 +0800 Subject: [PATCH] auto backup 2026-05-12 08:10:01 --- .vala_skill_hashes | 4 +- AGENTS.md | 10 - HEARTBEAT.md | 16 +- memory/.dreams/events.jsonl | 4 + memory/.dreams/short-term-recall.json | 304 +++++++++++++++--- memory/2026-05-11-1838.md | 10 + memory/2026-05-11-1925.md | 264 +++++++++++++++ memory/2026-05-11-1929.md | 29 ++ memory/2026-05-11.md | 67 ++++ .../ai_descriptions_2026-05-11.json | 9 + .../cluster_context_2026-05-11.json | 85 +++++ .../飞书反馈_2026-05-11.xlsx | Bin 0 -> 9659 bytes skills/feishu-feedback-sync/SKILL.md | 44 ++- .../sync_feishu_feedback.cpython-312.pyc | Bin 61730 -> 71645 bytes .../scripts/sync_feishu_feedback.py | 248 ++++++++++++-- skills/feishu-group-msg-sync/SKILL.md | 4 +- .../sync_group_to_mysql.cpython-312.pyc | Bin 20674 -> 31296 bytes .../scripts/fix_merge_forward_216.py | 120 +++++++ .../scripts/sync_group_to_mysql.py | 232 ++++++++++++- 19 files changed, 1359 insertions(+), 91 deletions(-) create mode 100644 memory/2026-05-11-1838.md create mode 100644 memory/2026-05-11-1925.md create mode 100644 memory/2026-05-11-1929.md create mode 100644 memory/2026-05-11.md create mode 100644 output/daily_feedback/ai_descriptions_2026-05-11.json create mode 100644 output/daily_feedback/cluster_context_2026-05-11.json create mode 100644 output/daily_feedback/飞书反馈_2026-05-11.xlsx create mode 100644 skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py diff --git a/.vala_skill_hashes b/.vala_skill_hashes index ab8e852..a1dd954 100644 --- a/.vala_skill_hashes +++ b/.vala_skill_hashes @@ -12,6 +12,6 @@ vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06 user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81 user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13 -feishu-group-msg-sync 085f95a5b89fec3b6a627da25d66ffeeb0be430098387739a64f7903f0ee88d4 user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8 -feishu-feedback-sync 7bd0ccd7112156e8fcfca7512b669f1c956cc085c6de9530e02838fa27d583cc +feishu-feedback-sync e51dd6d0c4f26898ebcc069ab19272d17e86c01a767c36c1d65ab576da38ba11 +feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445 diff --git a/AGENTS.md b/AGENTS.md index 97598aa..ceb1f8c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -127,16 +127,6 @@ System: [...] Feishu[xiaobian] group oc_xxx | 发送者 (ou_xxx) [msg:om_xxx] **简言之:群聊中未被 @ → 可以做事(静默任务),但绝不说话。** -#### 何时发言(仅在被 @ 的前提下) - -即使被 @,以下情况仍应保持沉默(NO_REPLY): - -- 同事之间的闲聊,@ 你只是无意的 -- 已经有人回答了问题 -- 你的回复只是"是的"或"收到",没有实质价值 -- 对话在没有你的情况下进展顺利 - -参与,而非主导。质量 > 数量。 ## 工作区目录规范(强制执行) diff --git a/HEARTBEAT.md b/HEARTBEAT.md index 936f24b..26b4188 100644 --- a/HEARTBEAT.md +++ b/HEARTBEAT.md @@ -1,13 +1,11 @@ # HEARTBEAT.md - -# 保持此文件为空(或仅包含注释)以跳过心跳 API 调用。 -# 当你希望定期检查某些内容时,在下方添加任务。 - -# 示例任务: -# - 检查是否有未处理的同事消息 -# - 检查日历中即将到来的会议 -# - 整理近期记忆文件 - # 自动同步Skill到公司SkillHub - 查看是否有需要推送的skill: 执行脚本 /root/.openclaw/workspace-xiaokui/scripts/sync_skill_to_skillhub.sh,如果推送成功,给李若松发送通知消息,无需推送则静默 + +# AI 问题归纳(每日) +- 检查 output/daily_feedback/ 下是否有当天日期的 cluster_context_YYYY-MM-DD.json 文件 +- 如果有,读取上下文 JSON,为每个问题簇生成精炼的问题描述 +- 将 AI 描述写入 output/daily_feedback/ai_descriptions_YYYY-MM-DD.json +- 执行 python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --apply-ai output/daily_feedback/ai_descriptions_YYYY-MM-DD.json +- 完成后删除 cluster_context 文件(避免重复处理) diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl index f5c2543..e38914d 100644 --- a/memory/.dreams/events.jsonl +++ b/memory/.dreams/events.jsonl @@ -7,3 +7,7 @@ {"type":"memory.recall.recorded","timestamp":"2026-05-07T02:55:43.649Z","query":"问题分类 分类规则 版本更新 声音音频 UI显示 启动运行 语音识别 关卡内容 priority_classifier","resultCount":6,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-07T03:19:39.321Z","query":"小葵小葵群 chat_id 发消息 bot","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-08T10:25:44.365Z","query":"MySQL database connection credentials lark_group_message","resultCount":4,"results":[{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-11T09:48:27.002Z","query":"今日反馈汇总 问题反馈报告 每日汇总","resultCount":5,"results":[{"path":"memory/2026-05-07.md","startLine":86,"endLine":116,"score":1},{"path":"memory/2026-05-07.md","startLine":73,"endLine":91,"score":1},{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-11T10:43:36.686Z","query":"feishu_group_message 表结构 多条转发 message_type","resultCount":5,"results":[{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1},{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-10.md","startLine":44,"endLine":68,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-11T11:26:09.200Z","query":"sort_tag 飞书 API 创建节点 权限","resultCount":2,"results":[{"path":"memory/2026-05-09.md","startLine":46,"endLine":65,"score":1},{"path":"memory/2026-05-09.md","startLine":1,"endLine":19,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-11T11:26:09.201Z","query":"知识库 sort_tag 空 文档排序","resultCount":3,"results":[{"path":"memory/2026-05-09.md","startLine":46,"endLine":65,"score":1},{"path":"memory/2026-05-09.md","startLine":78,"endLine":95,"score":1},{"path":"memory/2026-05-06.md","startLine":20,"endLine":47,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index 0797c41..fd28acf 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-05-08T10:25:44.365Z", + "updatedAt": "2026-05-11T11:26:09.201Z", "entries": { "memory:memory/2026-04-18.md:1:5": { "key": "memory:memory/2026-04-18.md:1:5", @@ -81,22 +81,24 @@ "endLine": 142, "source": "memory", "snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N", - "recallCount": 4, + "recallCount": 5, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 5, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-11T09:48:27.002Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "70caeba05281" ], "recallDays": [ "2026-05-06", - "2026-05-07" + "2026-05-07", + "2026-05-11" ], "conceptTags": [ "quote-message-id", @@ -151,22 +153,24 @@ "endLine": 205, "source": "memory", "snippet": "- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) - 单消息簇被跳过(需至少 2 条消息才能形成问题) ### Skill 文件最终状态 - `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 - crontab 每日 10:00 执行(与步骤3一起) ### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 #### 问题 脚本规则匹配生成的问题描述质量差: - 组件匹配失败(NPC/HUD → \"未知组件\") - 现象摘取了完整原始消息(含 @、无关词) - 端识别不稳定 #### 决策 **脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** - 脚本 `summarize_cluster` 改为输出: 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 2. 发言人-要点表格(规则生成) 3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固", - "recallCount": 4, + "recallCount": 5, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 5, "maxScore": 1, "firstRecalledAt": "2026-05-06T13:30:08.593Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-11T09:48:27.002Z", "queryHashes": [ "f865295b9ac7", "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "70caeba05281" ], "recallDays": [ "2026-05-06", - "2026-05-07" + "2026-05-07", + "2026-05-11" ], "conceptTags": [ "已包含完整步骤1-4的文档", @@ -186,20 +190,22 @@ "endLine": 86, "source": "memory", "snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) #### 5. 刘新玉要求简化文档格式(~11:46) - 反馈文档太繁琐,只要在原有格式把\"问题 1\"改成\"P0-问题 1\" - 去掉了优先级分布汇总、额外信息行,只保留标题前缀 + 一行判定依据 - 最终格式:`### P0-问题 1` + `**优先级判定:** 规则...` - 知识库文档已更新为简洁版 #### 6. 修复推断引用算法 Bug - 策略2的 else 分支(同发送者无引用时往前找不同发送者)缺少时间限制 - 导致胡陈辰 20:45 的媒体消息被推断引用到徐思清 12:29(跨8小时) - 修复:加了 `GAP_THRESHOLD_MIN` 检查 #### 7. 文档写入方式改进 - 从 `insert_before` + `replace_range` 改为 `append` 模式", - "recallCount": 3, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 3, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-07T02:22:47.713Z", - "lastRecalledAt": "2026-05-07T02:55:43.649Z", + "lastRecalledAt": "2026-05-11T09:48:27.002Z", "queryHashes": [ "cd9c89262c30", "ac7fd0b52a4e", - "49c0959dc960" + "49c0959dc960", + "70caeba05281" ], "recallDays": [ - "2026-05-07" + "2026-05-07", + "2026-05-11" ], "conceptTags": [ "generate-summary", @@ -252,18 +258,20 @@ "endLine": 23, "source": "memory", "snippet": "# 2026-04-17 工作日志 ## 飞书群消息同步改造(李若松要求) ### 变更内容 - **存储从飞书表格改为 MySQL**:新建 `vala_test.lark_group_message` 表,结构参考 `wechat_group_message` - **同步频率**:从每6小时改为每4小时 - **数据范围**:2026.4.1 起的「内容测试问题反馈」群消息 - **数据库账户**:chatbot(test环境,仅对 lark_group_message 有写入权限) ### 完成事项 1. ✅ 创建 `lark_group_message` 表(唯一键 message_id 防重复) 2. ✅ 编写新同步脚本 `scripts/sync_lark_group_to_mysql.py`(基于原有 sync_group_to_sheet.py 改造) 3. ✅ 首次全量同步完成:172 条记录(2026-04-01 ~ 2026-04-17),含文本134条、图片17条、视频10条、富文本9条、表情2条 4. ✅ crontab 定时任务已替换:旧的每6小时飞书表格同步 → 新的每4小时MySQL同步 5. ✅ 更新 secrets.md 记录 chatbot 账户 6. ✅ 更新 user-feedback-collector SKILL.md 反馈数据源信息 ### 文件变更 - 新增:`scripts/sync_lark_group_to_mysql.py`(核心同步脚本) - 新增:`scripts/run_lark_group_sync.s", - "recallCount": 1, + "recallCount": 2, "dailyCount": 0, "groundedCount": 0, - "totalScore": 1, + "totalScore": 2, "maxScore": 1, "firstRecalledAt": "2026-05-08T10:25:44.365Z", - "lastRecalledAt": "2026-05-08T10:25:44.365Z", + "lastRecalledAt": "2026-05-11T10:43:36.686Z", "queryHashes": [ - "cc0dd7ef50d7" + "cc0dd7ef50d7", + "5abc37103c15" ], "recallDays": [ - "2026-05-08" + "2026-05-08", + "2026-05-11" ], "conceptTags": [ "vala-test.lark-group-message", @@ -283,18 +291,20 @@ "endLine": 55, "source": "memory", "snippet": "- 创建脚本:`scripts/sync_feishu_full_history.py` - 同步时间范围:2026-04-10 至今 - 成功处理 57 条消息(包含文本、图片、视频等多种类型) - 所有媒体文件已上传到 COS - 数据已全部写入数据库 4. **定时任务配置** - 每 4 小时自动同步「内容测试问题反馈」群消息 - crontab: `0 */4 * * * cd /root/.openclaw/workspace-xiaokui/skills/feishu-group-msg-sync/scripts && python3 sync_group_to_mysql.py` 5. **文档更新** - `feishu-group-msg-sync/SKILL.md`: 说明数据库存储方案和引用回复支持 - `user-feedback-collector/SKILL.md`: 更新飞书反馈数据源为 MySQL - `feishu-group-msg-sync/references/query_examples.md`: 提供引用回复查询示例 ### 技术要点 - **引用回复识别**: 从消息 `body.quote` 字段提取被引用消息的 `message_id` - **去重机制**: 使用 `UNIQUE KEY (chat_id, message_id)` 避免重复记录 - **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文", - "recallCount": 1, + "recallCount": 2, "dailyCount": 0, "groundedCount": 0, - "totalScore": 1, + "totalScore": 2, "maxScore": 1, "firstRecalledAt": "2026-05-08T10:25:44.365Z", - "lastRecalledAt": "2026-05-08T10:25:44.365Z", + "lastRecalledAt": "2026-05-11T10:43:36.686Z", "queryHashes": [ - "cc0dd7ef50d7" + "cc0dd7ef50d7", + "5abc37103c15" ], "recallDays": [ - "2026-05-08" + "2026-05-08", + "2026-05-11" ], "conceptTags": [ "sync-group-to-mysql.py", @@ -314,18 +324,20 @@ "endLine": 120, "source": "memory", "snippet": "| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题(4/29) | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果:0 条,该群此时间段无消息记录 ### 反馈同步 Skill 创建 [刘新玉] 将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill,并计划注册定时任务。 #### Skill 文件 - `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 - `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 #### Skill 功能 1. 从 MySQL `lark_group_message` 查询近 N 天数据 2. 写入知识库电子表格(按天分 sheet) 3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 #### 策略2:推断缺失引用关系 [刘新玉] 问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人", - "recallCount": 1, + "recallCount": 2, "dailyCount": 0, "groundedCount": 0, - "totalScore": 1, + "totalScore": 2, "maxScore": 1, "firstRecalledAt": "2026-05-08T10:25:44.365Z", - "lastRecalledAt": "2026-05-08T10:25:44.365Z", + "lastRecalledAt": "2026-05-11T10:43:36.686Z", "queryHashes": [ - "cc0dd7ef50d7" + "cc0dd7ef50d7", + "5abc37103c15" ], "recallDays": [ - "2026-05-08" + "2026-05-08", + "2026-05-11" ], "conceptTags": [ "网络", @@ -345,18 +357,20 @@ "endLine": 72, "source": "memory", "snippet": "- **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文件处理**: 下载飞书媒体文件并上传到腾讯 COS,记录可访问 URL ### 验证结果 - ✅ 数据库表结构已更新(3 个新字段) - ✅ 历史数据已同步(57 条消息) - ✅ 定时任务已配置 - ✅ 文档已更新 - ℹ️ 当前数据中无引用回复消息(quote_message_id 都是 NULL),说明这段时间内群消息都是直接发送的 ### 查询能力 支持以下查询场景: - 查询某条消息的所有回复 - 查询完整对话链(原消息 + 所有回复) - 统计热门话题(被回复最多的消息) - 按时间、发送人、关键词筛选 ### 文件清单 - `scripts/alter_lark_group_message.sql` - 数据库表结构升级脚本 - `scripts/sync_feishu_full_history.py` - 完整历史数据同步脚本 - `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py` - 增量同步脚本 - `skills/feishu-group-msg-sync/references/query_examples.md` - 查询示例文档", - "recallCount": 1, + "recallCount": 2, "dailyCount": 0, "groundedCount": 0, - "totalScore": 1, + "totalScore": 2, "maxScore": 1, "firstRecalledAt": "2026-05-08T10:25:44.365Z", - "lastRecalledAt": "2026-05-08T10:25:44.365Z", + "lastRecalledAt": "2026-05-11T10:43:36.686Z", "queryHashes": [ - "cc0dd7ef50d7" + "cc0dd7ef50d7", + "5abc37103c15" ], "recallDays": [ - "2026-05-08" + "2026-05-08", + "2026-05-11" ], "conceptTags": [ "quote-message-id", @@ -368,6 +382,224 @@ "update", "处理" ] + }, + "memory:memory/2026-05-07.md:86:116": { + "key": "memory:memory/2026-05-07.md:86:116", + "path": "memory/2026-05-07.md", + "startLine": 86, + "endLine": 116, + "source": "memory", + "snippet": "| 9 | 用户反馈问题 | 建议、反馈、体验、优化 | | 10 | 其他问题 | (默认兜底) | ### 脚本修改 - `priority_classifier.py` 新增 `proble m_category(keyword)` 函数和 `CATEGORY_KEYWORDS` 规则 - `sync_feishu_feedback.py` 中 `summarize_cluster()` 新增 `category` 字段 - `generate_summary()` 输出格式改为按优先级+分类分组 --- ## 新增步骤7:问题分发 [刘新玉] - 2026-05-07 11:09 ### 需求 将每日问题归纳同步到「小葵小葵」群,P0问题@相关负责人跟进。 ### 分发规则 1. 将\"今日问题归纳\"内容发送到指定群(格式:标题+归纳正文+文档链接) 2. P0问题 @相关负责人的 @列表(从 `summarize_cluster` 的 `operators` 字段提取发言人) ### Bot身份问题 - Bot应用名:**小研**(App ID: `cli_a931175d41799cc7`) - Bot发消息需要已经加入目标群 - 小葵Bot在「小葵小葵」群中发消息报 500 \"chat does not exist\" → 可能是不同应用 - **待解决**:需要确认\"小葵小葵\"群里加的是哪个应用,或者把小研Bot加进去 ### 脚本修改 - `sync_feishu_feedback.py` 新增 `dispatch_to_group()` 函数 -", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T09:48:27.002Z", + "lastRecalledAt": "2026-05-11T09:48:27.002Z", + "queryHashes": [ + "70caeba05281" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "priority-classifier.py", + "m-category", + "category-keywords", + "sync-feishu-feedback.py", + "summarize-cluster", + "generate-summary", + "cli-a931175d41799cc7", + "dispatch-to-group" + ] + }, + "memory:memory/2026-05-07.md:73:91": { + "key": "memory:memory/2026-05-07.md:73:91", + "path": "memory/2026-05-07.md", + "startLine": 73, + "endLine": 91, + "source": "memory", + "snippet": "``` ### 问题分类规则(按匹配优先级从上到下) | 优先级 | 分类 | 匹配关键词 | |--------|------|-----------| | 1 | 启动/运行异常 | 闪退、崩溃、crash、进不去、卡死、黑屏、重启、服务器宕机、服务器挂了 | | 2 | 版本/更新类 | 更新、升级、新版、版本、安装失败、OTA、热更新、无法更新、下载更新 | | 3 | Loading/加载类 | 加载、loading、loading卡死、转星星、加载慢、加载超时、loading超时、一直加载 | | 4 | 数据/进度类 | 数据丢失、数据异常、进度丢失、数据错误、分数异常、进度不对、记录丢失、数据对不上 | | 5 | 声音/音频类 | 无声、声音、音频、没声音、音效 | | 6 | 语音识别/判分类 | 判分、识别、语音识别、发音、评分、打分、AI判分 | | 7 | 关卡/内容类 | 关卡、题目、答案、内容 | | 8 | UI显示类 | 显示、界面、UI、HUD、动画、层级、图层、渲染 | | 9 | 用户反馈问题 | 建议、反馈、体验、优化 | | 10 | 其他问题 | (默认兜底) | ### 脚本修改 - `priority_classifier.py` 新增 `proble m_category(keyword)` 函数和 `CATEGORY_KEYWORDS` 规则 - `sync_feishu_feedback.py` 中 `summarize_cluster()` 新增 `category` 字段", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T09:48:27.002Z", + "lastRecalledAt": "2026-05-11T09:48:27.002Z", + "queryHashes": [ + "70caeba05281" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "启动/运行异常", + "版本/更新类", + "loading/加载类", + "数据/进度类", + "声音/音频类", + "语音识别/判分类", + "关卡/内容类", + "priority-classifier.py" + ] + }, + "memory:memory/2026-04-10.md:44:68": { + "key": "memory:memory/2026-04-10.md:44:68", + "path": "memory/2026-04-10.md", + "startLine": 44, + "endLine": 68, + "source": "memory", + "snippet": "### 验证结果 - 全量同步成功:47条记录写入表格,5张图片+4个视频上传COS - crontab 每小时整点自动执行:`0 * * * *` - 群ID:oc_fabff7672e62a9ced7b326ee4a286c26 ## 封装两个通用Skill **来源:** [李若松] 要求将功能封装为可复用skill ### 1. tencent-cos-upload - 路径:`/root/.openclaw/skills/tencent-cos-upload/` - 功能:上传文件到腾讯COS并生成可访问URL - 提供命令行调用和Python模块两种方式 - 核心文件:`scripts/cos_upload.py`(CosUploader类) ### 2. feishu-group-msg-sync - 路径:`/root/.openclaw/skills/feishu-group-msg-sync/` - 功能:定期同步飞书群聊消息到电子表格,媒体上传COS - 依赖 tencent-cos-upload skill - 核心文件:`scripts/sync_group_to_sheet.py`(模板脚本,修改顶部配置即可复用) - 参考文件:`references/lark-cli-cheatsheet.md` ### 项目脚本也改为引用skill - `scripts/sync_feedback_group.py` 现在只做配置覆盖,逻辑全部引用自skill", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T10:43:36.686Z", + "lastRecalledAt": "2026-05-11T10:43:36.686Z", + "queryHashes": [ + "5abc37103c15" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "tencent-cos-upload", + "scripts/cos-upload.py", + "feishu-group-msg-sync", + "scripts/sync-group-to-sheet.py", + "scripts/sync-feedback-group.py", + "验证", + "结果", + "全量" + ] + }, + "memory:memory/2026-05-09.md:46:65": { + "key": "memory:memory/2026-05-09.md:46:65", + "path": "memory/2026-05-09.md", + "startLine": 46, + "endLine": 65, + "source": "memory", + "snippet": "- 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort_tag` 参数可能被忽略(API 返回均为 null) - 兜底方案:按日期由近到远的顺序依次创建子文档,利用 `node_create_time` 自然排序 - 所有旧文档已删除并按正确顺序重建(5月8日→5月7日→4月28日),5月6日需手动创建 ### 飞书分发消息 `` 标签修复 - 根因:`dispatch_summary_to_chat` 中两步打架——第一步 `re.sub` 注入 HTML `` 文本,第二步 `content_parts` 用正确 `{\"tag\":\"at\"}` 格式插入 - 修复:删除 `re.sub` 注入原始 HTML 标签的代码,仅保留富文本 at tag ### 5月9日补跑问题 - 5月9日10:00定时任务因 `IndentationError` 失败(凌晨08:10自动备份 `c3c8dbb` 损坏了脚本) - 修复:从上游版本恢复被清空的步骤4-7逻辑 + 模块常量 - 手动补跑5月8日数据(8条反馈,1个P0)成功 ### 待验证 - [ ] 确认「用户反馈问题汇总」下子文档排", + "recallCount": 2, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 2, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T11:26:09.200Z", + "lastRecalledAt": "2026-05-11T11:26:09.201Z", + "queryHashes": [ + "cd203fdc056f", + "dbd60c484e39" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "备份", + "sort-tag", + "dt.timestamp", + "node-create-time", + "dispatch-summary-to-chat", + "re.sub", + "content-parts", + "从上游版本恢复被清空的步骤4-7逻辑" + ] + }, + "memory:memory/2026-05-09.md:1:19": { + "key": "memory:memory/2026-05-09.md:1:19", + "path": "memory/2026-05-09.md", + "startLine": 1, + "endLine": 19, + "source": "memory", + "snippet": "## 2026-05-09 工作日志 ### 飞书反馈同步 - 三个文档问题修复 **问题一:图片无法点击查看** - 根因:Markdown表格中 `|` 被 `dialogue_info` 内部的 ` | ` 分隔符破坏 + `![图片]()` 格式在飞书导入时不可靠 - 修复:`info_parts` 分隔符从 `\" | \"` 改为 `
`;图片格式从 `![图片](url)` 改为 `📎 [图片](url)` 可点击链接 **问题二:子文档作者显示\"小研\"** - 根因:现有子文档由 xiaoyan bot (`ou_3e97d43a66639a457f0020a0d7f2bd74`) 创建,xiaokui 无法直接覆盖 - 修复:在 `update_summary_doc_as_children` 中添加 creator 校验,非 xiaokui 创建则先通过 xiaoyan 凭证删除再重建 - 关键常量:`XIAOKUI_BOT_OPEN_ID = \"ou_fdbf5fdafd91670db34b6ac887f30fb7\"` - 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T11:26:09.200Z", + "lastRecalledAt": "2026-05-11T11:26:09.200Z", + "queryHashes": [ + "cd203fdc056f" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "dialogue-info", + "info-parts", + "update-summary-doc-as-children", + "xiaokui-bot-open-id", + "sort-tag", + "dt.timestamp", + "工作", + "日志" + ] + }, + "memory:memory/2026-05-09.md:78:95": { + "key": "memory:memory/2026-05-09.md:78:95", + "path": "memory/2026-05-09.md", + "startLine": 78, + "endLine": 95, + "source": "memory", + "snippet": "- 修复:在 `update_summary_doc_as_children` 中添加 creator 校验,非 xiaokui 创建则先通过 xiaoyan 凭证删除再重建 - 关键常量:`XIAOKUI_BOT_OPEN_ID = \"ou_fdbf5fdafd91670db34b6ac887f30fb7\"` - 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort_tag` 参数可能被忽略(API 返回均为 null) - 兜底方案:按日期由近到远的顺序依次创建子文档,利用 `node_create_time` 自然排序 - 所有旧文档已删除并按正确顺序重建(5月8日→5月7日→4月28日),5月6日需手动创建 ### 飞书分发消息 `` 标签修复 - 根因:`dispatch_summary_to_chat` 中两步打架——第一步 `re.sub` 注入 HTML `` 文本,第二步 `content_parts` 用正确 `{\"tag\":\"at\"}` 格式插入 - 修复:删除 `re.sub` 注入原始 HTML 标签的代码,仅保留富文本 at tag ### 5月9日补跑问题", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T11:26:09.201Z", + "lastRecalledAt": "2026-05-11T11:26:09.201Z", + "queryHashes": [ + "dbd60c484e39" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "update-summary-doc-as-children", + "xiaokui-bot-open-id", + "sort-tag", + "dt.timestamp", + "node-create-time", + "dispatch-summary-to-chat", + "re.sub", + "content-parts" + ] + }, + "memory:memory/2026-05-06.md:20:47": { + "key": "memory:memory/2026-05-06.md:20:47", + "path": "memory/2026-05-06.md", + "startLine": 20, + "endLine": 47, + "source": "memory", + "snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) ### 待确认 - 步骤5的优先级标注目前仅在脚本输出中展示,尚未写入「用户反馈问题汇总」文档(RaL6whoYMijyYHkSlWrc7OLLnBy) - 刘新玉需确认是否需要增加文档自动写入能力 ## 刘新玉:PDF文件处理 - 刘新玉于11:02发送PDF(用户反馈问题优先级判断文档),8页A4 - 已用pdftotext解析并完整阅读 - 内容:P0-P3四级优先级定义、动态调整规则、三大判断问题 - 已基于此文档实现优先级判定规则 # 2026-05-06 工作日志 ## 刘新玉:飞书反馈优先级判定(步骤5)落地 ### 需求背景 - 刘新玉发来「用户反馈问题优先级判断文档」PDF,要求将优先级判定集成到飞书反馈同步流程的第5步", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-11T11:26:09.201Z", + "lastRecalledAt": "2026-05-11T11:26:09.201Z", + "queryHashes": [ + "dbd60c484e39" + ], + "recallDays": [ + "2026-05-11" + ], + "conceptTags": [ + "generate-summary", + "compute-final-priority", + "sort-by-priority", + "skip-priority", + "sort-threads", + "union-find", + "skill.md", + "ios端loading超时致数据丢失/无音频" + ] } } } diff --git a/memory/2026-05-11-1838.md b/memory/2026-05-11-1838.md new file mode 100644 index 0000000..228803b --- /dev/null +++ b/memory/2026-05-11-1838.md @@ -0,0 +1,10 @@ +# Session: 2026-05-11 18:38:20 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880 +- **Session ID**: 9f44defe-353f-4d66-a103-a1529633a9ce +- **Source**: feishu + +## Conversation Summary + +user: hi +assistant: 若松,我在。有什么需要处理的? diff --git a/memory/2026-05-11-1925.md b/memory/2026-05-11-1925.md new file mode 100644 index 0000000..58c520f --- /dev/null +++ b/memory/2026-05-11-1925.md @@ -0,0 +1,264 @@ +# Session: 2026-05-11 19:25:44 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_9d4df593d0419d705274947c5cec5ada +- **Session ID**: e5e30039-7ae4-4c7d-acfc-1767daffec51 +- **Source**: feishu + +## Conversation Summary + +assistant: 测试完整的 `--apply-ai` 端到端流程: +assistant: 全部完成。以下是交付摘要: + +--- + +## AI 归纳已完整接入飞书反馈同步流水线 + +### 改了什么 + +**`sync_feishu_feedback.py`** — 新增两种运行模式: + +| 模式 | 命令 | 产出 | +|------|------|------| +| 占位符模式 | `--ai-placeholders` | 文档写入 `[待AI归纳:#N]` 占位符 + 保存上下文 JSON | +| 回写模式 | `--apply-ai ` | 替换占位符为 AI 描述 → 覆盖写入文档 | + +### 端到端验证通过 + +今天 2026-05-11 的数据(1 个问题簇、9 条消息): + +| 阶段 | 结果 | +|------|------| +| 脚本规则引擎 | `图片 (1/1) 我怎么记着已经出了...`(原文拼贴) | +| AI 归纳 → 回写后 | `在 132 版本中,L1 级别 U 单元名称显示为数字编号 — U 级别单元名称未命名(季度级别已有名称),新版本发布后用户将全部可见` | + +文档已确认:无重复、无占位符残留、AI 描述正确写入。 + +### 伴随修复 + +- **markdown 转义 Bug**:`--markdown` 内联传递到 `subprocess` 会导致飞书 rich text 被错误转义,改为 `@file` 相对路径方式 +- **清理 crontab**:移除两个浪费资源的每分钟任务 + +### 自动化链路 + +``` +每日 10:00 crontab → --ai-placeholders(占位符文档) + ↓ +心跳检测到 cluster_context_YYYY-MM-DD.json + ↓ +AI 读取上下文 → 生成描述 JSON + ↓ +--apply-ai → 替换占位符 → 覆盖写入 +``` +user: 好的 小葵辛苦了 我还想知道目前“用户反馈问题汇总”下的子文档的排序规则 +assistant: 子文档的排序由 `create_child_doc()` 中的 `sort_tag` 控制: + +**当前规则:按日期倒序(最新日期排最前)** + +```python +sort_tag = 9999999999 - int(dt.timestamp()) # 日期越新 → 时间戳越大 → sort_tag越小 → 排在前面 +``` + +飞书 Wiki 按 `sort_tag` **升序**排列子节点,所以: + +| 日期 | 时间戳 | sort_tag | 位置 | +|------|--------|----------|------| +| 2026-05-11 | 1766966400 | 8233033599 | 最小 → **最前** | +| 2026-05-10 | 1766880000 | 8233119999 | | +| ... | ... | ... | | +| 2026-01-01 | 1767196800 | 8232803199 | 最大 → **最后** | + +**但有两点注意:** + +1. **仅创建时生效** — 首次 `create_child_doc()` 时写入 `sort_tag`,后续 `overwrite` 更新内容不修改排序值。如果某文档是早期创建的(sort_tag 逻辑加入之前),它保留原来的默认值。 + +2. **排序不一致场景** — 如果某个旧文档需要调整排序位置,需要手动修改其 `sort_tag`(目前脚本没有做这个)。 +user: 理解了,我看现在5月11日的文档排在最下面,按日期倒序逻辑,不是应该排期最上面吗 +assistant: 所有文档的 `sort_tag` 都是空 — 飞书 API 创建节点时 `sort_tag` 没生效。让我确认并修复: +assistant: 根因找到了。 + +**飞书 Wiki 子文档排序由 `sort_tag` 控制,但 Bot 应用缺少 `wiki:node:update` 权限,API 不认 `sort_tag` 字段** — 创建时传了也被静默忽略,更新接口直接 404。 + +所以实际排序退化为按 **`node_create_time`(文档创建时间)升序排列**: + +| # | 文档 | 创建时间 | +|---|------|----------| +| 1 | 2026-05-08 | 5月9日 11:17 | +| 2 | 2026-05-07 | 5月9日 11:17 | +| 3 | 2026-05-06 | 5月9日 11:17 | +| 4 | 2026-04-28 | 5月9日 14:05 | +| 5 | **2026-05-11** | **5月11日 18:18** ← 今天被删掉重建,创建时间最新,所以排最后 | + +**要修复的话,需要在飞书开放平台给 Bot 应用(`cli_a9311791083adcc1`)添加 `wiki:node:update` 权限,然后我再通过 API 给所有文档设 `sort_tag`。** + +这需要技术负责人(李若松或张昆鹏)去操作。要我通知他们吗? +user: 通知 +assistant: 已通知李若松和张昆鹏,内容摘要: + +- **问题**:Bot 应用缺少 `wiki:node:update` 权限,无法通过 API 设置 `sort_tag`,导致子文档按创建时间而非日期排序 +- **需操作**:飞书开放平台 → 给小葵应用添加 `wiki:node:update` 权限 +- **后续**:权限开通后告知我,我来批量修正所有子文档排序 +user: 好的 那等权限开通后我告诉你 我现在想知道飞书问题反馈整个收集、归纳和分发流程;想要知道的原因是:我现在还要收集微信的问题反馈问题,想评估下要怎么复用 +assistant: 整个流程我按三段拆开讲,每段标注了「哪些可以直接复用微信」: + +--- + +## 飞书问题反馈完整流程 + +``` +┌─────────────── 采集层 ───────────────┐ +│ 飞书群消息 → MySQL lark_group_message │ feishu-group-msg-sync 技能 +└──────────────┬────────────────────────┘ + ↓ +┌─────────────── 处理层 ───────────────┐ +│ 步骤1: 查询数据库(按日期范围) │ +│ 步骤2: 写入飞书表格(按天分sheet) │ +│ 步骤3: 对话链排序(Union-Find聚类) │ +│ 步骤4: 问题归纳(规则/AI生成描述) │ +│ 步骤5: 优先级排序(P0→P3) │ +│ 步骤6: 问题分类(按类型归类) │ +└──────────────┬────────────────────────┘ + ↓ +┌─────────────── 分发层 ───────────────┐ +│ 步骤7: P0 @通知到群 │ +│ 步骤8: AI 精炼描述(占位符替换) │ +└──────────────────────────────────────┘ +``` + +--- + +## 各步骤详解 & 微信复用评估 + +### 采集层 + +| | 飞书 | 微信(已有) | +|---|---|---| +| 数据来源 | `lark_group_message` 表 | `wechat_group_message` 表,1089条 | +| 采集方式 | 定时拉群消息 | 实时每分钟同步 | +| 引用关系 | `quote_message_id` + 推断 | `refer_msg_svrid`(较少) | + +微信已有数据在库,采集层 **现成可用**。 + +### 处理层(核心逻辑在 `sync_feishu_feedback.py`) + +**步骤 1-2:查询 + 写表格** — 微信可复用,只改表名和字段映射。 + +**步骤 3:对话链排序** — 这是最重要的复用点,也是最难点: + +飞书排序依赖三个推断策略(@提及、同发送者聚类、最近不同发送者),微信消息面临两个额外问题: + +1. **转发消息多** — 微信群大量用「聊天记录」转发,消息内容嵌套在 `content` 里的 XML/文本中,非原生消息 +2. **无显式引用** — 微信没有飞书那种消息级引用回复能力,只有少数消息有 `refer_msg_svrid` + +→ 排序逻辑需要针对微信场景调优,但整体架构(Union-Find 聚类 + 递归展开)可直接复用。 + +**步骤 4-6:归纳 + 优先级 + 分类** — **全部可直接复用**,只改入参数据源。`generate_problem_description()` 的规则引擎、优先级判定、分类映射都不依赖飞书特有字段。 + +### 分发层 + +| 步骤 | 飞书 | 微信复用 | +|---|---|---| +| 步骤 7 (P0通知) | 飞书群 @通知 | 暂不适用(无微信群 Bot) | +| 步骤 8 (AI归纳) | AI 描述 → 知识库文档 | 同样走知识库 | + +--- + +## 复用建议:两种路径 + +**路径 A:合流(推荐)** + +微信和飞书数据在「步骤 1 查询」阶段合并,后续处理统一走一套逻辑。最终知识库文档里标注来源渠道。 + +``` +lark_group_message ─┐ + ├→ 统一排序 → 归纳 → 分类 → 文档 +wechat_group_message─┘ +``` + +- 改动量小:`query_messages()` 加一个 UNION 或双表查 +- 字段对齐:微信没有 `quote_message_id`,用 `refer_msg_svrid` 替代;没有 `sender_id`,用 `sender_username` 替代 + +**路径 B:分治** + +两套独立流水线,各自产出文档,在最终展示层合并。 + +- 改动量大但隔离性好 +- 可逐步接入,先跑微信、观察效果再合 + +--- + +要我按路径 A 先做一个方案详细设计吗?或者你倾向哪条路? +user: 我更倾向路径B,也就是方案B;原因是:初期我需要分别校验飞书和微信的信息收集、归纳是否准确;若采用方案B,我要怎么做? +assistant: 方案B的核心思路:**新建一套独立脚本,复用 80% 的逻辑,只改数据入口和输出目标**。 + +--- + +## 要做的事(按顺序) + +### 第一步:新建微信专属脚本 + +从飞书脚本复制一份,修改三个入口: + +| 改什么 | 飞书 | 微信 | +|--------|------|------| +| 源表 | `lark_group_message` | `wechat_group_message` | +| 目标表格 | `AHtnsehwShUVyDtjasSciIvgn7b` | 新建一张微信专属表格 | +| 知识库输出 | 「用户反馈问题汇总」父节点 | 同一父节点下,标题改为 `微信-YYYY-MM-DD 问题反馈` | + +### 第二步:字段映射 + +| 飞书字段 | 微信对应字段 | 说明 | +|----------|-------------|------| +| `message_id` | `svr_msg_id`(优先)或 `local_id` | 消息唯一标识 | +| `quote_message_id` | `refer_msg_svrid` | 引用关系 | +| `sender_name` | `sender_name` | 一致 | +| `msg_type` | `msg_type` | 一致 | +| `content` | `content` | 一致,但微信多了转发嵌套格式需处理 | +| `media_url` | `media_url` | 一致 | +| `msg_time` | `msg_time` | 一致 | +| `msg_timestamp` | `msg_timestamp` | 不一致(飞书是毫秒,微信是秒) | + +### 第三步:适配微信特有的消息格式 + +微信有两个飞书没有的复杂情况,需要新增预处理逻辑: + +1. **转发消息** — `content` 里是嵌套的 XML/聊天记录文本,需要先提取还原成可读对话 +2. **引用链弱** — 微信没有飞书的消息级引用,`refer_msg_svrid` 覆盖率远低于飞书的 `quote_message_id` + +### 第四步:crontab 定时任务 + +飞书保持现有调度,微信另开一条: + +``` +0 10 * * * sync_feishu_feedback_wrapper.sh → 飞书 +5 10 * * * sync_wechat_feedback_wrapper.sh --ai-placeholders → 微信 +``` + +### 第五步:校验调试 + +跑几天观察: +- 对话链排序是否准确(微信引用少,需要看聚类效果) +- 问题描述归纳是否合理 +- 优先级分类是否正常 + +--- + +## 总结:需要新建/修改的文件 + +| 文件 | 动作 | +|------|------| +| `scripts/sync_wechat_feedback.py` | **新建**(从 feishu 版复制修改) | +| `scripts/sync_wechat_feedback_wrapper.sh` | **新建**(定时任务 wrapper) | +| 新飞书表格 | **新建**(微信专属数据 sheet) | +| `skills/feishu-feedback-sync/SKILL.md` | **修改**(增加微信流程说明) | + +其他文件(`priority_classifier.py`、`HEARTBEAT.md` 等)不需要改。 + +--- + +要我现在开始写 `sync_wechat_feedback.py` 吗? +user: 先这样吧 我再想想 若有需要告诉你 “所有文档的 sort_tag 都是空 — 飞书 API 创建节点时 sort_tag 没生效”这个问题你看看 现在是不是权限已经开通了 +assistant: 结论:**权限尚未开通。** + +测试过程:尝试创建一个带 `sort_tag` 的节点 → 创建成功但 `sort_tag` 被静默忽略(返回 `NOT_PRESENT`),PATCH 接口仍然 404。 + +这说明飞书开放平台侧还没有给小葵应用添加 `wiki:node:update` 权限,李若松/张昆鹏那边可能还没处理。要我再次提醒他们吗? diff --git a/memory/2026-05-11-1929.md b/memory/2026-05-11-1929.md new file mode 100644 index 0000000..785567a --- /dev/null +++ b/memory/2026-05-11-1929.md @@ -0,0 +1,29 @@ +# Session: 2026-05-11 19:29:51 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_9d4df593d0419d705274947c5cec5ada +- **Session ID**: b3a3fe48-81c8-405d-a393-dfa358eaeb4b +- **Source**: feishu + +## Conversation Summary + +user: “所有文档的 sort_tag 都是空 — 飞书 API 创建节点时 sort_tag 没生效”这个问题你看看 现在是不是权限已经开通了 +assistant: 从记忆看,上次检查结论是**权限尚未开通**——创建节点时 `sort_tag` 被静默忽略,PATCH 接口 404。让我现在重新测试,确认当前状态。 +assistant: 让我直接测试当前权限状态。先获取「用户反馈问题汇总」知识库的节点信息,然后尝试 PATCH 一个子文档的 sort_tag。 +assistant: 让我先确认凭证目录,然后直接测试 sort_tag 是否现在生效。 +assistant: 这是表格节点,让我找「用户反馈问题汇总」文档。 +assistant: 找到「用户反馈问题汇总」,`has_child: true`。现在获取子节点列表,看 sort_tag 状态,并测试 PATCH 权限。 +assistant: 所有子文档 `sort_tag` 仍然是 `null`(还有一个之前测试留下的 `TEST-192420`)。现在直接测试 PATCH 接口,看权限是否已开通: +assistant: PATCH 接口仍然是 404。让我看原始返回确认: +assistant: 创建成功但返回里没有 `sort_tag`。让我查一下新节点的 sort_tag 实际值: +assistant: 结论确认。清理测试节点: +assistant: **权限尚未开通。** + +测试结果: + +| 测试项 | 结果 | +|--------|------| +| PATCH `/nodes/{token}` 更新 `sort_tag` | **404** — `wiki:node:update` 权限仍未添加 | +| POST 创建节点时传 `sort_tag` | 创建成功,但 `sort_tag` 仍为 **null**(被静默忽略) | +| 所有子文档 sort_tag | 全部为 `null`,排序退化为按创建时间升序 | + +需要在飞书开放平台给 Bot 应用(`cli_a9311791083adcc1`)添加 **`wiki:node:update`** 权限,这个只能由技术负责人李若松或张昆鹏操作。要我再次提醒他们吗? diff --git a/memory/2026-05-11.md b/memory/2026-05-11.md new file mode 100644 index 0000000..aba410b --- /dev/null +++ b/memory/2026-05-11.md @@ -0,0 +1,67 @@ +# 2026-05-11 工作日志 + +## AI 问题归纳接入脚本流程 [刘新玉] + +### 背景 +脚本 `generate_problem_description()` 是纯规则关键词匹配引擎,当问题类型不在预定义关键词列表中时(如今天 L1 单元名称显示问题),会退化为原文拼贴兜底,描述质量差。 + +虽然 4 月 30 日已有架构决策「脚本输出元数据,AI 负责归纳描述」,但一直未落地到代码。 + +### 实现方案 + +**新增 `--ai-placeholders` 标志:** +- `generate_problem_description()` 返回 `[待AI归纳:#N]` 编号占位符 +- `generate_summary()` 保存 `cluster_context_{date}.json` 到 `output/daily_feedback/` +- 占位符编号确保多问题簇时精确匹配 + +**新增 `--apply-ai ` 模式:** +- 读取 AI 描述 JSON → 重新生成 markdown → 替换占位符 → 覆盖写入知识库文档 +- AI 描述 JSON 格式:`{"date": "...", "descriptions": [{"index": 1, "description": "..."}]}` + +**自动化链路:** +1. 定时任务 `--ai-placeholders` → 生成占位符文档 + 上下文 JSON +2. 心跳检测到上下文 JSON → AI 读取并生成描述 → `--apply-ai` 回写 + +### 修复的 Bug +- **markdown 内联传递转义问题**:`subprocess.run([..., "--markdown", content])` 内联传递会导致飞书 rich text 元素(如 ``)被错误转义。改用 `@file` 相对路径方式传递。 +- **`update_summary_doc_as_children()`** 同样修复为 `@file` 方式 + +### 清理 +- 移除 crontab 中两个无用的每分钟任务:`sync_feishu_feedback_summary.sh` 和 `sync_feishu_feedback_dispatch_p0.sh` + +### 文件变更 +- `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心修改 +- `skills/feishu-feedback-sync/SKILL.md` — 新增步骤 8 文档 +- `HEARTBEAT.md` — 新增 AI 归纳心跳任务 +- `output/daily_feedback/ai_descriptions_2026-05-11.json` — 示例 AI 描述 + +## merge_forward(多条转发)消息处理 [李若松] + +### 需求 +飞书群消息同步中,`merge_forward`(多条转发)类型消息原本无法处理(content 显示"无法提取资源key"),需要完整记录转发内容。 + +### 实现方案(方案B+) +1. **新增字段 `forward_detail` (LONGTEXT)**:存储转发详情,媒体已替换为COS链接 +2. **新增函数 `process_merge_forward_message()`**:解析 `` XML → 下载子消息中的图片/视频/封面 → 上传COS → 生成含URL的完整内容 +3. **`content` 字段**存储可读摘要:`[转发N条] 发送者: 首行文本 | ...` + +### 媒体下载验证 +- ✅ 转发消息中引用的图片 key(`img_v3_xxx`)可通过 merge_forward 的 message_id 正常下载 +- ✅ 视频 key(`file_v3_xxx`)也可正常下载 +- ✅ 视频封面图支持 + +### 产出 +- DDL: `ALTER TABLE lark_group_message ADD COLUMN forward_detail LONGTEXT` +- 脚本改动: `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py` +- 文档更新: `skills/feishu-group-msg-sync/SKILL.md` +- 历史数据修复: id=216 记录已重新处理,6个媒体全部上传COS + +### forward_detail 格式 +``` +[2026-04-20 17:14] 黄敏: + [图片1](https://cos.xxx/img1.png) + L1 9-5,关卡图标显示异常... +[2026-04-23 19:08] 胡陈辰: + [视频1 时长77s](https://cos.xxx/video.mov) + [封面](https://cos.xxx/cover.png) +``` diff --git a/output/daily_feedback/ai_descriptions_2026-05-11.json b/output/daily_feedback/ai_descriptions_2026-05-11.json new file mode 100644 index 0000000..338df24 --- /dev/null +++ b/output/daily_feedback/ai_descriptions_2026-05-11.json @@ -0,0 +1,9 @@ +{ + "date": "2026-05-11", + "descriptions": [ + { + "index": 1, + "description": "在 132 版本中,L1 级别 U 单元名称显示为数字编号 — U 级别单元名称未命名(季度级别已有名称),新版本发布后用户将全部可见" + } + ] +} diff --git a/output/daily_feedback/cluster_context_2026-05-11.json b/output/daily_feedback/cluster_context_2026-05-11.json new file mode 100644 index 0000000..545413e --- /dev/null +++ b/output/daily_feedback/cluster_context_2026-05-11.json @@ -0,0 +1,85 @@ +{ + "date": "2026-05-11", + "total_clusters": 1, + "clusters": [ + { + "index": 1, + "cluster_id": "om_x100b6f23a6f94ca8b39beec3f1daab1_img0", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "毋益飞", + "content": "图片 (1/1)", + "msg_type": "post_image", + "media_url": "https://static.valavala.com/vala_llm/user_feedback/image/2026-05-11/beec3f1daab1_img0.png", + "time": "2026-05-11 15:00:00" + }, + { + "sender": "王胤鑫", + "content": "我怎么记着已经出了,之前好像是ok的", + "msg_type": "text", + "media_url": "", + "time": "2026-05-11 15:00:00" + }, + { + "sender": "梁晨", + "content": "对,之前名字给过", + "msg_type": "text", + "media_url": "", + "time": "2026-05-11 15:01:00" + }, + { + "sender": "梁晨", + "content": "S2是对的吧,得鑫哥再看看了", + "msg_type": "text", + "media_url": "", + "time": "2026-05-11 15:02:00" + }, + { + "sender": "毋益飞", + "content": "我132分支看U17开始全是数字了", + "msg_type": "post_text", + "media_url": "", + "time": "2026-05-11 15:02:00" + }, + { + "sender": "梁晨", + "content": "@毋益飞 U级别的确实还有未命名的;季度是有的", + "msg_type": "text", + "media_url": "", + "time": "2026-05-11 15:03:00" + }, + { + "sender": "毋益飞", + "content": "目前如果新版本发布的话用户是都能看到了,你们评估一下是不是都先添加上", + "msg_type": "text", + "media_url": "", + "time": "2026-05-11 15:04:00" + }, + { + "sender": "毋益飞", + "content": "图片 (1/1)", + "msg_type": "post_image", + "media_url": "https://static.valavala.com/vala_llm/user_feedback/image/2026-05-11/5afc3865073e_img0.png", + "time": "2026-05-11 15:02:00" + }, + { + "sender": "毋益飞", + "content": "132版本新作了切换,是不是需要把L1的名字都出了,现在全是数字@梁晨 @庞鸿潇", + "msg_type": "post_text", + "media_url": "", + "time": "2026-05-11 15:00:00" + } + ] + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/飞书反馈_2026-05-11.xlsx b/output/daily_feedback/飞书反馈_2026-05-11.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..72e59c489364efe7cba31cace1f95e28d57a4f54 GIT binary patch literal 9659 zcmZ{K1ymecvo$UW1b4R~xI4iixVyW%YaloS1a}J-+``~KxLc468Z@{Cx5vHtKe_qe zo0;xjy;kp6Dkd@s|Vr0RJ4VHj1N`yEEeSU=H7f*ovq8 z4-=e-{d13BRA+#Jg2MQ36U>}kE&j4$Jh8{NmlY-4$l@mVt)h$x;L^gZ1Q&t9{?O9C zfh*d~%#J$Xo=+!>y%BM7dgeqaPo(ON*nQ;WG-8Z7+e&QpLDG~-ucCw~6m@`DiE0K` zfFr5DJMf%?l;Z7U{wE|4jjA3BxCf=Ussv87^18MB{iqruZraj8+2`;dU*qeu`qj=gQJn;ys<~; z_1EQXZ?OGS62zeH0+Zn zx33Nq1+B<5Q)>+-C%1BAT&dbtuCxQLl-@g1EtGZFQ!XS+QA_3xF|}@C@HWVxvSSG& z<%Fl(DigyLA+x3Chu?q(Dlyiv0-8FJhSp*uuPdA~-)ZG*%RoYJ(4`1BvfXyvrtR+$ zi>0QoG}@O7)3kA*)9}|g%eff)=e<+9&#JNoNw6@-lV2ljFw6KpC`e;HF%(0d zvS&q3lUbZwZH0pmY82z#Vzh&&>!X9Rtp;BT|JUJ)k{k-%6H)k{%?+O(aL z)=+=StO8>&hzXeza1**V;C^g$DGN2$#T=OSR-6H0Jm^$%B?%c19gZh>Ow1>|!ALaZ zd3kmsIv%5oG@Ms9^Qgc@8}2dJuvez%XYi=yWAlgbISzZqPuK6i;mK~H0ZN4T*f|jk zGj)(VLN#sk$|E?1N=eyY!JoB|3T^v(zv4+9XjiTd_Je+&hVXzZZi2h{0+|CQi%(j9 zz003z;-Lz#>Y0t!Ta|Lu>$|UJgDh||zctfx9UmH0uSud?)?Q7% zyAb2#t)S5^X#6#Yc$2H#eq(ZH1AtfdSKXMiBIziKfR$h(Nu$?i60Z9z1J5n0Vpg>@ z@n%A?PFRQ5oMnFA-1LN&SF@_|PQAB#{oaHdeBa?(0@Ae{b{D6o?{|AtY70*UQ%MFR z&kHDy^TX38d&MJs{EX7AQU;Vcu^sFysTrl3jGGtE7_ryQjhQ|phSaFQoU3XWv*``M zGQ=17%g0Fmbly$VH`<&|++6x%(coA^=SAQ^smceoNyoCP7+JNb>(L;Y)X+Px?X-Od z!cm-j(y1ECL%~qQeT^w?mW=mBw%~$5t#g+_rcZ;nOOm4zGY!9eb7#;+QLP!%vk?u3 zy`_z>%-4d=>dW~W6ACh;UZf7@hJwM-+-_EOwJ)XTU7A;q@%f|k@O!jvfnB3t2IAEp zu2weh9Fqj3<+FYIF`(TgVW?7w5peO?x0H_);}Ni+$CHO8PGN%0zYqtd-@WcQM`@wM znWz%t3zTFG30E|np~1uk)}p{u;JI#|oFv(L#Vp)0esl;`XE&e28xfD{dB@=@zNk)f zQ4ImS4qMt#A8#Y<1Q^Tp`a0$?(LOI3etQSPdna{Kn6XY;AK{ke+YFYheAb>Py=7nH zb6(&=UXz24Pf~cDB|i-O^+Wfc(xrl4m{4FBM&_tfTmUtI#brXTT7a zL-Z$0L@p|9Qa653l2tZpNyDyX zhpuYR@y7|ni{h~Ci0g{zrHF8!r{xp<{i@NXz>cTw?d{d?Hr340@w+UWJ|7-Faum;L zT-d<1$wF>?a%0H?jn!3G^wp&1G{74y>5@`m_DpXJ&FysTa<7gTF6>*nIPA82mO{6A zN-f0UY#5O&+#zjErKqFLEt%#AHKk0&C^?pPu@FpsP-A6-edhRh`;~1jTA}+AR`85E z+5?eo0=@;&YyI@1Ft5_Efv5qP9}-3#&zlC|q040;o21OF^|G^>s^cT-$w()>7ZY6* zom`iKHRb9ZHGXlVQ`#rjH|@PIKM19^o&?sjzt1?YpU-XaB*&v)xTAe^7XRphF2gu| zKf9iK@dSyJddg6`3cMQK1_2MDSJ(YFO`Zk)R+juv4hBpr8zXAaPBnZOGNxkXXSV=` zQez9nhItJ@SSkU9hK8z+iwQD=8cLX^_-JZWwpudiSS8o}1a)Keahyk1y4rStLqku+ z9I(q>Qy#nk8vB~Py1W`lPL~V3zZv2;9wC(f9&>-&8~_n`ug{we7KG@=?BUv{%J?+g zS^3I5(}X>^2q-)}e?x)Nhw|4Gd`?`k7!RC`K6#-bTLUs8knVFesO|? zbqb4h3Ws$XiF|`8&+6!o3=k>*ksef^$X-T2(!d0Q%_&yRVmFgep^wJK`aD-V({^Oo z<{(j=g-_ED7wH!OKrbId!GkZ6%%w9h7pyS{maY!0_)$Z%TA+ThA23+fTDzjVDO8}Q zv+3ogaFB!T>+&;Ds<=v$p6_IXPdIAuqigy>TK7ZpWGzsZoouoB!GWLV3E4Cil(%)t zoOWe$EFo4u*-#r0D=X7UCdg(T;3uzJ@s97h~QWnJOT{q!dZy};`s zZ-IOXKovWppD-74YXEPCt<*$C9;7yhONS)u_H^Zk5T9-UqUYeHca|b`J_U44!c-+C z24*56W+E~sCI<2_jkNXhPfY4i_05aek>!i>sJT#ls%BMq4c&Bb=~%jK$=#_Ih>$kU z2e`e^QlnnCRBVgxJgV(dTIhEKwIQ9ofpF@-;=|?c`_?TXFxrN5nTPOJgqS1N=Qx;VYlK#PUJWplVlgyR=mB0%FeBfOztaCy`6u;yb)im^uTn9-3 z&8!Mw;3jO$s(T+PKe}2HI9!5`I)CQ7yo6+kBJvAe0eU)>2K`-yG~D!Ekadm-2#8*l zCi1I!ng%^qeY4#JN;h`67pkuV%Upm6K+9LpUJ(r#qsc7US^X=HB)HSRBB*h1GJkpl zJLPF>1^wXM9d`QxxQzLsd(W?YNfJfsA$>J&`~Bt#n{`M^JL$DMB`BhnFy!jRR_iQS zkOB;ZKe$l`2J&KS5ci9%pJO%6*g+WA&ekFI4=!Y6XQ;m;ByY3+!P-tCdz-B3;+^F! zBI@h$^SCzl1my6y#Ad-dl9cksX!6Eb^2U^aS~=F97yZ+r$_9K)1awh3?cXo&vx(#m zw^9?(Nu)%z582YVrW|13Y&yORCX?4u@-p{Lt|Q=i6$ZE2@*x<>^Rkm9rk+)M_c0%e z)-iiCkCyJiIHzGUc0)IUKuXwLxl(xOqUj#w~rYUtg+R;4pTF<^%KUeR*89D zSCY7nYrqz`COM-bqrv-3sty5yQ^#b`qcn!?oIAQ*2@2%g@-0jlUK zMae`Y1El3`+%aT{np%;HDRB6bW~PAhC7gXdRI%zhsTg+h2^R(E%%3U_%CP}*s=CX= z?_^<<=ACB{T-@Bn;GCFbIwnMs=M&>JnUHx^l^>{gD5`{li2@>h9zcM63eV}IHG|4; zabbkFm1#v&eP+qju(dSR36NhdHioS6DB*AIhr35O3C8F_QRM?e>u=-iv3FqEBO5Si zc>;vl@n4i8Ab(K`QP%w6ZbQ(5<%|WAxZ&;f4B%fM1D}nu@cMa7mGKlIhSX^0^!t!CG)R3O&RE>|zE; zhPB{$Fsg_AHuy#?RVPIsp2XcT4#l`-%iYK_l%A-M9$!7iDr8q3(*zdBB&t6G@%V%> z6w5HCAG9{K&j0K_2Cn2K2nl`12s!=Sbb1?^Ae5aCkS;S90PJ7IHh^o zLPt(3CnDzD6KEUiX}m%=<1LOLJ^SXfU;}@dV2;*X+)5&soVuH2<^mzfN@g*E z(x+dHkRFZHBpa)*m~+#gUKaw#`rTDoa}LVa)#vu0Nf{->en%u$5eX=N!Roi(|9~~pVq--=ftkh%B;kWP{GB(9s?2+2i=eL5z$lGO+DJAY0dvvBA7}*6I zYAN4mBxf3{)e|&)QuI_b)l~*B=tG==n&$hKg*@pY@3LFtyVj{W_$tB}TbPQ=J#uXK znnDzhe_-Q&o;bDOx3WjC*o@}d^a%{ccY$yJtkt1lBRaks%h8JWZ0x`T2eYg!jA$Qo znAZX=r@EfnDYef9=as1f)lk7jdApOR@%Cy*c;CQA?3CWLhrvMN45)1os-ir8pc50U zY0h6Luh}6M1H-b^{Z)(C=3Q?-AL#rUV|-)$okzcw&-cU6R^2#xdzABRs?eeJ=w&Z7 zqe>}sPX&PLDd#6U=LXeJGxGMUSjEyBPaz{{r7wJ$AcscK%Z4(Vx$pO3ATOTZ29 zmeu98L8Wy-=jmaE#Un%%#e4V~0bd$dq&d$J{uQ!f&Dm3%hNT&# zJJ-RF+V6$*-487uwigBjtU?qa)T# zXZ8c2C(#`B;ul2aA3J;}R_&BYYm@0A0i@1xtQZZ^bt&ZK>IKY9Rf*9NFE8c{ig7tK zi90fj9L*RNhlRBDASr;l{d?9293|ww;G;oVJGJJA+~r@7 zi{{Jwl=i{uTVtg#RYSi@VO(kl0bfdiz%A+$ZmR|^4?|pC+NN<-{+2<2@r-Xc%WQ^2 zh&FtbwIGoo*>^$QC-kZxq*z@%a404)x9;j5%fHz%CsfqW4Nj=S16FXGX+!vcu2WvkU`^STwB^|qhN3MnrGe;&VYR{ee z|BHh6S`t(98=S-`?^}ceN<`gimfAWT&-f#Di!E1*nha;uRQViFcapWVF$jNEwiePJ zrD#o;<>@B~1$ab6vkr&IgVZc9Y4!csQdFPHNaobIlsINN1&?S4qoZnP7sE+L9h1d; zx5>bumjBXSPh3`*@3b`0Y+Wj@H!MVsL zBUYM(w`0&1^g!S)-{Yx~OlK%fiF8DsY8uq0ptP&42Kz*oMk%q8zvcU3Iyq@Y3r0SR zXlccl1bn(pnW)xIIeY8+b?X}{;&Sv{dI7rrt}OC)CywMDoV&K`E840bMW`D9=@e^m zNy3Rw6&obJHX}vluj}6Ok#KxXVl!PwWlk4H{wO!MStg!gV$VC3pn7b^BAT!LdOntt z3B3oOUxo%6(oMqekv#GezsJ1$QaWNh^kyq>^(1)y;mVylcp=+<)cyTER*Wjy7AffnGOLwrupPt`1F4`4^ zeQ|wFZ3y3Np_z~0MBHD&NMV>#=bgetY52lqFjI7-)(ek!Q-?P+sH7+MEg53$;F7o^ z;IuNBXI+pP4gZzYw^GYCG`i-B@dQGv>=9)$X~5UK7=w9+cWe;&iydL#7^*7N=`dmE zey-1PLc9Vih_fUkhdc`;jGl;Jnmd1G-Qf%HVfC;cPueJ*)VoZ_@vD4S69wNYk6az1 z;B%Q)_i(I9TCbQd4rA2kklioc?@z?pwv9)Tgz$qeV}^&%K4(x>HvRSs)849Mux5k8|IQOs1*mg5RPW!uelI)vRBBu6(;^&(lQiSJ>q;Kt&4A9)+Lg_- zm|t2Jh>}uEtENaRfGzOlwfq*o0?NV@1gxE2t+rA#R*XNTbWfXsoJvG>BtRX-$sy`$ zW|uS)!WSkRJk~y=AejZ+uR8LH+n zp$jXxDfkqXE-Vi(^tvlu9rG++7Wqz$ER z8(I^ZQbk+2r{%C46N{$6v-tIsX;*-KU>DEY@%7UE)xjV=Lxq7`h+Iw6Qh?X=gW}D? z(XpD|z`--@KlkXm?n%hjUbbM&ul!J%DiQPrI_9tR}m zwC)=<55`>#Md?gLYlLH}s%j@0r+rGuRvD(fx~d!>Ks%`TZxZFUZK%QkPlSv|(P0ao z2=woBGm*=>@fVSPr1 zq$G^{7?^^y++a@*{H8l^8PB0AD;gf7L@KA`^z%#KLdsq^)_C!>vtg!vklX>bDKY*w z;>ayk<(-E7`GV93Bk#t`&h04=(r?F{oP2TOa%QyMUFP-6uOX)fAj96=;wMOke;@1f zii`poHj}pS*u5z*uCB)FYCLgeQD*jZ6K_IRMJ?-o9Qs=Z8G?26>pE7NHfFGfQjG@|pekU`58j(Wx-we$b<(}H9(Gs;D? zPOxO7+z6!ukm2K)dzN*{$0R*?ISU7P5_}SMo1Z%a?L&K-=ySz&^=9b2pWPhkGo>tf zH$LlP`{w?8S<`~^Ds%iThF7ey5Uv|lkckT`e0tm^c;i9o)_&TbG2CW}KY?k#UNWFE z?X^q<8t&i`R@P{a4RM4CwuwOI7e#owMkf_GBOanJsDib;=z9)+v06kY@eB7*6?=9h z!4^O-%2c|}g`nnZRKbl!0|&-8-o_f;zK9!bn<7dr0bulUpiQqL>!&`pa1oYPKTcr$ zO4RyhHJYa^nDx^NlCdMaNhF0cp$nWE6d@}$O2qYg|NUFuj*{Tp$-XC+e=;Er*hbv_ z!T_unHoShh8o657yRk6;bn0C3CuJ zAjK~6uL*qK31Tjj+K3$S7Bxnf;Xk6Q+UU+TLrXW(#Mw+Fh*f`MB;|^j5s_Fj6R*`} z6cS-3ux#ou$1QOy(u&ky$SHoe#G)kX6jZ}6?;>U+EywZZbbKa|{rUohOLAM5Dn*ab zdv^QDb6sXSpHdq)Q*wB=fTTE(Wbi6}cCW^a66CA*^VY>${ne*sGnyC|Q_vGv@G#op z2dIA0?z!WqU=iA5&P#j8&(Di}wZ!a3!Ks zj!T9W-1Y>)Ia!+b@Qy%+CdW|b{;KD~u&1!3P~)E1iVU~o?J+*9I|jMvjE3lSQHD*^ z1P;6oR(F{6DerC+)#Km?RU*6v6`qoebz7r9uc7;s+=(KaL*lk6>0;nUn#Lb&5@=Z4lJR2e#1O zmC3J;W>gTQ)(h;#_7xEM^q>s8bf_H2owy)>Xc47ZdaaJanT(QVJ_a9b2BZAt{l!)&pSI%yL&TxA0eyPVNB}Dc#IjFYkcxxBq{~+JC==zvHeU;oX}MR+O+B%xx$x`?* zW_0)qDoo7&>Q_xHVbJmX?j~PznF3FS74xjIy&OL`UQkQfA&i)h9-crI>4AXE6Pv6` zws@5rI^hWIGaa_*iEmp)7dgyxk24Cg1)s#-JyvY|c^|*>o}#g~8~iMjG^dlrA>5J& zTQ_p|sBVnPrxuj569DN`ops4KhRGdlGy`gt6hXT_cs$pV)#IDU)oUou-tJF-^nNVj zeG&;?44j1hJv^W%?&Roh;plFp?&ECXX7E>_RV67X_OhbCSZAR}!_27t`3sTpYXfI! zGbO@`rz9)gnI#TIi0o%foG^t-@74Cn+_cKoIE>n#IhTvcBr5*cFdB%8rX-Z!)gTH! znTO0qO4)`aJ@o6*?GWW2EbsESe#r-O93WSxGVeVfB85|hjBZmM!C3H^tt>$ymn`N& zf6oC~^1eiAi`$g36(zH_GO!|4|CYpg-%mh9R!Y1sA)PiP2?KYce+U}}zIYNE z1{?PO9<+IhnZG|lFaG`i$8P>a|2gCG8w&*$1l{}J=zmST{0aYa3gkcV^_Rc@UuHu7 z6!2#k@ZSPHQXu^;;D7B2{wd|pb;`e`K=%{P1>P2>P3 排序的问题列表 | | 步骤 6 | 问题分类 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | 按优先级+分类展示的问题列表 | | 步骤 7 | 问题分发 | 「小葵小葵」群 `oc_4171a2188f2554522a4309f2d7c27753` | P0问题@通知到群 | +| 步骤 8 | AI 问题归纳 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | AI 生成精炼问题描述,替换占位符 | ## 关键标识 @@ -425,11 +426,48 @@ python3 sync_feishu_feedback.py --days 3 --skip-dispatch 建议每日执行一次,在飞书群消息同步完毕后(`feishu-group-msg-sync` 之后)运行。 -### 创建定时任务(crontab) +### 两阶段执行(含 AI 归纳) ```bash -# 每天 10:00 执行(确保消息已同步入库) -0 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh >> /var/log/xiaokui_feedback_sync.log 2>&1 +# 阶段一:每天 10:00 执行(生成占位符 + 保存上下文 JSON) +0 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh --ai-placeholders >> /var/log/xiaokui_feedback_sync.log 2>&1 + +# 阶段二:每天 10:05 AI 归纳(读取上下文 JSON → AI 生成描述 → 回写文档) +# 由心跳任务或手动触发:python3 sync_feishu_feedback.py --apply-ai +5 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/ai_summarize_feedback.sh >> /var/log/xiaokui_ai_summarize.log 2>&1 +``` + +### 步骤 8:AI 问题归纳(新增) + +脚本的规则匹配 `generate_problem_description()` 只能处理预定义的症状模式,无法理解对话语义。当问题类型不在关键词列表内时会退化为原文拼贴。 + +**AI 归纳流程:** + +1. **脚本阶段**:`--ai-placeholders` 模式下,问题描述使用 `[待AI归纳:#N]` 占位符,同时保存 `cluster_context_{date}.json` 到 `output/daily_feedback/` +2. **AI 阶段**:AI 读取上下文 JSON,理解完整对话语义,生成精炼的问题描述,写入 `ai_descriptions_{date}.json` +3. **回写阶段**:`--apply-ai ` 重新生成文档内容,替换占位符为 AI 描述,覆盖写入知识库 + +**上下文 JSON 结构:** +```json +{ + "date": "2026-05-11", + "clusters": [{ + "index": 1, + "location": {"端": "", "环节": "", "课程": "", "角色/组件": ""}, + "priority": "P2", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** ...", + "messages": [{"sender": "...", "content": "...", "time": "..."}] + }] +} +``` + +**AI 描述 JSON 格式:** +```json +{ + "date": "2026-05-11", + "descriptions": [{"index": 1, "description": "AI 生成的问题描述"}] +} ``` ## 权限说明 diff --git a/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc b/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc index 676abc84dc858c2f177a1e14dfb71fbe45177bbd..25bad64c898177ae4a9a8b9b136648d7033b1720 100644 GIT binary patch delta 21003 zcmd6P33yahmhgM8_SBlxlFDA$NytjrK_C#82!tiLfI=Daicld7-l~9sR}{8O2So{U zRf2#NYHL)`IB|(P+HNy#j}?<{VoA5OO#*b!v{A90rhCRY=T)le1<>yKzJI>&e|(&} z?=I)wd+xpGp6%t)FJ!-YPHy;xULVK6^Te}k9G`usd|d@Uh0lSq$$aje%(^N3R6Y;hrt;jnX?#B9Oy?&6oWU0WoXHoi zo5fFDH=Ccv7eTFS_+rSN!ZigmB zK(L8ElyRNi4atzK+0B!km{NLNExe16ZB*vyWFP8%z_C)Jya%hh=^qOG>@M0^*v3BC zd!}#+YdnfARL!0Sr<=cxJlH#}=)A08H)Pg4#S-l3DUKY(I&8CtgI|Y(B*70k1YpN} z`tej7Eh{gg_ms6K;%t7n&NG=$VY{ah{CLeGI%iV0<^)#!2>{wWwKDTLd`tXC+8Qd- zEj>{2090R8cES6-V*|*yz7bU1LN91uS_9e>n0+Z}GH7 z7U=eUWkKFkTn*U``Qm7RMvp++!vYv1Og|)dOce?9U(;vI`~<5a;hvI_4)}r9X(e74 zA5w0sYie_O3e@CY%$B)%Pe{?syX(E=T5NNiZk;hr`8gzas8s9Fw0RIE~E|q_x$H?Od*wkQ`sg!&n$ZfLn|F48em4 zK4<6~=ToU|b~1ahw|w?56(T_k9X~Q3XtpDpezYKk^7Av#GVyLX58`CsUUs+Z>uW387Fj37Yj z6==vPrehTO3SYlQ@FxU#Dn-oXFLceqLiSRxf8i#1jvuqv1Hc33!j~&gL$BuA)vQtt zyNal3-O^N+rMs3kusS-jwA8PK>;mpv9W*_6^k?UvyL{+KX+oR zAR$`74Bvh5>>G#2^39TbvE$*_Umf225V6Ga&(SAF?%gwd;)x3fA0<}ECmbDJHj$Ol z)a7@vcKYb@cT!RyyG;gD`u*X|y zG%YJ=-RevjddBtnDj9EpoP4O4Au|eMULMDrd>TRH(F#hU5|n)WmT5>SLD|A0&IZO8 zCuj)0Wo=@*pm_!;gb&LIDv}|nNt&PKVik0adRkd(PE3G0Y0^2g)JMuvuq=ICSq+RJS1iAEjoF+L=?7?R zBr_uUR!1zqB_B!>u3BP-nZ!p*=!WZcGx#sK8(lbh^SUBql=fo_UH;otnz$}A zX;y4JuoZ0{vtWNG9jJT-JCl& zT}b!H_(i_-7LV5&W>E$^KmPK-&k%#g59D;*iF7~GpGWaEeBbniAc0eJFPK`PI3dmU*&WY?IeRQnI z;*psZ>mrzanSz#R_*H@yc3`!T2@FN<%WC0-Okb9eMr=Y>V+zh7ZFQuDM+z41H*?N zaojZg=3SN5!*Bf4xmSB`!Ro=9=lLSlSK< zLo!d>Msfq9PQ$!(1SmF%8vPmXHPoN~aej z*N|a6!9qp}^-y!^{dlv1WslR}x>o}CeEWPl?Yu)xKR>UeYa7$^*FjGM@ZVDc!tZY8 zD+8mp4aH{!jKPeu9yVy46jV<-V@&A1x6!9%{{hn6v2B8%!WBkJaSCwTGNcRvign~B zNEIkaloVtdWQ5`uRIXXLc*W|q&XtvG)+}6IQyU{pfjlfq&cTX$n7pf@m9#rs>Y807 z2l70)wQ=W&B5BA}-&E(Jh1>OtIw-uK&feYxY$%XqVgEqi+@74P;MqGFpOWW%DuJnD z)|bH62X#|WddFlPIGSquU)vM?C|9t1lf4SSFFr8|f|uhFg3ZeC zZeV>z!APH9Wu^aio877qRDw*9A2&TSHee6|#L}dAfrYjSf(g%N=Max0y+wGaG*k+{Sf?N>mMa-q^p@sr;=tnhZ2J68t1Ft{A{Uav zboQmd>{11rkRV!0RFJZ4V?kWb@ujxd;ms`On+2-`f0UxUZ3k%nVhxVZ1w59!dn<9b61TVA>2BH7>KNYl+=bnHzI>fN z;+xp}*{)Q!_;0Xomsow6>6F8glSu#y-Wy@=Awzde@D<1yrl*7{Yq{n zcadSttGM2Scdk?ZBbPb&I~l;JXwOK(Tq}~BF@b`!%^eTlv#S-5a|ZE zuED31$kJC1--x{mT0w`DL~<3yZf};^yF$+JCO)1|;LW_{u-vETEztcL=-wb`p=)4? zRzm}H9UdrNv~b(o1v#Ly8m)p}Fd$-Jkk%tI&_}J=S5678U%&~~U`i&SsF9XDl$I#5 zLhMa}QP_O(f(e-Lv7zK-sJ>y0dQtCJI>5)8dW^88B z*(6pm`;zOq2Bw}{zZ9qh3Vv7`gx?Gt2M9aa(RO2bk=Ae&?Bhx~Y!N913=aQQP&lNO zu0W9~E#*;qm!I-2K~W<-i&zm4UwbR_lvv<2QO z|L2`xA8f4q|!W=S|RLZH9*%jfcWG4qADID)cqD)O_GbUy$1gvd^087ZZZ*c-t9 zP#~lMb14LCPlWNM2x*dAC@LiiHh$t^8CWW2L^=^t_#(decu8acQy8DaJ1bJ!I2(sB zCj+bxDxN!D8nH)!Jwc8CBK^Uz=0@7^Whi$_SP(7kvg4&z3Q}nbUT5_nnBlJ6&SW#? zppI>a_q8Ci1G6f-O}3WV&K8unwSs=$HA@!faM15LJAJWh?hhFnJW@-;-w{~Hn;ZZ+ zST0_9X}IU^b3H$eS{u&3b!7NN*KkLVFYn^wqvz0h&T3cK+<6y&&`{o`!IDwr`e{nEvLJ!ce zKo*KaVbbd&PH=u^f-DizyXr`j+vNeffg99ko|+z>Xs;2=NE=ijboJnBS8J0;JG2qnA#1g0tMNN89M=l z)S~d&zSTu;hw_jd$5EgLKO{J1Zwup@i47aE)EE_Md6&1&-9);uNWda>v&-YDYjAlk zsdN2ni*uWcbHQua+UDKb2KEphDz`f~xm^6ly810^3RsYyZv#)Cvvms^2*N~(POxqLxP-4Lj1%HVDC!j`X@+N^@V4@G?IE?tOEw{mLtHC(B5^;3RJ@y@^(3UK+vpK< z)K2FncazKM1hUp`aX~rMZQbf>A&UWFNY1x4Zxx-U1({*z<$qwd-m}HM)d@?qL{!H@ z#_?7t57`0Z3K_tY>*izRd;^F0Cbm~cHe-m}R!{DMEc(`=3G9pX{Gln$qDrHSED>Ml ztqW17=e!L8ir=AVO>sUN-s@~`<=dKEA&p1}Cy*hejbd|?kZNmPdsAy2 zA7VFoGDQW)G+QZ}#MT^6i2o%Z&Vk+FNd#NUZf4X@w;$fAUUgm2cwJC^9sTa`^qhp0 zt~uT11M$UQGpr(UkD`O^m^5Uyb#NV1?};0gLEc4mZ^@CxY(lVhW6--TxZT(9-E~pM z+UId}=Oc@BX6x={Lx$9Em=uMUe)z~le{e%HXchCg`5gQ#;Ho)rna<;`6*a1*T!(5% zt>35HqwBn3P@NV@uO3v}FDe=1gl?{T>Qiw8hKdf^klJ!eozkyP8M4}kl2f~G?oTcp zvZoB$laDG7D!cqW+N+XAgUWFTYasf;=O8FTt> z`!Hc3V_Brxj_x@F_L--Xi@I+bNS-m8z$EAXIbM@w7&S3g+o+z&a0J+c{7_MGciq!f zhdqJvz^Y@D54ZPhJnhI1R0isfRXwH|Dk<$@pXMJo2UZ1cJ+|iYq(1I+YDRz!WFC_r z@>hSORmLZLEd!)q#V4f2{h3M7Xh+RJp3&Ki#lCx4NM(C?_GjvxAxqkUn*BAWEED=I z69z1Wrz{oymWrNh2P|_tG^cI$0}cBd4m9m=dVJE8(~eDha_+IYFHCxQ+Vj(1p8NdV zcPD)??d@qF%zb8~e)o}B~r!a%KW zV8O0|3ja5-Au7WQvlM@pbL%XS= z(qP4clhr3@edr37uNg?Zu0waml6E>hGf*^;UOHsY`AQ`>$91SK#xaHje`irZ*4;3u zpE}C0nt5zr=4qX2U-h2q&W5gAyW9F!{Yp3YI~kUoRqKPM(jHy+uApJgpnC2Zt#RLs zJu^D58_=e7tqIHyYDeRs@4qIwRw)}$Dr$|Y zdf4s^t~VmEi~6}ND^eC!rhwCw@J zXJ5NtdJzua6FC3I8^|cbR{3)e-x;#X5KrTnx!Ho1oxu5|l}SI;Ly~z=umdBBnu#+64H*G* z_x3@}G*}HqYG3(jqj_KZp7yTzdj(imMe28$joc*lk_YdDeby4$vx;dd`fP$tg(es9 z+tarf+5D_TF$E8nM;Eh&@)CuuT_$j#7=8~bg-np~a+Km?*PRq!l zw?^)J6okyNc$Zfx$eAO@9y{B2B65(1pL=EGxqWB*o(>DO;3EV3>+rLWTsU}F_{4>A zphedI1Aes#s*p~)(c4gq|Kj1mmwo%dK^MT$g%N^PcKFo?pqEADC?2ex2#OFC1Nhbi z%|t=zfjb>zeP@$;$o$ri&I`vEcmI)#=xEQW>7nm}` z##|?QR9@0jQa;~4I| z^K9R4P#T~w(YEOre*GuIeSIKUYQwVPW@t_pB6toc9a6bkwz&zo3+45#TieM3ET{qy ziX$#>8$n6d1yOfi42dK+GK?P#)gevm#zv>Nbqj>8jc000JV04ic2p$h+8+ zR^oIF$?BW=FafW{&cO2SX>0OM3b8Ja4l;n4(I@P-ORd|w6-4$r|9DOcqj18ilk?C+ zFEZ4JAprte9HUP^rOWKsWd=%mvIcZhb}#%?YdQ@VqoH_ET{2{}oH1FyW*my74*8HF zp>x`vW)O^w=Fa8)#w^IP=LI|gS9f{PR@|Wl_N*}kO^$#$pzsG)cWVQ_o`romeYT)s z-k`b?yTYEF~<0Pje8pR zweD#hFl0cFg)2H_L0w+II)9YYYm!b|GY)Lnzag-oPkwT8a8+$^?ahPM^&N|bjMh^| zN59b#@K=6f%sp+gb{V^6fwo}LqLbS`@V)H|Ubi-Q{o3H-b-_h91#LGEn$~}sn0nUc zIMA}cB~aUEIaw84y(YMB{h)0_$KoNQ?UXUI-55B!ci3VY4}Em&X~N0@OA@4N596CLCXM@Rl0u_BPmdoOgS%2TvhR|jB+ zvNT-zZ`cX0KFvK*JIjFO{|Rs4@zZ1aJH)UW>-+^%u{qab-aG&hQ4AY^oT0rZiu~e~ z9>JPOJYoR=0nII5axdoLCL(RvMuU2~$6E)wwm8-6vBZi^SOk|*V>}_k+PRKYVjCn0 z5jzC`I+IHh@!bI+q{f?6RE9a;OV(oLF>=v6pDj~DFbk6^?|t=IuY9gZl{9Rbf9XO+UuWTBqqoL`(<@74-`NNuoFt*K6jcIO%uN7c zhJ3CVCULuDUU`BU<^{2(d@g!CAw5AFg25N~ERt2V{GY8VPF}LAq7&_Es|vbQcva^J zT2sp_-l#7FW?VrlKg(Lp7jD6Qy-PC5iQ}dxUBdsLr+0HphGY)m4Ok?daO94MMJpb+ zXq*p1^jpBUwD&P^F&aE25YIc>9hr%VBuP9>owp_i@-ZPPWbohiHP#kl8 zR>@FHywf6Of;B=cU_GU@^L1;Ab(a+ewT##JAQjUbK^2`$Pre=@&SW6YbR6GuC71cSX^(MdZ6NsSs1G3z11ZIxD@Mal%h7P~kx^jov+q|{Z>1k^Qy{g^W%$Il} zm?UaVA-~2Jrw|Mwi0NFP!Ye2j+gxNj1n{`XXYhWBok!_gvq2^azU52Xk#reqC#Wf; zUF^V3SDbv1ZK1;_k0hoM)W5=a`KOpVi{M3i@a-vnQ3FC&Pw+kqDZ+g%zzS6eV#<-S zt)>;5~%9-_N~6CQ6$85C@<<5jdq`AkEV0t zpgMh2KA9eSx021Lzkj!k&7zj~ZYdm^u%N5@bY^bA-DB?CKA5=}dQ2<-uzbjo`K0QY zs#`VaD1#o;%Foiz-%Bs+iSL=wGqKwb{bhm$Qpw~_7}I*b)hN=^p}(|@qtyqi1MXj? zmYq&>(4O~7{!jZjo0>_J-_KJoW%J`#vtO;Gi{GCSS1H@bDweSqm&oWn@28bi%91si zUoikKeK!Y)_Z1TL(s#3AR*+09zx16Qpg8frUQDJhzQ57m2|MI%UFFOb8H%49D{b)e zo-$!Yk>dTV%Bibl+=oe(St}Fd|1v9K#SGQI>gTKBxC|1g}AJ4(6L8W4)Q5)2%FrA=T#i)WQ$t!26f>V^xDmY7FfvkQ; z1~2`x{8c52{$h4jNpI5!J5(%B&;8p6>_Ph6$H_VrX+vC{7kpOL^j9C}E05vE{sZNL z%h+nVBA8ZyTb4Kxpy&s_qE?R=eTeYkm&?7)g+9b1cn)^bhl9&A#!G-#NW)aw-^#gJ zy0YI+_w}bHi)T`lcRR2L(dQ+es-yJ1{`~1HE?|L@f0jG zrb^1v_;k8|Xuh(OBTqsA>>r2H>F6sO8JKKQLNjOZj)>@p23n89Yo`94R@O=n2U2r* zX{0FH?Ze~9mW23-Xr|@Ftih410dBEeK{npX$mj9-P5NfN4?-9Pd1D$a|Cu!>8V&&Q zXNp+Z4WD&9`d!Hm`lnym>FXb9{9q-{j*J6BaH1c0K_ImXLRi#eAuJ$RixIwsq8P{U z6O-X{GmJQe04k%PhFFpWIfN=f9gQc)8=F}Xju`HLiBuE20vl}%eKHX5A4e6ml!;F) zwCiRN=^^w?3(;XwAsM5<_!5XYKye=)v5wwnfS@jp&%sfGl^l$H_{kJsCW@U9lEs&c zsRVwK7M?r7FPMdd=+fyWbC@jFog#{&Q-uWBHPcWI;in&wd@iWmpA^V1=08YzgL>l5i{)R0i=;+$S5P6oJqsi%9iZ;@EAH_OmgX zR}xFX@Yg_~RWw8d0-@*dbC1u9&Sk-5Vl$|9q0oD@mL zejBGiSX)YK5?=|?%k$Bm8WUt1Q7pLZ@y?87@C*1Vej!AeEaDf#H!0Qpwfs^caZE!> z;!EJ)vg6C68d7phmV-!>w{J{0fSQ3uT=8j~Cu&K_jq{^gQi_m*XK+SMWNO4HA=U~B zu>k`Z<{3%pN&zu}BxPmzRicDE`$+5@VQ9DH;KG|PRWj~c4CUt6h#@}L zNyTg_wyPd}!?>fLO27Jr6*)qUlm~d1M+W2tn^pLU5Z9NE9AZ7_#FujhUxtt_h(T_R zt8kI1|3Td~4mcm_;MI!phU+7kE?bHlyl8R}AO6NWp;hB`7)VKnL)kOXd~(@Cc1m!r zjp6jJjZ_pdRDM?M&w%KqSpVyU3?SW265i{Gw1)hf#}U&3XGETiJOxg+FH?eLy^zU^ zToKxAfV8OSVp^h@W`_NA(a)=ph0wozmQs9EWQb8VktIRx^sb1M!v|OEQGcZ0g$y?M z>@nNT_;SY05Q}sC5RV#Yv@J8u`TP8?q z)r+uSe(Y4@xLiXibxSd%y7VD_KGS(4P z_kWKX5zu`pK8>jBUiN@tp+dUs{OZ4dl_g8BvbAuPMXdz8=!<7&7|^r31Dvv;410*5 z{_V_%Y$JW|cP@W?adDlySmG4)w4qNl>`D1wd zp^+Z@qS=qqs^}{bJ;LaFj(XR_fu!f)Z$|X&cc4k%F?`~{$g?ks;hSyfV1*FGdmUo| zHUvNJY(t6AF>>Ts3=7Uj^n=ZGh;DB9lP+Lq2s=IsSU>pRaY=V$7P>gvun>nKerJR3 z{*ecJ;M-b=t|Wb^7+%HEPX@7|9#Q-XQWsE0BvUXCO-pTPj1!&eho2tV^CpBaguQSu zo--`4lr}sQFq1nS=MFtP68LGPbcYpuf#_co zJ&04WLySTYos53+8%!?1+Q^&2cDQL+2jyF`1OeLLLPk8PRW1xoSSUJq#G`NmTjNFz z|BasCVf79Kzen&QHdOIojwEd@KP*vBAbrLJXm27?LU|0 zN1H%FX-FwjhlJ!vA2JWyYQ2yKKP{d$@*%Vhab7;8#y=YbdF`YEk>IU}9yYLuVub4Z z*hLTv{sRl*Vw*Lj-CXCvZ~eRwG#p}`qH~h`QS|p=CV|KXjHY5pZ#$*W=+|ciwB73l z^wT@IA-~0TIH{W*Oe*Y$I8yWEe)Hs>bpz&k9qQA@w5~1P(+7-`VKf=pzc$b7$_(7v z9e>Q*y`Xz_puK15K>pl+stjUX0G}EYF)*U4E2T>xSQktv8`PCc270!1R^O6=(uMtM zu-bOs*yTH&JEdpyNxeVltRKwf`_)+;ah*#8@jwo1Qdc^}wSLVgG&O8T)lh=1bNl|{ zjs-*J#4dfmIS;Z@3%e`2bGvU1CYN_C9ZJeRl{B$GX=3-%KFdJToQ`XeQkI-RZD3({ zLif_{Yx)#@{K<-w^ZVR=H+?7zCM^4vZuzJd(U1B~j3uM9xXTwb6?Z63n;ZkCtPaJH z*3_lw*QSYx3ZSRNU_$wzZjxj|Lzbkj-2KY}DgBmW2)f0H)a*cYU{=sjI;bv#X(SYM zDA7Ef*Od*HVSO2zryV_&5%cu5gSreb7W*|03}61isF~C8DF@Q`r=NxZoV>1rzzyB* zKHEv_klxz4Gq9|uvS(AzjXgyWZLOcjo-vun4B9GLLM00pZ3ycLTeR~A)k&kAMq@v1 zPCPJk|IDsUN1G2e515NOR#ZVy(7xOD+}0)Ey9**^g7%qxQ~L6Qw)umm1)~N=W4x$k z;%z~DVfWPMu6gR3K1*MAaN?X`;oP8Y-k_=Sw8?xxy1xa>|(9Z_LK{8@sqDMcvmwxBjX1eUtkZ1dHbdiz82(jm2BR0Y*9d5|%@KgYD8LjHkK4)9-RCv2E5`)Jl2r-J$A%rbz#ovv_ls?Qab z>t(lSWoP18fFbxOpt;W7;&c{pi^B4OtY$31Em}mv*VUqeg&~zwj9hUNKXMF_--`@g zaS%>vamPl?yWQqp4apIZ*!%13%xSu4dWPuN%2C zb^-g%CUzdjmW?ivGme}fllrw%0cEBl*7WrTR>hh|ZS<4T?BX_-HGM;5x3O&e*V|-G g9J`Pm-JoFPx{e*6$dW#ltM6JgDr4ly1SOLH0g65eOjp64n8S@THDvpt6Hef+K$v}%O}{nh@yR3-_y7Exw$hpN_}1Lve-Fy z?|t^!XPt`7YI!}p4(5z2G*PR z4%w1u&d)bHHh@&R1wokgmMqI_hGx>x(IU|h!m!@lYL&Yi|kF@9av~H??Cou z_Fk}?#Mzoron%kqx1+8lS&v{3OGm6J)FN%7dlFTn`whdp0a^bvp*YafBHf2Pq$4K6 z-#~!o!yoz(`*7SftY_@zobSQw6-bN6br(K`oKGXTB2%mR2-0o?eLVLc99#GdzWM$E z`^oX)>Suv=5!W|pOPX5ba7T=;gEyReBx#6+<)mp{d$d7L8XDHd=(fb72}=yB68}i* zE0dOHq0}Iu&*LAFo=B8UZY5x(`=(S4Lql+lz!(Rh9U}k+EpQ5&U6WY$xLo$5noLG& zDug38yea>!?B>1-pv-)c*$Y5u4XcMmpxMRm%v-tiR<@U zXnkXZVm`vlQ_EKGMaCuseA34DmYAG0M#7t9dOKpx2>7r@sije-zk~Oru`RqgmNcRs z^xqI+Xpp2}0iA$tYfEGuY?QDBJ7eawNe;eAo9@>_O`Tm!PP11==d-=D|MbsvI5}t04Jqr= zw_(hnh2D)rUypz*dIno92;KpZG|+Hk%mS?dK0Q6^zWodkUBztX>;fjw?VEWP+0ica z90Ign&_*v|J3aXUeEkT)hY0X<<3rHT*!A;9k3xN7afIjhzx7xp~gzioF?TsVgcOunIsIM|sRs^{A|0eDCh_+xO8-wy4gRpOvOw+;>O+ zvo8)(v)OHRK9?&^J$L7WXHV{DyX)NSL|sO)J59O3o;-igj&t9C^5TP!Q4ioLVdv{A ziO6ym+(CS7--1^Q{DAH>fL?#@xs4jIuO(@0jkZMSCe~R$lHA4i*Y796M9qyIgcw-h zWsf+^$?ol(053o8EOQtRV`q5c21XW@W*HS7Mj`A64|(z=vt&GspBg-yNK7PxWQ-d% zqU8!ilS=eU)+-P}THL};FDY@`V6Z{C)F9d8mdK5WH#ro8EV5m96|y((vRZ+Z0i<$H za_UK=s3>K|Es7=PP>j@~7^SSX$zUc?F-8cE(j>&KiiNrqD;s#v%hoJ$coY*YQOvYR zF%2^3sl=e^VL{4P%m7^njEBLJ_z)pd4Ly7*?i`idZPIb%f;F5PexinwmraNi~Rb6{m5La=_+%1M3E*U5F(m?DJ(oQ$TMHQ6;g@ zmSwQDODnv?((}>;$6aWX{IV-ha-`vLHyd|TFduBJ?X^@tk2E6gMXCwxnci`@0noXjSjXw3K+Bk_S60Pqk@Qxh54%S`p7j?t-9>+pqX(o{|q%9i{lw z>yW4Jx417dg%c}b6?~ELN)BuIPN{FSP5=&gGpXw!h5&)38~A&pUTsT)M9Yc-Lt)m3`K zX>0Y=^3iaxXkRvGXPyYkR?7e4>#@q)-KoQM1B zA@~`9SC566dNuOkSeO+rQ0%)%EL#!bI1nR5F~keuge%Fam5fSZ>eNGAsdFd>&&8n} zC0Vn-(W}HVM5{wPJQ`S@)*uhFM&3Rm8^)7767JR#o$iKOlyayI-XLMc5S?PiDIKR=|;r{TjLs~1da$}JP;9; zl6XKVpt(wby}#O9v{2)ZnUn%0z=a2tz!D+2sPh))zInYFg97>mJ6WB7|2^{x{|EG; zUn2OX(`I*uCX(N=7enRZR{}d5s&K#&41N!R$b4&VA{G{1Gn&{E`_~w(t#g1jr`AzsAD=;&%sVWg^TkatL+{9*YtmwWEJ_$M+ z75EAjsKORTG4x-c6*SY-MCe37BwcfAm(IK9#>LA+3u~7yowvAtNqSmAqZ&EYAfvOf zK@K-X>E=+Rp(9Mo02jk8!R?MIL^2!FpBTHi(P>%*#2xI5jqMNs^xWwoXIXrczuO>@ zZ9?2AiE)!6%oSFTfh}Ve1;r>2h6k{Z&5{9SX{)XM<^GrS`ehRk`J-4gnqU>u-w27Kc-383Ue z8f?}~I1e+VEF}ZIS`&#o6(>eAtao#MmO(M4zWz)V?+3?Epqop{#67NQk4}tt`|r)A zhAe2i4tQB8ZZoC`y_+*cl_yim`9`-*UCs%D&EY8+A?KQF9` z7ZxZVP6t2#fW3+jr|VAf!JKpHG$j{Y*ZV&#VDD`4uovI3Wx!Ee2!j-XH}s*c0#dPp zKCMK_1sFJ>i<1;z%%w81|61lONpBZlN*6EJ#U%}E^|J3SakKY6wXKR66`yLvcs@+V zuRv_VGk*)TayAg~4`p$GBoE$P9Pd`#x{0RttK{vn3j*g5Au8}IIjue%#Ocw7*Os+H z8liYV`;iJnN!%-!A26qxc9GsnqaanNI%YdH3LJRB&T5fGy%At@FF>D6{`1cJ8j@=N~ z_gf2CAZcdXx4K>3z}^RbDE3nTJ;Eo$*paQL3~%NvO!RJBVg4tbS;IY}vcLG6Gh+|m zv$bqL&eEv5TlB~}hb3~q1W7?{Z!P(fZSDPO5Lc)ZK1yNbpL=euGah{k<~?=}A0$-!)?GP%o*j2KVYIpPw|v*0;CFs+&*we^@?l{jF^B z?qOzb)-D#>Jz)g;&=}_Y_=OEVv(yqsJ6apTr->JKpCj{9-VnG^aeTkL2X;)KAQ5mh z1~_UVGX@WOH5~WgOBEA8_{}oNP#?w2-e;xkZ&e_JZ9v&PI-fh$2z1M3LWQWQF zzC7jQ;>NTKk~1~B)*D=~F&hSf4*~qM7#pTo)zrmgYY`5MZ2I&5DUw}sXt6$c0!Q2~ zgHGym1#L1=8^xyB*}Z%7D^nRU5qfWgL)9r|N?B62{-B2O`}Mf;KzZ%z;^HTu7q4#^71@6XGk!C(;1IL>B9cO z<&sfz{(RkJQf?|*$x)|T;8P%1(`Vql6OBUe3Jj{y6s}xt`M{`W2C;$pFatN=F-!Pt z-GfTIWjrFTICR`27o}((#jP#A*QO1SQVsHFoVrsVE^uWs^~x+WvdjZ{p7e&!$E_Un zcm6_PHy@a$dd&w9tWXiN@AZQM;;|D7!>vaf0CXA0fOCc5oxS+z@b0C-EcL8;9_jqR;fItJU-8pa>&5S3 zi)SYogwvz=iW!CKRX)d8Je`BNYTfs*fJi!HZ7u6Vnya9Aj3hIm!rdZ;1}7Awz7yE> zB(htzNhQS%jjZhP3UZXyJ$_XI_c#ur-!mkv!y6g)AXm@gYh-dw_BPtLbw7Je+4O!@5JpR5EQQqMB~ z)mQwMHE+NH$bn2DBYS7fj+$M|-gFe~TYAb-ddgUuSba2RAPIK*g>jWhF&ax+!jTOv zG#XJYa}CbI2p0=d(umhKN#ptkxtSKQ@?%SQ3`UQzo@3)pKLtf^OT2LGxWW7~H2*%9 zeb`^h_MMpLeGXn}0xj}GY|X;fQTFkP@ij?XbZu)$j<$hVOCt?8$WhAkFYs8eBkvms zevRNY06_;mitV2gcGJmGi;iRaMFb}iprZ6z1TWz$E^E+EUx9Ygqz+-|r0w)oB>n`! zEZ|6*B2g&}cLaE$!cLzYo}bLdtA$Wwb4$Av;zas3aPKiB$V-LBz|XMf&jECqt5qtG zhO8r&SB>bX8X>VMuPzz+OJsTm0Nfw_0^8q5uo!iF7F((@*biRykT=-RULDuxAnh zH;Y9zOEpayfxd@g&2#6~&09Kehyl8P(ZZ(=D+=BD0`dv*xu6aHi3w3S_|*sywg=xY ze97&QUdti4-+>*4`w^!psI%cy)wY3ca37E`qrV(w{t&oM8xoKGGDaL(Aee^}b_)yq zs>vX({*T1nzcPyK!rzK#%f&$DkLn`3ZRjMW90`tb#oXjjAkTw;-Xpn(2lJ}9-6qJT zTCdh&;66QK>u~n|ayN^6tWaZjU@AA@snZVgU4+)B%T`jJf)#P75o>fP5c%U7a=9i` zD$po%ggGv_Zl+65^lBf6Tm2_u;Z!JdGD3u_D{5hMw zf7!!+z05ud9Is0%PJM0grLBi%5;RnmZ%A`KS1v(<0qe-uW^$yD8 zG@_EKtVlL#6jw}ly|P@8Z1J31b3pK8YHK6eD5QsAN8Z#XSPc^#j}t7r!UUHKy9~WI zYCVJF6XyTCo#cIr^_juU`P|NXFFv_FNj8FuV0+&hi-+l3(V0o36m5)kSt>i%<5AXS zs;UA6odO+c4S+6lRaFO^`rv&wgy}}e_`+RwKsC@dDY`Mj?)dF=aEGt{HZD?$t$KSe zX=We4eZc3Zew2XDfZs0DKjZ5$_T=xbax`HR&klN_?-myQ)%kTf+$JK;p< zd1?vrrcN%nDwpv`3qd!V{rjm!_?aZlkbbs>*3tKolb>tXqoiBd;oo0xr+)=(45NAo z7TgdljI={>P#C~Kfv|#3@hOx|dM_Q(pxE7O-X0hq-*4UfGWyDlRK8c8|``x0+(%M$*RqTKmQ)6owUFw{@gcbT-`|HudG+W%WL_j zx-#2qBW(*jme;EN3$iS)+suG}J<9}`*F6So`^xJ@(;I};i;0*2*kvIhHub{K%kM{% zHlQ_U00@dn10?3WP0^Vr#ReIE3t?jw{pIFIk!^r2>#wV`;mLwMlE!8lUY9g=HHR7_ zz*&ntxM(zl0KFnaNzs^$Q7&A>hg&v489+)}c?n_j`Y`Qc{rw9{hI+_v#s^C;Mn#FR zM=s{EKU^&I^G(L%fxB=R9?0bJ%tO+L|Kc~5gB?~Sj{oI3g9ATC?jrBDvC}^smgu`A zk~|rsZ2rIj4i2$NlsreghfRzOlw{@Vs6Hu=P5WrJNXN16<0S^tnt^A5EniD0;fmOU z_c-=ATo}a-k+HIqt@&9YqH^K}xL`^hehPt*8Tu~sV@dHy$`9gOTukS3QlTQkkyxY{ z02b@dX1z4Yuaf{1!AliizILiZf}T#oM5Nu6qCpxD#=$3Ul1tQC9z zYAweMnf=#iW$czW+#~509rN(C5S{bDF5pYMK|#n&c~T9fe_gi`iWST^Qm)cQQd0RR z;LC@Y)dtft`tb$>oHbM;FlykBRH4|_i9??twEalJ_F(MK7IgQxXs{6tGqcgs+l8XeEo&1ag% z(>+~Va4_!G`uOD^NYBzqnZYkQX6k$CsIxT5MEBJw`Wpw2;_OzeSQLxs_f)N}<4ny8 zP@>98eepiUM=O*J{i0}ncr{A@`Odx)7ff~{SDW3u+|mv4Q>#w#IhrgUvTcL|!AYIn zZt9TH{qRRg-Fy%MC~aV@*%~eVHx>f?-1NKFis$J@o2z*7(rBLEii%S`OYmTTSa9%Q z$cJ0)eAqXBB~Nnm%bXm=AIsLsx`uxP*Xl*}mdY3*@dv)Le68=1>$E)um9hyC-lmU> z>+$Z8+XsJ;g%6&5!5iv3$3mN}5%XB$tAfl-Xpd8$eQ!K>P@u`z{;F2s`Zt6wlZD@=RopXFuoQ zF3hXQy!MBH(&g)DXo;}W@ow>N#=cKXBwF4EaxX%SQRu{a*uX)yJ83z;!;t}Ba&A9>1GHu#p5t(Zlcjn5ZH_iU!tr4g2R2pL5iBb!1in;J&vtk zV~ZE#Y@jNXDnON_U5bVV@4?XiwiD?@Lu>^Qx$ z<3GzHr!xZEui3Tog`OO;j?AHXI5TS~gpxubs_vcn!P=xH6oQN-6oQ{ez%G3r=Y*dF zor9nTmDz-?Pe~tCh=LhE0*0iyB@zo$neBM2sL$dhvr8J0B?p2;73KvdUXS608lD1S9Y{@i zF!P|e+_b*0G?$dymqQz str: return "其他问题" -def summarize_cluster(cluster_msgs, idx, priority_info=None): +def summarize_cluster(cluster_msgs, idx, priority_info=None, ai_placeholder=False, placeholder_idx=0): """ 生成单个问题簇的结构化归纳。 - + 脚本负责:提取位置要素元数据 + 输出对话要点表格 + 提取结论 - AI 负责:基于元数据和上下文,生成精炼的问题描述 + 当 ai_placeholder=True 时,问题描述使用 [待AI归纳:#N] 占位符,后续由 AI 填充。 priority_info 可选,包含步骤5的优先级判定结果 - - 输出格式: - ### 问题 N 🔴 P0 - > **[AI归纳: 问题描述]** -
位置要素(脚本提取){元数据}
- | 发言人 | 要点 | - |--------|------| - ... - **当前问题排查结论:** ... """ loc = extract_location_elements(cluster_msgs) @@ -710,7 +706,7 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None): lines = [f"### {priority_prefix}"] lines.append("") - description = generate_problem_description(cluster_msgs, loc, root_msg_text) + description = generate_problem_description(cluster_msgs, loc, root_msg_text, ai_placeholder=ai_placeholder, placeholder_idx=placeholder_idx) lines.append(f"**{idx},问题描述:** {description}") lines.append("") conclusion = extract_conclusion(cluster_msgs) @@ -805,14 +801,17 @@ def extract_conclusion(cluster_msgs): return "**当前问题排查结论:** 暂无结论排查中" -def generate_summary(clusters, cluster_order, skip_priority=False): +def generate_summary(clusters, cluster_order, skip_priority=False, ai_placeholders=False): """ 生成当日问题归纳 Markdown(步骤4+5)。 - + 步骤4:问题归纳 步骤5:优先级判定与排序(P0 > P1 > P2 > P3) - - 返回: (markdown_str, has_content) + + 当 ai_placeholders=True 时,问题描述使用 [待AI归纳] 占位符, + 并将簇上下文保存到 JSON 文件供后续 AI 处理。 + + 返回: (markdown_str, has_content, context_json_path_or_None) """ lines = ["## 今日问题归纳\n"] @@ -830,7 +829,7 @@ def generate_summary(clusters, cluster_order, skip_priority=False): }) if not valid_clusters: - return "\n".join(lines) + "\n*(无可归纳的问题簇)*\n", False + return "\n".join(lines) + "\n*(无可归纳的问题簇)*\n", False, None # 步骤5:优先级判定 if not skip_priority: @@ -839,7 +838,11 @@ def generate_summary(clusters, cluster_order, skip_priority=False): # 按优先级排序:P0 > P1 > P2 > P3,同优先级按时间 valid_clusters = sort_by_priority(valid_clusters) - + + # 分配问题索引(用于 AI 占位符编号) + for i, vc in enumerate(valid_clusters, 1): + vc["_idx"] = i + # 生成层汇总索引(按优先级 + 分类) grouped = defaultdict(list) for vc in valid_clusters: @@ -871,7 +874,7 @@ def generate_summary(clusters, cluster_order, skip_priority=False): lines.append(f"{cat_idx}. **{cat_name}**") for vc in cat_items: desc = generate_problem_description(vc["msgs"], - extract_location_elements(vc["msgs"]), "") + extract_location_elements(vc["msgs"]), "", ai_placeholder=ai_placeholders, placeholder_idx=vc["_idx"]) lines.append(f" - {desc}") lines.append("") @@ -879,14 +882,123 @@ def generate_summary(clusters, cluster_order, skip_priority=False): lines.append("## 今日问题拆解\n") idx = 0 + context_data = [] # 收集簇上下文供 AI 处理 for vc in valid_clusters: idx += 1 sorted_msgs = sort_cluster_msgs(vc["msgs"]) - summary = summarize_cluster(sorted_msgs, idx, vc.get("priority_info")) + summary = summarize_cluster(sorted_msgs, idx, vc.get("priority_info"), ai_placeholder=ai_placeholders, placeholder_idx=vc["_idx"]) lines.append(summary) lines.append("") - - return "\n".join(lines), True + + # 收集 AI 上下文数据 + loc = extract_location_elements(vc["msgs"]) + context_data.append({ + "index": idx, + "cluster_id": vc["cluster_id"], + "location": loc, + "priority": vc.get("priority_info", {}).get("priority", "P2"), + "priority_detail": vc.get("priority_info", {}).get("detail", ""), + "category": vc.get("category", "其他问题"), + "conclusion": extract_conclusion(sorted_msgs), + "messages": [ + { + "sender": m[1], + "content": str(m[3]) if m[3] else "", + "msg_type": str(m[2]), + "media_url": str(m[4]) if m[4] else "", + "time": str(m[6]), + } + for m in sorted_msgs + ], + }) + + # 保存 AI 上下文 JSON + context_path = None + if ai_placeholders and context_data: + import datetime as dt_module + today_str = dt_module.date.today().strftime("%Y-%m-%d") + context_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), "output", "daily_feedback") + os.makedirs(context_dir, exist_ok=True) + context_path = os.path.join(context_dir, f"cluster_context_{today_str}.json") + payload = { + "date": today_str, + "total_clusters": len(context_data), + "clusters": context_data, + } + with open(context_path, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + print(f" 📝 AI 上下文已保存: {context_path}") + + return "\n".join(lines), True, context_path + + +# === AI 描述回写 === +DEFAULT_AI_DESCRIPTIONS_FILE = None # 运行时设置 + + +def load_ai_descriptions(descriptions_path): + """从 JSON 文件加载 AI 生成的描述。 + 格式: {"date": "2026-05-11", "descriptions": [{"index": 1, "description": "..."}]} + """ + with open(descriptions_path, "r", encoding="utf-8") as f: + return json.load(f) + + +def apply_ai_descriptions_to_doc(obj_token, descriptions_data): + """ + 将 AI 生成的描述替换到知识库文档中。 + 对每个问题,用实际描述替换 [待AI归纳:#N] 占位符。 + """ + descs = descriptions_data.get("descriptions", []) + if not descs: + print("⚠️ 无 AI 描述可应用") + return False + + for item in descs: + idx = item["index"] + desc = item["description"] + placeholder = f"[待AI归纳:#{idx}]" + + # 使用 replace_range 替换该编号的占位符("今日问题归纳"和"今日问题拆解"各一处) + # replace_range 每次只替换一处,所以需要多次调用 + print(f" 🔄 替换 #{idx}: {placeholder} → {desc}") + # 先替换"今日问题归纳"中的占位符 + cmd = [ + CLI, "docs", "+update", + "--doc", obj_token, + "--mode", "replace_range", + "--selection-with-ellipsis", placeholder, + "--markdown", desc, + "--as", "bot", + ] + env = os.environ.copy() + env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui" + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env) + if result.returncode != 0: + print(f" ❌ 替换 #{idx}(归纳部分)失败: {result.stderr[:200]}") + return False + # 再替换"今日问题拆解"中的占位符 + result2 = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env) + if result2.returncode != 0: + print(f" ❌ 替换 #{idx}(拆解部分)失败: {result2.stderr[:200]}") + return False + print(f" ✅ 替换 #{idx} 成功(归纳 + 拆解各一处)") + except Exception as e: + print(f" ❌ 替换 #{idx} 异常: {e}") + return False + + return True + + +def get_today_doc_obj_token(): + """获取今天日期对应的子文档 obj_token。""" + today_str = datetime.now().strftime("%Y-%m-%d") + doc_title = f"{today_str} 问题反馈" + children = list_child_nodes() + if doc_title in children: + return children[doc_title]["obj_token"] + return None # === 步骤5写入目标文档配置 === @@ -1055,12 +1167,17 @@ def update_summary_doc_as_children(day_summaries): continue # 写入内容(锁内已保证obj_token唯一,不会并发写入) + # 通过临时文件传递 markdown,避免 shell 转义问题 + tmp_md_path = "tmp/_summary_md.txt" + with open(tmp_md_path, "w", encoding="utf-8") as f: + f.write(content) result = subprocess.run( [CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", "--mode", "overwrite", - "--markdown", content], + "--markdown", f"@{tmp_md_path}"], env=env, capture_output=True, text=True, timeout=15 ) + os.unlink(tmp_md_path) try: d = json.loads(result.stdout) @@ -1081,20 +1198,28 @@ def update_summary_doc_as_children(day_summaries): if dup_count > 1: print(f" ⚠️ 检测到内容重复(共{dup_count}次),第{retry+1}次重试写入") + tmp_retry = "tmp/_summary_md_retry.txt" + with open(tmp_retry, "w", encoding="utf-8") as f: + f.write(content) subprocess.run( [CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", "--mode", "overwrite", - "--markdown", content], + "--markdown", f"@{tmp_retry}"], env=env, capture_output=True, text=True, timeout=15 ) + os.unlink(tmp_retry) elif len(saved_content.strip()) < 100: print(f" ⚠️ 检测到文档为空/内容过短,第{retry+1}次重试写入") + tmp_retry = "tmp/_summary_md_retry.txt" + with open(tmp_retry, "w", encoding="utf-8") as f: + f.write(content) subprocess.run( [CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", "--mode", "overwrite", - "--markdown", content], + "--markdown", f"@{tmp_retry}"], env=env, capture_output=True, text=True, timeout=15 ) + os.unlink(tmp_retry) else: print(f" ✅ {title} 校验通过:长度{len(saved_content)},无内容重复") check_pass = True @@ -1234,6 +1359,8 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False): print(f" ⚠️ 分发异常: {e}") return False + + def main(): parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)") parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3)") @@ -1241,10 +1368,75 @@ def main(): parser.add_argument("--dry-run", action="store_true", help="仅预览不写入") parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定") parser.add_argument("--skip-dispatch", action="store_true", help="跳过步骤7问题分发") + parser.add_argument("--ai-placeholders", action="store_true", help="问题描述使用 [待AI归纳:#N] 占位符,并保存簇上下文 JSON 供后续 AI 处理") + parser.add_argument("--apply-ai", type=str, default=None, metavar="DESCRIPTIONS_JSON", help="应用 AI 描述:读取指定 JSON 文件中的描述,替换知识库文档中 [待AI归纳:#N] 占位符") parser.add_argument("--dispatch-mode", type=str, default="all", choices=["all", "p0"], help="分发模式:all=发全部, p0=仅发P0级问题(默认 all)") parser.add_argument("--steps", type=str, default="1-7", help="执行步骤范围,如 '1-3'(仅写入表格)或 '4-7'(归纳+分类+分发)") args = parser.parse_args() + # --apply-ai 模式:加载 AI 描述,替换占位符并覆盖写入文档 + if args.apply_ai: + data = load_ai_descriptions(args.apply_ai) + target_date = data.get("date", "") + if not target_date: + print("❌ AI 描述文件缺少 date 字段") + sys.exit(1) + print(f"📋 加载 {len(data.get('descriptions', []))} 条 AI 描述,日期: {target_date}") + + # 重新查询数据并生成带占位符的 markdown + from datetime import datetime as dt_module + td = dt_module.strptime(target_date, "%Y-%m-%d") + next_day = (td + timedelta(days=1)).strftime("%Y-%m-%d") + rows = query_messages(target_date, next_day) + day_msgs = [r for r in rows if r[6][:10] == target_date] + if not day_msgs: + print(f"❌ {target_date} 无数据") + sys.exit(1) + + sorted_msgs, clusters, cluster_order = sort_threads(day_msgs) + summary_md, has_content, _ = generate_summary(clusters, cluster_order, ai_placeholders=True) + + # 替换占位符 + for item in data["descriptions"]: + idx = item["index"] + desc = item["description"] + placeholder = f"[待AI归纳:#{idx}]" + summary_md = summary_md.replace(placeholder, desc) + print(f" 🔄 替换 #{idx}: {placeholder} → {desc[:50]}...") + + # 获取文档并覆盖写入(通过文件避免 shell 转义问题) + title = f"{target_date} 问题反馈" + nodes = list_child_nodes() + if title not in nodes: + print(f"❌ 未找到文档: {title}") + sys.exit(1) + obj_token = nodes[title]["obj_token"] + print(f"📄 目标文档: {obj_token}") + + # 写入临时文件(lark-cli @file 要求相对路径) + tmp_md = "tmp/ai_summary_md.txt" + with open(tmp_md, "w", encoding="utf-8") as f: + f.write(summary_md) + + env = os.environ.copy() + env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui" + result = subprocess.run( + [CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", + "--mode", "overwrite", "--markdown", f"@{tmp_md}"], + env=env, capture_output=True, text=True, timeout=15) + os.unlink(tmp_md) # 清理临时文件 + try: + resp = json.loads(result.stdout) + if resp.get("ok"): + print(f"🎉 AI 描述已应用到文档: {title}") + else: + print(f"❌ 写入失败: {result.stdout[:300]}") + sys.exit(1) + except json.JSONDecodeError: + print(f"❌ 解析响应失败: {result.stdout[:300]}") + sys.exit(1) + return + if args.date: start_date = datetime.strptime(args.date, "%Y-%m-%d").date() end_date = start_date @@ -1305,7 +1497,7 @@ def main(): if do_summary: # 步骤 4:问题归纳 # 步骤 5:优先级判定(默认启用,--skip-priority 可跳过) - summary, has_content = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority) + summary, has_content, context_path = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority, ai_placeholders=args.ai_placeholders) if has_content: priority_count = summary.count('优先级:') if not args.skip_priority else 0 print(f" 归纳完成:{summary.count('### 问题')} 个问题") diff --git a/skills/feishu-group-msg-sync/SKILL.md b/skills/feishu-group-msg-sync/SKILL.md index 073c441..bb71efb 100644 --- a/skills/feishu-group-msg-sync/SKILL.md +++ b/skills/feishu-group-msg-sync/SKILL.md @@ -40,8 +40,9 @@ description: 定期同步飞书群聊消息到MySQL数据库或飞书电子表 | chat_name | varchar(255) | 群聊名称 | | sender_id | varchar(128) | 发送者ID(open_id或user_id) | | sender_name | varchar(255) | 发送者显示名称 | -| msg_type | varchar(32) | 消息类型(text/post/image/media/audio/file/sticker/system) | +| msg_type | varchar(32) | 消息类型(text/post/image/media/audio/file/sticker/system/merge_forward) | | content | text | 消息文本内容或描述 | +| forward_detail | longtext | merge_forward转发详情(媒体已替换为COS链接) | | media_url | varchar(1024) | 媒体文件COS URL | | message_id | varchar(128) | 飞书消息ID(om_xxx) | | parent_id | varchar(128) | 父消息ID(thread根消息) | @@ -186,6 +187,7 @@ LARKSUITE_CLI_CONFIG_DIR= lark-cli sheets +info \ | audio | 下载 → 上传COS → 记录URL | 如有引用则记录 | | file | 下载 → 上传COS → 记录URL | 如有引用则记录 | | sticker | 记录为"表情包" | 如有引用则记录 | +| merge_forward | 解析子消息、下载媒体→COS、生成forward_detail | 如有引用则记录 | | system | 跳过(入群退群等系统消息) | - | | 已删除 | 跳过 | - | diff --git a/skills/feishu-group-msg-sync/scripts/__pycache__/sync_group_to_mysql.cpython-312.pyc b/skills/feishu-group-msg-sync/scripts/__pycache__/sync_group_to_mysql.cpython-312.pyc index f66c2d6525300244a3bd18b03c59eea0acc4df22..7c495a46cf997e41dbb11e514edeef1d69934472 100644 GIT binary patch delta 11005 zcmb_?2~-o-N+iwjAuGH@i{ZoN}QR_ae9_}OGtp1 zzh};wI&j~8_uaSMck8{c?$fW&l4IYJ%Fh%E83B*#7i8PI{U?<=@<;Dv@8qq|ZqBEi z&DoTxIlo0jEjTXNy@<+dcC_%QeBc*T1;>fqOPbBp!UGP1pd~z_dC56!2Og3*#yiGy z7M_2LHMu)iEd#(NqLk!aK0?{2w&Tdl1?ODNDd-7 z#0Cs`|`qYXxb{Rj_W@v*K@p>GO!oZYK%rk zO-WZnON_dfVEB}YQW*$OD%3Z#%Pn>?jooY6m25djf(ksbl(Mj&S~5s0`>&QqwCNP@ z9Ph63N3Ba0HYVvsg0e9@r0ke=lQDzg1E0x=f!a?CW9^XymBrf9Yt7k==|$o+Z&uEv z!3Z5VLdtpmx%32*T+jZ*R>C*+A{uJZ&Ft41>&Py4 zN#?EuPQV$@%MN9(SA`WF&Gf$JhL$e+NFzX7^Zi4nP!U)^;y0OIY62gM)F62jNpvX4 z^#cj>C^!8K<_0h)qMMt#Xo}WhMuzRXx*w)mX}S#s&|#;$xzpXiR%h$UR(5}OtL$wk zcoY6zMd$x6yNZ;STo&6xs^p01e5NyzM;>CAa0@uihKl&>a{FXjs&)5K@j$~5PHzJL~2Mpp`k zP9HxY{ZFc{XvXwssE-XFR<#IQ;n~%Dy>El>_ z0!Zt(;LG$$EEhDicI>0;fxRs*Xzgg+*IYn9$KF^{O+L>qU0RXELWwhtE`fwTjD*jQ zEY%eaBl}|{FCy8A^VJDp~ld+%RC>3#hwZk#!`L6@RmMKbGC?BQi=EI&utMkH?_sYLQL z67(JVEjGBUD&ZqwEnp~@pU+NZH=KXE=n6lxi4W4)qXGu=w;qxb#8c!`yr&#bIiJd5 zea1}Q(@s`xPGt-7tgON$XRjKUvo(2gR&UZN#Iu`)N67Z(Ww4K#bbRSKlKqLv!WxxQ zok!x1_rt_@9FLUE$xkPR>_b|Ofl8p{cWyZz86#^;U{9_}W}nT^(ko(nf+xY97#lNa zB(ozzgPKzI2xoUQkDUGYe2Y-Uh}=5XCo&n%%?!Y!U=%LBJ1thfEuHR2M9Xv^TrlH&a;-THXFOeoWmI(Dtav^6$xk7^&`Q8TJGXH0E2Wt6dI%82@H1mQ_! z)NQ%5nu8&3A&5P21bCA72!LulT2BfbBFR(|zRB*zch@-p+H?Y=j_IJOs1&4~GEU#i z)tR+}N5?1`Eu(|eNzWwR(-tXADx;@TU*g9GMiK*60%&$0`Lu(hIh4-5BsLgIKZiHW z;f=^g`zKl?ePkc6&(Y`X%V`l(rsHytfimBz9Br~iK&2fQc#L2t6a1T*q%vZVtX~Qi zN#hZWkx8R0IK$ZRJQk1|Kx##4tQ1l{IUFF$O zZo7YZvSWTZV~b8kL@$o9ELEsYj1M#F22mw1l}I@aL?=n{JQ+*|ZDh>%T>4oXTcLRJyJXv$vGRJzL3Mvnj!FwWQvZ#ezqlKsWJqk`8&lK))ubp84zA=J+ zCnIJgjFei$WHFgkB9p+#sl_iTJ=sh)dog`i!km4PWfnc?y<;QXMGW!>hK~?O`MZcC zq;pC4KSAr$KRf>A;FH#QSm_V%_f7xo$2U%&{py1^rca#w!~6X=e$jX1vA1PbcsTOa zYcJpU$)I)h47h5h8akR?E;wXeYpgq~D*y2Qz!&~!ykC9r!StJh(Yjyw-~95&$ESb( z+!r6PU-+N?;{B27KHvJ9T2SR${POuTH~I#xGYvLS&F;q5b{xpyIncVv{_4gLU%>|6 z8o2SoyVFlTG5ywiFs|w62COq+33X4r{ngvQym4~qt5+X`LhtW;ALrVp|ECb73k*Wz z?m$0=ybK zK>~tg_`g*R+zUcL2ZpqBfGhCv@L`t}Fnri0dG&trNVWr{+I-`miY_^wy|3Yh~P?|Z`R5R*9l75C!$LuLU zh=Ca;rHxhfB*jE2aZz$qO5Ll)LpSv)uZ(oEtp#1Ntw5VJ`6K=1nN( zs5)*!`gx9;ssT^c#b;z-4D=T8Qf@X`05bG*oYcsu;0xTyjVOAYsAlJ9XY}`STQ6gX ztv9&sye}c;wL}Kt*2Ky);hV{$ zbgS9@BC}J8t3s(5CHlc^!@CH0W0j98VHjJZ9J?SRO%2)V*b$dOMBhsuBWHUu7P7C} zRKnSI)Ba4m|PXyyOUc*%hs^V{8ice@4{Z=pB4djnV`dO}Y>`vM%u z@l70#!W%z&>c&fNOrJglN9eTg$>`BK{r(GIK0kWn#Sd>h`_nJ{e;q$MZ<8|m9cYX@ zZn*uQ`%JD5OBz~^Q1NeN`d!p{4#_qm3Fuy6yK#?-LIL(qOLVe1S<_OT_jyzi1F4&S z@mF8=Kh?vlchLBG-Ho4-^$tJ+2n4tT&)LJQ=Hg&*Y#nGmR%~;$w6-@l)HUYyl;-VT zn75)KuRhmlgVN5%j%FyX+iR=O;RIAS-RN%Z>IC5)D%a_-63KYeDU)q8)H*dZUaPP9J9!+HFRn=gFW!*|s?y5UqIAf0a}Swj=fEB|2U zzHUSyGK5}r)4s=Up8hG+!7VkqB42sAVMR;s8r)pPd+EhS=u8d^T?gA+-O->120Nkw zu?LYGL2|Fa28KtxcZN6Mv?u5vA&CakkShn`Os79V{%?@{Es(IVy|uI16;>UjyP5z# z0*Z_Z1ek;+E;rqJ@L)3)mg0(bG`gGi(;s5xUm=MFIlR#*M_7zX-E=$s7B-zgHy>cb#+6ukLF^E;RGDajKq}=I_%kOy%*NL z7auwwYiVcfg)zg14%faYX6soU6VJn&F<_p*nj42}O@2&mUhM$jHrMm;PJGu(TvzK) zsZOd!1VOdEPZ%PxYMaVVS8nA5~mjrd& z`pU1VbRk2=nS$Yh(e$8U!Js&#HV&1t%8~R5bq1>prKFFfpX?muM?{3hK2kffW2k*- z@91*hdf!sNAYd#WluVg(hH6HOe9L_~{!;&b|L%a-uz65CWzHWx>^m|h9@hoU+XrPK zL)xJDTCzF9lWNmLw){!k(h1vAU)xyr_^P1o!6EUK&Ui*LED2<+9ZMaP2GT38=(a>8 ze0}`HFl_e__B@d~(b68JQ|zTk9R%a$Rki zRA)`7vqp|iW*1Fl7x|?hs@_wLlfmT`<6DB+JFlpBh19yZK_WaNaVrlbw}gDXvakFb z38B{CVHjiPnUdj>(ZZlHZ%`7lIL>&7y`zVMmcl`K$YeiLGh8!T8Z_k(O1<~7A@pW6 zpUEE1o;2i47;;9J2MtSn+a?S}mkp)EeGGqPYLZS<(0@9XwI=r0dgHV(?? z+qm7oH(;ooHL+-19yC8NYhv&Jp^e3Ux$n`j+VRwJX<*y-fUS1c)Lc$gyP7v1-Dx%D-jY9N5_ubWlU(Ayf8fgMXKQ z%h;hnZux|%d}zy*$#Q1%@a8~v#dz^}LBPKIim5Il@$${~DXZ-@=~?M$?iK5@h=fSA z&bY%PSJaEI!7_^owJtR4B)?NHjCgqX!bH~c3AN_LvHoL2mY~WRkT}2lwvI=b?B5Wi zww6RM#TO{EbS!&JKb|q(I9?mDZJX3>zpUH-eT0uX-tWJa5mqNu(5Ht@RuJmbzmI@p z&ARecQ{i`076+$tZARrkzbzyGSN+_t8}Fa%Cv#?WO(Yr+-mQRR%XrmzNnm$@0jJ`Bpsgl(zcnwZUd8(~Nn4Z0`z%9RnI!ydfw58{ydV}; z+IbfgLXccY;v=t>R@#IY(n+LtvPv$zkgcsO7F}4)@x{_Asq}LZ59FUq$SSE<`ni;k z8I`nZjqr1wv1*0z^Tnd-MBe8sgeY0V2mT@{tyT&zDoCV>Wc704#bj-@Q*_b6@lI*= zQt8D-Jd`gXkzdM3ewDN)M|eqKtg#C(SwuC(yi0Z=N^(Z+gVh>6%tS36aE;^Y$(uQryl`OAv)6WaA74@Z<uqu+{aeWP2;c~|c0-7j*fRrj0fa*I z1punEK4*`!8?Sfv)I~8K6xHpm2l1S8v_4ej0Nfn;O$=%D@K##8F>JBYTIY@8`1%>l z$OYiKg-{0Qzuwxzhk)@-%yRnH{QP{3dp+?^H!c*YK-i0Z7Fef}egYEuNhGrumAL$O zUL#eb3@@3&N(f;&@UnC!{1&}Air+8+fGa`W!$`R6GB3Bj_q#6R5M)?XR)yx*KVCm=ED6+K(0AL*hnq z2FXPX;YP3G5MRz*#R2Ecz4Kme)&d?V2XqLLu9>*uo#sFfopY-u#m^%Sd&2O5eYlG{ z7FNw*CvU^SMmLPNGc0=;P#o1rw*sCeJHm2KVJ0LRCR^#RvDI&o;A;R=UZhtBo* z3qLG+uV}3Bmn#E?jaMa`VBdhc08Qj6_Zb?%T1`rCCJH+auCT+S;S+&w~$l%5q%XtPERUDi4;WQ_0sEU!-% z(wK&7M+^bYf<9?TqM4K!CnUyEn_oEAI$_#+RkAH2#r7jgLZcnb=-m=Z)?gA5G?T)d zHbuvk1XdszOS`EkY$+ z2Ulg+5>sLLtA2z zodKTW1`ZEW?(dK4((Da6<20=+gxC%Y26>^eZOyB@_CRpuTi0 zV{Flceq*4qsjuRiN*_|2C)Jq~>daBz=vt9I4%pZJPF)(Y zpwS{WBE@iO*U4Q&Oi;U^w;HCPP!Gz3a&zy7kUDk9D;zvB)ErP|j)bGFBqH^v)F z3@ln7%r2Wymi2C$63hCEgJS(q>1DAwBvFB<2#1c0v;>m#CM0=%VrWleoYJO+;4jq{ zN--gmYzUbagftn*7#tz39{ebz&-&V|Qi^&vMy!N9skiF8Z#RG+vnz5pFT3s1z&roz z{8IZiAu%B=&D@^Hzr271O1}>U^N<8;;`x5-4Yp_1tK?2LeRU_VO^eWBas1Yp5m1R3 z)q^x?CQZh)MH!1=!}aZMxSS^0gCY&vYm>m%fl*}kA2t{al*S#8iqAq6h^c8=MX`mn z<8>*!sn}vngC@BLz{w(ya84MSyUUlRFv4iOlT9ynu%E>1{#S7V{CFy1O%M6+?2~JT z@YtJk>anJhgY3Z)!TEubqoi|wnYGyZgVM?eHq<)i%bg2kuwQCeL22%~|Mj`cZOwDf zTDHA(J(LWy^bOSlm65)}l<|(sds47AzaugB1 zGFjcX9(=@T`-V`n->qw0e5&GP1p+C}!jQ%=v}!_=jp@n>O%|pl6PnzJS!GZ9H$s(| z9O0`K$&pOfyFSxaPGT34?Ia;poRIa)o|XTxHY(!!{{Gtv_LKGN9xUS%9|_hO))(=A zlO}=8Z;FIvg!nhB#lU~emzJ&Je=K*Dt>AyWM7V(@K3*Y2$r?WJpO8r#lKD+hABa+f zL7aQ;IKQ{d!I$>J)ehV)MX%M_XS50Io0}?RaQCC)ekx~U|G3FQRfuKzhQ$*Lc~QgKWIZh!QpDe%dsa4 z9t0hYt(|lMH2*rfq@8!ZddmWmRI-m$F0+Re+9Atjh2sm@i(UT8aYs;J+gB0B%{TRw z@}zR8=c+n~eWx-_WL65ZzqYYgE9=Ncwy;XGwu~f&<_MhX=G(4J=>8+6r9#O6*pgI{ z#Q$whQbij7Q=zazCH^!?SYZ@@nkKBs7DLPzKwWERLxXcEd%S7^PffCaQDt9#8Yhd} z3Z02ou_2d)Trv_35-pMxBswG`s4~Q=s@vh$cTBa#egFmCNI0C#+4LjKa&|`Rf(_VT zR_kRYz{4Nm`u_aiS2JW<7fB`!X(9xsJ~*r~^}BCzX@p{rZnc*GfKMikxFZCnW3|zA ve8(*=AE8CdN$n8iAo=Dnxr{VNXp#`f`+6?(_16TFCpJd-gup~Ujx>F^QmZMNl9ElOB(O~C)>>87Kid8=X_M11MWWKSdp<%y z+b;6&o_o%B&;7o0&pBLOp&!0Njf)0@p71fBawp(vJ7dhH$KEMBDAZK?%lieX>bUBW zZBq4d@<^@UCe;j<6T(si;;)^QL8i*gv851o7s0kuMh%jOZzRItnF)&##3#(cf$}Nb0VFdxE>`s~u7wpZJ z9OY~}kuGbcTt(JOdGHUr*P1V9NV=Wei=45a$f9JG((}nw4u7ry4mw=a22VN;neB6w zKf=fQEZO02j$-Nnqw@)iQxfKc|FK_i)*A|C(>Wp)$^sI;bbilTB#T@wmbIKE=hf@} zD3M&yTDadWW{Ym2jhYwD3st$!3QO6pmz<@$6?fUfKJ1r>-#*81*N0; z0o>Mu_#wiIco5MKmr5Txir0`x^9@CNM3Od9Tu-hZe+BSbZr-J zHd17BYPVio(ov*A=OyA&wzIogTxw8z&19)r%{9wJt@H$KEn}M;Tg-GN4u&w5IW2r<>I8aCJ^d7?5|;` zps2o^o`K=|&cf5&;^vH~5HBNKh>M6)gpyvbpmc#FAqM?nw~Z;a{wvgu zkY>*Rmt6PbwJBVfSM1k_wZwwU4Xuvf;JO|024Wv#8u44i9HcjPsjqO+8t-e|p(bXdWng1{{cV^(4?{yt7hAEXS44C1aeP(!C+Am5Od2m}YXZ0J_y8$ILohl;J z0Sh?&w$yBNgJU2MuH~9{m}B_e%8>7O7GBU<=fzFMOeOxe;gNeQ=H3e6ZNDwEV10iH zVcVwEv23_9Y%^PA^?&sc3Kw^0fzzG>75#-&*{>fXJ-;#9r-C$qoZ1b(GvQM+}XPa(imEyen zR+cAeHM&gyB*sj0QcO3PlSR-SEGgPc(QC4s5}o0c{-pk=X?MMAD&@HER@&i(VC%ze zwd9(rRo_-7&KpzsV!lk>wnaN%sYSU~x2Ia1-&wxLEnY8C@7YSOyVYECy;?+ht7)$& z`uKO<9~v$xd8LV0g~~w0)TX;^iv9v&O6 z%}bAAAt^)Ftmh2RpbWS)Ttzb>F&w48hi=x$Z{itt!hr`X?O_BP$=9c6zIpStuP@Jh zGkv_SW(_?LZiBqXcH;Dy`jE9xVP6`2Vaa< z3V$S!Hs;!n?Ty(H2}BX%Bg6vYW5goj24V@JG`_PC7zeUMVqY*v!s{C0G0{vdWoStox4 sf1PkvG*HXzfjfjx-!i(D+LA{oQKd~!{#nfXOqFt?Jt-1ZzH-=q0O9d;x&QzG diff --git a/skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py b/skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py new file mode 100644 index 0000000..15e41a1 --- /dev/null +++ b/skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +"""一次性修复 id=216 的 merge_forward 记录,重新解析媒体并生成 forward_detail""" +import os +import sys +import json +import subprocess +import logging +import re +import pymysql +from datetime import datetime, timezone, timedelta + +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +log = logging.getLogger(__name__) + +# 引入同步脚本的函数(复用 process_merge_forward_message) +sys.path.insert(0, os.path.dirname(__file__)) +from sync_group_to_mysql import ( + process_merge_forward_message, CosUploader, + LARK_CLI_CONFIG, CHAT_ID, CHAT_NAME, WORK_DIR, + COS_BASE_PATH, TZ +) + +MYSQL_HOST = "bj-cdb-8frbdwju.sql.tencentcdb.com" +MYSQL_PORT = 25413 +MYSQL_USER = "chatbot" +MYSQL_PASS = "xhuBx7d@uT2gUVv" +MYSQL_DB = "vala_test" +MYSQL_TABLE = "lark_group_message" + +TARGET_MSG_ID = "om_x100b50f7871e74a8c39cb1549d9052f" + + +def fetch_message_by_id(msg_id: str) -> dict: + """通过 message_id 拉取单条消息""" + env = os.environ.copy() + env['LARKSUITE_CLI_CONFIG_DIR'] = LARK_CLI_CONFIG + # 用一个大时间窗口保证拉到 + cmd = [ + 'lark-cli', 'im', '+chat-messages-list', + '--chat-id', CHAT_ID, + '--start', '2026-05-07T15:00:00+08:00', + '--end', '2026-05-07T15:30:00+08:00', + '--sort', 'asc', '--page-size', '50', + '--as', 'bot' + ] + result = subprocess.run(cmd, capture_output=True, text=True, env=env) + if result.returncode != 0: + log.error(f"lark-cli error: {result.stderr}") + return None + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + log.error(f"lark-cli JSON parse error") + return None + + messages = data.get('data', {}).get('messages', []) + for m in messages: + if m.get('message_id') == msg_id: + return m + return None + + +def main(): + os.makedirs(WORK_DIR, exist_ok=True) + + log.info(f"拉取消息: {TARGET_MSG_ID}") + msg = fetch_message_by_id(TARGET_MSG_ID) + if not msg: + log.error("未找到目标消息") + return + + log.info(f"消息类型: {msg.get('msg_type')}, 发送者: {msg.get('sender', {}).get('name')}") + + cos_uploader = CosUploader() + date_str = datetime.now(TZ).strftime('%Y-%m-%d') + + log.info("开始处理 merge_forward...") + record = process_merge_forward_message(msg, cos_uploader, date_str) + if not record: + log.error("处理失败") + return + + log.info(f"处理完成, forward_detail 长度: {len(record.get('forward_detail', ''))}") + log.info(f"content: {record['content'][:200]}...") + + # 更新数据库 + conn = pymysql.connect( + host=MYSQL_HOST, port=MYSQL_PORT, + user=MYSQL_USER, password=MYSQL_PASS, + database=MYSQL_DB, charset='utf8mb4' + ) + try: + with conn.cursor() as cursor: + sql = f""" + UPDATE {MYSQL_TABLE} + SET content = %s, forward_detail = %s + WHERE message_id = %s + """ + cursor.execute(sql, ( + record['content'], + record['forward_detail'], + TARGET_MSG_ID + )) + conn.commit() + log.info(f"数据库更新成功, affected rows: {cursor.rowcount}") + except Exception as e: + log.error(f"数据库更新失败: {e}") + conn.rollback() + finally: + conn.close() + + # 打印 forward_detail 前几行 + log.info("=== forward_detail preview ===") + for line in record['forward_detail'].split('\n')[:15]: + print(line) + print("...") + + +if __name__ == '__main__': + main() diff --git a/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py b/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py index f89cf7d..e19d53a 100755 --- a/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py +++ b/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py @@ -214,6 +214,10 @@ def process_message(msg: dict, cos_uploader, date_str: str) -> dict: 'msg_timestamp': msg_timestamp } + # 多条转发消息 + if msg_type == 'merge_forward': + return None # 由 process_merge_forward_message 单独处理 + # 纯文本消息 if msg_type == 'text': text = re.sub(r'<[^>]*>', '', content).strip() @@ -398,6 +402,223 @@ def process_post_message(msg: dict, cos_uploader, date_str: str) -> list: return records +def process_merge_forward_message(msg: dict, cos_uploader, date_str: str) -> dict: + """处理 merge_forward(多条转发)消息 + + 解析 XML,下载子消息中的媒体上传到COS, + 生成 forward_detail(含COS链接的完整内容)和 content(可读摘要)。 + """ + msg_id = msg.get('message_id', '') + sender_id = msg.get('sender', {}).get('id', '') + sender_name = msg.get('sender', {}).get('name', '未知') + create_time = msg.get('create_time', '') + raw_content = msg.get('content', '') + parent_id = msg.get('parent_id') + root_id = msg.get('root_id') + quote_message_id = extract_quote_message_id(msg) + + # 解析时间戳 + try: + dt = datetime.fromisoformat(create_time.replace('Z', '+00:00')) + msg_time = dt.astimezone(TZ).strftime('%Y-%m-%d %H:%M:%S') + msg_timestamp = int(dt.timestamp() * 1000) + except Exception: + msg_time = datetime.now(TZ).strftime('%Y-%m-%d %H:%M:%S') + msg_timestamp = int(datetime.now(TZ).timestamp() * 1000) + + # 提取 内容 + fwd_match = re.search(r'\n(.*)\n', raw_content, re.DOTALL) + if not fwd_match: + log.warning(f"merge_forward 解析失败: {msg_id}") + return None + + fwd_body = fwd_match.group(1) + + # 解析子消息:每条以 [时间戳] 开头 + sub_msg_pattern = re.compile( + r'^\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})\]\s+(.*?):\s*$', + re.MULTILINE + ) + + # 先找到所有子消息的边界 + boundaries = [] + for m in sub_msg_pattern.finditer(fwd_body): + boundaries.append((m.start(), m.end(), m.group(1), m.group(2))) + + if not boundaries: + log.warning(f"merge_forward 无子消息: {msg_id}") + return None + + # 提取每条子消息的完整文本块 + sub_messages = [] + for i, (start, end, ts, sname) in enumerate(boundaries): + # 内容从当前边界结束到下一个边界开始(或字符串末尾) + next_start = boundaries[i + 1][0] if i + 1 < len(boundaries) else len(fwd_body) + raw_block = fwd_body[end:next_start] + sub_messages.append({'timestamp': ts, 'sender': sname, 'raw': raw_block}) + + # 全局媒体计数器 + img_counter = [0] + vid_counter = [0] + + def process_sub_msg_block(raw_block: str, sub_ts: str, sub_sender: str) -> str: + """处理单条子消息的文本块,将媒体引用替换为COS链接""" + lines = raw_block.split('\n') + processed_lines = [] + + for line in lines: + stripped = line.strip() + + # 处理图片引用: [Image: img_xxx] + img_match = re.search(r'\[Image:\s*(img_[a-zA-Z0-9_-]+)\]', stripped) + if img_match: + img_key = img_match.group(1) + img_counter[0] += 1 + idx = img_counter[0] + + # 下载图片 + filename = f'{msg_id[-12:]}_fwd_img{idx}.png' + cos_key = f'{COS_BASE_PATH}/image/{date_str}/{filename}' + local_path = download_resource(msg_id, img_key, 'image', filename) + img_url = None + if local_path: + try: + img_url = cos_uploader.upload(local_path, cos_key, 'image/png') + log.info(f"转发图片上传COS成功: {cos_key}") + except Exception as e: + log.error(f"转发图片上传COS失败: {cos_key} - {e}") + finally: + try: + os.remove(local_path) + except Exception: + pass + + if img_url: + replacement = f'[图片{idx}]({img_url})' + else: + replacement = f'[图片{idx}](下载失败)' + + processed_lines.append(stripped.replace(img_match.group(0), replacement)) + continue + + # 处理视频引用: