auto backup 2026-05-12 08:10:01
This commit is contained in:
parent
d0b44f5a8c
commit
7931b77dfb
@ -12,6 +12,6 @@ vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf
|
|||||||
tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06
|
tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06
|
||||||
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
|
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
|
||||||
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
|
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
|
||||||
feishu-group-msg-sync 085f95a5b89fec3b6a627da25d66ffeeb0be430098387739a64f7903f0ee88d4
|
|
||||||
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
|
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
|
||||||
feishu-feedback-sync 7bd0ccd7112156e8fcfca7512b669f1c956cc085c6de9530e02838fa27d583cc
|
feishu-feedback-sync e51dd6d0c4f26898ebcc069ab19272d17e86c01a767c36c1d65ab576da38ba11
|
||||||
|
feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445
|
||||||
|
|||||||
10
AGENTS.md
10
AGENTS.md
@ -127,16 +127,6 @@ System: [...] Feishu[xiaobian] group oc_xxx | 发送者 (ou_xxx) [msg:om_xxx]
|
|||||||
|
|
||||||
**简言之:群聊中未被 @ → 可以做事(静默任务),但绝不说话。**
|
**简言之:群聊中未被 @ → 可以做事(静默任务),但绝不说话。**
|
||||||
|
|
||||||
#### 何时发言(仅在被 @ 的前提下)
|
|
||||||
|
|
||||||
即使被 @,以下情况仍应保持沉默(NO_REPLY):
|
|
||||||
|
|
||||||
- 同事之间的闲聊,@ 你只是无意的
|
|
||||||
- 已经有人回答了问题
|
|
||||||
- 你的回复只是"是的"或"收到",没有实质价值
|
|
||||||
- 对话在没有你的情况下进展顺利
|
|
||||||
|
|
||||||
参与,而非主导。质量 > 数量。
|
|
||||||
|
|
||||||
## 工作区目录规范(强制执行)
|
## 工作区目录规范(强制执行)
|
||||||
|
|
||||||
|
|||||||
16
HEARTBEAT.md
16
HEARTBEAT.md
@ -1,13 +1,11 @@
|
|||||||
# HEARTBEAT.md
|
# HEARTBEAT.md
|
||||||
|
|
||||||
|
|
||||||
# 保持此文件为空(或仅包含注释)以跳过心跳 API 调用。
|
|
||||||
# 当你希望定期检查某些内容时,在下方添加任务。
|
|
||||||
|
|
||||||
# 示例任务:
|
|
||||||
# - 检查是否有未处理的同事消息
|
|
||||||
# - 检查日历中即将到来的会议
|
|
||||||
# - 整理近期记忆文件
|
|
||||||
|
|
||||||
# 自动同步Skill到公司SkillHub
|
# 自动同步Skill到公司SkillHub
|
||||||
- 查看是否有需要推送的skill: 执行脚本 /root/.openclaw/workspace-xiaokui/scripts/sync_skill_to_skillhub.sh,如果推送成功,给李若松发送通知消息,无需推送则静默
|
- 查看是否有需要推送的skill: 执行脚本 /root/.openclaw/workspace-xiaokui/scripts/sync_skill_to_skillhub.sh,如果推送成功,给李若松发送通知消息,无需推送则静默
|
||||||
|
|
||||||
|
# AI 问题归纳(每日)
|
||||||
|
- 检查 output/daily_feedback/ 下是否有当天日期的 cluster_context_YYYY-MM-DD.json 文件
|
||||||
|
- 如果有,读取上下文 JSON,为每个问题簇生成精炼的问题描述
|
||||||
|
- 将 AI 描述写入 output/daily_feedback/ai_descriptions_YYYY-MM-DD.json
|
||||||
|
- 执行 python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --apply-ai output/daily_feedback/ai_descriptions_YYYY-MM-DD.json
|
||||||
|
- 完成后删除 cluster_context 文件(避免重复处理)
|
||||||
|
|||||||
@ -7,3 +7,7 @@
|
|||||||
{"type":"memory.recall.recorded","timestamp":"2026-05-07T02:55:43.649Z","query":"问题分类 分类规则 版本更新 声音音频 UI显示 启动运行 语音识别 关卡内容 priority_classifier","resultCount":6,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1}]}
|
{"type":"memory.recall.recorded","timestamp":"2026-05-07T02:55:43.649Z","query":"问题分类 分类规则 版本更新 声音音频 UI显示 启动运行 语音识别 关卡内容 priority_classifier","resultCount":6,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1}]}
|
||||||
{"type":"memory.recall.recorded","timestamp":"2026-05-07T03:19:39.321Z","query":"小葵小葵群 chat_id 发消息 bot","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
{"type":"memory.recall.recorded","timestamp":"2026-05-07T03:19:39.321Z","query":"小葵小葵群 chat_id 发消息 bot","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||||
{"type":"memory.recall.recorded","timestamp":"2026-05-08T10:25:44.365Z","query":"MySQL database connection credentials lark_group_message","resultCount":4,"results":[{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1}]}
|
{"type":"memory.recall.recorded","timestamp":"2026-05-08T10:25:44.365Z","query":"MySQL database connection credentials lark_group_message","resultCount":4,"results":[{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1}]}
|
||||||
|
{"type":"memory.recall.recorded","timestamp":"2026-05-11T09:48:27.002Z","query":"今日反馈汇总 问题反馈报告 每日汇总","resultCount":5,"results":[{"path":"memory/2026-05-07.md","startLine":86,"endLine":116,"score":1},{"path":"memory/2026-05-07.md","startLine":73,"endLine":91,"score":1},{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1}]}
|
||||||
|
{"type":"memory.recall.recorded","timestamp":"2026-05-11T10:43:36.686Z","query":"feishu_group_message 表结构 多条转发 message_type","resultCount":5,"results":[{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1},{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-10.md","startLine":44,"endLine":68,"score":1}]}
|
||||||
|
{"type":"memory.recall.recorded","timestamp":"2026-05-11T11:26:09.200Z","query":"sort_tag 飞书 API 创建节点 权限","resultCount":2,"results":[{"path":"memory/2026-05-09.md","startLine":46,"endLine":65,"score":1},{"path":"memory/2026-05-09.md","startLine":1,"endLine":19,"score":1}]}
|
||||||
|
{"type":"memory.recall.recorded","timestamp":"2026-05-11T11:26:09.201Z","query":"知识库 sort_tag 空 文档排序","resultCount":3,"results":[{"path":"memory/2026-05-09.md","startLine":46,"endLine":65,"score":1},{"path":"memory/2026-05-09.md","startLine":78,"endLine":95,"score":1},{"path":"memory/2026-05-06.md","startLine":20,"endLine":47,"score":1}]}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"updatedAt": "2026-05-08T10:25:44.365Z",
|
"updatedAt": "2026-05-11T11:26:09.201Z",
|
||||||
"entries": {
|
"entries": {
|
||||||
"memory:memory/2026-04-18.md:1:5": {
|
"memory:memory/2026-04-18.md:1:5": {
|
||||||
"key": "memory:memory/2026-04-18.md:1:5",
|
"key": "memory:memory/2026-04-18.md:1:5",
|
||||||
@ -81,22 +81,24 @@
|
|||||||
"endLine": 142,
|
"endLine": 142,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N",
|
"snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N",
|
||||||
"recallCount": 4,
|
"recallCount": 5,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 4,
|
"totalScore": 5,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||||
"lastRecalledAt": "2026-05-07T02:55:43.649Z",
|
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"f865295b9ac7",
|
"f865295b9ac7",
|
||||||
"cd9c89262c30",
|
"cd9c89262c30",
|
||||||
"ac7fd0b52a4e",
|
"ac7fd0b52a4e",
|
||||||
"49c0959dc960"
|
"49c0959dc960",
|
||||||
|
"70caeba05281"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-06",
|
"2026-05-06",
|
||||||
"2026-05-07"
|
"2026-05-07",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"quote-message-id",
|
"quote-message-id",
|
||||||
@ -151,22 +153,24 @@
|
|||||||
"endLine": 205,
|
"endLine": 205,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) - 单消息簇被跳过(需至少 2 条消息才能形成问题) ### Skill 文件最终状态 - `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 - crontab 每日 10:00 执行(与步骤3一起) ### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 #### 问题 脚本规则匹配生成的问题描述质量差: - 组件匹配失败(NPC/HUD → \"未知组件\") - 现象摘取了完整原始消息(含 @、无关词) - 端识别不稳定 #### 决策 **脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** - 脚本 `summarize_cluster` 改为输出: 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 2. 发言人-要点表格(规则生成) 3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固",
|
"snippet": "- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) - 单消息簇被跳过(需至少 2 条消息才能形成问题) ### Skill 文件最终状态 - `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 - crontab 每日 10:00 执行(与步骤3一起) ### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 #### 问题 脚本规则匹配生成的问题描述质量差: - 组件匹配失败(NPC/HUD → \"未知组件\") - 现象摘取了完整原始消息(含 @、无关词) - 端识别不稳定 #### 决策 **脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** - 脚本 `summarize_cluster` 改为输出: 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 2. 发言人-要点表格(规则生成) 3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固",
|
||||||
"recallCount": 4,
|
"recallCount": 5,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 4,
|
"totalScore": 5,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||||
"lastRecalledAt": "2026-05-07T02:55:43.649Z",
|
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"f865295b9ac7",
|
"f865295b9ac7",
|
||||||
"cd9c89262c30",
|
"cd9c89262c30",
|
||||||
"ac7fd0b52a4e",
|
"ac7fd0b52a4e",
|
||||||
"49c0959dc960"
|
"49c0959dc960",
|
||||||
|
"70caeba05281"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-06",
|
"2026-05-06",
|
||||||
"2026-05-07"
|
"2026-05-07",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"已包含完整步骤1-4的文档",
|
"已包含完整步骤1-4的文档",
|
||||||
@ -186,20 +190,22 @@
|
|||||||
"endLine": 86,
|
"endLine": 86,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) #### 5. 刘新玉要求简化文档格式(~11:46) - 反馈文档太繁琐,只要在原有格式把\"问题 1\"改成\"P0-问题 1\" - 去掉了优先级分布汇总、额外信息行,只保留标题前缀 + 一行判定依据 - 最终格式:`### P0-问题 1` + `**优先级判定:** 规则...` - 知识库文档已更新为简洁版 #### 6. 修复推断引用算法 Bug - 策略2的 else 分支(同发送者无引用时往前找不同发送者)缺少时间限制 - 导致胡陈辰 20:45 的媒体消息被推断引用到徐思清 12:29(跨8小时) - 修复:加了 `GAP_THRESHOLD_MIN` 检查 #### 7. 文档写入方式改进 - 从 `insert_before` + `replace_range` 改为 `append` 模式",
|
"snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) #### 5. 刘新玉要求简化文档格式(~11:46) - 反馈文档太繁琐,只要在原有格式把\"问题 1\"改成\"P0-问题 1\" - 去掉了优先级分布汇总、额外信息行,只保留标题前缀 + 一行判定依据 - 最终格式:`### P0-问题 1` + `**优先级判定:** 规则...` - 知识库文档已更新为简洁版 #### 6. 修复推断引用算法 Bug - 策略2的 else 分支(同发送者无引用时往前找不同发送者)缺少时间限制 - 导致胡陈辰 20:45 的媒体消息被推断引用到徐思清 12:29(跨8小时) - 修复:加了 `GAP_THRESHOLD_MIN` 检查 #### 7. 文档写入方式改进 - 从 `insert_before` + `replace_range` 改为 `append` 模式",
|
||||||
"recallCount": 3,
|
"recallCount": 4,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 3,
|
"totalScore": 4,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-07T02:22:47.713Z",
|
"firstRecalledAt": "2026-05-07T02:22:47.713Z",
|
||||||
"lastRecalledAt": "2026-05-07T02:55:43.649Z",
|
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"cd9c89262c30",
|
"cd9c89262c30",
|
||||||
"ac7fd0b52a4e",
|
"ac7fd0b52a4e",
|
||||||
"49c0959dc960"
|
"49c0959dc960",
|
||||||
|
"70caeba05281"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-07"
|
"2026-05-07",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"generate-summary",
|
"generate-summary",
|
||||||
@ -252,18 +258,20 @@
|
|||||||
"endLine": 23,
|
"endLine": 23,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "# 2026-04-17 工作日志 ## 飞书群消息同步改造(李若松要求) ### 变更内容 - **存储从飞书表格改为 MySQL**:新建 `vala_test.lark_group_message` 表,结构参考 `wechat_group_message` - **同步频率**:从每6小时改为每4小时 - **数据范围**:2026.4.1 起的「内容测试问题反馈」群消息 - **数据库账户**:chatbot(test环境,仅对 lark_group_message 有写入权限) ### 完成事项 1. ✅ 创建 `lark_group_message` 表(唯一键 message_id 防重复) 2. ✅ 编写新同步脚本 `scripts/sync_lark_group_to_mysql.py`(基于原有 sync_group_to_sheet.py 改造) 3. ✅ 首次全量同步完成:172 条记录(2026-04-01 ~ 2026-04-17),含文本134条、图片17条、视频10条、富文本9条、表情2条 4. ✅ crontab 定时任务已替换:旧的每6小时飞书表格同步 → 新的每4小时MySQL同步 5. ✅ 更新 secrets.md 记录 chatbot 账户 6. ✅ 更新 user-feedback-collector SKILL.md 反馈数据源信息 ### 文件变更 - 新增:`scripts/sync_lark_group_to_mysql.py`(核心同步脚本) - 新增:`scripts/run_lark_group_sync.s",
|
"snippet": "# 2026-04-17 工作日志 ## 飞书群消息同步改造(李若松要求) ### 变更内容 - **存储从飞书表格改为 MySQL**:新建 `vala_test.lark_group_message` 表,结构参考 `wechat_group_message` - **同步频率**:从每6小时改为每4小时 - **数据范围**:2026.4.1 起的「内容测试问题反馈」群消息 - **数据库账户**:chatbot(test环境,仅对 lark_group_message 有写入权限) ### 完成事项 1. ✅ 创建 `lark_group_message` 表(唯一键 message_id 防重复) 2. ✅ 编写新同步脚本 `scripts/sync_lark_group_to_mysql.py`(基于原有 sync_group_to_sheet.py 改造) 3. ✅ 首次全量同步完成:172 条记录(2026-04-01 ~ 2026-04-17),含文本134条、图片17条、视频10条、富文本9条、表情2条 4. ✅ crontab 定时任务已替换:旧的每6小时飞书表格同步 → 新的每4小时MySQL同步 5. ✅ 更新 secrets.md 记录 chatbot 账户 6. ✅ 更新 user-feedback-collector SKILL.md 反馈数据源信息 ### 文件变更 - 新增:`scripts/sync_lark_group_to_mysql.py`(核心同步脚本) - 新增:`scripts/run_lark_group_sync.s",
|
||||||
"recallCount": 1,
|
"recallCount": 2,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 1,
|
"totalScore": 2,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
||||||
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
|
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"cc0dd7ef50d7"
|
"cc0dd7ef50d7",
|
||||||
|
"5abc37103c15"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-08"
|
"2026-05-08",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"vala-test.lark-group-message",
|
"vala-test.lark-group-message",
|
||||||
@ -283,18 +291,20 @@
|
|||||||
"endLine": 55,
|
"endLine": 55,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "- 创建脚本:`scripts/sync_feishu_full_history.py` - 同步时间范围:2026-04-10 至今 - 成功处理 57 条消息(包含文本、图片、视频等多种类型) - 所有媒体文件已上传到 COS - 数据已全部写入数据库 4. **定时任务配置** - 每 4 小时自动同步「内容测试问题反馈」群消息 - crontab: `0 */4 * * * cd /root/.openclaw/workspace-xiaokui/skills/feishu-group-msg-sync/scripts && python3 sync_group_to_mysql.py` 5. **文档更新** - `feishu-group-msg-sync/SKILL.md`: 说明数据库存储方案和引用回复支持 - `user-feedback-collector/SKILL.md`: 更新飞书反馈数据源为 MySQL - `feishu-group-msg-sync/references/query_examples.md`: 提供引用回复查询示例 ### 技术要点 - **引用回复识别**: 从消息 `body.quote` 字段提取被引用消息的 `message_id` - **去重机制**: 使用 `UNIQUE KEY (chat_id, message_id)` 避免重复记录 - **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文",
|
"snippet": "- 创建脚本:`scripts/sync_feishu_full_history.py` - 同步时间范围:2026-04-10 至今 - 成功处理 57 条消息(包含文本、图片、视频等多种类型) - 所有媒体文件已上传到 COS - 数据已全部写入数据库 4. **定时任务配置** - 每 4 小时自动同步「内容测试问题反馈」群消息 - crontab: `0 */4 * * * cd /root/.openclaw/workspace-xiaokui/skills/feishu-group-msg-sync/scripts && python3 sync_group_to_mysql.py` 5. **文档更新** - `feishu-group-msg-sync/SKILL.md`: 说明数据库存储方案和引用回复支持 - `user-feedback-collector/SKILL.md`: 更新飞书反馈数据源为 MySQL - `feishu-group-msg-sync/references/query_examples.md`: 提供引用回复查询示例 ### 技术要点 - **引用回复识别**: 从消息 `body.quote` 字段提取被引用消息的 `message_id` - **去重机制**: 使用 `UNIQUE KEY (chat_id, message_id)` 避免重复记录 - **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文",
|
||||||
"recallCount": 1,
|
"recallCount": 2,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 1,
|
"totalScore": 2,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
||||||
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
|
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"cc0dd7ef50d7"
|
"cc0dd7ef50d7",
|
||||||
|
"5abc37103c15"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-08"
|
"2026-05-08",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"sync-group-to-mysql.py",
|
"sync-group-to-mysql.py",
|
||||||
@ -314,18 +324,20 @@
|
|||||||
"endLine": 120,
|
"endLine": 120,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题(4/29) | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果:0 条,该群此时间段无消息记录 ### 反馈同步 Skill 创建 [刘新玉] 将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill,并计划注册定时任务。 #### Skill 文件 - `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 - `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 #### Skill 功能 1. 从 MySQL `lark_group_message` 查询近 N 天数据 2. 写入知识库电子表格(按天分 sheet) 3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 #### 策略2:推断缺失引用关系 [刘新玉] 问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人",
|
"snippet": "| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题(4/29) | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果:0 条,该群此时间段无消息记录 ### 反馈同步 Skill 创建 [刘新玉] 将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill,并计划注册定时任务。 #### Skill 文件 - `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 - `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 #### Skill 功能 1. 从 MySQL `lark_group_message` 查询近 N 天数据 2. 写入知识库电子表格(按天分 sheet) 3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 #### 策略2:推断缺失引用关系 [刘新玉] 问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人",
|
||||||
"recallCount": 1,
|
"recallCount": 2,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 1,
|
"totalScore": 2,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
||||||
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
|
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"cc0dd7ef50d7"
|
"cc0dd7ef50d7",
|
||||||
|
"5abc37103c15"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-08"
|
"2026-05-08",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"网络",
|
"网络",
|
||||||
@ -345,18 +357,20 @@
|
|||||||
"endLine": 72,
|
"endLine": 72,
|
||||||
"source": "memory",
|
"source": "memory",
|
||||||
"snippet": "- **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文件处理**: 下载飞书媒体文件并上传到腾讯 COS,记录可访问 URL ### 验证结果 - ✅ 数据库表结构已更新(3 个新字段) - ✅ 历史数据已同步(57 条消息) - ✅ 定时任务已配置 - ✅ 文档已更新 - ℹ️ 当前数据中无引用回复消息(quote_message_id 都是 NULL),说明这段时间内群消息都是直接发送的 ### 查询能力 支持以下查询场景: - 查询某条消息的所有回复 - 查询完整对话链(原消息 + 所有回复) - 统计热门话题(被回复最多的消息) - 按时间、发送人、关键词筛选 ### 文件清单 - `scripts/alter_lark_group_message.sql` - 数据库表结构升级脚本 - `scripts/sync_feishu_full_history.py` - 完整历史数据同步脚本 - `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py` - 增量同步脚本 - `skills/feishu-group-msg-sync/references/query_examples.md` - 查询示例文档",
|
"snippet": "- **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文件处理**: 下载飞书媒体文件并上传到腾讯 COS,记录可访问 URL ### 验证结果 - ✅ 数据库表结构已更新(3 个新字段) - ✅ 历史数据已同步(57 条消息) - ✅ 定时任务已配置 - ✅ 文档已更新 - ℹ️ 当前数据中无引用回复消息(quote_message_id 都是 NULL),说明这段时间内群消息都是直接发送的 ### 查询能力 支持以下查询场景: - 查询某条消息的所有回复 - 查询完整对话链(原消息 + 所有回复) - 统计热门话题(被回复最多的消息) - 按时间、发送人、关键词筛选 ### 文件清单 - `scripts/alter_lark_group_message.sql` - 数据库表结构升级脚本 - `scripts/sync_feishu_full_history.py` - 完整历史数据同步脚本 - `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py` - 增量同步脚本 - `skills/feishu-group-msg-sync/references/query_examples.md` - 查询示例文档",
|
||||||
"recallCount": 1,
|
"recallCount": 2,
|
||||||
"dailyCount": 0,
|
"dailyCount": 0,
|
||||||
"groundedCount": 0,
|
"groundedCount": 0,
|
||||||
"totalScore": 1,
|
"totalScore": 2,
|
||||||
"maxScore": 1,
|
"maxScore": 1,
|
||||||
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
||||||
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
|
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
"queryHashes": [
|
"queryHashes": [
|
||||||
"cc0dd7ef50d7"
|
"cc0dd7ef50d7",
|
||||||
|
"5abc37103c15"
|
||||||
],
|
],
|
||||||
"recallDays": [
|
"recallDays": [
|
||||||
"2026-05-08"
|
"2026-05-08",
|
||||||
|
"2026-05-11"
|
||||||
],
|
],
|
||||||
"conceptTags": [
|
"conceptTags": [
|
||||||
"quote-message-id",
|
"quote-message-id",
|
||||||
@ -368,6 +382,224 @@
|
|||||||
"update",
|
"update",
|
||||||
"处理"
|
"处理"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-07.md:86:116": {
|
||||||
|
"key": "memory:memory/2026-05-07.md:86:116",
|
||||||
|
"path": "memory/2026-05-07.md",
|
||||||
|
"startLine": 86,
|
||||||
|
"endLine": 116,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "| 9 | 用户反馈问题 | 建议、反馈、体验、优化 | | 10 | 其他问题 | (默认兜底) | ### 脚本修改 - `priority_classifier.py` 新增 `proble m_category(keyword)` 函数和 `CATEGORY_KEYWORDS` 规则 - `sync_feishu_feedback.py` 中 `summarize_cluster()` 新增 `category` 字段 - `generate_summary()` 输出格式改为按优先级+分类分组 --- ## 新增步骤7:问题分发 [刘新玉] - 2026-05-07 11:09 ### 需求 将每日问题归纳同步到「小葵小葵」群,P0问题@相关负责人跟进。 ### 分发规则 1. 将\"今日问题归纳\"内容发送到指定群(格式:标题+归纳正文+文档链接) 2. P0问题 @相关负责人的 @列表(从 `summarize_cluster` 的 `operators` 字段提取发言人) ### Bot身份问题 - Bot应用名:**小研**(App ID: `cli_a931175d41799cc7`) - Bot发消息需要已经加入目标群 - 小葵Bot在「小葵小葵」群中发消息报 500 \"chat does not exist\" → 可能是不同应用 - **待解决**:需要确认\"小葵小葵\"群里加的是哪个应用,或者把小研Bot加进去 ### 脚本修改 - `sync_feishu_feedback.py` 新增 `dispatch_to_group()` 函数 -",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"70caeba05281"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"priority-classifier.py",
|
||||||
|
"m-category",
|
||||||
|
"category-keywords",
|
||||||
|
"sync-feishu-feedback.py",
|
||||||
|
"summarize-cluster",
|
||||||
|
"generate-summary",
|
||||||
|
"cli-a931175d41799cc7",
|
||||||
|
"dispatch-to-group"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-07.md:73:91": {
|
||||||
|
"key": "memory:memory/2026-05-07.md:73:91",
|
||||||
|
"path": "memory/2026-05-07.md",
|
||||||
|
"startLine": 73,
|
||||||
|
"endLine": 91,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "``` ### 问题分类规则(按匹配优先级从上到下) | 优先级 | 分类 | 匹配关键词 | |--------|------|-----------| | 1 | 启动/运行异常 | 闪退、崩溃、crash、进不去、卡死、黑屏、重启、服务器宕机、服务器挂了 | | 2 | 版本/更新类 | 更新、升级、新版、版本、安装失败、OTA、热更新、无法更新、下载更新 | | 3 | Loading/加载类 | 加载、loading、loading卡死、转星星、加载慢、加载超时、loading超时、一直加载 | | 4 | 数据/进度类 | 数据丢失、数据异常、进度丢失、数据错误、分数异常、进度不对、记录丢失、数据对不上 | | 5 | 声音/音频类 | 无声、声音、音频、没声音、音效 | | 6 | 语音识别/判分类 | 判分、识别、语音识别、发音、评分、打分、AI判分 | | 7 | 关卡/内容类 | 关卡、题目、答案、内容 | | 8 | UI显示类 | 显示、界面、UI、HUD、动画、层级、图层、渲染 | | 9 | 用户反馈问题 | 建议、反馈、体验、优化 | | 10 | 其他问题 | (默认兜底) | ### 脚本修改 - `priority_classifier.py` 新增 `proble m_category(keyword)` 函数和 `CATEGORY_KEYWORDS` 规则 - `sync_feishu_feedback.py` 中 `summarize_cluster()` 新增 `category` 字段",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"70caeba05281"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"启动/运行异常",
|
||||||
|
"版本/更新类",
|
||||||
|
"loading/加载类",
|
||||||
|
"数据/进度类",
|
||||||
|
"声音/音频类",
|
||||||
|
"语音识别/判分类",
|
||||||
|
"关卡/内容类",
|
||||||
|
"priority-classifier.py"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-04-10.md:44:68": {
|
||||||
|
"key": "memory:memory/2026-04-10.md:44:68",
|
||||||
|
"path": "memory/2026-04-10.md",
|
||||||
|
"startLine": 44,
|
||||||
|
"endLine": 68,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "### 验证结果 - 全量同步成功:47条记录写入表格,5张图片+4个视频上传COS - crontab 每小时整点自动执行:`0 * * * *` - 群ID:oc_fabff7672e62a9ced7b326ee4a286c26 ## 封装两个通用Skill **来源:** [李若松] 要求将功能封装为可复用skill ### 1. tencent-cos-upload - 路径:`/root/.openclaw/skills/tencent-cos-upload/` - 功能:上传文件到腾讯COS并生成可访问URL - 提供命令行调用和Python模块两种方式 - 核心文件:`scripts/cos_upload.py`(CosUploader类) ### 2. feishu-group-msg-sync - 路径:`/root/.openclaw/skills/feishu-group-msg-sync/` - 功能:定期同步飞书群聊消息到电子表格,媒体上传COS - 依赖 tencent-cos-upload skill - 核心文件:`scripts/sync_group_to_sheet.py`(模板脚本,修改顶部配置即可复用) - 参考文件:`references/lark-cli-cheatsheet.md` ### 项目脚本也改为引用skill - `scripts/sync_feedback_group.py` 现在只做配置覆盖,逻辑全部引用自skill",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"5abc37103c15"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"tencent-cos-upload",
|
||||||
|
"scripts/cos-upload.py",
|
||||||
|
"feishu-group-msg-sync",
|
||||||
|
"scripts/sync-group-to-sheet.py",
|
||||||
|
"scripts/sync-feedback-group.py",
|
||||||
|
"验证",
|
||||||
|
"结果",
|
||||||
|
"全量"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-09.md:46:65": {
|
||||||
|
"key": "memory:memory/2026-05-09.md:46:65",
|
||||||
|
"path": "memory/2026-05-09.md",
|
||||||
|
"startLine": 46,
|
||||||
|
"endLine": 65,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "- 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort_tag` 参数可能被忽略(API 返回均为 null) - 兜底方案:按日期由近到远的顺序依次创建子文档,利用 `node_create_time` 自然排序 - 所有旧文档已删除并按正确顺序重建(5月8日→5月7日→4月28日),5月6日需手动创建 ### 飞书分发消息 `<at>` 标签修复 - 根因:`dispatch_summary_to_chat` 中两步打架——第一步 `re.sub` 注入 HTML `<at>` 文本,第二步 `content_parts` 用正确 `{\"tag\":\"at\"}` 格式插入 - 修复:删除 `re.sub` 注入原始 HTML 标签的代码,仅保留富文本 at tag ### 5月9日补跑问题 - 5月9日10:00定时任务因 `IndentationError` 失败(凌晨08:10自动备份 `c3c8dbb` 损坏了脚本) - 修复:从上游版本恢复被清空的步骤4-7逻辑 + 模块常量 - 手动补跑5月8日数据(8条反馈,1个P0)成功 ### 待验证 - [ ] 确认「用户反馈问题汇总」下子文档排",
|
||||||
|
"recallCount": 2,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 2,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T11:26:09.200Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T11:26:09.201Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"cd203fdc056f",
|
||||||
|
"dbd60c484e39"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"备份",
|
||||||
|
"sort-tag",
|
||||||
|
"dt.timestamp",
|
||||||
|
"node-create-time",
|
||||||
|
"dispatch-summary-to-chat",
|
||||||
|
"re.sub",
|
||||||
|
"content-parts",
|
||||||
|
"从上游版本恢复被清空的步骤4-7逻辑"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-09.md:1:19": {
|
||||||
|
"key": "memory:memory/2026-05-09.md:1:19",
|
||||||
|
"path": "memory/2026-05-09.md",
|
||||||
|
"startLine": 1,
|
||||||
|
"endLine": 19,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "## 2026-05-09 工作日志 ### 飞书反馈同步 - 三个文档问题修复 **问题一:图片无法点击查看** - 根因:Markdown表格中 `|` 被 `dialogue_info` 内部的 ` | ` 分隔符破坏 + `![图片]()` 格式在飞书导入时不可靠 - 修复:`info_parts` 分隔符从 `\" | \"` 改为 `<br>`;图片格式从 `` 改为 `📎 [图片](url)` 可点击链接 **问题二:子文档作者显示\"小研\"** - 根因:现有子文档由 xiaoyan bot (`ou_3e97d43a66639a457f0020a0d7f2bd74`) 创建,xiaokui 无法直接覆盖 - 修复:在 `update_summary_doc_as_children` 中添加 creator 校验,非 xiaokui 创建则先通过 xiaoyan 凭证删除再重建 - 关键常量:`XIAOKUI_BOT_OPEN_ID = \"ou_fdbf5fdafd91670db34b6ac887f30fb7\"` - 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T11:26:09.200Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T11:26:09.200Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"cd203fdc056f"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"dialogue-info",
|
||||||
|
"info-parts",
|
||||||
|
"update-summary-doc-as-children",
|
||||||
|
"xiaokui-bot-open-id",
|
||||||
|
"sort-tag",
|
||||||
|
"dt.timestamp",
|
||||||
|
"工作",
|
||||||
|
"日志"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-09.md:78:95": {
|
||||||
|
"key": "memory:memory/2026-05-09.md:78:95",
|
||||||
|
"path": "memory/2026-05-09.md",
|
||||||
|
"startLine": 78,
|
||||||
|
"endLine": 95,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "- 修复:在 `update_summary_doc_as_children` 中添加 creator 校验,非 xiaokui 创建则先通过 xiaoyan 凭证删除再重建 - 关键常量:`XIAOKUI_BOT_OPEN_ID = \"ou_fdbf5fdafd91670db34b6ac887f30fb7\"` - 注意:xiaokui 无法删除 xiaoyan 创建的文档(跨应用权限隔离),需回退到 xiaoyan 凭证删除 **问题三:子文档排序不规则** - 根因:旧 `sort_tag = dt.timestamp()` 升序导致旧日期在前 - 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort_tag` 参数可能被忽略(API 返回均为 null) - 兜底方案:按日期由近到远的顺序依次创建子文档,利用 `node_create_time` 自然排序 - 所有旧文档已删除并按正确顺序重建(5月8日→5月7日→4月28日),5月6日需手动创建 ### 飞书分发消息 `<at>` 标签修复 - 根因:`dispatch_summary_to_chat` 中两步打架——第一步 `re.sub` 注入 HTML `<at>` 文本,第二步 `content_parts` 用正确 `{\"tag\":\"at\"}` 格式插入 - 修复:删除 `re.sub` 注入原始 HTML 标签的代码,仅保留富文本 at tag ### 5月9日补跑问题",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T11:26:09.201Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T11:26:09.201Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"dbd60c484e39"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"update-summary-doc-as-children",
|
||||||
|
"xiaokui-bot-open-id",
|
||||||
|
"sort-tag",
|
||||||
|
"dt.timestamp",
|
||||||
|
"node-create-time",
|
||||||
|
"dispatch-summary-to-chat",
|
||||||
|
"re.sub",
|
||||||
|
"content-parts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"memory:memory/2026-05-06.md:20:47": {
|
||||||
|
"key": "memory:memory/2026-05-06.md:20:47",
|
||||||
|
"path": "memory/2026-05-06.md",
|
||||||
|
"startLine": 20,
|
||||||
|
"endLine": 47,
|
||||||
|
"source": "memory",
|
||||||
|
"snippet": "- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序 - 新增 `--skip-priority` CLI 参数 - 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑 #### 3. 更新 SKILL.md 文档 - 增加了步骤5的完整文档说明 - 增加了优先级判定规则速查表 #### 4. 真实数据验证结果 - 2026-04-28 数据:2个问题 - P0:iOS端Loading超时致数据丢失/无音频(15条) - P1:移动端NPC HUD偶尔变成一小条(8条) ### 待确认 - 步骤5的优先级标注目前仅在脚本输出中展示,尚未写入「用户反馈问题汇总」文档(RaL6whoYMijyYHkSlWrc7OLLnBy) - 刘新玉需确认是否需要增加文档自动写入能力 ## 刘新玉:PDF文件处理 - 刘新玉于11:02发送PDF(用户反馈问题优先级判断文档),8页A4 - 已用pdftotext解析并完整阅读 - 内容:P0-P3四级优先级定义、动态调整规则、三大判断问题 - 已基于此文档实现优先级判定规则 # 2026-05-06 工作日志 ## 刘新玉:飞书反馈优先级判定(步骤5)落地 ### 需求背景 - 刘新玉发来「用户反馈问题优先级判断文档」PDF,要求将优先级判定集成到飞书反馈同步流程的第5步",
|
||||||
|
"recallCount": 1,
|
||||||
|
"dailyCount": 0,
|
||||||
|
"groundedCount": 0,
|
||||||
|
"totalScore": 1,
|
||||||
|
"maxScore": 1,
|
||||||
|
"firstRecalledAt": "2026-05-11T11:26:09.201Z",
|
||||||
|
"lastRecalledAt": "2026-05-11T11:26:09.201Z",
|
||||||
|
"queryHashes": [
|
||||||
|
"dbd60c484e39"
|
||||||
|
],
|
||||||
|
"recallDays": [
|
||||||
|
"2026-05-11"
|
||||||
|
],
|
||||||
|
"conceptTags": [
|
||||||
|
"generate-summary",
|
||||||
|
"compute-final-priority",
|
||||||
|
"sort-by-priority",
|
||||||
|
"skip-priority",
|
||||||
|
"sort-threads",
|
||||||
|
"union-find",
|
||||||
|
"skill.md",
|
||||||
|
"ios端loading超时致数据丢失/无音频"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
10
memory/2026-05-11-1838.md
Normal file
10
memory/2026-05-11-1838.md
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Session: 2026-05-11 18:38:20 GMT+8
|
||||||
|
|
||||||
|
- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880
|
||||||
|
- **Session ID**: 9f44defe-353f-4d66-a103-a1529633a9ce
|
||||||
|
- **Source**: feishu
|
||||||
|
|
||||||
|
## Conversation Summary
|
||||||
|
|
||||||
|
user: hi
|
||||||
|
assistant: 若松,我在。有什么需要处理的?
|
||||||
264
memory/2026-05-11-1925.md
Normal file
264
memory/2026-05-11-1925.md
Normal file
@ -0,0 +1,264 @@
|
|||||||
|
# Session: 2026-05-11 19:25:44 GMT+8
|
||||||
|
|
||||||
|
- **Session Key**: agent:xiaokui:feishu:direct:ou_9d4df593d0419d705274947c5cec5ada
|
||||||
|
- **Session ID**: e5e30039-7ae4-4c7d-acfc-1767daffec51
|
||||||
|
- **Source**: feishu
|
||||||
|
|
||||||
|
## Conversation Summary
|
||||||
|
|
||||||
|
assistant: 测试完整的 `--apply-ai` 端到端流程:
|
||||||
|
assistant: 全部完成。以下是交付摘要:
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## AI 归纳已完整接入飞书反馈同步流水线
|
||||||
|
|
||||||
|
### 改了什么
|
||||||
|
|
||||||
|
**`sync_feishu_feedback.py`** — 新增两种运行模式:
|
||||||
|
|
||||||
|
| 模式 | 命令 | 产出 |
|
||||||
|
|------|------|------|
|
||||||
|
| 占位符模式 | `--ai-placeholders` | 文档写入 `[待AI归纳:#N]` 占位符 + 保存上下文 JSON |
|
||||||
|
| 回写模式 | `--apply-ai <json>` | 替换占位符为 AI 描述 → 覆盖写入文档 |
|
||||||
|
|
||||||
|
### 端到端验证通过
|
||||||
|
|
||||||
|
今天 2026-05-11 的数据(1 个问题簇、9 条消息):
|
||||||
|
|
||||||
|
| 阶段 | 结果 |
|
||||||
|
|------|------|
|
||||||
|
| 脚本规则引擎 | `图片 (1/1) 我怎么记着已经出了...`(原文拼贴) |
|
||||||
|
| AI 归纳 → 回写后 | `在 132 版本中,L1 级别 U 单元名称显示为数字编号 — U 级别单元名称未命名(季度级别已有名称),新版本发布后用户将全部可见` |
|
||||||
|
|
||||||
|
文档已确认:无重复、无占位符残留、AI 描述正确写入。
|
||||||
|
|
||||||
|
### 伴随修复
|
||||||
|
|
||||||
|
- **markdown 转义 Bug**:`--markdown` 内联传递到 `subprocess` 会导致飞书 rich text 被错误转义,改为 `@file` 相对路径方式
|
||||||
|
- **清理 crontab**:移除两个浪费资源的每分钟任务
|
||||||
|
|
||||||
|
### 自动化链路
|
||||||
|
|
||||||
|
```
|
||||||
|
每日 10:00 crontab → --ai-placeholders(占位符文档)
|
||||||
|
↓
|
||||||
|
心跳检测到 cluster_context_YYYY-MM-DD.json
|
||||||
|
↓
|
||||||
|
AI 读取上下文 → 生成描述 JSON
|
||||||
|
↓
|
||||||
|
--apply-ai → 替换占位符 → 覆盖写入
|
||||||
|
```
|
||||||
|
user: 好的 小葵辛苦了 我还想知道目前“用户反馈问题汇总”下的子文档的排序规则
|
||||||
|
assistant: 子文档的排序由 `create_child_doc()` 中的 `sort_tag` 控制:
|
||||||
|
|
||||||
|
**当前规则:按日期倒序(最新日期排最前)**
|
||||||
|
|
||||||
|
```python
|
||||||
|
sort_tag = 9999999999 - int(dt.timestamp()) # 日期越新 → 时间戳越大 → sort_tag越小 → 排在前面
|
||||||
|
```
|
||||||
|
|
||||||
|
飞书 Wiki 按 `sort_tag` **升序**排列子节点,所以:
|
||||||
|
|
||||||
|
| 日期 | 时间戳 | sort_tag | 位置 |
|
||||||
|
|------|--------|----------|------|
|
||||||
|
| 2026-05-11 | 1766966400 | 8233033599 | 最小 → **最前** |
|
||||||
|
| 2026-05-10 | 1766880000 | 8233119999 | |
|
||||||
|
| ... | ... | ... | |
|
||||||
|
| 2026-01-01 | 1767196800 | 8232803199 | 最大 → **最后** |
|
||||||
|
|
||||||
|
**但有两点注意:**
|
||||||
|
|
||||||
|
1. **仅创建时生效** — 首次 `create_child_doc()` 时写入 `sort_tag`,后续 `overwrite` 更新内容不修改排序值。如果某文档是早期创建的(sort_tag 逻辑加入之前),它保留原来的默认值。
|
||||||
|
|
||||||
|
2. **排序不一致场景** — 如果某个旧文档需要调整排序位置,需要手动修改其 `sort_tag`(目前脚本没有做这个)。
|
||||||
|
user: 理解了,我看现在5月11日的文档排在最下面,按日期倒序逻辑,不是应该排期最上面吗
|
||||||
|
assistant: 所有文档的 `sort_tag` 都是空 — 飞书 API 创建节点时 `sort_tag` 没生效。让我确认并修复:
|
||||||
|
assistant: 根因找到了。
|
||||||
|
|
||||||
|
**飞书 Wiki 子文档排序由 `sort_tag` 控制,但 Bot 应用缺少 `wiki:node:update` 权限,API 不认 `sort_tag` 字段** — 创建时传了也被静默忽略,更新接口直接 404。
|
||||||
|
|
||||||
|
所以实际排序退化为按 **`node_create_time`(文档创建时间)升序排列**:
|
||||||
|
|
||||||
|
| # | 文档 | 创建时间 |
|
||||||
|
|---|------|----------|
|
||||||
|
| 1 | 2026-05-08 | 5月9日 11:17 |
|
||||||
|
| 2 | 2026-05-07 | 5月9日 11:17 |
|
||||||
|
| 3 | 2026-05-06 | 5月9日 11:17 |
|
||||||
|
| 4 | 2026-04-28 | 5月9日 14:05 |
|
||||||
|
| 5 | **2026-05-11** | **5月11日 18:18** ← 今天被删掉重建,创建时间最新,所以排最后 |
|
||||||
|
|
||||||
|
**要修复的话,需要在飞书开放平台给 Bot 应用(`cli_a9311791083adcc1`)添加 `wiki:node:update` 权限,然后我再通过 API 给所有文档设 `sort_tag`。**
|
||||||
|
|
||||||
|
这需要技术负责人(李若松或张昆鹏)去操作。要我通知他们吗?
|
||||||
|
user: 通知
|
||||||
|
assistant: 已通知李若松和张昆鹏,内容摘要:
|
||||||
|
|
||||||
|
- **问题**:Bot 应用缺少 `wiki:node:update` 权限,无法通过 API 设置 `sort_tag`,导致子文档按创建时间而非日期排序
|
||||||
|
- **需操作**:飞书开放平台 → 给小葵应用添加 `wiki:node:update` 权限
|
||||||
|
- **后续**:权限开通后告知我,我来批量修正所有子文档排序
|
||||||
|
user: 好的 那等权限开通后我告诉你 我现在想知道飞书问题反馈整个收集、归纳和分发流程;想要知道的原因是:我现在还要收集微信的问题反馈问题,想评估下要怎么复用
|
||||||
|
assistant: 整个流程我按三段拆开讲,每段标注了「哪些可以直接复用微信」:
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 飞书问题反馈完整流程
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────── 采集层 ───────────────┐
|
||||||
|
│ 飞书群消息 → MySQL lark_group_message │ feishu-group-msg-sync 技能
|
||||||
|
└──────────────┬────────────────────────┘
|
||||||
|
↓
|
||||||
|
┌─────────────── 处理层 ───────────────┐
|
||||||
|
│ 步骤1: 查询数据库(按日期范围) │
|
||||||
|
│ 步骤2: 写入飞书表格(按天分sheet) │
|
||||||
|
│ 步骤3: 对话链排序(Union-Find聚类) │
|
||||||
|
│ 步骤4: 问题归纳(规则/AI生成描述) │
|
||||||
|
│ 步骤5: 优先级排序(P0→P3) │
|
||||||
|
│ 步骤6: 问题分类(按类型归类) │
|
||||||
|
└──────────────┬────────────────────────┘
|
||||||
|
↓
|
||||||
|
┌─────────────── 分发层 ───────────────┐
|
||||||
|
│ 步骤7: P0 @通知到群 │
|
||||||
|
│ 步骤8: AI 精炼描述(占位符替换) │
|
||||||
|
└──────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 各步骤详解 & 微信复用评估
|
||||||
|
|
||||||
|
### 采集层
|
||||||
|
|
||||||
|
| | 飞书 | 微信(已有) |
|
||||||
|
|---|---|---|
|
||||||
|
| 数据来源 | `lark_group_message` 表 | `wechat_group_message` 表,1089条 |
|
||||||
|
| 采集方式 | 定时拉群消息 | 实时每分钟同步 |
|
||||||
|
| 引用关系 | `quote_message_id` + 推断 | `refer_msg_svrid`(较少) |
|
||||||
|
|
||||||
|
微信已有数据在库,采集层 **现成可用**。
|
||||||
|
|
||||||
|
### 处理层(核心逻辑在 `sync_feishu_feedback.py`)
|
||||||
|
|
||||||
|
**步骤 1-2:查询 + 写表格** — 微信可复用,只改表名和字段映射。
|
||||||
|
|
||||||
|
**步骤 3:对话链排序** — 这是最重要的复用点,也是最难点:
|
||||||
|
|
||||||
|
飞书排序依赖三个推断策略(@提及、同发送者聚类、最近不同发送者),微信消息面临两个额外问题:
|
||||||
|
|
||||||
|
1. **转发消息多** — 微信群大量用「聊天记录」转发,消息内容嵌套在 `content` 里的 XML/文本中,非原生消息
|
||||||
|
2. **无显式引用** — 微信没有飞书那种消息级引用回复能力,只有少数消息有 `refer_msg_svrid`
|
||||||
|
|
||||||
|
→ 排序逻辑需要针对微信场景调优,但整体架构(Union-Find 聚类 + 递归展开)可直接复用。
|
||||||
|
|
||||||
|
**步骤 4-6:归纳 + 优先级 + 分类** — **全部可直接复用**,只改入参数据源。`generate_problem_description()` 的规则引擎、优先级判定、分类映射都不依赖飞书特有字段。
|
||||||
|
|
||||||
|
### 分发层
|
||||||
|
|
||||||
|
| 步骤 | 飞书 | 微信复用 |
|
||||||
|
|---|---|---|
|
||||||
|
| 步骤 7 (P0通知) | 飞书群 @通知 | 暂不适用(无微信群 Bot) |
|
||||||
|
| 步骤 8 (AI归纳) | AI 描述 → 知识库文档 | 同样走知识库 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 复用建议:两种路径
|
||||||
|
|
||||||
|
**路径 A:合流(推荐)**
|
||||||
|
|
||||||
|
微信和飞书数据在「步骤 1 查询」阶段合并,后续处理统一走一套逻辑。最终知识库文档里标注来源渠道。
|
||||||
|
|
||||||
|
```
|
||||||
|
lark_group_message ─┐
|
||||||
|
├→ 统一排序 → 归纳 → 分类 → 文档
|
||||||
|
wechat_group_message─┘
|
||||||
|
```
|
||||||
|
|
||||||
|
- 改动量小:`query_messages()` 加一个 UNION 或双表查
|
||||||
|
- 字段对齐:微信没有 `quote_message_id`,用 `refer_msg_svrid` 替代;没有 `sender_id`,用 `sender_username` 替代
|
||||||
|
|
||||||
|
**路径 B:分治**
|
||||||
|
|
||||||
|
两套独立流水线,各自产出文档,在最终展示层合并。
|
||||||
|
|
||||||
|
- 改动量大但隔离性好
|
||||||
|
- 可逐步接入,先跑微信、观察效果再合
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
要我按路径 A 先做一个方案详细设计吗?或者你倾向哪条路?
|
||||||
|
user: 我更倾向路径B,也就是方案B;原因是:初期我需要分别校验飞书和微信的信息收集、归纳是否准确;若采用方案B,我要怎么做?
|
||||||
|
assistant: 方案B的核心思路:**新建一套独立脚本,复用 80% 的逻辑,只改数据入口和输出目标**。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 要做的事(按顺序)
|
||||||
|
|
||||||
|
### 第一步:新建微信专属脚本
|
||||||
|
|
||||||
|
从飞书脚本复制一份,修改三个入口:
|
||||||
|
|
||||||
|
| 改什么 | 飞书 | 微信 |
|
||||||
|
|--------|------|------|
|
||||||
|
| 源表 | `lark_group_message` | `wechat_group_message` |
|
||||||
|
| 目标表格 | `AHtnsehwShUVyDtjasSciIvgn7b` | 新建一张微信专属表格 |
|
||||||
|
| 知识库输出 | 「用户反馈问题汇总」父节点 | 同一父节点下,标题改为 `微信-YYYY-MM-DD 问题反馈` |
|
||||||
|
|
||||||
|
### 第二步:字段映射
|
||||||
|
|
||||||
|
| 飞书字段 | 微信对应字段 | 说明 |
|
||||||
|
|----------|-------------|------|
|
||||||
|
| `message_id` | `svr_msg_id`(优先)或 `local_id` | 消息唯一标识 |
|
||||||
|
| `quote_message_id` | `refer_msg_svrid` | 引用关系 |
|
||||||
|
| `sender_name` | `sender_name` | 一致 |
|
||||||
|
| `msg_type` | `msg_type` | 一致 |
|
||||||
|
| `content` | `content` | 一致,但微信多了转发嵌套格式需处理 |
|
||||||
|
| `media_url` | `media_url` | 一致 |
|
||||||
|
| `msg_time` | `msg_time` | 一致 |
|
||||||
|
| `msg_timestamp` | `msg_timestamp` | 不一致(飞书是毫秒,微信是秒) |
|
||||||
|
|
||||||
|
### 第三步:适配微信特有的消息格式
|
||||||
|
|
||||||
|
微信有两个飞书没有的复杂情况,需要新增预处理逻辑:
|
||||||
|
|
||||||
|
1. **转发消息** — `content` 里是嵌套的 XML/聊天记录文本,需要先提取还原成可读对话
|
||||||
|
2. **引用链弱** — 微信没有飞书的消息级引用,`refer_msg_svrid` 覆盖率远低于飞书的 `quote_message_id`
|
||||||
|
|
||||||
|
### 第四步:crontab 定时任务
|
||||||
|
|
||||||
|
飞书保持现有调度,微信另开一条:
|
||||||
|
|
||||||
|
```
|
||||||
|
0 10 * * * sync_feishu_feedback_wrapper.sh → 飞书
|
||||||
|
5 10 * * * sync_wechat_feedback_wrapper.sh --ai-placeholders → 微信
|
||||||
|
```
|
||||||
|
|
||||||
|
### 第五步:校验调试
|
||||||
|
|
||||||
|
跑几天观察:
|
||||||
|
- 对话链排序是否准确(微信引用少,需要看聚类效果)
|
||||||
|
- 问题描述归纳是否合理
|
||||||
|
- 优先级分类是否正常
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 总结:需要新建/修改的文件
|
||||||
|
|
||||||
|
| 文件 | 动作 |
|
||||||
|
|------|------|
|
||||||
|
| `scripts/sync_wechat_feedback.py` | **新建**(从 feishu 版复制修改) |
|
||||||
|
| `scripts/sync_wechat_feedback_wrapper.sh` | **新建**(定时任务 wrapper) |
|
||||||
|
| 新飞书表格 | **新建**(微信专属数据 sheet) |
|
||||||
|
| `skills/feishu-feedback-sync/SKILL.md` | **修改**(增加微信流程说明) |
|
||||||
|
|
||||||
|
其他文件(`priority_classifier.py`、`HEARTBEAT.md` 等)不需要改。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
要我现在开始写 `sync_wechat_feedback.py` 吗?
|
||||||
|
user: 先这样吧 我再想想 若有需要告诉你 “所有文档的 sort_tag 都是空 — 飞书 API 创建节点时 sort_tag 没生效”这个问题你看看 现在是不是权限已经开通了
|
||||||
|
assistant: 结论:**权限尚未开通。**
|
||||||
|
|
||||||
|
测试过程:尝试创建一个带 `sort_tag` 的节点 → 创建成功但 `sort_tag` 被静默忽略(返回 `NOT_PRESENT`),PATCH 接口仍然 404。
|
||||||
|
|
||||||
|
这说明飞书开放平台侧还没有给小葵应用添加 `wiki:node:update` 权限,李若松/张昆鹏那边可能还没处理。要我再次提醒他们吗?
|
||||||
29
memory/2026-05-11-1929.md
Normal file
29
memory/2026-05-11-1929.md
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# Session: 2026-05-11 19:29:51 GMT+8
|
||||||
|
|
||||||
|
- **Session Key**: agent:xiaokui:feishu:direct:ou_9d4df593d0419d705274947c5cec5ada
|
||||||
|
- **Session ID**: b3a3fe48-81c8-405d-a393-dfa358eaeb4b
|
||||||
|
- **Source**: feishu
|
||||||
|
|
||||||
|
## Conversation Summary
|
||||||
|
|
||||||
|
user: “所有文档的 sort_tag 都是空 — 飞书 API 创建节点时 sort_tag 没生效”这个问题你看看 现在是不是权限已经开通了
|
||||||
|
assistant: 从记忆看,上次检查结论是**权限尚未开通**——创建节点时 `sort_tag` 被静默忽略,PATCH 接口 404。让我现在重新测试,确认当前状态。
|
||||||
|
assistant: 让我直接测试当前权限状态。先获取「用户反馈问题汇总」知识库的节点信息,然后尝试 PATCH 一个子文档的 sort_tag。
|
||||||
|
assistant: 让我先确认凭证目录,然后直接测试 sort_tag 是否现在生效。
|
||||||
|
assistant: 这是表格节点,让我找「用户反馈问题汇总」文档。
|
||||||
|
assistant: 找到「用户反馈问题汇总」,`has_child: true`。现在获取子节点列表,看 sort_tag 状态,并测试 PATCH 权限。
|
||||||
|
assistant: 所有子文档 `sort_tag` 仍然是 `null`(还有一个之前测试留下的 `TEST-192420`)。现在直接测试 PATCH 接口,看权限是否已开通:
|
||||||
|
assistant: PATCH 接口仍然是 404。让我看原始返回确认:
|
||||||
|
assistant: 创建成功但返回里没有 `sort_tag`。让我查一下新节点的 sort_tag 实际值:
|
||||||
|
assistant: 结论确认。清理测试节点:
|
||||||
|
assistant: **权限尚未开通。**
|
||||||
|
|
||||||
|
测试结果:
|
||||||
|
|
||||||
|
| 测试项 | 结果 |
|
||||||
|
|--------|------|
|
||||||
|
| PATCH `/nodes/{token}` 更新 `sort_tag` | **404** — `wiki:node:update` 权限仍未添加 |
|
||||||
|
| POST 创建节点时传 `sort_tag` | 创建成功,但 `sort_tag` 仍为 **null**(被静默忽略) |
|
||||||
|
| 所有子文档 sort_tag | 全部为 `null`,排序退化为按创建时间升序 |
|
||||||
|
|
||||||
|
需要在飞书开放平台给 Bot 应用(`cli_a9311791083adcc1`)添加 **`wiki:node:update`** 权限,这个只能由技术负责人李若松或张昆鹏操作。要我再次提醒他们吗?
|
||||||
67
memory/2026-05-11.md
Normal file
67
memory/2026-05-11.md
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# 2026-05-11 工作日志
|
||||||
|
|
||||||
|
## AI 问题归纳接入脚本流程 [刘新玉]
|
||||||
|
|
||||||
|
### 背景
|
||||||
|
脚本 `generate_problem_description()` 是纯规则关键词匹配引擎,当问题类型不在预定义关键词列表中时(如今天 L1 单元名称显示问题),会退化为原文拼贴兜底,描述质量差。
|
||||||
|
|
||||||
|
虽然 4 月 30 日已有架构决策「脚本输出元数据,AI 负责归纳描述」,但一直未落地到代码。
|
||||||
|
|
||||||
|
### 实现方案
|
||||||
|
|
||||||
|
**新增 `--ai-placeholders` 标志:**
|
||||||
|
- `generate_problem_description()` 返回 `[待AI归纳:#N]` 编号占位符
|
||||||
|
- `generate_summary()` 保存 `cluster_context_{date}.json` 到 `output/daily_feedback/`
|
||||||
|
- 占位符编号确保多问题簇时精确匹配
|
||||||
|
|
||||||
|
**新增 `--apply-ai <descriptions.json>` 模式:**
|
||||||
|
- 读取 AI 描述 JSON → 重新生成 markdown → 替换占位符 → 覆盖写入知识库文档
|
||||||
|
- AI 描述 JSON 格式:`{"date": "...", "descriptions": [{"index": 1, "description": "..."}]}`
|
||||||
|
|
||||||
|
**自动化链路:**
|
||||||
|
1. 定时任务 `--ai-placeholders` → 生成占位符文档 + 上下文 JSON
|
||||||
|
2. 心跳检测到上下文 JSON → AI 读取并生成描述 → `--apply-ai` 回写
|
||||||
|
|
||||||
|
### 修复的 Bug
|
||||||
|
- **markdown 内联传递转义问题**:`subprocess.run([..., "--markdown", content])` 内联传递会导致飞书 rich text 元素(如 `<lark-table>`)被错误转义。改用 `@file` 相对路径方式传递。
|
||||||
|
- **`update_summary_doc_as_children()`** 同样修复为 `@file` 方式
|
||||||
|
|
||||||
|
### 清理
|
||||||
|
- 移除 crontab 中两个无用的每分钟任务:`sync_feishu_feedback_summary.sh` 和 `sync_feishu_feedback_dispatch_p0.sh`
|
||||||
|
|
||||||
|
### 文件变更
|
||||||
|
- `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心修改
|
||||||
|
- `skills/feishu-feedback-sync/SKILL.md` — 新增步骤 8 文档
|
||||||
|
- `HEARTBEAT.md` — 新增 AI 归纳心跳任务
|
||||||
|
- `output/daily_feedback/ai_descriptions_2026-05-11.json` — 示例 AI 描述
|
||||||
|
|
||||||
|
## merge_forward(多条转发)消息处理 [李若松]
|
||||||
|
|
||||||
|
### 需求
|
||||||
|
飞书群消息同步中,`merge_forward`(多条转发)类型消息原本无法处理(content 显示"无法提取资源key"),需要完整记录转发内容。
|
||||||
|
|
||||||
|
### 实现方案(方案B+)
|
||||||
|
1. **新增字段 `forward_detail` (LONGTEXT)**:存储转发详情,媒体已替换为COS链接
|
||||||
|
2. **新增函数 `process_merge_forward_message()`**:解析 `<forwarded_messages>` XML → 下载子消息中的图片/视频/封面 → 上传COS → 生成含URL的完整内容
|
||||||
|
3. **`content` 字段**存储可读摘要:`[转发N条] 发送者: 首行文本 | ...`
|
||||||
|
|
||||||
|
### 媒体下载验证
|
||||||
|
- ✅ 转发消息中引用的图片 key(`img_v3_xxx`)可通过 merge_forward 的 message_id 正常下载
|
||||||
|
- ✅ 视频 key(`file_v3_xxx`)也可正常下载
|
||||||
|
- ✅ 视频封面图支持
|
||||||
|
|
||||||
|
### 产出
|
||||||
|
- DDL: `ALTER TABLE lark_group_message ADD COLUMN forward_detail LONGTEXT`
|
||||||
|
- 脚本改动: `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py`
|
||||||
|
- 文档更新: `skills/feishu-group-msg-sync/SKILL.md`
|
||||||
|
- 历史数据修复: id=216 记录已重新处理,6个媒体全部上传COS
|
||||||
|
|
||||||
|
### forward_detail 格式
|
||||||
|
```
|
||||||
|
[2026-04-20 17:14] 黄敏:
|
||||||
|
[图片1](https://cos.xxx/img1.png)
|
||||||
|
L1 9-5,关卡图标显示异常...
|
||||||
|
[2026-04-23 19:08] 胡陈辰:
|
||||||
|
[视频1 时长77s](https://cos.xxx/video.mov)
|
||||||
|
[封面](https://cos.xxx/cover.png)
|
||||||
|
```
|
||||||
9
output/daily_feedback/ai_descriptions_2026-05-11.json
Normal file
9
output/daily_feedback/ai_descriptions_2026-05-11.json
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"date": "2026-05-11",
|
||||||
|
"descriptions": [
|
||||||
|
{
|
||||||
|
"index": 1,
|
||||||
|
"description": "在 132 版本中,L1 级别 U 单元名称显示为数字编号 — U 级别单元名称未命名(季度级别已有名称),新版本发布后用户将全部可见"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
85
output/daily_feedback/cluster_context_2026-05-11.json
Normal file
85
output/daily_feedback/cluster_context_2026-05-11.json
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
{
|
||||||
|
"date": "2026-05-11",
|
||||||
|
"total_clusters": 1,
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"index": 1,
|
||||||
|
"cluster_id": "om_x100b6f23a6f94ca8b39beec3f1daab1_img0",
|
||||||
|
"location": {
|
||||||
|
"端": "未知",
|
||||||
|
"环节": "未知",
|
||||||
|
"课程": "",
|
||||||
|
"角色/组件": ""
|
||||||
|
},
|
||||||
|
"priority": "P2",
|
||||||
|
"priority_detail": "",
|
||||||
|
"category": "其他问题",
|
||||||
|
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"sender": "毋益飞",
|
||||||
|
"content": "图片 (1/1)",
|
||||||
|
"msg_type": "post_image",
|
||||||
|
"media_url": "https://static.valavala.com/vala_llm/user_feedback/image/2026-05-11/beec3f1daab1_img0.png",
|
||||||
|
"time": "2026-05-11 15:00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "王胤鑫",
|
||||||
|
"content": "我怎么记着已经出了,之前好像是ok的",
|
||||||
|
"msg_type": "text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "梁晨",
|
||||||
|
"content": "对,之前名字给过",
|
||||||
|
"msg_type": "text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:01:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "梁晨",
|
||||||
|
"content": "S2是对的吧,得鑫哥再看看了",
|
||||||
|
"msg_type": "text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:02:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "毋益飞",
|
||||||
|
"content": "我132分支看U17开始全是数字了",
|
||||||
|
"msg_type": "post_text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:02:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "梁晨",
|
||||||
|
"content": "@毋益飞 U级别的确实还有未命名的;季度是有的",
|
||||||
|
"msg_type": "text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:03:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "毋益飞",
|
||||||
|
"content": "目前如果新版本发布的话用户是都能看到了,你们评估一下是不是都先添加上",
|
||||||
|
"msg_type": "text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:04:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "毋益飞",
|
||||||
|
"content": "图片 (1/1)",
|
||||||
|
"msg_type": "post_image",
|
||||||
|
"media_url": "https://static.valavala.com/vala_llm/user_feedback/image/2026-05-11/5afc3865073e_img0.png",
|
||||||
|
"time": "2026-05-11 15:02:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sender": "毋益飞",
|
||||||
|
"content": "132版本新作了切换,是不是需要把L1的名字都出了,现在全是数字@梁晨 @庞鸿潇",
|
||||||
|
"msg_type": "post_text",
|
||||||
|
"media_url": "",
|
||||||
|
"time": "2026-05-11 15:00:00"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
output/daily_feedback/飞书反馈_2026-05-11.xlsx
Normal file
BIN
output/daily_feedback/飞书反馈_2026-05-11.xlsx
Normal file
Binary file not shown.
@ -20,6 +20,7 @@ description: 同步飞书「内容测试问题反馈」群消息到知识库电
|
|||||||
| 步骤 5 | 优先级判定与排序 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | 按 P0>P1>P2>P3 排序的问题列表 |
|
| 步骤 5 | 优先级判定与排序 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | 按 P0>P1>P2>P3 排序的问题列表 |
|
||||||
| 步骤 6 | 问题分类 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | 按优先级+分类展示的问题列表 |
|
| 步骤 6 | 问题分类 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | 按优先级+分类展示的问题列表 |
|
||||||
| 步骤 7 | 问题分发 | 「小葵小葵」群 `oc_4171a2188f2554522a4309f2d7c27753` | P0问题@通知到群 |
|
| 步骤 7 | 问题分发 | 「小葵小葵」群 `oc_4171a2188f2554522a4309f2d7c27753` | P0问题@通知到群 |
|
||||||
|
| 步骤 8 | AI 问题归纳 | `RaL6whoYMijyYHkSlWrc7OLLnBy`(子文档) | AI 生成精炼问题描述,替换占位符 |
|
||||||
|
|
||||||
## 关键标识
|
## 关键标识
|
||||||
|
|
||||||
@ -425,11 +426,48 @@ python3 sync_feishu_feedback.py --days 3 --skip-dispatch
|
|||||||
|
|
||||||
建议每日执行一次,在飞书群消息同步完毕后(`feishu-group-msg-sync` 之后)运行。
|
建议每日执行一次,在飞书群消息同步完毕后(`feishu-group-msg-sync` 之后)运行。
|
||||||
|
|
||||||
### 创建定时任务(crontab)
|
### 两阶段执行(含 AI 归纳)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 每天 10:00 执行(确保消息已同步入库)
|
# 阶段一:每天 10:00 执行(生成占位符 + 保存上下文 JSON)
|
||||||
0 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh >> /var/log/xiaokui_feedback_sync.log 2>&1
|
0 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh --ai-placeholders >> /var/log/xiaokui_feedback_sync.log 2>&1
|
||||||
|
|
||||||
|
# 阶段二:每天 10:05 AI 归纳(读取上下文 JSON → AI 生成描述 → 回写文档)
|
||||||
|
# 由心跳任务或手动触发:python3 sync_feishu_feedback.py --apply-ai <descriptions.json>
|
||||||
|
5 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/ai_summarize_feedback.sh >> /var/log/xiaokui_ai_summarize.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
### 步骤 8:AI 问题归纳(新增)
|
||||||
|
|
||||||
|
脚本的规则匹配 `generate_problem_description()` 只能处理预定义的症状模式,无法理解对话语义。当问题类型不在关键词列表内时会退化为原文拼贴。
|
||||||
|
|
||||||
|
**AI 归纳流程:**
|
||||||
|
|
||||||
|
1. **脚本阶段**:`--ai-placeholders` 模式下,问题描述使用 `[待AI归纳:#N]` 占位符,同时保存 `cluster_context_{date}.json` 到 `output/daily_feedback/`
|
||||||
|
2. **AI 阶段**:AI 读取上下文 JSON,理解完整对话语义,生成精炼的问题描述,写入 `ai_descriptions_{date}.json`
|
||||||
|
3. **回写阶段**:`--apply-ai <descriptions.json>` 重新生成文档内容,替换占位符为 AI 描述,覆盖写入知识库
|
||||||
|
|
||||||
|
**上下文 JSON 结构:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"date": "2026-05-11",
|
||||||
|
"clusters": [{
|
||||||
|
"index": 1,
|
||||||
|
"location": {"端": "", "环节": "", "课程": "", "角色/组件": ""},
|
||||||
|
"priority": "P2",
|
||||||
|
"category": "其他问题",
|
||||||
|
"conclusion": "**当前问题排查结论:** ...",
|
||||||
|
"messages": [{"sender": "...", "content": "...", "time": "..."}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**AI 描述 JSON 格式:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"date": "2026-05-11",
|
||||||
|
"descriptions": [{"index": 1, "description": "AI 生成的问题描述"}]
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## 权限说明
|
## 权限说明
|
||||||
|
|||||||
Binary file not shown.
@ -464,11 +464,16 @@ def extract_location_elements(msgs):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def generate_problem_description(cluster_msgs, location, root_text):
|
def generate_problem_description(cluster_msgs, location, root_text, ai_placeholder=False, placeholder_idx=0):
|
||||||
"""
|
"""
|
||||||
基于簇中消息生成问题描述。
|
基于簇中消息生成问题描述。
|
||||||
格式:在{端}端{环节}({课程}),{具体表现}
|
格式:在{端}端{环节}({课程}),{具体表现}
|
||||||
|
|
||||||
|
当 ai_placeholder=True 时,返回 [待AI归纳:#N] 占位符,后续由 AI 根据上下文生成。
|
||||||
"""
|
"""
|
||||||
|
if ai_placeholder:
|
||||||
|
return f"[待AI归纳:#{placeholder_idx}]"
|
||||||
|
|
||||||
all_text = " ".join(str(m[3]) or "" for m in cluster_msgs)
|
all_text = " ".join(str(m[3]) or "" for m in cluster_msgs)
|
||||||
all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text)
|
all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text)
|
||||||
all_text = re.sub(r'https?://\S+', '', all_text)
|
all_text = re.sub(r'https?://\S+', '', all_text)
|
||||||
@ -660,22 +665,13 @@ def classify_problem(cluster_msgs) -> str:
|
|||||||
return "其他问题"
|
return "其他问题"
|
||||||
|
|
||||||
|
|
||||||
def summarize_cluster(cluster_msgs, idx, priority_info=None):
|
def summarize_cluster(cluster_msgs, idx, priority_info=None, ai_placeholder=False, placeholder_idx=0):
|
||||||
"""
|
"""
|
||||||
生成单个问题簇的结构化归纳。
|
生成单个问题簇的结构化归纳。
|
||||||
|
|
||||||
脚本负责:提取位置要素元数据 + 输出对话要点表格 + 提取结论
|
脚本负责:提取位置要素元数据 + 输出对话要点表格 + 提取结论
|
||||||
AI 负责:基于元数据和上下文,生成精炼的问题描述
|
当 ai_placeholder=True 时,问题描述使用 [待AI归纳:#N] 占位符,后续由 AI 填充。
|
||||||
priority_info 可选,包含步骤5的优先级判定结果
|
priority_info 可选,包含步骤5的优先级判定结果
|
||||||
|
|
||||||
输出格式:
|
|
||||||
### 问题 N 🔴 P0
|
|
||||||
> **[AI归纳: 问题描述]**
|
|
||||||
<details><summary>位置要素(脚本提取)</summary>{元数据}</details>
|
|
||||||
| 发言人 | 要点 |
|
|
||||||
|--------|------|
|
|
||||||
...
|
|
||||||
**当前问题排查结论:** ...
|
|
||||||
"""
|
"""
|
||||||
loc = extract_location_elements(cluster_msgs)
|
loc = extract_location_elements(cluster_msgs)
|
||||||
|
|
||||||
@ -710,7 +706,7 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None):
|
|||||||
lines = [f"### {priority_prefix}"]
|
lines = [f"### {priority_prefix}"]
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
description = generate_problem_description(cluster_msgs, loc, root_msg_text)
|
description = generate_problem_description(cluster_msgs, loc, root_msg_text, ai_placeholder=ai_placeholder, placeholder_idx=placeholder_idx)
|
||||||
lines.append(f"**{idx},问题描述:** {description}")
|
lines.append(f"**{idx},问题描述:** {description}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
conclusion = extract_conclusion(cluster_msgs)
|
conclusion = extract_conclusion(cluster_msgs)
|
||||||
@ -805,14 +801,17 @@ def extract_conclusion(cluster_msgs):
|
|||||||
return "**当前问题排查结论:** 暂无结论排查中"
|
return "**当前问题排查结论:** 暂无结论排查中"
|
||||||
|
|
||||||
|
|
||||||
def generate_summary(clusters, cluster_order, skip_priority=False):
|
def generate_summary(clusters, cluster_order, skip_priority=False, ai_placeholders=False):
|
||||||
"""
|
"""
|
||||||
生成当日问题归纳 Markdown(步骤4+5)。
|
生成当日问题归纳 Markdown(步骤4+5)。
|
||||||
|
|
||||||
步骤4:问题归纳
|
步骤4:问题归纳
|
||||||
步骤5:优先级判定与排序(P0 > P1 > P2 > P3)
|
步骤5:优先级判定与排序(P0 > P1 > P2 > P3)
|
||||||
|
|
||||||
返回: (markdown_str, has_content)
|
当 ai_placeholders=True 时,问题描述使用 [待AI归纳] 占位符,
|
||||||
|
并将簇上下文保存到 JSON 文件供后续 AI 处理。
|
||||||
|
|
||||||
|
返回: (markdown_str, has_content, context_json_path_or_None)
|
||||||
"""
|
"""
|
||||||
lines = ["## 今日问题归纳\n"]
|
lines = ["## 今日问题归纳\n"]
|
||||||
|
|
||||||
@ -830,7 +829,7 @@ def generate_summary(clusters, cluster_order, skip_priority=False):
|
|||||||
})
|
})
|
||||||
|
|
||||||
if not valid_clusters:
|
if not valid_clusters:
|
||||||
return "\n".join(lines) + "\n*(无可归纳的问题簇)*\n", False
|
return "\n".join(lines) + "\n*(无可归纳的问题簇)*\n", False, None
|
||||||
|
|
||||||
# 步骤5:优先级判定
|
# 步骤5:优先级判定
|
||||||
if not skip_priority:
|
if not skip_priority:
|
||||||
@ -840,6 +839,10 @@ def generate_summary(clusters, cluster_order, skip_priority=False):
|
|||||||
# 按优先级排序:P0 > P1 > P2 > P3,同优先级按时间
|
# 按优先级排序:P0 > P1 > P2 > P3,同优先级按时间
|
||||||
valid_clusters = sort_by_priority(valid_clusters)
|
valid_clusters = sort_by_priority(valid_clusters)
|
||||||
|
|
||||||
|
# 分配问题索引(用于 AI 占位符编号)
|
||||||
|
for i, vc in enumerate(valid_clusters, 1):
|
||||||
|
vc["_idx"] = i
|
||||||
|
|
||||||
# 生成层汇总索引(按优先级 + 分类)
|
# 生成层汇总索引(按优先级 + 分类)
|
||||||
grouped = defaultdict(list)
|
grouped = defaultdict(list)
|
||||||
for vc in valid_clusters:
|
for vc in valid_clusters:
|
||||||
@ -871,7 +874,7 @@ def generate_summary(clusters, cluster_order, skip_priority=False):
|
|||||||
lines.append(f"{cat_idx}. **{cat_name}**")
|
lines.append(f"{cat_idx}. **{cat_name}**")
|
||||||
for vc in cat_items:
|
for vc in cat_items:
|
||||||
desc = generate_problem_description(vc["msgs"],
|
desc = generate_problem_description(vc["msgs"],
|
||||||
extract_location_elements(vc["msgs"]), "")
|
extract_location_elements(vc["msgs"]), "", ai_placeholder=ai_placeholders, placeholder_idx=vc["_idx"])
|
||||||
lines.append(f" - {desc}")
|
lines.append(f" - {desc}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
@ -879,14 +882,123 @@ def generate_summary(clusters, cluster_order, skip_priority=False):
|
|||||||
lines.append("## 今日问题拆解\n")
|
lines.append("## 今日问题拆解\n")
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
|
context_data = [] # 收集簇上下文供 AI 处理
|
||||||
for vc in valid_clusters:
|
for vc in valid_clusters:
|
||||||
idx += 1
|
idx += 1
|
||||||
sorted_msgs = sort_cluster_msgs(vc["msgs"])
|
sorted_msgs = sort_cluster_msgs(vc["msgs"])
|
||||||
summary = summarize_cluster(sorted_msgs, idx, vc.get("priority_info"))
|
summary = summarize_cluster(sorted_msgs, idx, vc.get("priority_info"), ai_placeholder=ai_placeholders, placeholder_idx=vc["_idx"])
|
||||||
lines.append(summary)
|
lines.append(summary)
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
return "\n".join(lines), True
|
# 收集 AI 上下文数据
|
||||||
|
loc = extract_location_elements(vc["msgs"])
|
||||||
|
context_data.append({
|
||||||
|
"index": idx,
|
||||||
|
"cluster_id": vc["cluster_id"],
|
||||||
|
"location": loc,
|
||||||
|
"priority": vc.get("priority_info", {}).get("priority", "P2"),
|
||||||
|
"priority_detail": vc.get("priority_info", {}).get("detail", ""),
|
||||||
|
"category": vc.get("category", "其他问题"),
|
||||||
|
"conclusion": extract_conclusion(sorted_msgs),
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"sender": m[1],
|
||||||
|
"content": str(m[3]) if m[3] else "",
|
||||||
|
"msg_type": str(m[2]),
|
||||||
|
"media_url": str(m[4]) if m[4] else "",
|
||||||
|
"time": str(m[6]),
|
||||||
|
}
|
||||||
|
for m in sorted_msgs
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
# 保存 AI 上下文 JSON
|
||||||
|
context_path = None
|
||||||
|
if ai_placeholders and context_data:
|
||||||
|
import datetime as dt_module
|
||||||
|
today_str = dt_module.date.today().strftime("%Y-%m-%d")
|
||||||
|
context_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), "output", "daily_feedback")
|
||||||
|
os.makedirs(context_dir, exist_ok=True)
|
||||||
|
context_path = os.path.join(context_dir, f"cluster_context_{today_str}.json")
|
||||||
|
payload = {
|
||||||
|
"date": today_str,
|
||||||
|
"total_clusters": len(context_data),
|
||||||
|
"clusters": context_data,
|
||||||
|
}
|
||||||
|
with open(context_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||||
|
print(f" 📝 AI 上下文已保存: {context_path}")
|
||||||
|
|
||||||
|
return "\n".join(lines), True, context_path
|
||||||
|
|
||||||
|
|
||||||
|
# === AI 描述回写 ===
|
||||||
|
DEFAULT_AI_DESCRIPTIONS_FILE = None # 运行时设置
|
||||||
|
|
||||||
|
|
||||||
|
def load_ai_descriptions(descriptions_path):
|
||||||
|
"""从 JSON 文件加载 AI 生成的描述。
|
||||||
|
格式: {"date": "2026-05-11", "descriptions": [{"index": 1, "description": "..."}]}
|
||||||
|
"""
|
||||||
|
with open(descriptions_path, "r", encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_ai_descriptions_to_doc(obj_token, descriptions_data):
|
||||||
|
"""
|
||||||
|
将 AI 生成的描述替换到知识库文档中。
|
||||||
|
对每个问题,用实际描述替换 [待AI归纳:#N] 占位符。
|
||||||
|
"""
|
||||||
|
descs = descriptions_data.get("descriptions", [])
|
||||||
|
if not descs:
|
||||||
|
print("⚠️ 无 AI 描述可应用")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for item in descs:
|
||||||
|
idx = item["index"]
|
||||||
|
desc = item["description"]
|
||||||
|
placeholder = f"[待AI归纳:#{idx}]"
|
||||||
|
|
||||||
|
# 使用 replace_range 替换该编号的占位符("今日问题归纳"和"今日问题拆解"各一处)
|
||||||
|
# replace_range 每次只替换一处,所以需要多次调用
|
||||||
|
print(f" 🔄 替换 #{idx}: {placeholder} → {desc}")
|
||||||
|
# 先替换"今日问题归纳"中的占位符
|
||||||
|
cmd = [
|
||||||
|
CLI, "docs", "+update",
|
||||||
|
"--doc", obj_token,
|
||||||
|
"--mode", "replace_range",
|
||||||
|
"--selection-with-ellipsis", placeholder,
|
||||||
|
"--markdown", desc,
|
||||||
|
"--as", "bot",
|
||||||
|
]
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui"
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" ❌ 替换 #{idx}(归纳部分)失败: {result.stderr[:200]}")
|
||||||
|
return False
|
||||||
|
# 再替换"今日问题拆解"中的占位符
|
||||||
|
result2 = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env)
|
||||||
|
if result2.returncode != 0:
|
||||||
|
print(f" ❌ 替换 #{idx}(拆解部分)失败: {result2.stderr[:200]}")
|
||||||
|
return False
|
||||||
|
print(f" ✅ 替换 #{idx} 成功(归纳 + 拆解各一处)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ 替换 #{idx} 异常: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_today_doc_obj_token():
|
||||||
|
"""获取今天日期对应的子文档 obj_token。"""
|
||||||
|
today_str = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
doc_title = f"{today_str} 问题反馈"
|
||||||
|
children = list_child_nodes()
|
||||||
|
if doc_title in children:
|
||||||
|
return children[doc_title]["obj_token"]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# === 步骤5写入目标文档配置 ===
|
# === 步骤5写入目标文档配置 ===
|
||||||
@ -1055,12 +1167,17 @@ def update_summary_doc_as_children(day_summaries):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 写入内容(锁内已保证obj_token唯一,不会并发写入)
|
# 写入内容(锁内已保证obj_token唯一,不会并发写入)
|
||||||
|
# 通过临时文件传递 markdown,避免 shell 转义问题
|
||||||
|
tmp_md_path = "tmp/_summary_md.txt"
|
||||||
|
with open(tmp_md_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(content)
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
||||||
"--mode", "overwrite",
|
"--mode", "overwrite",
|
||||||
"--markdown", content],
|
"--markdown", f"@{tmp_md_path}"],
|
||||||
env=env, capture_output=True, text=True, timeout=15
|
env=env, capture_output=True, text=True, timeout=15
|
||||||
)
|
)
|
||||||
|
os.unlink(tmp_md_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
d = json.loads(result.stdout)
|
d = json.loads(result.stdout)
|
||||||
@ -1081,20 +1198,28 @@ def update_summary_doc_as_children(day_summaries):
|
|||||||
|
|
||||||
if dup_count > 1:
|
if dup_count > 1:
|
||||||
print(f" ⚠️ 检测到内容重复(共{dup_count}次),第{retry+1}次重试写入")
|
print(f" ⚠️ 检测到内容重复(共{dup_count}次),第{retry+1}次重试写入")
|
||||||
|
tmp_retry = "tmp/_summary_md_retry.txt"
|
||||||
|
with open(tmp_retry, "w", encoding="utf-8") as f:
|
||||||
|
f.write(content)
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
||||||
"--mode", "overwrite",
|
"--mode", "overwrite",
|
||||||
"--markdown", content],
|
"--markdown", f"@{tmp_retry}"],
|
||||||
env=env, capture_output=True, text=True, timeout=15
|
env=env, capture_output=True, text=True, timeout=15
|
||||||
)
|
)
|
||||||
|
os.unlink(tmp_retry)
|
||||||
elif len(saved_content.strip()) < 100:
|
elif len(saved_content.strip()) < 100:
|
||||||
print(f" ⚠️ 检测到文档为空/内容过短,第{retry+1}次重试写入")
|
print(f" ⚠️ 检测到文档为空/内容过短,第{retry+1}次重试写入")
|
||||||
|
tmp_retry = "tmp/_summary_md_retry.txt"
|
||||||
|
with open(tmp_retry, "w", encoding="utf-8") as f:
|
||||||
|
f.write(content)
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
||||||
"--mode", "overwrite",
|
"--mode", "overwrite",
|
||||||
"--markdown", content],
|
"--markdown", f"@{tmp_retry}"],
|
||||||
env=env, capture_output=True, text=True, timeout=15
|
env=env, capture_output=True, text=True, timeout=15
|
||||||
)
|
)
|
||||||
|
os.unlink(tmp_retry)
|
||||||
else:
|
else:
|
||||||
print(f" ✅ {title} 校验通过:长度{len(saved_content)},无内容重复")
|
print(f" ✅ {title} 校验通过:长度{len(saved_content)},无内容重复")
|
||||||
check_pass = True
|
check_pass = True
|
||||||
@ -1234,6 +1359,8 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False):
|
|||||||
print(f" ⚠️ 分发异常: {e}")
|
print(f" ⚠️ 分发异常: {e}")
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)")
|
parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)")
|
||||||
parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3)")
|
parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3)")
|
||||||
@ -1241,10 +1368,75 @@ def main():
|
|||||||
parser.add_argument("--dry-run", action="store_true", help="仅预览不写入")
|
parser.add_argument("--dry-run", action="store_true", help="仅预览不写入")
|
||||||
parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定")
|
parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定")
|
||||||
parser.add_argument("--skip-dispatch", action="store_true", help="跳过步骤7问题分发")
|
parser.add_argument("--skip-dispatch", action="store_true", help="跳过步骤7问题分发")
|
||||||
|
parser.add_argument("--ai-placeholders", action="store_true", help="问题描述使用 [待AI归纳:#N] 占位符,并保存簇上下文 JSON 供后续 AI 处理")
|
||||||
|
parser.add_argument("--apply-ai", type=str, default=None, metavar="DESCRIPTIONS_JSON", help="应用 AI 描述:读取指定 JSON 文件中的描述,替换知识库文档中 [待AI归纳:#N] 占位符")
|
||||||
parser.add_argument("--dispatch-mode", type=str, default="all", choices=["all", "p0"], help="分发模式:all=发全部, p0=仅发P0级问题(默认 all)")
|
parser.add_argument("--dispatch-mode", type=str, default="all", choices=["all", "p0"], help="分发模式:all=发全部, p0=仅发P0级问题(默认 all)")
|
||||||
parser.add_argument("--steps", type=str, default="1-7", help="执行步骤范围,如 '1-3'(仅写入表格)或 '4-7'(归纳+分类+分发)")
|
parser.add_argument("--steps", type=str, default="1-7", help="执行步骤范围,如 '1-3'(仅写入表格)或 '4-7'(归纳+分类+分发)")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# --apply-ai 模式:加载 AI 描述,替换占位符并覆盖写入文档
|
||||||
|
if args.apply_ai:
|
||||||
|
data = load_ai_descriptions(args.apply_ai)
|
||||||
|
target_date = data.get("date", "")
|
||||||
|
if not target_date:
|
||||||
|
print("❌ AI 描述文件缺少 date 字段")
|
||||||
|
sys.exit(1)
|
||||||
|
print(f"📋 加载 {len(data.get('descriptions', []))} 条 AI 描述,日期: {target_date}")
|
||||||
|
|
||||||
|
# 重新查询数据并生成带占位符的 markdown
|
||||||
|
from datetime import datetime as dt_module
|
||||||
|
td = dt_module.strptime(target_date, "%Y-%m-%d")
|
||||||
|
next_day = (td + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||||
|
rows = query_messages(target_date, next_day)
|
||||||
|
day_msgs = [r for r in rows if r[6][:10] == target_date]
|
||||||
|
if not day_msgs:
|
||||||
|
print(f"❌ {target_date} 无数据")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sorted_msgs, clusters, cluster_order = sort_threads(day_msgs)
|
||||||
|
summary_md, has_content, _ = generate_summary(clusters, cluster_order, ai_placeholders=True)
|
||||||
|
|
||||||
|
# 替换占位符
|
||||||
|
for item in data["descriptions"]:
|
||||||
|
idx = item["index"]
|
||||||
|
desc = item["description"]
|
||||||
|
placeholder = f"[待AI归纳:#{idx}]"
|
||||||
|
summary_md = summary_md.replace(placeholder, desc)
|
||||||
|
print(f" 🔄 替换 #{idx}: {placeholder} → {desc[:50]}...")
|
||||||
|
|
||||||
|
# 获取文档并覆盖写入(通过文件避免 shell 转义问题)
|
||||||
|
title = f"{target_date} 问题反馈"
|
||||||
|
nodes = list_child_nodes()
|
||||||
|
if title not in nodes:
|
||||||
|
print(f"❌ 未找到文档: {title}")
|
||||||
|
sys.exit(1)
|
||||||
|
obj_token = nodes[title]["obj_token"]
|
||||||
|
print(f"📄 目标文档: {obj_token}")
|
||||||
|
|
||||||
|
# 写入临时文件(lark-cli @file 要求相对路径)
|
||||||
|
tmp_md = "tmp/ai_summary_md.txt"
|
||||||
|
with open(tmp_md, "w", encoding="utf-8") as f:
|
||||||
|
f.write(summary_md)
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui"
|
||||||
|
result = subprocess.run(
|
||||||
|
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
|
||||||
|
"--mode", "overwrite", "--markdown", f"@{tmp_md}"],
|
||||||
|
env=env, capture_output=True, text=True, timeout=15)
|
||||||
|
os.unlink(tmp_md) # 清理临时文件
|
||||||
|
try:
|
||||||
|
resp = json.loads(result.stdout)
|
||||||
|
if resp.get("ok"):
|
||||||
|
print(f"🎉 AI 描述已应用到文档: {title}")
|
||||||
|
else:
|
||||||
|
print(f"❌ 写入失败: {result.stdout[:300]}")
|
||||||
|
sys.exit(1)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print(f"❌ 解析响应失败: {result.stdout[:300]}")
|
||||||
|
sys.exit(1)
|
||||||
|
return
|
||||||
|
|
||||||
if args.date:
|
if args.date:
|
||||||
start_date = datetime.strptime(args.date, "%Y-%m-%d").date()
|
start_date = datetime.strptime(args.date, "%Y-%m-%d").date()
|
||||||
end_date = start_date
|
end_date = start_date
|
||||||
@ -1305,7 +1497,7 @@ def main():
|
|||||||
if do_summary:
|
if do_summary:
|
||||||
# 步骤 4:问题归纳
|
# 步骤 4:问题归纳
|
||||||
# 步骤 5:优先级判定(默认启用,--skip-priority 可跳过)
|
# 步骤 5:优先级判定(默认启用,--skip-priority 可跳过)
|
||||||
summary, has_content = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority)
|
summary, has_content, context_path = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority, ai_placeholders=args.ai_placeholders)
|
||||||
if has_content:
|
if has_content:
|
||||||
priority_count = summary.count('优先级:') if not args.skip_priority else 0
|
priority_count = summary.count('优先级:') if not args.skip_priority else 0
|
||||||
print(f" 归纳完成:{summary.count('### 问题')} 个问题")
|
print(f" 归纳完成:{summary.count('### 问题')} 个问题")
|
||||||
|
|||||||
@ -40,8 +40,9 @@ description: 定期同步飞书群聊消息到MySQL数据库或飞书电子表
|
|||||||
| chat_name | varchar(255) | 群聊名称 |
|
| chat_name | varchar(255) | 群聊名称 |
|
||||||
| sender_id | varchar(128) | 发送者ID(open_id或user_id) |
|
| sender_id | varchar(128) | 发送者ID(open_id或user_id) |
|
||||||
| sender_name | varchar(255) | 发送者显示名称 |
|
| sender_name | varchar(255) | 发送者显示名称 |
|
||||||
| msg_type | varchar(32) | 消息类型(text/post/image/media/audio/file/sticker/system) |
|
| msg_type | varchar(32) | 消息类型(text/post/image/media/audio/file/sticker/system/merge_forward) |
|
||||||
| content | text | 消息文本内容或描述 |
|
| content | text | 消息文本内容或描述 |
|
||||||
|
| forward_detail | longtext | merge_forward转发详情(媒体已替换为COS链接) |
|
||||||
| media_url | varchar(1024) | 媒体文件COS URL |
|
| media_url | varchar(1024) | 媒体文件COS URL |
|
||||||
| message_id | varchar(128) | 飞书消息ID(om_xxx) |
|
| message_id | varchar(128) | 飞书消息ID(om_xxx) |
|
||||||
| parent_id | varchar(128) | 父消息ID(thread根消息) |
|
| parent_id | varchar(128) | 父消息ID(thread根消息) |
|
||||||
@ -186,6 +187,7 @@ LARKSUITE_CLI_CONFIG_DIR=<credentials> lark-cli sheets +info \
|
|||||||
| audio | 下载 → 上传COS → 记录URL | 如有引用则记录 |
|
| audio | 下载 → 上传COS → 记录URL | 如有引用则记录 |
|
||||||
| file | 下载 → 上传COS → 记录URL | 如有引用则记录 |
|
| file | 下载 → 上传COS → 记录URL | 如有引用则记录 |
|
||||||
| sticker | 记录为"表情包" | 如有引用则记录 |
|
| sticker | 记录为"表情包" | 如有引用则记录 |
|
||||||
|
| merge_forward | 解析子消息、下载媒体→COS、生成forward_detail | 如有引用则记录 |
|
||||||
| system | 跳过(入群退群等系统消息) | - |
|
| system | 跳过(入群退群等系统消息) | - |
|
||||||
| 已删除 | 跳过 | - |
|
| 已删除 | 跳过 | - |
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
120
skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py
Normal file
120
skills/feishu-group-msg-sync/scripts/fix_merge_forward_216.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""一次性修复 id=216 的 merge_forward 记录,重新解析媒体并生成 forward_detail"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import pymysql
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 引入同步脚本的函数(复用 process_merge_forward_message)
|
||||||
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
|
from sync_group_to_mysql import (
|
||||||
|
process_merge_forward_message, CosUploader,
|
||||||
|
LARK_CLI_CONFIG, CHAT_ID, CHAT_NAME, WORK_DIR,
|
||||||
|
COS_BASE_PATH, TZ
|
||||||
|
)
|
||||||
|
|
||||||
|
MYSQL_HOST = "bj-cdb-8frbdwju.sql.tencentcdb.com"
|
||||||
|
MYSQL_PORT = 25413
|
||||||
|
MYSQL_USER = "chatbot"
|
||||||
|
MYSQL_PASS = "xhuBx7d@uT2gUVv"
|
||||||
|
MYSQL_DB = "vala_test"
|
||||||
|
MYSQL_TABLE = "lark_group_message"
|
||||||
|
|
||||||
|
TARGET_MSG_ID = "om_x100b50f7871e74a8c39cb1549d9052f"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_message_by_id(msg_id: str) -> dict:
|
||||||
|
"""通过 message_id 拉取单条消息"""
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['LARKSUITE_CLI_CONFIG_DIR'] = LARK_CLI_CONFIG
|
||||||
|
# 用一个大时间窗口保证拉到
|
||||||
|
cmd = [
|
||||||
|
'lark-cli', 'im', '+chat-messages-list',
|
||||||
|
'--chat-id', CHAT_ID,
|
||||||
|
'--start', '2026-05-07T15:00:00+08:00',
|
||||||
|
'--end', '2026-05-07T15:30:00+08:00',
|
||||||
|
'--sort', 'asc', '--page-size', '50',
|
||||||
|
'--as', 'bot'
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, env=env)
|
||||||
|
if result.returncode != 0:
|
||||||
|
log.error(f"lark-cli error: {result.stderr}")
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
log.error(f"lark-cli JSON parse error")
|
||||||
|
return None
|
||||||
|
|
||||||
|
messages = data.get('data', {}).get('messages', [])
|
||||||
|
for m in messages:
|
||||||
|
if m.get('message_id') == msg_id:
|
||||||
|
return m
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
os.makedirs(WORK_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
log.info(f"拉取消息: {TARGET_MSG_ID}")
|
||||||
|
msg = fetch_message_by_id(TARGET_MSG_ID)
|
||||||
|
if not msg:
|
||||||
|
log.error("未找到目标消息")
|
||||||
|
return
|
||||||
|
|
||||||
|
log.info(f"消息类型: {msg.get('msg_type')}, 发送者: {msg.get('sender', {}).get('name')}")
|
||||||
|
|
||||||
|
cos_uploader = CosUploader()
|
||||||
|
date_str = datetime.now(TZ).strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
log.info("开始处理 merge_forward...")
|
||||||
|
record = process_merge_forward_message(msg, cos_uploader, date_str)
|
||||||
|
if not record:
|
||||||
|
log.error("处理失败")
|
||||||
|
return
|
||||||
|
|
||||||
|
log.info(f"处理完成, forward_detail 长度: {len(record.get('forward_detail', ''))}")
|
||||||
|
log.info(f"content: {record['content'][:200]}...")
|
||||||
|
|
||||||
|
# 更新数据库
|
||||||
|
conn = pymysql.connect(
|
||||||
|
host=MYSQL_HOST, port=MYSQL_PORT,
|
||||||
|
user=MYSQL_USER, password=MYSQL_PASS,
|
||||||
|
database=MYSQL_DB, charset='utf8mb4'
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cursor:
|
||||||
|
sql = f"""
|
||||||
|
UPDATE {MYSQL_TABLE}
|
||||||
|
SET content = %s, forward_detail = %s
|
||||||
|
WHERE message_id = %s
|
||||||
|
"""
|
||||||
|
cursor.execute(sql, (
|
||||||
|
record['content'],
|
||||||
|
record['forward_detail'],
|
||||||
|
TARGET_MSG_ID
|
||||||
|
))
|
||||||
|
conn.commit()
|
||||||
|
log.info(f"数据库更新成功, affected rows: {cursor.rowcount}")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"数据库更新失败: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# 打印 forward_detail 前几行
|
||||||
|
log.info("=== forward_detail preview ===")
|
||||||
|
for line in record['forward_detail'].split('\n')[:15]:
|
||||||
|
print(line)
|
||||||
|
print("...")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@ -214,6 +214,10 @@ def process_message(msg: dict, cos_uploader, date_str: str) -> dict:
|
|||||||
'msg_timestamp': msg_timestamp
|
'msg_timestamp': msg_timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 多条转发消息
|
||||||
|
if msg_type == 'merge_forward':
|
||||||
|
return None # 由 process_merge_forward_message 单独处理
|
||||||
|
|
||||||
# 纯文本消息
|
# 纯文本消息
|
||||||
if msg_type == 'text':
|
if msg_type == 'text':
|
||||||
text = re.sub(r'<[^>]*>', '', content).strip()
|
text = re.sub(r'<[^>]*>', '', content).strip()
|
||||||
@ -398,6 +402,223 @@ def process_post_message(msg: dict, cos_uploader, date_str: str) -> list:
|
|||||||
return records
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def process_merge_forward_message(msg: dict, cos_uploader, date_str: str) -> dict:
|
||||||
|
"""处理 merge_forward(多条转发)消息
|
||||||
|
|
||||||
|
解析 <forwarded_messages> XML,下载子消息中的媒体上传到COS,
|
||||||
|
生成 forward_detail(含COS链接的完整内容)和 content(可读摘要)。
|
||||||
|
"""
|
||||||
|
msg_id = msg.get('message_id', '')
|
||||||
|
sender_id = msg.get('sender', {}).get('id', '')
|
||||||
|
sender_name = msg.get('sender', {}).get('name', '未知')
|
||||||
|
create_time = msg.get('create_time', '')
|
||||||
|
raw_content = msg.get('content', '')
|
||||||
|
parent_id = msg.get('parent_id')
|
||||||
|
root_id = msg.get('root_id')
|
||||||
|
quote_message_id = extract_quote_message_id(msg)
|
||||||
|
|
||||||
|
# 解析时间戳
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(create_time.replace('Z', '+00:00'))
|
||||||
|
msg_time = dt.astimezone(TZ).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
msg_timestamp = int(dt.timestamp() * 1000)
|
||||||
|
except Exception:
|
||||||
|
msg_time = datetime.now(TZ).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
msg_timestamp = int(datetime.now(TZ).timestamp() * 1000)
|
||||||
|
|
||||||
|
# 提取 <forwarded_messages> 内容
|
||||||
|
fwd_match = re.search(r'<forwarded_messages>\n(.*)\n</forwarded_messages>', raw_content, re.DOTALL)
|
||||||
|
if not fwd_match:
|
||||||
|
log.warning(f"merge_forward 解析失败: {msg_id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
fwd_body = fwd_match.group(1)
|
||||||
|
|
||||||
|
# 解析子消息:每条以 [时间戳] 开头
|
||||||
|
sub_msg_pattern = re.compile(
|
||||||
|
r'^\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})\]\s+(.*?):\s*$',
|
||||||
|
re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# 先找到所有子消息的边界
|
||||||
|
boundaries = []
|
||||||
|
for m in sub_msg_pattern.finditer(fwd_body):
|
||||||
|
boundaries.append((m.start(), m.end(), m.group(1), m.group(2)))
|
||||||
|
|
||||||
|
if not boundaries:
|
||||||
|
log.warning(f"merge_forward 无子消息: {msg_id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 提取每条子消息的完整文本块
|
||||||
|
sub_messages = []
|
||||||
|
for i, (start, end, ts, sname) in enumerate(boundaries):
|
||||||
|
# 内容从当前边界结束到下一个边界开始(或字符串末尾)
|
||||||
|
next_start = boundaries[i + 1][0] if i + 1 < len(boundaries) else len(fwd_body)
|
||||||
|
raw_block = fwd_body[end:next_start]
|
||||||
|
sub_messages.append({'timestamp': ts, 'sender': sname, 'raw': raw_block})
|
||||||
|
|
||||||
|
# 全局媒体计数器
|
||||||
|
img_counter = [0]
|
||||||
|
vid_counter = [0]
|
||||||
|
|
||||||
|
def process_sub_msg_block(raw_block: str, sub_ts: str, sub_sender: str) -> str:
|
||||||
|
"""处理单条子消息的文本块,将媒体引用替换为COS链接"""
|
||||||
|
lines = raw_block.split('\n')
|
||||||
|
processed_lines = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
# 处理图片引用: [Image: img_xxx]
|
||||||
|
img_match = re.search(r'\[Image:\s*(img_[a-zA-Z0-9_-]+)\]', stripped)
|
||||||
|
if img_match:
|
||||||
|
img_key = img_match.group(1)
|
||||||
|
img_counter[0] += 1
|
||||||
|
idx = img_counter[0]
|
||||||
|
|
||||||
|
# 下载图片
|
||||||
|
filename = f'{msg_id[-12:]}_fwd_img{idx}.png'
|
||||||
|
cos_key = f'{COS_BASE_PATH}/image/{date_str}/{filename}'
|
||||||
|
local_path = download_resource(msg_id, img_key, 'image', filename)
|
||||||
|
img_url = None
|
||||||
|
if local_path:
|
||||||
|
try:
|
||||||
|
img_url = cos_uploader.upload(local_path, cos_key, 'image/png')
|
||||||
|
log.info(f"转发图片上传COS成功: {cos_key}")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"转发图片上传COS失败: {cos_key} - {e}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(local_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if img_url:
|
||||||
|
replacement = f'[图片{idx}]({img_url})'
|
||||||
|
else:
|
||||||
|
replacement = f'[图片{idx}](下载失败)'
|
||||||
|
|
||||||
|
processed_lines.append(stripped.replace(img_match.group(0), replacement))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 处理视频引用: <video key="file_xxx" name="..." duration="Ns" cover_image_key="img_xxx"/>
|
||||||
|
vid_match = re.search(
|
||||||
|
r'<video\s+key="(file_[a-zA-Z0-9_-]+)"\s+name="([^"]*)"\s+duration="(\d+)s"(?:\s+cover_image_key="(img_[a-zA-Z0-9_-]+)")?\s*/>',
|
||||||
|
stripped
|
||||||
|
)
|
||||||
|
if vid_match:
|
||||||
|
vid_key = vid_match.group(1)
|
||||||
|
vid_name = vid_match.group(2)
|
||||||
|
vid_duration = vid_match.group(3)
|
||||||
|
cover_key = vid_match.group(4)
|
||||||
|
vid_counter[0] += 1
|
||||||
|
idx = vid_counter[0]
|
||||||
|
|
||||||
|
# 下载视频
|
||||||
|
vid_ext = os.path.splitext(vid_name)[1] or '.mp4'
|
||||||
|
vid_filename = f'{msg_id[-12:]}_fwd_vid{idx}{vid_ext}'
|
||||||
|
cos_key = f'{COS_BASE_PATH}/video/{date_str}/{vid_filename}'
|
||||||
|
local_path = download_resource(msg_id, vid_key, 'file', vid_filename)
|
||||||
|
vid_url = None
|
||||||
|
if local_path:
|
||||||
|
try:
|
||||||
|
vid_url = cos_uploader.upload(local_path, cos_key, 'video/mp4')
|
||||||
|
log.info(f"转发视频上传COS成功: {cos_key}")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"转发视频上传COS失败: {cos_key} - {e}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(local_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if vid_url:
|
||||||
|
replacement = f'[视频{idx} 时长{vid_duration}s]({vid_url})'
|
||||||
|
else:
|
||||||
|
replacement = f'[视频{idx} 时长{vid_duration}s](下载失败)'
|
||||||
|
|
||||||
|
# 如果有封面图也下载上传
|
||||||
|
if cover_key:
|
||||||
|
cover_filename = f'{msg_id[-12:]}_fwd_vid{idx}_cover.png'
|
||||||
|
cover_cos_key = f'{COS_BASE_PATH}/image/{date_str}/{cover_filename}'
|
||||||
|
cover_local = download_resource(msg_id, cover_key, 'image', cover_filename)
|
||||||
|
if cover_local:
|
||||||
|
try:
|
||||||
|
cover_url = cos_uploader.upload(cover_local, cover_cos_key, 'image/png')
|
||||||
|
replacement += f'\n [封面]({cover_url})'
|
||||||
|
log.info(f"转发视频封面上传COS成功: {cover_cos_key}")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"转发视频封面上传COS失败: {cover_cos_key} - {e}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(cover_local)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
processed_lines.append(replacement)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 普通文本行
|
||||||
|
if stripped:
|
||||||
|
processed_lines.append(stripped)
|
||||||
|
|
||||||
|
return '\n'.join(processed_lines)
|
||||||
|
|
||||||
|
# 格式化时间戳为可读格式
|
||||||
|
def fmt_ts(ts: str) -> str:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(ts)
|
||||||
|
return dt.strftime('%Y-%m-%d %H:%M')
|
||||||
|
except Exception:
|
||||||
|
return ts
|
||||||
|
|
||||||
|
# 构建 forward_detail
|
||||||
|
detail_lines = []
|
||||||
|
summary_parts = []
|
||||||
|
for sm in sub_messages:
|
||||||
|
ts_fmt = fmt_ts(sm['timestamp'])
|
||||||
|
processed = process_sub_msg_block(sm['raw'], sm['timestamp'], sm['sender'])
|
||||||
|
if processed.strip():
|
||||||
|
detail_lines.append(f"[{ts_fmt}] {sm['sender']}:")
|
||||||
|
for line in processed.split('\n'):
|
||||||
|
detail_lines.append(f" {line}")
|
||||||
|
# 摘要:用第一行非媒体文本
|
||||||
|
text_only = re.sub(r'\[图片\d+\]\([^)]*\)', '', processed)
|
||||||
|
text_only = re.sub(r'\[视频\d+[^]]*\]\([^)]*\)', '', text_only)
|
||||||
|
text_only = re.sub(r'\[封面\]\([^)]*\)', '', text_only)
|
||||||
|
first_text = text_only.strip().split('\n')[0].strip()[:60] if text_only.strip() else ''
|
||||||
|
if first_text:
|
||||||
|
summary_parts.append(f"{sm['sender']}: {first_text}")
|
||||||
|
else:
|
||||||
|
summary_parts.append(f"{sm['sender']}: [媒体]")
|
||||||
|
|
||||||
|
forward_detail = '\n'.join(detail_lines)
|
||||||
|
|
||||||
|
# content 摘要
|
||||||
|
count = len(sub_messages)
|
||||||
|
summary = f"[转发{count}条] " + ' | '.join(summary_parts[:5])
|
||||||
|
if len(summary_parts) > 5:
|
||||||
|
summary += f" ...(共{count}条)"
|
||||||
|
|
||||||
|
record = {
|
||||||
|
'chat_id': CHAT_ID,
|
||||||
|
'chat_name': CHAT_NAME,
|
||||||
|
'sender_id': sender_id,
|
||||||
|
'sender_name': sender_name,
|
||||||
|
'message_id': msg_id,
|
||||||
|
'parent_id': parent_id,
|
||||||
|
'root_id': root_id,
|
||||||
|
'quote_message_id': quote_message_id,
|
||||||
|
'msg_type': 'merge_forward',
|
||||||
|
'content': summary,
|
||||||
|
'forward_detail': forward_detail,
|
||||||
|
'media_url': None,
|
||||||
|
'msg_time': msg_time,
|
||||||
|
'msg_timestamp': msg_timestamp
|
||||||
|
}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
def insert_to_mysql(records: list):
|
def insert_to_mysql(records: list):
|
||||||
"""批量插入记录到MySQL"""
|
"""批量插入记录到MySQL"""
|
||||||
if not records:
|
if not records:
|
||||||
@ -417,8 +638,8 @@ def insert_to_mysql(records: list):
|
|||||||
sql = f"""
|
sql = f"""
|
||||||
INSERT INTO {MYSQL_TABLE}
|
INSERT INTO {MYSQL_TABLE}
|
||||||
(chat_id, chat_name, sender_id, sender_name, message_id, parent_id, root_id,
|
(chat_id, chat_name, sender_id, sender_name, message_id, parent_id, root_id,
|
||||||
quote_message_id, msg_type, content, media_url, msg_time, msg_timestamp)
|
quote_message_id, msg_type, content, forward_detail, media_url, msg_time, msg_timestamp)
|
||||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||||
ON DUPLICATE KEY UPDATE
|
ON DUPLICATE KEY UPDATE
|
||||||
sender_name = VALUES(sender_name),
|
sender_name = VALUES(sender_name),
|
||||||
parent_id = VALUES(parent_id),
|
parent_id = VALUES(parent_id),
|
||||||
@ -426,6 +647,7 @@ def insert_to_mysql(records: list):
|
|||||||
quote_message_id = VALUES(quote_message_id),
|
quote_message_id = VALUES(quote_message_id),
|
||||||
msg_type = VALUES(msg_type),
|
msg_type = VALUES(msg_type),
|
||||||
content = VALUES(content),
|
content = VALUES(content),
|
||||||
|
forward_detail = VALUES(forward_detail),
|
||||||
media_url = VALUES(media_url)
|
media_url = VALUES(media_url)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -441,6 +663,7 @@ def insert_to_mysql(records: list):
|
|||||||
record['quote_message_id'],
|
record['quote_message_id'],
|
||||||
record['msg_type'],
|
record['msg_type'],
|
||||||
record['content'],
|
record['content'],
|
||||||
|
record.get('forward_detail'),
|
||||||
record['media_url'],
|
record['media_url'],
|
||||||
record['msg_time'],
|
record['msg_time'],
|
||||||
record['msg_timestamp']
|
record['msg_timestamp']
|
||||||
@ -490,6 +713,11 @@ def main():
|
|||||||
# post 富文本消息:拆分为文本+图片多条记录
|
# post 富文本消息:拆分为文本+图片多条记录
|
||||||
post_records = process_post_message(msg, cos_uploader, date_str)
|
post_records = process_post_message(msg, cos_uploader, date_str)
|
||||||
records.extend(post_records)
|
records.extend(post_records)
|
||||||
|
elif msg_type == 'merge_forward':
|
||||||
|
# 多条转发:解析子消息、下载媒体→COS、生成forward_detail
|
||||||
|
fwd_record = process_merge_forward_message(msg, cos_uploader, date_str)
|
||||||
|
if fwd_record:
|
||||||
|
records.append(fwd_record)
|
||||||
else:
|
else:
|
||||||
record = process_message(msg, cos_uploader, date_str)
|
record = process_message(msg, cos_uploader, date_str)
|
||||||
if record:
|
if record:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user