From ec805b0731852caf39a60ca518116633008f8b89 Mon Sep 17 00:00:00 2001 From: --git_token Date: Fri, 1 May 2026 08:10:01 +0800 Subject: [PATCH] auto backup 2026-05-01 08:10:01 --- .vala_skill_hashes | 3 +- data/last_wechat_sync_id | 2 +- memory/.dreams/events.jsonl | 3 + memory/.dreams/short-term-recall.json | 39 ++ memory/2026-04-30-greeting-assist.md | 10 + ...26-04-30-request-timed-out-before-a-res.md | 9 + memory/2026-04-30-weekly-wechat-feedback.md | 32 + memory/2026-04-30.md | 226 +++++++ .../飞书反馈_2026-04-29.xlsx | Bin 0 -> 5814 bytes scripts/export_lark_feedback_to_wiki.py | 372 +++++++++++ scripts/run_export_lark_feedback.sh | 13 + scripts/sync_feishu_feedback_wrapper.sh | 21 + skills/feishu-feedback-sync/SKILL.md | 244 +++++++ .../sync_feishu_feedback.cpython-312.pyc | Bin 0 -> 27011 bytes .../scripts/sync_feishu_feedback.py | 599 ++++++++++++++++++ .../sync_group_to_mysql.cpython-312.pyc | Bin 0 -> 20674 bytes .../scripts/sync_group_to_mysql.py | 139 +++- 17 files changed, 1705 insertions(+), 7 deletions(-) create mode 100644 memory/.dreams/events.jsonl create mode 100644 memory/.dreams/short-term-recall.json create mode 100644 memory/2026-04-30-greeting-assist.md create mode 100644 memory/2026-04-30-request-timed-out-before-a-res.md create mode 100644 memory/2026-04-30-weekly-wechat-feedback.md create mode 100644 memory/2026-04-30.md create mode 100644 output/daily_feedback/飞书反馈_2026-04-29.xlsx create mode 100644 scripts/export_lark_feedback_to_wiki.py create mode 100755 scripts/run_export_lark_feedback.sh create mode 100755 scripts/sync_feishu_feedback_wrapper.sh create mode 100644 skills/feishu-feedback-sync/SKILL.md create mode 100644 skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc create mode 100755 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py create mode 100644 skills/feishu-group-msg-sync/scripts/__pycache__/sync_group_to_mysql.cpython-312.pyc diff --git a/.vala_skill_hashes b/.vala_skill_hashes index 04450cb..4cf72c7 100644 --- a/.vala_skill_hashes +++ b/.vala_skill_hashes @@ -10,7 +10,8 @@ pua f6a38fdd39c22c81370abd6b979b58c767e41738d43a26fbbc23d1e933cdc701 smart-auto-model-switch bfb3547dcd6029622c7062b49ae7922614a366b6dfe88c7d0fae9dcd85fc2eb3 vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf0bca9fd64b968ec tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06 -feishu-group-msg-sync c6e6e1007f417d76582bd7f7e10da063313f2ae93f9d7ee54f629a3642c66a74 user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81 user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13 user-feedback-processor 3bc199b29eaec4e20d7904ae13e006276532145d203df92718cbf3197825bc44 +feishu-group-msg-sync 085f95a5b89fec3b6a627da25d66ffeeb0be430098387739a64f7903f0ee88d4 +feishu-feedback-sync ad9934adbb72b3fb6503dc97379f0a5eb4af82e4d0bdacfbcafbd563645f68b1 diff --git a/data/last_wechat_sync_id b/data/last_wechat_sync_id index 90064a9..a0e999a 100644 --- a/data/last_wechat_sync_id +++ b/data/last_wechat_sync_id @@ -1 +1 @@ -854 \ No newline at end of file +860 \ No newline at end of file diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl new file mode 100644 index 0000000..8a8a6df --- /dev/null +++ b/memory/.dreams/events.jsonl @@ -0,0 +1,3 @@ +{"type":"memory.recall.recorded","timestamp":"2026-04-30T03:47:21.989Z","query":"微信反馈群 数据库 表结构 MySQL","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-04-30T06:52:35.560Z","query":"用户反馈数据源 飞书群 微信群 数据库表","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-04-30T08:11:39.116Z","query":"飞书群反馈表格 token E8vFsCmPBhT4SCtNmnJchqeJnJe 内容测试问题反馈","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json new file mode 100644 index 0000000..598ab18 --- /dev/null +++ b/memory/.dreams/short-term-recall.json @@ -0,0 +1,39 @@ +{ + "version": 1, + "updatedAt": "2026-04-30T08:11:39.116Z", + "entries": { + "memory:memory/2026-04-18.md:1:5": { + "key": "memory:memory/2026-04-18.md:1:5", + "path": "memory/2026-04-18.md", + "startLine": 1, + "endLine": 5, + "source": "memory", + "snippet": "# 2026-04-18 工作日志 ## 术语共识 [李若松确认] 术语「飞书反馈消息数据库」默认指代用户反馈收集技能中的飞书内部测试反馈MySQL数据表 `vala_test.lark_group_message`,存储「内容测试问题反馈」群(oc_fabff7672e62a9ced7b326ee4a286c26)的同步消息数据。", + "recallCount": 3, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 3, + "maxScore": 1, + "firstRecalledAt": "2026-04-30T03:47:21.989Z", + "lastRecalledAt": "2026-04-30T08:11:39.116Z", + "queryHashes": [ + "353f9765c086", + "a6b740c99377", + "9625ed0029fd" + ], + "recallDays": [ + "2026-04-30" + ], + "conceptTags": [ + "vala-test.lark-group-message", + "工作", + "日志", + "术语", + "共识", + "若松", + "确认", + "反馈" + ] + } + } +} diff --git a/memory/2026-04-30-greeting-assist.md b/memory/2026-04-30-greeting-assist.md new file mode 100644 index 0000000..d9bb489 --- /dev/null +++ b/memory/2026-04-30-greeting-assist.md @@ -0,0 +1,10 @@ +# Session: 2026-04-30 09:56:16 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880 +- **Session ID**: a3cec499-614b-474a-8317-6b1413d17138 +- **Source**: feishu + +## Conversation Summary + +user: hi +assistant: 您好若松,请问有什么需要我协助处理的? diff --git a/memory/2026-04-30-request-timed-out-before-a-res.md b/memory/2026-04-30-request-timed-out-before-a-res.md new file mode 100644 index 0000000..0339ae5 --- /dev/null +++ b/memory/2026-04-30-request-timed-out-before-a-res.md @@ -0,0 +1,9 @@ +# Session: 2026-04-30 10:26:17 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880 +- **Session ID**: d5668b39-c920-4fc3-8184-62fad2edf228 +- **Source**: feishu + +## Conversation Summary + +assistant: 您好若松,请问今天有什么需要我协助处理的? diff --git a/memory/2026-04-30-weekly-wechat-feedback.md b/memory/2026-04-30-weekly-wechat-feedback.md new file mode 100644 index 0000000..374511e --- /dev/null +++ b/memory/2026-04-30-weekly-wechat-feedback.md @@ -0,0 +1,32 @@ +# Session: 2026-04-30 15:08:29 GMT+8 + +- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880 +- **Session ID**: efb75fb6-49d4-4278-8d6c-2ef5e8fc00f6 +- **Source**: feishu + +## Conversation Summary + +assistant: 您好若松,请问今天有什么需要我协助处理的? +user: hi +assistant: 你好若松,请问有什么需要我帮忙处理的? +user: hi +assistant: 若松,有事直接说,我在。 +user: 本周记录了多少条微信群的反馈 有多少图片消息 都获取到图片链接了吗 +assistant: 李若松是技术负责人,有完整权限。我需要查询微信反馈群的数据。先查一下相关的技能和数据库配置。 +assistant: 数据源明确了:微信反馈存储在 `vala_test.wechat_group_message` 表。现在查询本周的数据。需要先读取 secrets 获取数据库密码。 +assistant: 图片消息是 0,这个有点奇怪。让我核对下 msg_type 的枚举含义,以及看看实际上有哪些消息类型。 +assistant: 原来 msg_type 是字符串类型,不是数字!所以之前用 `msg_type = 3` 没查到。改用字符串 `'image'`。 +assistant: 数据清晰了。 + +本周(4/23 - 4/30)"用户火线救火"微信群反馈数据: + +| 指标 | 数值 | +|------|------| +| **本周总消息数** | 348 条 | +| **本周图片消息数** | 68 条 | +| **图片已获链接** | 54 条 | +| **图片无链接** | 14 条(约 21%) | + +有 14 条图片消息没有获取到链接,主要集中在 4/23~4/28,发送者包括松、王虹茗、茉茉老师、嘿哈、糖果老师、丹咪尼🌹等用户。content 里保留了图片 hash(如 `9a1285b97cd634b2fd10fb6c3c98fb68`),但 `media_url` 为空。 + +这些图片可能是通过微信同步脚本进来的但当时图片下载或 COS 上传环节失败了。需要我排查一下这 14 条无链接图片的原因,或者尝试补拉吗? diff --git a/memory/2026-04-30.md b/memory/2026-04-30.md new file mode 100644 index 0000000..ecad4ea --- /dev/null +++ b/memory/2026-04-30.md @@ -0,0 +1,226 @@ +# 2026-04-30 工作日志 + +## 今日任务 + +### 微信问题反馈数据同步到飞书知识库 +[刘新玉] 要求将微信「用户火线救火」群近3天数据导出到飞书知识库文档"微信问题反馈"下方。 + +#### 执行过程 +1. **数据源**:MySQL `vala_test.wechat_group_message`,查询 2026-04-27 ~ 2026-04-30 数据 +2. **权限问题**: + - Bot 应用缺少 `wiki:node:create` 权限(code:10014),无法在知识库节点下直接创建文档 + - `feishu_bitable_app` 仅限 App Owner(李若松 ou_088ee79216826be4a24af44f7268f880)使用 + - 刘新玉 (ou_9d4df593d0419d705274947c5cec5ada) 无权使用 feishu 工具写入 +3. **解决方案**: + - 先用 `lark-cli sheets +create` 在知识空间中创建电子表格(绕过 wiki:node:create 限制,直接在空间创建) + - 成功在知识空间 `7612229802338045122` 下创建表格,自动挂载到父节点 `SB3dwaSshie7ifklKlLc2GswnqX`("微信问题反馈"文档下方) +4. **数据写入**:按天分 sheet(2026-04-27 / 2026-04-28 / 2026-04-30),字段与数据库一致: + - 消息ID(svr_msg_id) / 发送者(sender_name) / 消息类型(msg_type) / 内容(content) / 媒体URL(media_url) / 引用消息ID(refer_msg_svrid) / 消息时间(msg_time) / 消息时间戳(msg_timestamp) +5. **最终结果**:共 99 条数据(4/27: 9条, 4/28: 84条, 4/30: 6条) + +#### 关键标识 +- 电子表格 token:`RUXfsytPzhJO5kt2uwCcvdIgnLg` +- 在知识库位置:https://makee-interactive.feishu.cn/wiki/R4HRwNU42iwH1Hk3OMCcB6i7n1u 下方 +- 父节点 token:`SB3dwaSshie7ifklKlLc2GswnqX` +- 旧临时表格(已清理):`Bh1gsZj2ehf4brt9impcNnuqnBg` + +### SkillHub 同步 +- 心跳触发 SkillHub 自动同步检查:6 个 skill 均无变更,跳过推送 +- 执行于 14:59 + +## 经验沉淀 + +### 知识库中创建电子表格的正确方法 +- ❌ `lark-cli wiki node:create` → Bot 缺少 `wiki:node:create` 权限 +- ❌ 在云空间创建再移动 → Bot 缺少 drive 权限 +- ✅ `lark-cli sheets +create --space-id=$SPACE_ID --parent-node=$NODE_TOKEN` → 直接在知识空间创建,自动挂载到指定父节点 + +### lark-cli sheets +write 数据格式 +- 使用 `--data-b64` 传 base64 编码的 JSON 数据,避免 shell 转义问题 +- 格式:`[["cell1","cell2"],["cell3","cell4"]]`(二维数组) +- `--range='!A1:Z1'` 总是指定起始单元格即可 + +### 飞书问题反馈数据同步到知识库(追加) +[刘新玉] 要求同步飞书「内容测试问题反馈」群近3天数据到知识库。 + +#### 执行过程 +1. 原电子表格 `E8vFsCmPBhT4SCtNmnJchqeJnJe` 已不可用(API 返回 deleted/missing) +2. 在知识空间 `SB3dwaSshie7ifklKlLc2GswnqX` 下创建新电子表格「飞书问题反馈-近3天」 +3. 数据源:MySQL `vala_test.lark_group_message` (chatbot:xhuBx7d@uT2gUVv) +4. 按天分 sheet 写入: + - 2026-04-28: 23条(HUD显示bug、组件无音频、Loading慢/数据丢失) + - 2026-04-29: 2条(网络问题导致下载播放失败) + - 4/30 无数据 +5. 表格列:时间 | 反馈人 | 信息类型 | 信息内容(或地址) + +#### 关键标识 +- 电子表格 token:`AHtnsehwShUVyDtjasSciIvgn7b` +- 知识库节点 token:`TVivwmzqXiW3YakDUzucFMRenvf` +- 位置:知识库「飞书问题反馈」文档同级 + +### Bot 权限现状 +- 有:sheets:read/write, wiki API 读取(wiki:space:get, wiki:node:read) +- 无:wiki:node:create, drive 操作, bitable 操作 +- 结论:Bot 可以通过 sheets API 在知识空间直接创建电子表格,但无法创建其他类型文档 + + +### 飞书问题反馈表格字段统一与移动 +- 将表格移动到「飞书问题反馈」文档下方(wiki v2 move API) +- 将表格字段从「时间|反馈人|信息类型|信息内容」改为与数据库 `lark_group_message` 一致:消息ID/发送者/消息类型/内容/媒体URL/引用消息ID/消息时间/消息时间戳 +- 4/29 数据因字段变更需重新写入(仅2条) +- 表格最终位置:https://makee-interactive.feishu.cn/sheets/AHtnsehwShUVyDtjasSciIvgn7b(「飞书问题反馈」文档下方) + +### 飞书问题反馈按引用关系重新排序 +[刘新玉] 要求按问题完整解决过程排序——通过 `quote_message_id` 串联同一问题的讨论链。 + +#### 排序逻辑 +1. 从数据库读取全部消息及引用关系 +2. 构建引用图:每个消息的 `quote_message_id` 指向其父消息 +3. 聚合问题链(cluster):同一引用链的消息归为一组,连续排列 +4. 同 cluster 内按时间排序,子回复紧跟父消息 +5. Cluster 间按最早时间排序 +6. 无引用关系的独立消息按时间线补充 + +#### 处理过程 +- 写入前先通过 `lark-cli sheets +write` 清空 sheet(`--raw-data="[]"`) +- 4/28 23条 → 生成完整引用链排序,写入 +- 4/29 2条 → 无引用关系,直接写入 + +#### 4/28 问题链总结 +| 问题 | 涉及人 | 消息数 | +|------|--------|--------| +| NPC HUD显示bug(仅移动端) | 徐思清→王胤鑫 | 3 | +| 关卡出现规律 | 王胤鑫→庞鸿潇→梁晨 | 4 | +| Playtesting数据记录 | 孙时敏 | 1 | +| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | +| 网络问题(4/29) | Ann | 2 | + +### 飞书群 4/25-4/27 数据查询结果 +- 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 +- 结果:0 条,该群此时间段无消息记录 + +### 反馈同步 Skill 创建 [刘新玉] +将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill,并计划注册定时任务。 + +#### Skill 文件 +- `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 +- `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 +- `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 + +#### Skill 功能 +1. 从 MySQL `lark_group_message` 查询近 N 天数据 +2. 写入知识库电子表格(按天分 sheet) +3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 + +#### 策略2:推断缺失引用关系 [刘新玉] +问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) + +**推断规则(按优先级)**: +1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 +2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 +3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) + +已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 + +#### 触发方式 +- 手动:「同步飞书反馈」「整理反馈对话链」 +- 定时:每天 10:00 crontab 自动执行 + +## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 + +### 步骤4 包含两部分 +1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} +2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: + - "日志上传/排查/查" → "日志已上传,排查中" + - "确认/确实" → "已确认,待修复" + - "已修复/已解决" → "已修复" + - "不是 bug/设计如此" → "非问题,设计如此" + - 无明确结论 → "暂未排查到根因" + +### 归纳格式 +```markdown +### 问题 N +> **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** + +| 发言人 | 要点 | +|--------|------| +| 报告人 | 🚩 报告:... | +| ... | ... | +| 最终人 | ✅ 结论/待排查 | +``` + +### 维度提取规则 +| 维度 | 优先级/来源 | +|------|------------| +| 端 | iOS > iPad > pad端 > Android > 移动端 > PC(正则匹配,忽略大小写) | +| 环节 | 关卡内/知识巩固/单元挑战/听力挑战/阅读挑战/口语挑战/写作挑战/单元强化/瓦拉学院/报告(从消息文本匹配) | +| 课程 | 匹配数字编号(如 11-2、L1 3-2) | +| 角色/组件 | NPC/HUD/音频/组件/数据/Loading/加载/日志(从消息文本匹配) | +| 现象 | 从消息中提取要害描述,截断在 35 字符以内 | + +### 现象提取逻辑 +1. 优先从包含 "Bug的表现是这样的:"、"问题是"、"发现"、"出现" 等关键词的消息中截取描述句 +2. 提取的句子去除 URL、图片标记、疑问句 +3. 截断到 35 字符防止过长 + +### Bug 修复记录 +- **idx 变量覆盖 bug**:`summarize_cluster` 的 `idx` 参数被循环内 `idx = t.find(kw)` 覆盖,导致问题编号显示为字符串查找位置。修复:局部变量改名 `pos` +- **iOS 识别失败**:`\biOS\b` 不匹配 "iOS线上"(无词边界)。修复:模式改为 `iOS|ios` 配合 `re.IGNORECASE` +- **编号跳跃**:单消息簇被跳过导致编号不连续。修复:`generate_summary` 中 `idx` 只对有效簇递增 + +### 测试验证 +- 2026-04-28 数据:问题一 "暂未排查到根因",问题二 "日志已上传,排查中" — 均正确 +- dry-run 模式正常运行 + +### 已知限制 +- 「未知组件」在 NPC/HUD 边缘写法时可能漏匹配 +- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) +- 单消息簇被跳过(需至少 2 条消息才能形成问题) + +### Skill 文件最终状态 +- `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 +- `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 +- crontab 每日 10:00 执行(与步骤3一起) + +### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 + +#### 问题 +脚本规则匹配生成的问题描述质量差: +- 组件匹配失败(NPC/HUD → "未知组件") +- 现象摘取了完整原始消息(含 @、无关词) +- 端识别不稳定 + +#### 决策 +**脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** +- 脚本 `summarize_cluster` 改为输出: + 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 + 2. 发言人-要点表格(规则生成) + 3. 问题描述留 `[AI归纳]` 占位符 +- 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 + +#### AI 归纳的最终输出格式(固定模板) +```markdown +### 问题 N +> **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** + +| 发言人 | 要点 | +|--------|------| +| ... | ... | + +**当前问题排查结论:** ... +``` + +#### 结论提取规则增强 +- 解释性关键词:上云/预下载/加载/原因是/改为了/首次 → 标记为分析性发言 +- 分析性发言 + 日志上传 → 输出「疑似{原因},已上传日志,排查中」 +- 分析性发言 + 无日志 → 输出「{原因},待确认」 +- 无分析 + 无日志 → 改为「暂未排查到问题」(刘新玉确认,比「暂未排查到根因」更准确) + +#### 4/28 最终归纳结果(AI 生成) +1. **NPC HUD 显示**:在移动端关卡内(11-2),NPC 头上的 HUD 偶尔变成一小条 → 暂未排查到问题 +2. **iOS Loading 慢**:在 iOS 端关卡内(L1 3-2),Loading 耗时约 10 秒(正常 3 秒),导致组件数据丢失/无音频 → 疑似关卡内容上云加载导致,已上传日志,排查中 + +#### 结论提取的边界 +- 刘新玉指出:"暂未排查到问题" vs "暂未排查到根因" → 前者更准确(问题被描述了但可能没被排查) +- 结论应基于对话中实际出现的解释性判断,而非仅关键词匹配 +- 分析性发言需要语义理解("上云在加载"、"首次预下载"),规则匹配作为辅助 diff --git a/output/daily_feedback/飞书反馈_2026-04-29.xlsx b/output/daily_feedback/飞书反馈_2026-04-29.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2d0a1c4a296a9def40b3bff5e7fc6538598ab1ae GIT binary patch literal 5814 zcmZ`-1ymIM*IrU$>0VM&1Yu!88VNz^&ZPxm>6DO8T|kzQ6qFVa5Lvnxq+uxmX$dJ2 z5T)Z=U(fe{-=H9u_z4!NMtKr~M0{{R5fMGz6nR1WD!*y9@ZaIBTFah*4l5{!*Lnle!EzO9AP98F)@JTEHi&me(cnbaw~$ zo~yrqFJqp-b9V09bftZPRTqeG4~g6)$EoZ1*bF5WhyjE z2q-?dT=#x+LO|Kx2c4?m`$1BNyK~uCM6!y9wQP=whDmi83SN7rx(8ZlH>!*`EcZn4}q4^gkUckYUs zbgCA+nm)cgeRpqLU`FuY(aA7-#X*h_0KDP>0H`q0@pBgRcCd4?`*Rlh9iJUjsMm}* zb?|QGh@btT3vu_|uv#RQ7wpL_G9z+FlQoLMINsUcHZd}!?ZI90fmD*6B#p)_Xl@#?L?7U$l%sH3-fvr`L1;2ybh`x@gn1S=qc6yr(`XpwG-r^YUhU#@{8L zQ(Sg`#GQnVF{FwpduAi>NC9G9qw3m$mGZUlB~;Rw8DbL6TcZsk$m|XK^&tG_$GOUj zUujK!1(1O3Ajx5;ln_`MR`E_#7CQOdYh(3waq^Ug!mxWYKjHov2h=)-$5o2TQS!idfj`c zag{>6MukL}>K;LMbh@K96LtZiP-<@UF|zv|#S&FeT`NKFV*InCGPg__qg-Q^naE>u zWzg~~udiOC&Sy6Yl}8T^n&QJs^^oem8lMcAVJ9#Ba#FWU$ zjBU?Yt$Z^Q*0je`$@wC)97VyWAZx>L7vxz(8&>O45@}^h**)r~IfkS-H+d(TENj*K zZ79)acruZcBFRDw^Xzaef})KjT3j`AC(pwe_aeulQ>9}geBkj#eQ@+Ah%^6-qX!do zs;gw&#rHOaMQ-G0nh>@`8alo%jS-Q3%OXsKx8J}byB6R_B$nFUtX&cQ6zghqMhsPU z9NsPwA`pZw+->+NlRH)?#t~%SF&+zDPZPHpo0YlVvi#^1Y0Guw5HUot&QooxHl?+OTU_;L)u6j3%K%WyUOI`n*7psw z=-Hw9lU0sFuwI&CJxHMy$E7iQPEtH4J+XmkTo>Wid9wM@540@t8Mh%Q-NL`Px(L`UGl=r>{QfSf=Q$Bo@LRX}kcfg?6y3VK4WgyO#IKOi0nk@BD{gwYm3M_9WY>t;q__TDwtJ>d_5cpJBLn(dt_prll zi664 z^(#K_kb!tAPU~|WRLpP_N6jwl4>f^WxvkVX16;H3@LbJWOvoVVWRwr`GnQDYW4y8g z>QPGXu8b#HPJ|aF+-D^ha4f!3@hcVtcLc%Vp%i5{t&NphCtR;XjIl{uHrHptdP5sw(fE5d!*$8Xo|2AB!)>C}n~mpJLxh z#1py6f0A3fH#&(e^rx6dhT+Uly(jt0f2V(_Wbcd$##?A`uK9m@K}7SV9ORS z+Tdg*c^E!F7<1XWa?`xrX_jKJ7+N!y(0EzfBB;S9{=N%VA!cCe+^ow3x#AITReaK# zwfD5t_tR*dJqKaza+3@Hat2x8K=bHALXS(TB-Fy(+09$ma+4nt*n6a}UyMFoP|rHC z)|par?TJsTDb+C2rM=pSuP%YOd<9w7+9xw6rA|a2`)*f;5cEj;oVOgES3`}eHFHaU z8YEsMcK-_aO1v2Blb7@1G(YuZf#P1NmBKpQoiX=pt)8#Gj!q>;?( zM&@7`6~RsvQ&LdoPH?a6{GQ0-Te=Tg;?hI)yhAL#vTJ)H>lcQHDsEdc4nzw(sZIuE zCrJJ$7O-_j*|RUTX4NhB!Hrd@1LL&EBBJ#Ht@SzhHD9XA*&KylTl56_B~Yw5+bo8^ zQQ_(NG%GIK`~acgI!i8pchjnw#pXUjyQdvD|3!w^+|E4uDq-L!2k&ObEa_Kt#2_}g z6$1fP^K6qPz62-A#*<i#9s8VWz9<&9FXISDsc*)=@uz8}^z1-^sycn#0r%CpNuaej&87(yf^zVLVO zR&>fkSW(!RqYflDVW&e`YOb^CnF)85>B?~9)Iq%M74cY`)J0+7oKz7*epHhDFbB8X z;7!2o6qK~VTgrt(?Fj3KjlNkUnp_}vKFd~mF$E%ZQN>8=hjP!7$vZfU*t>yn0hpx5 zc8J2b)Q1h`lhel7*}T%GyT{va@6k1!*s(l?wVKMVaDizC)8HH|o;2|afM3ttfy&q_ zqN?oV6y-il56apWJ}-yPab;tD@m^S6Xw6NTbKzAs+fUbBs(r09af>-(;uptl^C~tJ zV-!M@cNk72!nhS0LrqZZIzm%evwlq9^L;KXPMpaFee-s17CKi-H0E16t{T{Xiph-h zmN?fCR!nB7&^mM2E{+rg_+C$t3jCyr;L)h~twN_PAD#yXpc^114^myMeW9!8mIM$$ z77kr+wOr%BW`nic^ZFK#1q{Cu9VKSH?)cDfDMI2E;Z?dY*EY94GpFnW<4NByxV073 zc`3v4z3w37T6JnWV`g2%4E`oS0D$CAb@KA|ceeBTJ=o40>9}Qq$uIeyVJ7*JDsa?8 zM%_Y{S^bir=`uomeugZD@x;s16EIfNaVc_U{2Ar9v%tlmb*YFCPQMbzd*He|zeMR= zmW@osUPru<(MNLc-1w|*jZi4fbbuJxjogJNHRBa$-pKlNo5&Fr*|zw+qP5azo7tAf zd%5!5s*SH8{SM9*v;3= z*rV0cy&*^=Q6a{9k;QD9C`X#u;;Q z)ZOp!mo#%V&TQ}r)5*sOh_0%9betB8nwHx}QP3DF<8n)&6Q-8}VV8_%0$Mk)Wvdh^M{Hwt9|uX3rdH(V0_tIg=6C1-_bB z(O{*%V|;J$%m$uNRcU`Xlr%rBGQPJ$H>|3omvuITbtL8a7}XGV#t79iuyVz-esOzjv3 zP+{0WjJaBR+Btg(3j982CpEZsf`M`&A4hV)H%4wggV^oU;+|j`=8;WP;=z@&+bU-Y zJrKkU{xZ^(b1X2q9r1$N*b@2+a$T6|r+Tc1wOk3X;Y5Xs34RvQm@&EgvyT+toAS%b z2{1gaYq6y*_9`%XW;T^wC^IXlCGQqmDXH$E0DGtgA>A7q3lTm#0E#NDsdBu8g8atU zE}tx^jOMZ%(`G95jps2JhA{UWBE~l>AF(3?pc^M14*End<{ojydDtK?MZ^2Zz6AsR zJ&=sdzFpG80ssUtj*|VM{O^Ip#?8&?H|gVb22?xcs4rfES*>(O!wWaN^JwqKzWiR? zr{T3Jjmpk4Ji~K^^VPXV3VbZT%8$B?N{+PZh%d|V`o1VKw)!b6%B@Sd2~7%ZMq&7RY~8D5G9~H@ z0~uuusnt@O@m+b0{vE(Rj~1P6wOtRw;0AfFSz>*lND45`b^v>*Icc946+!Pqw(~h< zI{#|Ip&CdY&`bgQ&{J-fR;6nut!dNS>@Q09JFqcekR zx4ZY|?p7}A$Ss%VT713U5xIaB(d_jglfLxut+zfev^1scTT^sPywLNRPJH8c-Oh|6 zSpOY=PL`~^a!lNhG5-McC;oqO?Vo?)ue_^Clp&1(1EVS_*8rm0C5m|J=Ei&i~o)*o{2gzWZdzILGYb$wHNz^Uwn-)}qcZ5L1gMZyJ!SHN11bnli; zqVb!@S{%ufZZ*nJLu_-tXXL$-zEt$~nTG|6UwjbXBLg>^Z)t#Ez6IhE@nG7J=@5-j~<sv z8t0E}0#$T#^|o{Mw$%4`xAQXpooMCB8k!hc3FDm|luLl$_)QU`6|uQH)*~%Y*=4f5 z>DVkpb4K+IB~6sZJHLfybWYm4g#?QC8`;06Q= %s AND msg_time < %s + ORDER BY msg_timestamp ASC + """ + cursor.execute(sql, ( + yesterday_start.strftime("%Y-%m-%d %H:%M:%S"), + yesterday_end.strftime("%Y-%m-%d %H:%M:%S") + )) + rows = cursor.fetchall() + logger.info(f"查询到 {len(rows)} 条记录") + return rows, yesterday_start + finally: + conn.close() + + +def generate_excel(rows, date_obj): + """生成 Excel 文件""" + os.makedirs(OUTPUT_DIR, exist_ok=True) + date_str = date_obj.strftime("%Y-%m-%d") + filename = f"飞书反馈_{date_str}.xlsx" + filepath = os.path.join(OUTPUT_DIR, filename) + + wb = openpyxl.Workbook() + ws = wb.active + ws.title = "飞书反馈数据" + + # 表头 + headers = [ + "ID", "群聊ID", "群聊名称", "发送者ID", "发送者姓名", + "消息ID", "父消息ID", "根消息ID", "引用消息ID", + "消息类型", "内容", "媒体URL", "消息时间", "时间戳", "入库时间" + ] + + # 表头样式 + header_font = Font(bold=True, color="FFFFFF", size=11) + header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") + header_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + thin_border = Border( + left=Side(style="thin"), + right=Side(style="thin"), + top=Side(style="thin"), + bottom=Side(style="thin") + ) + + for col, header in enumerate(headers, 1): + cell = ws.cell(row=1, column=col, value=header) + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_align + cell.border = thin_border + + # 数据行 + for row_idx, row_data in enumerate(rows, 2): + values = [ + row_data.get("id"), + row_data.get("chat_id"), + row_data.get("chat_name"), + row_data.get("sender_id"), + row_data.get("sender_name"), + row_data.get("message_id"), + row_data.get("parent_id"), + row_data.get("root_id"), + row_data.get("quote_message_id"), + row_data.get("msg_type"), + row_data.get("content"), + row_data.get("media_url"), + str(row_data.get("msg_time", "")) if row_data.get("msg_time") else "", + row_data.get("msg_timestamp"), + str(row_data.get("collected_at", "")) if row_data.get("collected_at") else "", + ] + for col, val in enumerate(values, 1): + cell = ws.cell(row=row_idx, column=col, value=val) + cell.border = thin_border + cell.alignment = Alignment(vertical="center", wrap_text=True) + + # 调整列宽 + col_widths = [6, 30, 18, 30, 12, 30, 30, 30, 30, 10, 50, 50, 20, 15, 20] + for i, width in enumerate(col_widths, 1): + ws.column_dimensions[openpyxl.utils.get_column_letter(i)].width = width + + # 冻结首行 + ws.freeze_panes = "A2" + + wb.save(filepath) + logger.info(f"Excel 已生成: {filepath}") + return filepath, filename + + +def get_bot_tenant_token(): + """获取 Bot 的 tenant_access_token""" + # 从 lark-cli 配置中读取 app_id 和 app_secret + config_path = os.path.join(LARK_CLI_CONFIG, "config.json") + if not os.path.exists(config_path): + # 尝试其他路径 + config_path = os.path.join(LARK_CLI_CONFIG, "credentials.json") + + # 通过 lark-cli 获取 token + result = subprocess.run( + ["lark-cli", "auth", "token", "--as", "bot"], + capture_output=True, text=True, + env={**os.environ, "LARKSUITE_CLI_CONFIG_DIR": LARK_CLI_CONFIG} + ) + if result.returncode == 0: + token = result.stdout.strip() + if token: + return token + + raise RuntimeError(f"无法获取 Bot tenant_access_token: {result.stderr}") + + +def upload_to_drive(filepath, filename): + """使用 Bot 身份上传文件到飞书云空间""" + logger.info(f"正在上传文件到飞书云空间...") + + # lark-cli 要求相对路径,需要 cd 到文件所在目录 + file_dir = os.path.dirname(os.path.abspath(filepath)) + file_basename = os.path.basename(filepath) + + result = subprocess.run( + [ + "lark-cli", "drive", "+upload", + "--file", file_basename, + "--name", filename, + "--as", "bot" + ], + capture_output=True, text=True, + cwd=file_dir, + env={**os.environ, "LARKSUITE_CLI_CONFIG_DIR": LARK_CLI_CONFIG} + ) + + if result.returncode != 0: + raise RuntimeError(f"文件上传失败: {result.stdout}\n{result.stderr}") + + # 解析返回的 file_token + output = result.stdout.strip() + logger.info(f"上传响应: {output}") + + try: + data = json.loads(output) + file_token = data.get("data", {}).get("file_token") or data.get("file_token") + if file_token: + logger.info(f"文件上传成功, file_token: {file_token}") + return file_token + except json.JSONDecodeError: + pass + + # 尝试从文本中提取 file_token + if "file_token" in output: + import re + match = re.search(r'"file_token"\s*:\s*"([^"]+)"', output) + if match: + return match.group(1) + + raise RuntimeError(f"无法从上传响应中解析 file_token: {output}") + + +def move_file_to_wiki(file_token, filename): + """将云空间文件移入知识库指定节点下""" + logger.info(f"正在将文件移入知识库...") + + import time + + # 使用 move_docs_to_wiki API + data = { + "parent_wiki_token": WIKI_PARENT_NODE, + "obj_type": "file", + "obj_token": file_token + } + + result = subprocess.run( + [ + "lark-cli", "api", "POST", + f"/open-apis/wiki/v2/spaces/{WIKI_SPACE_ID}/nodes/move_docs_to_wiki", + "--data", json.dumps(data), + "--as", "bot" + ], + capture_output=True, text=True, + env={**os.environ, "LARKSUITE_CLI_CONFIG_DIR": LARK_CLI_CONFIG} + ) + + output = result.stdout.strip() + logger.info(f"move_docs_to_wiki 响应: {output}") + + try: + resp = json.loads(output) + if resp.get("code") == 0: + task_id = resp.get("data", {}).get("task_id", "") + logger.info(f"移动任务已提交, task_id: {task_id}") + + # 等待异步任务完成 + time.sleep(3) + + # 查询任务结果 + check_result = subprocess.run( + [ + "lark-cli", "api", "GET", + f"/open-apis/wiki/v2/tasks/{task_id}", + "--params", '{"task_type":"move"}', + "--as", "bot" + ], + capture_output=True, text=True, + env={**os.environ, "LARKSUITE_CLI_CONFIG_DIR": LARK_CLI_CONFIG} + ) + + check_output = check_result.stdout.strip() + check_resp = json.loads(check_output) + + if check_resp.get("code") == 0: + move_results = check_resp.get("data", {}).get("task", {}).get("move_result", []) + if move_results and move_results[0].get("status") == 0: + node_token = move_results[0].get("node", {}).get("node_token", "") + wiki_url = f"https://makee-interactive.feishu.cn/wiki/{node_token}" + logger.info(f"文件已成功移入知识库: {wiki_url}") + return wiki_url + else: + raise RuntimeError(f"移动任务失败: {move_results}") + else: + raise RuntimeError(f"查询移动任务失败: {check_output}") + else: + raise RuntimeError(f"move_docs_to_wiki 失败: code={resp.get('code')}, msg={resp.get('msg')}") + except json.JSONDecodeError: + raise RuntimeError(f"move_docs_to_wiki 响应解析失败: {output}") + + +def send_notification(success, message): + """给李若松发送通知""" + notify_cmd = [ + "/root/.nvm/versions/node/v24.14.0/bin/openclaw", + "message", "send", + "--channel", "feishu", + "--target", "user:ou_088ee79216826be4a24af44f7268f880", + "--message", message + ] + subprocess.run(notify_cmd, capture_output=True, text=True) + + +def main(): + """主流程""" + logger.info("=== 飞书反馈数据每日导出开始 ===") + + # 加载环境变量 + load_secrets() + + try: + # 1. 查询前一天数据 + rows, date_obj = query_yesterday_data() + + # 2. 判断是否有有效数据 + if not rows: + logger.info("前一天无新增数据,跳过导出") + return + + # 3. 生成 Excel + filepath, filename = generate_excel(rows, date_obj) + + # 4. 上传到飞书云空间 + file_token = upload_to_drive(filepath, filename) + + # 5. 将文件移入知识库 + wiki_url = move_file_to_wiki(file_token, filename) + + # 6. 发送成功通知 + date_str = date_obj.strftime("%Y-%m-%d") + send_notification(True, + f"📊 飞书反馈日报导出成功\n" + f"日期:{date_str}\n" + f"记录数:{len(rows)} 条\n" + f"文件:{wiki_url}" + ) + logger.info("=== 导出完成 ===") + + except Exception as e: + logger.error(f"导出失败: {e}", exc_info=True) + send_notification(False, f"❌ 飞书反馈日报导出失败\n错误:{str(e)[:200]}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_export_lark_feedback.sh b/scripts/run_export_lark_feedback.sh new file mode 100755 index 0000000..7dd2bed --- /dev/null +++ b/scripts/run_export_lark_feedback.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# 飞书反馈数据每日导出到知识库 +# 定时任务:每天 09:00 执行 +# 从 MySQL lark_group_message 表导出前一天新增数据为 Excel,上传到飞书知识库 + +set -e + +# 加载环境变量 +source /root/.openclaw/workspace-xiaokui/secrets.env + +# 执行 Python 脚本 +cd /root/.openclaw/workspace-xiaokui +python3 scripts/export_lark_feedback_to_wiki.py diff --git a/scripts/sync_feishu_feedback_wrapper.sh b/scripts/sync_feishu_feedback_wrapper.sh new file mode 100755 index 0000000..058e9f1 --- /dev/null +++ b/scripts/sync_feishu_feedback_wrapper.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# 任务名称:飞书问题反馈同步与对话链排序 +# 执行时间:每天 10:00 +# 归属 Agent:小葵 xiaokui + +set -e + +export PATH=/root/.nvm/versions/node/v24.14.0/bin:$PATH +LOG_FILE="/var/log/xiaokui_feedback_sync.log" + +log() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE" +} + +log "=== 飞书问题反馈同步开始 ===" + +cd /root/.openclaw/workspace-xiaokui +python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --days 3 >> "$LOG_FILE" 2>&1 + +log "=== 同步完成 ===" +exit 0 diff --git a/skills/feishu-feedback-sync/SKILL.md b/skills/feishu-feedback-sync/SKILL.md new file mode 100644 index 0000000..2183d03 --- /dev/null +++ b/skills/feishu-feedback-sync/SKILL.md @@ -0,0 +1,244 @@ +--- +name: feishu-feedback-sync +description: 同步飞书「内容测试问题反馈」群消息到知识库电子表格并执行对话链排序。用于:(1) 刘新玉说"同步飞书反馈"、"更新飞书问题反馈表格"、"整理反馈对话链"时触发;(2) 定时任务每日自动同步。数据源为 MySQL vala_test.lark_group_message,目标为知识库飞书问题反馈-近3天表格。 +--- + +# 飞书问题反馈同步与对话链排序 + +## 概述 + +从 MySQL `vala_test.lark_group_message` 读取飞书群消息,同步到知识库电子表格,并按引用关系执行「反馈对话链排序」。 + +### 两阶段输出(双文档分工) + +| 阶段 | 目标文档 | 用途 | +|------|---------|------| +| 步骤 1-3 | 飞书问题反馈-近3天 电子表格 (`AHtnsehwShUVyDtjasSciIvgn7b`) | 原始数据初步整理,验证信息源准确性 | +| 步骤 4 | 用户反馈问题汇总 文档 (`RaL6whoYMijyYHkSlWrc7OLLnBy`) | 问题归纳输出,供二次处理 | + +## 关键标识 + +| 项目 | 值 | +|------|-----| +| 数据库 | MySQL `bj-cdb-8frbdwju.sql.tencentcdb.com:25413`,user `chatbot`,密码 `xhuBx7d@uT2gUVv`,database `vala_test` | +| 源表 | `lark_group_message` | +| 目标表格 token | `AHtnsehwShUVyDtjasSciIvgn7b`(步骤1-3:原始数据整理) | +| 目标归纳文档 token | `RaL6whoYMijyYHkSlWrc7OLLnBy`(步骤4:问题归纳输出) | +| 目标知识库节点 | `TVivwmzqXiW3YakDUzucFMRenvf` | +| 知识空间 space_id | `7612229802338045122` | +| 群 chat_id | `oc_fabff7672e62a9ced7b326ee4a286c26` | +| Bot App ID | `cli_a9311791083adcc1` | +| Bot App Secret | `ThhLL1I0F2AIKnSbpozKnhm40LTPfyap` | + +## 表格字段(与数据库一致) + +| 列 | 数据库字段 | +|----|-----------| +| 消息ID | `message_id` | +| 发送者 | `sender_name` | +| 消息类型 | `msg_type` | +| 内容 | `content` | +| 媒体URL | `media_url` | +| 引用消息ID | `quote_message_id` | +| 消息时间 | `msg_time` | +| 消息时间戳 | `msg_timestamp` | + +## 执行流程 + +### 步骤 1:查询数据库 + +```sql +SELECT message_id, sender_name, msg_type, content, media_url, quote_message_id, + DATE_FORMAT(msg_time, '%Y-%m-%d %H:%i:%s') as msg_time, msg_timestamp +FROM lark_group_message +WHERE msg_time >= '{date} 00:00:00' AND msg_time < '{next_date} 00:00:00' +ORDER BY msg_time ASC +``` + +默认同步最近 3 天的数据。如果表格已有当天 sheet,则追加新数据(去重写入,按 message_id 去重);如无则创建新 sheet。 + +### 步骤 2:写入飞书表格 + +使用 `lark-cli sheets +write`(Bot 身份)批量写入: +- 按天分 sheet(sheet 名为日期,如 `2026-04-28`) +- 表头固定为 8 列 +- 每 sheet 最多一次写入(或分批,不超过 500 行/批) + +**凭证环境:** +```bash +export PATH=/root/.nvm/versions/node/v24.14.0/bin:$PATH +export LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui +export HOME=/root +``` + +**写入命令格式:** +```bash +lark-cli sheets +write \ + --spreadsheet-token AHtnsehwShUVyDtjasSciIvgn7b \ + --range '!A1:H' \ + --values '' \ + --as bot +``` + +### 步骤 3:反馈对话链排序 + +对每个 sheet 中的数据执行引用链排序(详见下文排序逻辑),使每个问题的完整讨论过程在表格中连续呈现。 + +## 反馈对话链排序逻辑 + +### 两阶段排序 + +#### 阶段 A:推断缺失引用(策略2) + +由于飞书群消息同步时仅采集了 `quote_message_id`(显式引用),大量通过飞书「直接回复」功能产生的消息没有引用关系。策略2通过启发式规则补全: + +1. **@提及匹配**(最高优先级):消息中 `@某人` → 关联到被@者最近一条消息 +2. **同发送者聚类**:同一发送者在 2 分钟窗口内连续发多条消息 → 认为是对同一目标的回复 +3. **最近不同发送者**(fallback):关联到最近一条不同发送者的消息(需在 30 分钟内,防止跨话题误关联) + +``` +示例: +12:18 徐思清: NPC HUD 报告 (无引用) +12:19 王胤鑫: o3 分支? (无引用 → 推断关联12:18,因为12:18是最近不同发送者) +12:19 王胤鑫: 有固定关卡吗 (无引用 → 推断关联12:18,因为同发送者2分钟窗口内) +``` + +#### 阶段 B:引用链排序 + +1. **Union-Find 聚类**:将有引用关系的消息(含推断引用)合并为同一「问题簇」 +2. **簇内递归展开**:从根消息开始,子回复紧跟父消息(子节点按时间排序) +3. **簇间排序**:按每个簇最早消息的时间排序 + +### 排序效果对比 + +排序前(时间平铺): +``` +12:18 徐思清: NPC HUD 报告 +12:19 王胤鑫: o3 分支? +12:19 王胤鑫: 有固定关卡出现吗 +12:20 庞鸿潇: @王胤鑫 11-2 (引用"有固定关卡") +12:22 梁晨: @王胤鑫 没啥规律 (引用"有固定关卡") +12:28 王胤鑫: 是否只在移动端 (引用NPC HUD) +``` + +排序后(问题链聚合): +``` +12:18 徐思清: NPC HUD 报告 + 12:19 王胤鑫: o3 分支? (推断引用12:18) + 12:19 王胤鑫: 有固定关卡吗 (推断引用12:18) + 12:20 庞鸿潇: 11-2 (引用"有固定关卡") + 12:22 梁晨: 没啥规律 (引用"有固定关卡") + 12:28 王胤鑫: 是否只在移动端 (引用12:18) +12:26 孙时敏: Playtesting有数据 (推断引用12:22) +``` + +> 缩进行为对该根消息的直接或间接回复,形成完整的「问题 → 追问 → 诊断 → 结论」链路。 + +### 步骤 4:问题归纳 + +对每个问题簇生成结构化归纳,分两个部分: +1. **问题描述**:按固定格式描述问题 +2. **当前问题排查结论**:从对话最后几条消息中提取排查状态 + +#### 4.1 问题描述格式 + +``` +> **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** +``` + +#### 位置要素提取维度 + +| 维度 | 可能的值 | 来源 | +|------|---------|------| +| 端 | 移动端、iOS、iPad、pad端、Android | 消息内容关键词 | +| 环节 | 关卡内、关卡外、知识巩固、巩固题、单元挑战、挑战、听力挑战、阅读挑战、口语挑战、写作挑战、单元强化、瓦拉学院、报告 | 消息内容关键词 | +| 课程 | 数字如 11-2、L1 3-2 | 消息内容中的数字/字母编号 | +| 角色/组件 | NPC、HUD、音频、组件等 | 消息内容关键词 | +| 现象 | 一句话概括发生了什么 | 综合理解 | + +#### 4.2 归纳输出格式 + +每个问题簇输出一个结构化块(结论在表格上方,表格放原始对话): + +```markdown +### 问题 N + +**问题描述:** {AI 归纳后的精炼描述} + +**当前问题排查结论:** {结论} + +| 发言人 | 对话信息 | +|--------|---------| +| 报告人 | 🚩 报告:原始对话内容 | +| ... | 原始对话内容(含媒体 URL) | +| 最终人 | ✅ 原始对话内容 | +``` +``` + +#### 4.3 排查结论提取规则 + +脚本从**全部消息**(而非仅最后 1-2 条)中提取结论。优先匹配以下模式: + +| 优先级 | 匹配模式 | 结论 | +|--------|---------|------| +| 1 | 已修复/已解决/修好了 | 已修复 | +| 2 | 确认是bug/确实是问题 | 已确认,待修复 | +| 3 | 不是bug/设计如此/非问题 | 非问题,设计如此 | +| 4 | 有解释性分析 + 日志已上传 + 排查中 | 疑似{原因},已上传日志,排查中 | +| 5 | 日志已上传 + 排查中 | 已上传日志,排查中 | +| 6 | 暂未/没复现/未复现 | 暂未排查到问题 | +| 7 | 其他(无明确结论) | 暂未排查到问题 | + +**解释性分析** 的来源:消息中匹配 "因为/原因是/应该是/改为了/导致/预下载/上云/首次/正常情况" 等关键词,提取对应发言人的判断句作为疑似原因。 + +> 注意:脚本的规则匹配是辅助手段。运行时的 AI 可以根据完整对话上下文,修正或补全结论。 + +#### 4.4 归纳示例 + +```markdown +### 问题一 +> **在移动端关卡内(11-2 等),NPC 头上的 HUD 偶尔变成一小条** + +| 发言人 | 要点 | +|--------|------| +| 徐思清 | 🚩 报告:最近经常出现,无明显规律 | +| 王胤鑫 | 追问:o3 分支?只在手机包出现,unity 里正常? | +| 庞鸿潇 | 确认:11-2 出现了 | +| 梁晨 | 补充:只在 APP 里发现过 | +| 徐思清 | ✅ 确认:确实只在移动端出现 | + +**当前问题排查结论:** 暂未排查到问题 +``` + +```markdown +### 问题二 +> **在 iOS 端关卡内(L1 3-2),组件数据丢失,无音频,Loading 耗时约 10 秒** + +| 发言人 | 要点 | +|--------|------| +| 胡陈辰 | 🚩 报告:iOS 线上 L1 3-2 组件无音频 | +| 安君仪 | 询问:哪个组件? | +| 胡陈辰 | 复现:Loading 约 10 秒,组件数据丢失,杀 APP 重进恢复正常 | +| 毋益飞 | 解释:Loading 慢因内容上云加载;要求上传日志 | +| 胡陈辰 | ✅ 日志已上传,待明天排查 | + +**当前问题排查结论:** 日志已上传,排查中 +``` + +## 定时任务 + +建议每日执行一次,在飞书群消息同步完毕后(`feishu-group-msg-sync` 之后)运行。 + +### 创建定时任务(crontab) + +```bash +# 每天 10:00 执行(确保消息已同步入库) +0 10 * * * /bin/bash /root/.openclaw/workspace-xiaokui/scripts/sync_feishu_feedback_wrapper.sh >> /var/log/xiaokui_feedback_sync.log 2>&1 +``` + +## 权限说明 + +- Bot 有 `sheets:read/write` 和 wiki API 读取权限 +- Bot 缺少 `wiki:node:create`、`drive`、`bitable` 权限 +- 所有操作均使用 Bot 身份 + `lark-cli` 命令 +- 创建新 sheet 使用 `lark-cli api POST sheets_batch_update` diff --git a/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc b/skills/feishu-feedback-sync/scripts/__pycache__/sync_feishu_feedback.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed761a05552989c8d8099a6ea665d33bfc70d5f8 GIT binary patch literal 27011 zcmcJ&3w%^nx+l7Ky;ILr<(&|y00AnH!~iNpUV#LOLU?O;gpQT$0uu6oT~$EhPIsvw zRR~fMltNGnMJ-yC*lo8F@o`Sioa4-yGgG0W6xGZfyAPGe+~1v!XwNx%XMXqo*V?tK zDi9y%-kFVAwf5R;ee3Vli~EQvW?lCp)rRuHs*4sgG^%{mx(Z+ zGb1eEEC>s^EQCc|Ho_@f4#HwC7vWSc58*T}AEA{iKv=>RBAm_@A)LWYK{%5uMp){Y z#ZBEkhhcb)jB%8{mFPOlbj!MBwrTE}EW>EWUVG)%2XBm>dS~p_K>YZzu{XSN|Iy*M zUmrdA((u_!+W3)kqfdYIm#Z(``tUjHy6(*z*IGv}e;7ahqF8R^;|s&T_YOaK`7c+G zisj`J?g6ABo+ zw>~^H>^l)Z)<1mW*sYJw-WoU-f97=j(CgYUGH?wxc}&6CU5=QBgs#>OM=XPcoTJ@k zx5;8U&auPpZg+95Ev}f7bL@3HdZstGmA7!s<%@Rk&D{PrcZG9rdxgu<+2ZJQA-$rd ztE089g<)dqmR)vNbC;{fbYPcz<$?RT749wbcW!-TUo2yvz1`mAayVVFqM9|XPN!qn z{>{6#KGI$5YO^~xx3t#p+u3=4b5BJj-__-+tmxd=QMu2-J6pRtot2$koTGBz{Dl=& z3oGuaY;NtWY`62f%Ujx8Z5p=*y%Hy{zSfAI4vK)~zC+ zpE(qN=L0e4ub;p5(TS~_*1C=4!_^na1k!Z?P^rShr+zOHq92~A8G_WpK^?8C9C z2+;e~qd-)Szpn{c>`v@VPR=RL$~QAMWkXN%PwHcJrQ69@ws*DI+bf;ubNYMoHT8ie ziLZ$E%t+MHwFlGGZr@+o!efd%U9I+ZXXSxbd)IDvYmbs{IxZk9^SmQ zeoI}`sa(J>Mti-qSD#Ix9)K{E5B!d#MC<-t|mw4K0X_jO~;=z2Z4wAGSd~ESs%^J z`%10QtHv3HQpH>FWy0{26+wspaYX3wGwn<-BQR|w^^V6yCW}5UvO2^Vfgr{x3MNq( zSr7E#WS8V`CGuiI6%9OD1u?bTwPR67^Fo^{rr6bmb*|Xc#Upe(9ehl)$L@4uh&a0K z?9FzkgKiey>2TT9G4-DAj&5u!q;++6IwXR-)wE{)<}Ffu!}?9h_}0yJo091@ zn>WWaV*c8dHkL0&bDThmjFrzKEi;qBQY;BmfjS8)xZ{z&qr3t$CwG(H4-+*1 z>hRIS@poPtKK1(W*>j^upN^k-Z{($ecN`A(QgYHH5VLi2-P*cUTdW-pr_;XE(bUS# zvpOA}oP%%bw0Ah>Sv#CNn_S&{5W~0wS-Ozq;9BiXZoYk&RSZ!rn5anrlB^Kv;-;1?CUqx)@`z`{CVng z&E{3~Yfpyto_nh4e>NpuY%x`fn|F5cF}34>qs2|mpB)Ytj*Is8n6jn4%jvKwV_Gzq z$5N%o6UI`EDag9zXJM*h3chQ<(@9nf9VKyj8*xPO-BM?qd<8x)z@M`bfrq)Jn-bAY z_0&cU=Hrj~9(#7XXLVGeJ>2kQ!#JbL){N^JW4?E7AS0}u9y}0i>(29@Qp#G zTsm%K^mJQ9bW;F9p*j4}lMfwS8`YUS58sV<$cm$TVH}Z3_X6WeVxYDp-a+QXlfppa zk>N_>Ax^<5cd7-3Q{kWblIE?%j_GAxI;nJ95<^KT3?r~@8L3hD?vxIQrcCpsL~@ zorwF6i4+s|(Xh`meAZ`m?s7O>F%|uB_9*AJcJAoe}t-{7z9` zZgT1XO zq5!ckZ5n!tV0F2h_wZdU*sw7<$gP+hr(DdudBdigu*MZCU?tT|-RK=IrW> zDcifyC^Xl_0k%!S--m|8RJ_9p>WHr&h0oJ3|{H)Eh3OM%oFMPkanW6XY3aG;+_YEzo|Lnf-=AVT%Kac87K}BfcfaM#d z+^F)b{zk*-GmlsMs{K1ob@X*abThFSD;M;yz9b&vI3-!vsWTQBU-Pn@%fR~7a=M)wf#vl0XSifcu6%h~ zyCzxVI3Y46=&9di%M^m5jU0}5Je*0&!DUjGBU?~#X1X^lG|Z%-jD~U=W|43+<01qK zeK?dk!@Mk$AU(y}a@>T5iF?n)KfE&hV!yZ(tY9QYKRFpc^NRS6r5e-lgy43%oH31^ z<2KU`qs5}#^OVi|P@ zS{!>^piOXKkY0-0P0`%Nb@R0(n~ZO}6Z7lEs>O}QZ$+*K{5k(Eg490Kndr{x2{!i6 z`!w&f((sB6;i`?{nVZ5hHivV!M08s{wP~Bt;eRBmD<7&@9xh)IUa=*du{D}u_Ad3j8Yu4V>W`%PrLVSn|XD^6o7lsuJ z#Umy`^q^A=Ll!#SuB(wc>2{U5lKFSC0!xtjaYc)KCqd`aM0AccMT?B4fS~WiR(Mk9 zN|IoLLRz?;;9L4+N3$rhS0Gk`n7mifEAK%wdX=2KSC!h}oWhkPxdf#ILr}d%+KZ=_ z*?twJ)Phn_8TgAKtl#Whs~_yj6w!y;xc#YfRfDkXStM>L`C|v zKIBSJQon+}EkE@p#iSPZ8U#aIk(8SH<+7z5JbUyN7a{8D!{rESv@3UVs6fZ^LJ-a` zCufmmASqEQm~M08uvY9Bk8L`WIJooNtH&XAXoxXmpP>*%5vy%%Oz-G)l5^SaY-w#BBL@tB4}u=m-2Hs3%fVNX7j=0# zZ|~gc=#iDw^vJ5$^k~YOUFWliV?AptZR2qp4Iv&fnas;d>3!EyQ~z8SyA3< zGl>VgY7Gu>@I!K^i;pR36)}0cqm$wweglE`gBXb4h%7PX9=^5HWi#?iNC7e=A_`*p z7~2AL+}_?qsLz;IM5E5u9!E@P*X@PGuL|{ym9OG!bd)<`E=tKMGt<;2JOL)v#Wya z+0_Gf4M_S>V-0GxGyQ z!N>Zm2TPZRGnaXlH**V5we__HbNaUqzU-Ty$z`-_)CiO#hs~n((X#hL$!&4Y`3ufwJJrV0UnLzoq}d{>9<^rBOp3s@V~8 z0E-&#MU%|g!P<~(VAD4lN{iO3`bN)yK!3>hkbmE)-oD<5zSPS`O_|5tKDU3%u_tco z&Ef31p@&0D2V?{HeX{g}rJrSgHY5B%L%4c<*s@_rzwt&^{&+EZG(Hu?(zmx~GgH>E z-(m=;uHx^%Nm#uJ#JLtQ)$whjljiOl6pXUw&G><|Dnk&k>A@;-q-CqW<{ z=B6&|x^Bv#Zc1QNxMX2OcW+p6uXr4g%_Tj_Mibif6eJu@pmoq9$vl%ao^aObjdv5& zGJM<*x?L20-5PisM7wnbk%oESkNc1Hs8-C`wt21%;z1b?!Nrc&P7eD49U}4y!rP|z zDibt0Z*OsdBZIsN@;lyz3Vww@CqZ)_=1)2c&O>j;O`U-r&8@+_h|U&P*zWFNlBJr8 zxjDt`WWewwJBMy0;jc}vDSDMi zQ4;bE=UC#DqHKLC4R^pe-=pk&OUeKq=_1%RO`m$ty#4*p9_};7QEKBQ`cGA(x z18f7z8OR(YMoYE({zU7gbaA|N;&>%T&6oy#^0}y+bZjK(|0~CaV0ZeEvEenzkq}#M z6x%FhU@fX3j?#&1QZV+K1d~(dXOFgmbtmH1J03wHm>>vCta}#g{^&0iVDo{6wC*A- ze=RvqhMx-!zxej>Z_X38`%Y1lm2l)#%8iI9^o}3;?Z|IG{3p*-v@{Jy-#i%ip2pEX zeAIXAY9M~dJ96bs9R1Lt(C<|h))f?K9El(OVC>K_q$jb5b;YeK&kUdQ-@0;z=t~a$ zcJ#@UpeaUQeQO28@}e^1)_`aD;)B8sXG(0lXgqpmNJn3 z-T^f^6GW+V)KkI_2WvJoZCSIaZu6S; zYinte%DxAzDrd{2v{g(A{?@_8*jC;yLT%-ECyA*yQDp?${AycgMgsJN7koxpqPPt`a{wi9~?xkMu_*y7K7|Hg&W*okVoQ zJ20R`!R6$Sq0BS*a~iR`Jj{4LV=#H@ZedW@3z-&T5Q*v4nF%)}c->8_J(%_?!>~auT z1;t+D4PExloWej=VDIUBgVTe}ksRA?Ms6tdv(YRR?7Kfu)mI%XxRzBGEwzP8&OaL3 zc)ro!a3i~5T#amyXHK0S))d|}=lb^|s~aR+NpNGxe17Yhj?kuv`JUU1KBLfE6E&f6 zzWx5aU`ro~m}W%t3(q!&6rqh5)&1;6ZKQa?VE%&Zc`FC=Rz~t_KjjDW>ir5d;q3AV zs?IA4)b#BS?0vo`T3*#(a`DmrjTajS^UKhk>9d1-LsjS9q2~VSk&=7S_<~u1n&`9{ z!J5@j%JDj}B~nw=ry;f5Tde))&nBO2gz7ju)VTAmefiz1n0q z0?onc5p(HnMwyZO|Gn`;Mf0v@&5M?l2JPqbFeZimhi+u%jOze&Jd?3lubEecXWWZ$ zpwO%Oqs|=Fn~$&ctqsfw>*ND9@cGDQAX@&pjTSs#}r_;~}) zH(B~ADCGn9Zc;#)3z4r}%ufZp5Xw;@Fr|wG8AQ8UH@CQxfEIbRN#DA8&&i{YX+F=`|eJpto{KJWVM+ zOQn-aNd?IrTve=sdKS zN>4~>N-=_#BEm@;_vKS+&MadDk|sf?-y{l+Wr*~|h4~Sml&D`CsUtb5T6{S&#+NI> zl5!A=s4XeekRX9#O``{BElIk;mou4=Q<@`h4tlFOsJWCOwX#?1EkK$UBTxWZTPJAI z?|;Ck>3unZp43mI6<9yU7TGQ$QNnufH3~+t&(F(_W?=Q0kdnxgTw}>Rf-$kiGE=#d zk|>gkjvDBtL`*R4V=>m5f>D%2^_o%6+?Eu!rbdC&9c0jxHbqQU zaS#6#5=81NBSBKOa*{&JL&>7=GM4OgbUF^~;g|JHgZ9?dhn%zXV2f*06Rj1Te((mjD8Y#?9YW2F?@X?!Keij>xyj3x53M+*eVL@u(4zRP0l z0sbgTILUxm#g4`lM5fVGIC&q*w=q(Jh!N4Zs4?5W(bpQNI?@@T8~P66T7m*K4$S2Cs?G2gSD5O`&py&92XAQejwA;)QVpGV{c`6&jNNs*Fl(6{8- zPtR|h;B+9We9E>2wv&L7Uz{3A+0^&sfOJH{_`A^-UM0wUDnU+$u^VSI;xaMbO=MfG zBUpnlDaN@yUgc&o>g7+a;g6JnsAqR9uB ze*AAJ<0O(=jP4C{j^BPF@AS;T-ZQhkE4{L)&fvEkSrj$p9)H63L|}fv334^K^UdAo zclS^4Z;8yRzGkY97Ekl5{QUFUFQv$M2BXiun?y?vI1%)b^_3#UIQC$2_1sO$P(j~a zOO@`;Spb#XnUGl#N{*w8Hn9(NY5S0Q% zz8k*NNY(UA{o!WNIte<%yj@y=iUxl6Y0C%OhbkV9%zfCy(9yq$8h1OoTL>nT9v-?EZ}LguVORr;$$emn zfvVG1NMg-s$pV3N4_s`ZBv5mDW@3W)H;_h;2b-MtAPycJ0KjvrL|q0fD4r(Ca-0t_ zWd}Lzs1}1iPf|7X@Bp_ALmncTh#4m~gyMUG5b%f?f&UH4_00S!y^xbOKZWdAU{Q0H zR~6M6{f5@a=6KW4Pa`LoGf=?<|2To(gdnD9huKL?fz#T>6GZ|~ggSIO`AhUW_MkJSAcGD~8C33a zNMLM*qBKS$NsvT(zyRf0Ys;<#ofA_-ZPnCa-vgOSLcRpdfyHn@#QgXo(h@I|dJGAS zB2qyyb%J6dK)PJS#z8M-z^UP+ONf7mH1#qmtt7cfGaXn@F!$fQArU){O z%sC-N#8mECb3<*qsWJFXqTVdp zT-VGP)Ijp{xn>sjlX5mn6zt!oc8cHayDlj>sj}WMyJdjFJw$+&x6q{ z>utuOBr>zSX+ia1-W;z!T5S8mSQcPO|DJV)jb&arg6Q;$FET3w8~tV?S_9dY7tXBo zDiNULD3Q6wzqg<5FZqpPfE_6LK=J9SE2iho;mkE8&zh*la9r!t_WjbU9n#FYWwJc$ z_9~*;1-BVR#w>5`jhq?KP@2y_7~0%l8p&Biq;Ru{6b{5+-@<_FTGotccK)d~eQN?6 zA*|bSvLTvXbgH4RAz1Z!cIj6T#-YoA1c{sls797Coye^+$+8G6yk?pPE9O(iK4W0- z=b0s6!L(%hcnLZ^K8Mj~`PT$G!n(O(#oTXiua_}7^S+H4E55z0WlWXd{?WMP+uJI} zSoQ7gM_9%*_uE)zB}p*Xvrb~X^S))xeTu&^8P+_Y_*=7~o>Ba5MP|KH^#@gEy-x9O zEA{o6s=s3m^*O4)%Qn{+s{U@40^tN)Mi2)%tiSeSgAn`a$&O-jL=w@HbihhSaxV-J z68{uuAxMIoi1JmIsN`^jRwbAgsNPayuE{JRq2^#yhAeHQMRz>Xu?ykAJcyWNoN5Vj zB-AQ+BgeB-$*H9;Xom*vPzkF4&F#pT&<<){nP><7^27b+v_g_JhpCL3pDw5;vt;IU z(s=)nst$eElh&Z`2J~G6A`2=@^uOkR+dO1YaY>bs(Uuf-r6w+A76jImh(I!UGiRJ^ zbqPs94vdiG=!c2)Ly&<6&vf5|DExSmq5z^rp%$hh2M_hNsLj6h(Xo+_-Wh%4$&vSh z-?7#SYn+gZlyqN#NRo61B|AaEoFL+*u}&zCCFVLGM7@kK*B9mdet@A0l?D)P+sR81 z!QEv2JV`lD$akV0Z*sIdh-Sl?Mz2XccLh28rvi>h(dXYE+ITigL^fv%UYZtcJ~M4F z|Nd{khOC)+YRXiWHo&I%E7`mwZ(aT?*)}83T^>F16pfBv{&*zx>|e>mjK2B&sP8wG zBOe~T_2KVrYMZQQ%ADm(N*=S9_tca(Znw3r-#lk|>J^9mdKfhH6i>{w!Ol&{rD#Qp zP39&v=C~)h8lU5x1}H|lwXJ!>D&&U4*66{$m~6u;{y!p> zc7FKmui?;T(<3La5CiR8{MdQOj07<@cKB!_hUFuSesV@k7NC+7W3=vMDVr?){lbg< z4ZsBGnY0mWVL1#f5?VrRh!GRwM}ngteI$&2bZIzn4*&5{n*Xna{7?9qy?-%&<_aGZ z(@96;GzI=_{2x=9-FGTM%t6IzYXBdq zsa!=FYD&d_?}fxO`HF0KrS*!r0p9(q+std)wgu@^o`y^t?bF7hF{aAN92 z0jUcg^^JUZmLCOFH`(_8aGxo05PJS<5h)&hcx9o~(>mO`bzo~9j&E~q8 zlKOzelrT?f-9z~Vn>MB(GBi~$#S{)V)5s?`=e7zAh)~Mw+>`3RFFyw$ZfsFUwUL|G~en~dEGw4ZIvNc z<>blHWSbm4eF;%1UKgzIM$*WfOlX}lTR27%kgescer5jz~YFhj`A7! zes0<6WucNt(Y)(L^9PIOM~W6EO59dus2rps%O2sT(9B`a#JKYw_8Jb!-(Ck_5T|THN`ch~1th-w}NmyP=3SPk(lP)St zNho_EZ#)Re2PcD61q9)C$klq4(1oQL=nJqFCIp#ukV?OV90LJYn5O4cN#8id?pjPa zd(;ML3)}->E38P8cWMGHaU5$vC?(ujk|`RbPXhVN9LHzCn2#2;|kK=jVax*#DMggQrgwj z=wdzoH=uPeGQF8Zj+cN(bhSTWeJ0k544OkFS4isTjVS_NN|Rbm?B#pm7vGyLSoqm# zYuX~^;EKqahIZ3}k)2Y=aK$~OcNQUgvXvtw%VCbP_?*deaZ`mX%&UTu= zC%08<3oIZ@Ce>9Y?aq61(4SnvEEuROj2ba$V|h6@y*IZrA8$sQo+V_a^)6JJGG+sn!lnU-)47kqr2%a%CZ9UXSQds#X^1%0h}wSw3s=xUc# zO8K5h_Mvx4r357bRe&zJZ#3kAX`ofGio=75*e;O>~(sRSl{09^f zSd8!(e-v=P%iQ)@eFuDps~`Ktw(Z;JZrk3Y-{o@caW1c}tR&{va*=qG!*7xQzfb_G z8+L-FwYH_*Veh1bT2HAs`tV91}v!AOqtQFoSS;%@zuW7;jCF9 zS*RwWukf;oB>(c@$`BjTmqDlpJN*5={m*uXHTjfA5KRXu&$|!68pb$;l-J^~lNjLv zC~;3|Q8%F?(w3o{u(UtC1}R}be(uDx*7Q||bw6EraOhR3mrL|!*7LF9PT z5=4e~>EqwAkGqNFm{u|N#*4&g^vLV+z9ZlWaXSQ-(-_ghxThePxManBee09+xGlsg z^km<;X0d_=k-2F^S-u;=n1TcwNU#n;j~wfi-Z+8UpR+!O{S793`<(As8@Htd6$1D;}4Kq!$ zDj;v*2gp~dARsDj{sRQIJpK zVZLj(@I=$XlkM>dhy@Sxjg`^O46co6t30c2E0pjn$z*c!JnNzcA|F;-yzC8q?nx(P z?i)`pJkcG|mw4HjPJc4XzwtzFpyWhhbV_l63pfK0dMRs>fiiF>H=6x9zQ${Y8G%Lc zL)V-98sEBW`qIG9Jathx(d7HKUo*@K?83LqEdTn!%$e6R?+IzZ-A|X3 zvrn!P)fOk~L-V~EPz&Vdd$l)orkf^q z!Jr2A#Y38DL`R|b>HU0QW*|4<3@!?mg|>&*56lneEc;xue7uSb$oN9yA5J*itr1;O zSW$Gh)dLYIQARF?@UZS*E7d=uOOd^k$?M7WHa?wnVjj_Xbuc4T!gLbmltc$Wl=~pX zA;_Ytcgns&2SCgK=mS8zVvn;?x5@vx`tUK%TL^l_LP{%4r3=12h3`aa1xaz>x-r}ac? zIb48$Ds0og8g>R4J(kOq>f+gM!uqjXlC?rgmLTU#1&vb%eMAXd!m`OqoQ$R5ayYeQ z6vE|_To|!D2qv^#K4q&5UZ$oiy!HPNl$W99plvKnYc*VZ(CR5^DaABpYFf&)?rAow zdmEaiB}ZPmiX?t0H1>4jZmXnz1lN`&3FwLJ4!xD2+6vL2Riv*$qmXOV2w48#p@O?8 z?EhuD6#gFop8pRd2qL&!gHn1oO_QQ@lh%m;NKSG>XNszmyk()9v+_Vqz<#>+OvX#5 z;HERC!MtVP^8ZLG`i=yTBAC#vq>a+!_%hK@ENu%AV(gAbfDu{R7We_>r8Ie)8U7@I zz%EnK+||`i>3;sdB8C4yNwSRp3O}L`GLg!r+iHO}qSrduJG*;+aA=a*!enTs0pP#D z&D4~2a^ZP#fBI1TncpFgXViNMAYMYMAl`RJK;U*J>aj%iZB)*Hf>@KRHE2_eXAB4bnzj4Wd4p?4E~S z4hNo#U$}^*bFT~H)({XyOVDm1{_!D_KJvDYW}r7m#ozzUtxw*8E0}=T=$mjv`>j9{ zUjK-W!u!98UpP48e+KVv8{q#K_nHLfUWnWy=ihq@8+opO^xgCBe=R(*c%m@Ddc!Y01>GW4h5=}TFkilp;p!-^ zoMSWwP1J>BC?Fu482d@{B*GO34{*fp<8~FwwGpij|9_K!>Y9mkAcZf$m*KV+7c~o` zd>}ssyTGRBDU&x9@tEG;X>ad#wmOMwCT2{&QMOIw^sKx1o?!$)KQN8S%sPI+cL2D! zf67qavWRJ!XU$EM`S?EHzHqKBG(U78Vp`-`^NoU4*04~`WOFjl+9((?Tr&VhOs(9= zQuze2s_`laQC~%@bDr%!@~gl~l8^?<6)`QmZmJ$MRYy!q@GYSY+7?`x)CS$mFFdC? ztqB(QD!R7yuV>o^ zvu&X*q1FAif!a?V`rx4t*M64u+4S&|b>xnMvh|;5Z-9*tppENDo4?6sfO+Q=sNv$J z0|)+tlJJP+QqKRh} z!6Ee;H0Vp+N|6--OPzd(aE!7d{~i&-h(P}We|E44#OJJ@vIw}KkC0bj5wK9iCuKas z$*I*kdlcSXoI0xo3bljh)(p8k+AKMWbOYElV(QtJV+ zp4#1lmQoriA!GjN1dyP(aYb3Hbsa9>WLB$KG>rnI77~!^ z-um#!9q_fH9gb5WdBq(~Z1zcN=Gi^|L*7Uut!pRZ-IvvSABg z<%lasvic0TYD8!Bx}U8Mut%1`T{f!GdH4D>UinSSj9^`8LBvuC6Tk!m{OFLTSc-4@ zT$4X;0w^eWCu4k3d-5?LMpxS`&|&Es%t=ML?4{(4(hEZ;5#(}mSCXP5*Ye?Zq67_y zwKS>dms67)hAwF(29oB>NVYH;k=U>wyVNh458jBWJC1N)KxPtDGQ#PI7Uoh?mq^}v z<*p=yi2IG?U<{3d458`ev{j!Do71I10PIQq?~+VbuOj7p2WB+M+$9?YU8J=CaNAS@ zH(;-mEl(d8kjyZ16$R6SKI)m23=WAvZ_~DuD_Q+KQ!8soxZXjD2i{$(hQj zPwRb_1Rr-KF=^@9EQYdJ} z(MFjf#HkLQ#9g)|T&?I0A+-pwpxeY4q+!{kX;1PJIggqf*zM2sW_-9CoT7j)vv>NTXba2g#~t6;z= zl%$Vh=bW@$(`l|5=~`J=rPK={d0*KC$TJDd)Ww1DZmCL^NY33hV6$G_6VjWRR$FP> zEGKL=tXeFWn1D(yS7VCEO5!I};K+Iyv(sb6kRYQ#C)1h}c$E0gk|?a5Rj zO~>oHUxFwY1Y@!d7NRl7jWVgVn(PHV-_!T(+{tp?+1I20HB~=1FV(7}{~e;NWXrg6 zL4RjS_`Fp#xbxA>@4HhTdN|yYE?!|x9NXwsirr3;C$*v=E)K!f^Wwc9lU>?QZrmnU z1IN^f%hqvYw4;4bPX_K}6t8k8SK)TyJ~bEbc2JW^-0BYYcl1s8z$Wf#Cpp#(thOnThVeV02E;|`#u|PnF01KuZ6L2uEyA&>314X%OqapDCT?0o zqLU|U2A2ePVWaS5()jsgrXDA$eu_ZjH-Xk#&`HHJ$$o~@ow-I*01QI*7 z3G0G7SBbPWWyM#M?7t_$RucR@2?$*bdb-Qq>Ed@I!InjDq#1kuO6K(-8kH z+I2GB$dFVN55w@HJ5nDB`7y@Ko{{vi|~H{UpmF>VkT8gi~1W)2!=hN~VPGOVR{XN#d=ym#Z*EMilg8$X$qW{%Y+)1OizzHmDtQ^wZgG^{tSZ}?M z4UF-r>hJY`qyK|(sC0d#WPRAWA*|gvq}cSOT6b7@N}z=n3~3hLESMHly{SL1f6W*v zsDuq^Mh)w$_pb71<6c959j=$j3A9|85#(O81&cy;Vaxm>{eoytL122I%KtOCRvBto zKYKGXC!i3`{{yRoGlKRId!7qE5?XM{*Ob`7sT(K;iyZrOa6<^6o z$#EsA55kiS(=F3mR;_k#Y38EWQB%!5j@{44|vr#bQTJ0R&f8Dz2|%T zw?$^J49{BiX?57rFr;5kzhOKVk|xeR+CU0V4eSc(B9?`;a8zG#c5|>OQd~Z$FZZq^ z*%~I)X<7Waz8XCVXV3mzZySFAWAV*0MyUKtZffGXTAG^RZwc2+S(j71k#s59i1!hsL&bRH%%Mn$VxyR@rK`Oi{|g8l>`tC= znmjoncyciFRuYtufSgbK3=*s$0g@@= %s AND msg_time < %s + ORDER BY msg_time ASC + """, (f"{start_date} 00:00:00", f"{end_date} 00:00:00")) + rows = cursor.fetchall() + conn.close() + return rows + + +def get_existing_sheets(): + """获取当前表格的所有 sheet""" + env = get_env() + result = subprocess.run( + [CLI, "sheets", "+info", "--spreadsheet-token", SPREADSHEET_TOKEN, "--as", "bot"], + env=env, capture_output=True, text=True, timeout=15 + ) + data = json.loads(result.stdout) + return {s["title"]: s["sheet_id"] for s in data["data"]["sheets"]["sheets"]} + + +def create_sheet(title): + """创建新 sheet 并返回 sheet_id""" + env = get_env() + body = json.dumps({"requests": [{"addSheet": {"properties": {"title": title}}}]}) + result = subprocess.run( + [CLI, "api", "POST", + f"/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/sheets_batch_update", + "--data", body, "--as", "bot"], + env=env, capture_output=True, text=True, timeout=15 + ) + rj = json.loads(result.stdout) + if rj.get("code") == 0: + return rj["data"]["replies"][0]["addSheet"]["properties"]["sheetId"] + raise Exception(f"Failed to create sheet: {rj}") + + +def write_sheet(sheet_id, rows): + """写入数据到指定 sheet(覆盖模式)""" + env = get_env() + all_rows = [HEADER] + [[str(c) if c is not None else "" for c in r] for r in rows] + + # 分批写入(最多 200 行/批,包含表头) + batch_size = 200 + for batch_start in range(0, len(all_rows), batch_size): + batch = all_rows[batch_start:batch_start + batch_size] + values_json = json.dumps(batch, ensure_ascii=False) + start_row = batch_start + 1 + end_row = start_row + len(batch) - 1 + result = subprocess.run( + [CLI, "sheets", "+write", "--spreadsheet-token", SPREADSHEET_TOKEN, + "--range", f"{sheet_id}!A{start_row}:H{end_row}", + "--values", values_json, "--as", "bot"], + env=env, capture_output=True, text=True, timeout=30 + ) + d = json.loads(result.stdout) + if not d.get("ok"): + print(f" Write error at batch {batch_start}: {result.stdout[:200]}") + return False + return True + + +# ============================================================ +# 策略2:推断缺失的引用关系 +# ============================================================ + +def parse_time(ts): + return datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') + + +def extract_mentions(content): + """提取消息中的 @用户名""" + if not content: + return [] + return re.findall(r'@(\S+)', content) + + +def infer_missing_references(rows): + """ + 推断没有 quote_message_id 的消息的引用关系。 + + 规则(按优先级): + 1. @提及匹配:消息中 @了某人,关联到被@者最近一条消息 + 2. 同发送者聚类:同一发送者在 2 分钟窗口内连续发多条消息, + 认为这些消息是对同一目标(其前一条不同发送者消息)的回复 + 3. 最近不同发送者:关联到最近一条不同发送者的消息(需在 30 分钟内) + + 返回: [(child_message_id, inferred_parent_message_id), ...] + """ + added = [] + + for i, r in enumerate(rows): + mid = r[0] + if r[5]: # 已有 quote_message_id,跳过 + continue + + content = r[3] or '' + sender = r[1] + ts = parse_time(r[6]) + + # 策略 1: @提及匹配(最高优先级,最可靠) + mentioned = extract_mentions(content) + mention_parent = None + if mentioned: + for j in range(i - 1, -1, -1): + if rows[j][1] in mentioned: + mention_parent = rows[j][0] + break + + # 策略 2: 同发送者聚类 + same_sender_parent = None + for j in range(i - 1, max(0, i - 5), -1): # 往前看 5 条 + if rows[j][1] == sender: + diff = (ts - parse_time(rows[j][6])).total_seconds() / 60 + if diff <= TIME_WINDOW_MIN: + if rows[j][5]: + same_sender_parent = rows[j][5] + else: + # 同发送者无引用,关联到它的前一条不同发送者 + for k in range(j - 1, -1, -1): + if rows[k][1] != sender: + same_sender_parent = rows[k][0] + break + break + + # 策略 3: 最近不同发送者(有 gap 限制防止跨话题) + prev_other = None + for j in range(i - 1, -1, -1): + if rows[j][1] != sender: + diff = (ts - parse_time(rows[j][6])).total_seconds() / 60 + if diff <= GAP_THRESHOLD_MIN: + prev_other = rows[j][0] + break + + # 优先级:@提及 > 同发送者聚类 > 最近不同发送者 + parent = mention_parent or same_sender_parent or prev_other + if parent: + added.append((mid, parent)) + + return added + + +# ============================================================ +# 反馈对话链排序 +# ============================================================ + +def sort_threads(rows): + """ + 反馈对话链排序: + 1. 先通过策略2推断缺失的引用关系 + 2. 通过引用链(quote_message_id)将消息聚合为问题簇 + 3. 每个簇内递归展开:子回复紧跟父消息 + 4. 簇间按最早消息时间排序 + + 返回: (ordered_rows, clusters_dict, cluster_order) + """ + if not rows: + return rows, {}, [] + + # 推断缺失引用 + inferred = infer_missing_references(rows) + if inferred: + print(f" 🔗 推断 {len(inferred)} 条缺失引用关系") + + # 注入推断的引用 + enriched_rows = [list(r) for r in rows] + row_map = {r[0]: r for r in enriched_rows} + for child_mid, parent_mid in inferred: + idx = next(i for i, r in enumerate(enriched_rows) if r[0] == child_mid) + enriched_rows[idx][5] = parent_mid + + msg_map = {r[0]: r for r in enriched_rows} + msg_idx = {r[0]: i for i, r in enumerate(enriched_rows)} + + # Union-Find 聚类:有引用关系的消息归为同一簇 + parent = {r[0]: r[0] for r in enriched_rows} + def find(x): + while parent[x] != x: + parent[x] = parent[parent[x]] + x = parent[x] + return x + def union(a, b): + pa, pb = find(a), find(b) + if pa != pb: + if msg_idx.get(pa, 999) < msg_idx.get(pb, 999): + parent[pb] = pa + else: + parent[pa] = pb + + for r in enriched_rows: + if r[5] and r[5] in parent: + union(r[0], r[5]) + + clusters = defaultdict(list) + for r in enriched_rows: + clusters[find(r[0])].append(r) + + # 簇间按最早时间排序 + cluster_order = sorted(clusters.keys(), + key=lambda cid: min(m[6] for m in clusters[cid])) + + # 簇内递归展开:子回复紧跟父消息(子节点按时间排序) + def recursive_sort(mid, ids, mdict): + result = [] + if mid not in mdict: + return result + result.append(mdict[mid]) + children = sorted( + [m for m in mdict.values() if m[5] == mid], + key=lambda x: x[6] + ) + for child in children: + result += recursive_sort(child[0], ids, mdict) + return result + + ordered = [] + for root_id in cluster_order: + cluster_msgs = clusters[root_id] + ids = {m[0] for m in cluster_msgs} + mdict = {m[0]: m for m in cluster_msgs} + + # 找根:引用的消息不在本簇内,或无引用 + roots = [m for m in cluster_msgs if not m[5] or m[5] not in ids] + roots.sort(key=lambda x: x[6]) + + seen = set() + for root in roots: + for m in recursive_sort(root[0], ids, mdict): + if m[0] not in seen: + seen.add(m[0]) + ordered.append(m) + # 补上遗漏的 + for m in cluster_msgs: + if m[0] not in seen: + ordered.append(m) + + return ordered, clusters, cluster_order + + +# ============================================================ +# 步骤 4:问题归纳 +# ============================================================ + +def extract_location_elements(msgs): + """从问题簇的所有消息中提取位置要素""" + full_text = " ".join(str(m[3]) or "" for m in msgs) + + result = {"端": "", "环节": "", "课程": "", "角色/组件": ""} + + # 端:按优先级匹配,移动端相关优先 + device_patterns = [ + (r"(? **[AI归纳: 问题描述]** +
位置要素(脚本提取){元数据}
+ | 发言人 | 要点 | + |--------|------| + ... + **当前问题排查结论:** ... + """ + loc = extract_location_elements(cluster_msgs) + + def clean_text(t): + if not t: + return "" + t = re.sub(r'\[Image:[^\]]+\]', '', t) + t = re.sub(r'https?://\S+', '', t) + return t.strip() + + # 收集关键信息供 AI 归纳 + root_msg_text = "" + for m in cluster_msgs: + t = clean_text(str(m[3])) + if t and len(t) > 5: + root_msg_text = t[:100] + break + + # 构建位置要素摘要 + device = loc["端"] or "未知" + scene = loc["环节"] or "未知" + course = loc["课程"] or "未知" + component = loc["角色/组件"] or "未知" + + lines = [f"### 问题 {idx}"] + lines.append("") + lines.append(f"> **[AI归纳: 问题描述]**") + lines.append("") + conclusion = extract_conclusion(cluster_msgs) + lines.append(conclusion) + lines.append("") + lines.append(f"📋 脚本提取的位置要素:端=`{device}` | 环节=`{scene}` | 课程=`{course}` | 组件=`{component}`") + lines.append(f"📋 首条报告内容:{root_msg_text[:120]}") + lines.append("") + lines.append("| 发言人 | 对话信息 |") + lines.append("|--------|---------|") + + first_speaker = cluster_msgs[0][1] + last_speaker = cluster_msgs[-1][1] + seen_speakers = set() + + for i, m in enumerate(cluster_msgs): + name = m[1] + text = clean_text(str(m[3])) or "" + media_url = str(m[4]) if m[4] else "" + + # 构建对话信息:优先显示文本,有媒体则附加链接 + info_parts = [] + if text: + if len(text) > 80: + text = text[:77] + "..." + info_parts.append(text) + if media_url: + info_parts.append(f"📎 [文件]({media_url})") + if not info_parts: + info_parts.append("[图片]") + dialogue_info = " | ".join(info_parts) + + role_tag = "" + if name == first_speaker and name not in seen_speakers: + role_tag = "🚩 报告:" + elif name == last_speaker and i == len(cluster_msgs) - 1: + role_tag = "✅ " + + seen_speakers.add(name) + lines.append(f"| {name} | {role_tag}{dialogue_info} |") + + return "\n".join(lines) + + +def extract_conclusion(cluster_msgs): + """ + 从问题簇全部消息中提取排查结论。 + + 返回: str 结论文本 + """ + all_text = " ".join(str(m[3]) or "" for m in cluster_msgs) + all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text) + + def match_any(pattern): + return bool(re.search(pattern, all_text, re.IGNORECASE)) + + # 收集解释性/分析性发言 + analysis_pat = r'因为|原因是|应该是|可能是|改为了|导致|造成|引起|预下载|上云|缓存|首次|正常.*情况|一般|通常|默认|预期' + analysis_lines = [] + for m in cluster_msgs: + name = m[1] + text = str(m[3]) or "" + text = re.sub(r'\[Image:[^\]]+\]', '', text).strip() + if re.search(analysis_pat, text, re.IGNORECASE): + for sent in re.split(r'[。;\n]', text): + if re.search(analysis_pat, sent, re.IGNORECASE) and len(sent) > 5: + analysis_lines.append(f"{name}判断:{sent.strip()[:80]}") + + # 检查是否已上传日志 + has_log = bool(re.search(r'日志.*上传|上传.*日志', all_text, re.IGNORECASE)) + has_pending = bool(re.search(r'明天.*查|排查|查一下|待查|等.*查', all_text, re.IGNORECASE)) + + if match_any(r'已修复|已解决|修好了|已处理好'): + return "**当前问题排查结论:** 已修复" + if match_any(r'确认.*是.*bug|确实是.*问题|确实有.*问题'): + return "**当前问题排查结论:** 已确认,待修复" + # 分析性发言优先于"非问题"判断 + if analysis_lines and (has_log or has_pending): + # 提取第一条分析作为疑似原因 + first_reason = analysis_lines[0].split(':', 1)[-1][:50] + return f"**当前问题排查结论:** 疑似{first_reason},已上传日志,排查中" + if analysis_lines: + first_reason = analysis_lines[0].split(':', 1)[-1][:50] + return f"**当前问题排查结论:** 疑似{first_reason}" + if match_any(r'不是.*bug|就是这样的|设计如此|非问题|不是问题'): + return "**当前问题排查结论:** 非问题,设计如此" + if match_any(r'正常.*现象'): + return "**当前问题排查结论:** 非问题,正常现象" + if match_any(r'暂未|还没.*找到|查不到|没复现|未复现'): + return "**当前问题排查结论:** 暂未排查到问题" + # 默认 + return "**当前问题排查结论:** 暂未排查到问题" + + +def generate_summary(clusters, cluster_order): + """ + 生成当日问题归纳 Markdown。 + + 返回: str 完整的 Markdown 归纳文本 + """ + lines = ["## 今日问题归纳\n"] + + idx = 0 + for cid in cluster_order: + cmsgs = clusters[cid] + if len(cmsgs) < 2: + continue # 跳过单条无法形成完整问题的 + idx += 1 + summary = summarize_cluster(cmsgs, idx) + lines.append(summary) + lines.append("") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="飞书问题反馈同步") + parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3)") + parser.add_argument("--dry-run", action="store_true", help="仅预览不写入") + args = parser.parse_args() + + end_date = date.today() + start_date = end_date - timedelta(days=args.days - 1) + + print(f"📊 查询 {start_date} ~ {end_date} 数据...") + # end_date 是当天不包含,所以用 end_date + 1 天作为上界 + next_day = (end_date + timedelta(days=1)).strftime("%Y-%m-%d") + rows = query_messages(start_date.strftime("%Y-%m-%d"), next_day) + + # 按天分组 + groups = defaultdict(list) + for r in rows: + day = r[6][:10] + groups[day].append(r) + + if not groups: + print("✅ 无新数据,无需同步") + return + + print(f"📋 数据分布:{dict((d, len(ms)) for d, ms in groups.items())}") + + if args.dry_run: + print("\n🔍 Dry run,预览前 5 条:") + for day, msgs in sorted(groups.items()): + print(f"\n [{day}] ({len(msgs)} 条)") + for m in msgs[:3]: + print(f" {m[6]} | {m[1]} | {str(m[3])[:50]}") + return + + existing = get_existing_sheets() + print(f"📑 现有 sheet:{list(existing.keys())}") + + total_written = 0 + for day in sorted(groups.keys()): + msgs = groups[day] + + # 获取或创建 sheet + if day in existing: + sheet_id = existing[day] + print(f"\n🔄 [{day}] 已有 sheet {sheet_id},覆盖写入") + else: + print(f"\n➕ [{day}] 创建 sheet") + sheet_id = create_sheet(day) + print(f" 新建 sheet_id = {sheet_id}") + + # 对话链排序(含推断引用) + sorted_msgs, clusters, cluster_order = sort_threads(msgs) + print(f" 排序完成:{len(sorted_msgs)} 条") + + # 步骤 4:问题归纳 + summary = generate_summary(clusters, cluster_order) + if summary: + print(f" 归纳完成:{summary.count('### 问题')} 个问题") + print(f" (归纳内容见下方)") + print(summary) + + # 写入 + success = write_sheet(sheet_id, sorted_msgs) + if success: + total_written += len(sorted_msgs) + print(f" ✅ 写入 {len(sorted_msgs)} 条") + else: + print(f" ❌ 写入失败") + + print(f"\n🎉 同步完成,总计写入 {total_written} 条") + + +if __name__ == "__main__": + main() diff --git a/skills/feishu-group-msg-sync/scripts/__pycache__/sync_group_to_mysql.cpython-312.pyc b/skills/feishu-group-msg-sync/scripts/__pycache__/sync_group_to_mysql.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f66c2d6525300244a3bd18b03c59eea0acc4df22 GIT binary patch literal 20674 zcmeHvYjj)1)#y2Tzu)rvA-`hFvFyYNb`lc6aT4M@oKgp93_(?NWJiu|xkqxGxDuae zdB`PTxdbdq0MSA*O#t<6T4-tMy`>MR9GTDJ)Ze;veX*0=weG5E($(IuzSX<;IXXv* zamYs>|L!E7nKQFz&vVc0y=RYpqt&ViaBnFtc~`d+#DC%k@7Nspxb!6vK}-@9(M?b! zCF&=;Nj4O9i`Y=yEoMVWw}cI)-BLD`b<5aL-Ythv+^-l=bSnmw-O2$~w~9nul797o zrdtDXX}6Y=b?Yd3x1LgT8z^PBky3S=D0R1)(sWxWZMT)ub>~s~?tIG7T_7SnO88Sz z#xthVB-da*>@I{HY2SLj8`%(2Te4q#j51S}GuG2$?wcgo(i`~PZ0IQ!YAExF`fkeR z@Ugx&K9&tB(nGEY3!}Za6gt{&3X$W|Mk0%x%d1c{8kEtBCt-ip8ADuq;<2M!`9$OfH0t#^@AnW4O&n!Oi z(Bi2x3*Y+cxi=m=H~p1!ug3w$g$I6e;gwh0_jLixg;x>_Z#=&6;FAjvyl^${KfWGW zJo)P4iTf5_fBgK(sfDMWS$O`Dg$KTS{<}Z^f;wDU%B}8*uwZD7rz-9t!kouzCcr>Z^$#~?sttexoN0lFyM9d`Tz`i~E~ooo{;SfCa2M||`V%1e({9_n+rsY8wp zJ@g@Jq;I&<|Aqd>08|VWLVBayH{jjwCJ09EKI{q{@&!iq#|{r~J=RLyGQ7XJ_x3xE zGRmW_ewQ=g@dp@Fzl%QN?4^CfL(Tz@-|y=6jOMY~{rv+?!+sAfH1nbY#^5EAkskI9 z)Be%IyK3rOem6GT?muX&smm0%`>z;l4mfHC95s}!X6L4wy_;&fU^b-vo}-?AM%Lq_ z2V4P0j+4_f5E!*JHa5ZkOb;AxpWiV&)bDdqO@24+9SZniqSWm^|Lts|hlUo2-6ItA zkaM_CVaw$3cj6H^T=d>Lmk8A(iC! zh*3xii34grr%%JjvSCn6!M~s=NE{>mGLi`B_`EZc)0uXJBtc1^fltqdl=Mg|&}=`^tV=ri2QlkdGgv~?KK6;a_ zbF^4U!rE*@F52(mb1%LPh3p1KPDu~j(@VkG@uAWW5i_|OpT za|2B>VtROxQQWx?;GngPiuOSJ2Hif&!^r#r3fjqt`+dEPlx+)}1~Ep`=Z9LPNK1Z3 zkB!>q!9v@y#day%8%E-yd;N4FG!MvXfMR5{$3NU3@Z;jNp}TT$`R;~0k$=eL_Bb+o zTa*8Yw;#4TZr^gSyNP4K-|N7QjVI$Kc9-LJnt;zaa2)q7=s!F5PMA2l398%(fBs*B z7becBG~@MS_2W%rO(|7zxP5-r4dM2rqH;!3l~!9L4W}$I>Zzg`RTX4u^T+QQyC>G3 zkW8heDkB7opA zG$mv|)P7q#O-=igYub}d+mfd3vx*(>112fJUGXF7{=A@UYb){d)(!2=;=qNqUDGid80ro{eHJ*r$%sJ(EAf1gW9S%0t zOO47M4t7>Xr49$o$*2szeKgP(9CM>;hhqp1M~B}#>S07Tthpj_I9x!QaKHjpAE8%4 zrHnj-n~@I>9vSqF434S?eKw$|A>W|iGmP5@*RTwS_58OleD8%#Hbw%6p^H)QEd$y_ zT43buJKOd3+Hm z5Y8|8qB5nvAt|}x610tAWUzHOy%eM9 z@%DS1M?A1czz{kE$A^HE!?Z!yfX9v9w^!Bmn_upRjuQDu8cTVI61j1jYRN4@rX(dOw_K0%wrLL8*9ZquZ}jTc%anvIG&7 z3Z<*BQyNEPnah@@)JL}AK^ByD5yv%l?eIDP1!nrf8*gx80Jl8PO}`9B=Y{Xzzxbo* zA74MgovO&%pGsW#;j0Tjxu0GS+{RX7~nNlOE}L(nKA(q{DVEMQODPP>972)YtekX>OUL&&;I`h4C&`esa! zd5!@&!|4j7f;4KE(d`Hg9VO`P=Yon_s>nG}C`)5s+&(klCJU8;UxyUgBkBU`g1s9~pQA9o*Qr7FscVqu)F5YjVF#WSxqa64 zA@9G;K^>WXhQvYfigc*pAt)CAaS8CT0{;#?YcueypFCE}dt$xrSV2v0(E&K;C`^O8+%#V%(=G;)<57wtaG zNLhA`u7k$WcJx?MQ*J-z_6!BQzCk#NIx%@4cuW4r7Ja}Q^1GzrhdRAUEZ!k?NCzQ#erbK?qurVTizhP}uI;oye$J!p##pOxl#M%=< z@X|_S(po#CtQE?vNz|qco1o0*V%{?Xmilr6ztfX>Lqv8kZD=ChxD8<%FRScs?v*$(!t$=t!2WPxPiN zH$}F;-?ToueRB82?l}2aXM9J}y!xzW$8-aD!2CghD}EHfepX+*)lB@{yuMv0eoHAr zUrU026~*E<#8T|3t0|T|4zeR9X4n1x>#RsPl5T)K$CGfRQq?F#Bkmk zc}&@m(okA{Os;M_v|BB-8@N|&_i+R40)ng!yMPq>qz!?rc_Lq+RpT(8dfZ`Xs{?h# z!pqU~PrU*ftp*#YzRvyl2a97bEqwd&h3F#gNhR5w6m<;x7a z3dc4utFTi!y@Q}4@&e4g2>0*cWw(g5el?QuooAIL^Va;y0}}_5+ z#td?9kB}&>S8oyXaFEwd)XteIXH1pxwJ+ZA+znI3i9@NXjVaTn>9sSaEs^#m8DYqa z`o9{A^(0r_lGNXlRNRtQH}jhaskEj|manugi6M5yk4(g-mo*_Ru4PBy9>{3DyP4>_)&712BIi66%VW1O*W* zI07SZO-ON8G{d8oUJu9ZN^O(y#D~f*x+14-lB}R8w9N>_yb=~+8v;_5+2zB3gYF53 zrf^(xy5lBJdCVQC9otmM;jx~erOEKK@z)n#AKwH*+ED*-XTWD$czJU1mG6Tl8Y1V< zeDmD&cYyaj|J3&uAAMml_UeVNpXTrbXKc&xy&$Wgb{cEo*yn{upIvzI;kM6oEM|~{ zE(lmOfXiTKVGsGJOb?(ph@KC;<@8#> zAL?u!yqF0+qyqr`RV)z#FH9^M2!nZiWNailn$lN>cfK#zpWGEYcE&twaisP65k1h1 z(K0EYkjK<1Lv^_0eYqj6FGwm1AVZ@cZyjrmw#1rK>ay^*_vPv|9NUTsMGP2(3OMZ5 zhLhcK>GOeEYeU*xFu8GJW9;sfxjw0>|3F}sQPJ##KDq^hfQI4u!;;h$;3OxnMF^l{ z;TI(=@0E!qhXht!w2~Ygprct4=6X1!luT%c^cpziIrYe{0i(!)kzWHwnFFJ^1`O(! zGV(@9c?}pruM`3vV8vc*PiKfcqz)?m(x9BuWw-`?&IUmUP#WMMP_kj&N)mcdwvvoa z87L#)S^<|fs9lNE$;tpcWZw#sG$q`QhI9de>i~?nqc3y(%>rc}g|_opL;3)+hOEo# z&x4YnG$;CWvOfjV|B0~94V+mX19Xb75#be8cRB5)}P(iSOUK>CLgmr!Od@LLG zp~Sz;1q=F6-dg6UGJbaWkSgQ2+@LvB7|ef)2o`dDZqSUn%TUqaJ<#fMw$)v*HrMh9 z`*S z1W+NvI#`v~U|z83G_oS#LZ!h{xPd}l4eR=D8sze1Vk9W?{268X#Z%up|LhBl8Y}{k z4Z+*;9MMP)mI$7JhuXn%TZ|md2vGAa^B?yIJOiB!*?mQ}dd-?mYu3=nv0S_WJMBwa z?rI+am2SAvaIylPlB*w6)tM-!b1h_9tCR_aLLXVHDV*g4|537tdv|H{mQ$79-JxuRIG}egsm7aVRK3F|tT0ONRu`^}b65h>cI;Ko1Qv-bK zO`NO*nDwi|JJUKNYBZwWxGANp4&R#nwx)Eo@SUld6P<+Il+G5u^$&7gv@Uk|tZ5b8 zYhRWT1#1%GRQ~#;#IhulN!!SIt#wXYHKVPH@1Cn}ovCh}7EgaJRlOsny)`0Ao2`>u zCbm3pN|~$Y%#IneBiY!NGH;D2<_n5rTc6$Y}=F0n;qk>fLP@1eIwv>*>=%HTac`?2w?yd+)X$c2KA)~rixn7%Vr zy?sVf5#ALQNAI35v`^I~ZcP<#o{{8*cSa8XL2XW#ZOknP2tjq@j#7~z3_Gup{+N#G z=8luildTi2bLOfUb5(rTT=n{y>h+1+Qq`>~^M+~TS@Vr=`y+~UURks|uK2ossvQ(m zN%PtXo4T;Z9 zA4?f`!X=d2inp}tF?BMpeyRn=r!w)m#I9*q(sIkJqAgvr=IY_mNR4S-(VVVgMpqH9 zpXyBnQ@U;3O|N21ku+Dw?~HdOjqFWtSp}5+;*6p=UDh!5xv5>rlGY5eC6!odoY&~a zJH|SqeY2Vh)Uunxd;fabDXesyuRR11Z2IhF<^bh|MKIe_m;J-W{-h*yQ)pT_eSD3#D-2S@tcj-ePZIb zhPLXx8^rIllIY*4?o^52Rg&mymHP5=valst7(PasK!+h0BtOG-pv?en;Gz4N3qfO6Y12f7B- zI8+BMS4*GQ9({mj$LVQtkel}!&?A_s+EKuaqZAPI8re~Y^tz{|!9rnml&n1G!IEst zPrN5#XK|TK`p?oN2rD9#L%si0O~Nu-XSD?XmzSlN^=efpcb?WQn(fF1$l^fB~~qlZh5 zlcDxu482ZX+4ZVB7W5s;L z!uJ}5We_`g!TVbIyr@l2AQDtAPdkE8bldXwnI&_nr~eiFUg!DHk--a*RM-%me-2M4;7<&dhFxd7C+Dp0nwy$IA zFnTMA)WULDCQZ}dz!F(;TE&hB4v?SG;9NR05;W+QSV7y*sM%!h3$zc^F-D#lQ~E_j z#){A{VGJc_R#`%S3uAbKGEhM;SjC~t?k|OePZFdvR;U(Ul4d6?a}i0u4uy+harvh~ zaq=$}r~fhWnYCCwm*4a+l$$kZA6PU|lq|XB&DxZ4S9ljlYG7y7GSQMO+B|&-63x59 zd*(qLhoo42{LoZG$_(P?mKpPwl=b;;f<1fQOq#zYCJ^t- zVu83g8p_32pt?-xl!(%hAJIV%3M=FFsRGBJh&pN2m?k2Q+?h5P#;Rh~(ajm*7?8Bb zZd=XT-2B| zs@3Vjig;Dr^;}Ine~LU)HPv^zDQVv{U6IV+j9~m|=cyv9>}Zd^r>I=Ijf8<&+D{T1 zbGY-bmqY(l*>{_Wu#|!%s_g(PH#2KeSJFByPaJykyc&KY)|HSGKdq*Q?U z=(36^r~r6l;ro^XNHe17yKK1rI-hU53go~10ifca-`rNStA%*mw zrLB5TnfSM*B>EM~y$0!T>x=iQrSC{(dn-lnsHKqdjzNrZvvO~_^qn#i{YtV^DSfBf zx_6W8osDdKld@B$d{-=j^mnBs#${rN(?5iYudA(;3(h$+EcqQ(tGNJL2jC}^_|*d6K9Vl*NE zrhO+c>=H1w!*(xoaM32AWGEd$D9?n7OsLF+s!XWPgqlpK&4jv4sLzCkOlZu6CJcK7 z!#I*Su?yH!vd_A#G4RRv2d=z?i>^K_zRXeP>@%{UaLo<9wmdBu3&S-KK5-U|6hW{d z`>K{9g3oN7$H^*UEzIT!7#AKONceZ6h`YD}mQxTM(Ofqq&1v5XA|QxW(q+O2*;nyN zbpta>hNSmOPjqo%E-QMycJpkhJ#QFA;l;7n;BD&S_*dbIHVhnE zZ7Jh#YeHJ?z~Nyetchma3K?z#W}k+dsSC z*3r3tpUw3G(89+c>e%}tF9nu~{KZj&O}ID`qIhxCkYnD|fUns&3#JBJ=8~#`GpD+$ z4a@JA07u47T`+}2v}olAO8HDK+=OIf>_v$EW9`19ZO`r7yKHqe{s!Bp`};U9c$>HF z_Rs9;Xm8uU-L`xC0o(1L*#^;%)rmdJxk$5Zv2m@a%Sp61tXNDKt!yD7=^8~cqna&{ zNw^01@{!J#Tb^?b1pLHg3-T$~D3D<$v+$XOYk&*0oGliBs9xHjEm;m;CzW#oE70yWA{XRdwW5M$dABzqS((hnY3@<0? z=dl>>lplM+q6f=958ROPk1Xl;0BQo+Z3?_F@dvdDUa6c@o5pV*yBS_iX8s$C=Z%GF zL*ZquTq6r_TQb#2t!b@s{I0RP9zM84R4TWVv7RLnnO{0zS{K{9L}==@OJc3r0A{ac zL~-Sx2)))Em8A`Nle!6A%%8Rt#Wv1ZYB0QU#!{ObZke&z@q6=(r7kx;1D`B*%vc&R zW7CXf6^6?Z@##0uSXM6;>uOAYCUi#gk{Fr*S4+@pVC+upod^5Dont$b`SoYkO^ux1 zJkA#V{8_7@=n`7T&yK_6AQ{9jgyEsVS5Q;)ly?_D+obJR@BLsC%j!y`}N zvJU{4Ef323R%hY(SnjhBxaLu03{T4g4jux`8^IV;P;D#0w6uxO%Z5QEUoUW`8?y$0 z%Sx+)DnEIm18}U%mgQqX;YldqxC{7j8Qqvwi3erb8m_Yf^f@yil!6&B4Z1fpNBD}QDEaNd|nTO z6+ZVXHk)jt5}V^D+i)4=E`H~^#ZzO8Pb7Fn?gaDp}&L4 z$U)fE4B5kI*&ncBf64)WyFd-h4ks7@%d-VRv${-VTR8nDs?iw@s{+iZ4_WrbuZHv- z`GGUOkqo%tP479#`ZdtB|8L-hiFt|aq22fGKBXHZN(DYzdPc$rAtbm-SOm(r*=Nu`D7;;3Pkq)1uo`MW;-abEbwFQ$xzs7~VZEkw3Ka zzMYYKV*@eg)V4%%LYvgyG%L9oP#Vf&152z)}e7 zub8SxYqaCHj@=sFJ*%-{yDB~ab}WN?n$nlg>FZ|nbyGzt{rWIkhUkr83Z&2f6QPvW!UHFB9-64cHpOXp3jz8zwK;n9 zth)4EF=#RqcfYwIRlFy%C#^A$caC+&HovE-J7=wqcTF`+D^k|B$PPwtO&juAF7D`i zhE+>u!1`eUq0rAO@?-K@g)P&)=uOX8#e2We5HFh=O&T}7r?~MP-*2=6ftIUuCkA8g zl(G_fE-j6_(hB9cd`uoy&nilw=h9L({V#WtFgYLM)Lr>Y0XH63{<4YBzT&Tj#qcv- zc}JP}7v-jo)#6`?O&zV`UzJHa>g2y#E$vt@|5dAW7a?a9PIzbL9dtVF>lwMzNx?5W zIXT)AgGD9xQq2a)7&-i0N-r3`v5J2Be?k&#!$(Un#*&X5Ux)X557EeYW%d3vZXh&+ zUL|^OqxUQHevRI5(EBZV@1n=v2p~x^YLMN%?snf`kGGeRbad|6$0*^++a72J%K1*# zh7%fFgT;{Y85zgM(;fIBhi!qi@&risO`nr9nBxrcXqJlce4=UeQ2)XH-~)Rb!AJuD z=HaXby_n@k4|ze3#F6M(5@)IIC4zlo&JyDl#pVGYHQevHiM|X#c*gqY!3QNFNq#7i zlafmYf>iyUfdA^>6UN^Yy5AE@@c$RlJVP}92T`!3BGhZb^0a1ESP2W&ydjJaeoU=l zU0U53RzO_W99E^ZCiL>qD+5oR7gnTom0>mH8|`5&Xs2bmF>lN@tFxt~6=4x@+oUBd zpCimOggI9GY{Qcc$);V&{k`uIhtsnBaPOQfe@2!cQ>0`SVKHDKB@Zd?Q#_=oLYczS%Y>G+EZOvAOU%7QKrq$L z28nH#@N21BM;b5(f~hVxm<9m&dV3ukB)jgogr7^5dNMEO$Fg|{2tnd@M3J{tZ6w#l zAPIsgCA)p6T)JEACrNXR zS^|w3X~vdd_$IU!!=-+43u%q*zeGUrCz@PKmM$G835g~$dRAQczC`iBjwLZ6DPm{; F{{heoX@LL$ literal 0 HcmV?d00001 diff --git a/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py b/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py index 5580197..f89cf7d 100755 --- a/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py +++ b/skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py @@ -214,12 +214,16 @@ def process_message(msg: dict, cos_uploader, date_str: str) -> dict: 'msg_timestamp': msg_timestamp } - # 文本消息 - if msg_type in ('text', 'post'): + # 纯文本消息 + if msg_type == 'text': text = re.sub(r'<[^>]*>', '', content).strip() record['content'] = text return record + # 富文本消息(post):可能同时包含文本和图片,拆分为多条记录 + if msg_type == 'post': + return None # 由 process_post_message 单独处理,返回多条记录 + # 表情包 if msg_type == 'sticker': record['content'] = '(表情包)' @@ -276,6 +280,124 @@ def process_message(msg: dict, cos_uploader, date_str: str) -> dict: return record +def process_post_message(msg: dict, cos_uploader, date_str: str) -> list: + """处理 post 类型消息(富文本),拆分文本和图片为多条记录""" + msg_id = msg.get('message_id', '') + sender_id = msg.get('sender', {}).get('id', '') + sender_name = msg.get('sender', {}).get('name', '未知') + create_time = msg.get('create_time', '') + content = msg.get('content', '') + deleted = msg.get('deleted', False) + parent_id = msg.get('parent_id') + root_id = msg.get('root_id') + + if deleted: + return [] + + quote_message_id = extract_quote_message_id(msg) + + # 解析时间戳 + try: + dt = datetime.fromisoformat(create_time.replace('Z', '+00:00')) + msg_time = dt.astimezone(TZ).strftime('%Y-%m-%d %H:%M:%S') + msg_timestamp = int(dt.timestamp() * 1000) + except Exception: + msg_time = datetime.now(TZ).strftime('%Y-%m-%d %H:%M:%S') + msg_timestamp = int(datetime.now(TZ).timestamp() * 1000) + + # 提取图片 keys + image_keys = re.findall(r'\[Image:\s*(img_[a-zA-Z0-9_-]+)\]', content) + + # 提取纯文本(去掉图片标记和HTML标签) + text_content = re.sub(r'\[Image:\s*img_[a-zA-Z0-9_-]+\]', '', content) + text_content = re.sub(r'<[^>]*>', '', text_content).strip() + + records = [] + + # 如果有文本内容,生成文本记录 + if text_content: + text_record = { + 'chat_id': CHAT_ID, + 'chat_name': CHAT_NAME, + 'sender_id': sender_id, + 'sender_name': sender_name, + 'message_id': msg_id, + 'parent_id': parent_id, + 'root_id': root_id, + 'quote_message_id': quote_message_id, + 'msg_type': 'post_text', + 'content': text_content, + 'media_url': None, + 'msg_time': msg_time, + 'msg_timestamp': msg_timestamp + } + records.append(text_record) + + # 如果没有图片,且文本也为空(极端情况),仍保留一条原始记录 + if not image_keys and not text_content: + records.append({ + 'chat_id': CHAT_ID, + 'chat_name': CHAT_NAME, + 'sender_id': sender_id, + 'sender_name': sender_name, + 'message_id': msg_id, + 'parent_id': parent_id, + 'root_id': root_id, + 'quote_message_id': quote_message_id, + 'msg_type': 'post', + 'content': content[:500], + 'media_url': None, + 'msg_time': msg_time, + 'msg_timestamp': msg_timestamp + }) + return records + + # 处理每张图片 + for idx, img_key in enumerate(image_keys): + short_id = msg_id[-12:] + filename = f'{short_id}_img{idx}.png' + cos_key = f'{COS_BASE_PATH}/image/{date_str}/{filename}' + + # 下载图片 + local_path = download_resource(msg_id, img_key, 'image', filename) + media_url = None + + if local_path: + try: + url = cos_uploader.upload(local_path, cos_key, 'image/png') + log.info(f"post图片上传COS成功: {cos_key}") + media_url = url + except Exception as e: + log.error(f"post图片上传COS失败: {cos_key} - {e}") + finally: + try: + os.remove(local_path) + except Exception: + pass + else: + log.warning(f"post图片下载失败: {msg_id} img_key={img_key}") + + # 图片记录使用特殊 message_id 避免唯一键冲突(原 msg_id + _imgN 后缀) + img_record = { + 'chat_id': CHAT_ID, + 'chat_name': CHAT_NAME, + 'sender_id': sender_id, + 'sender_name': sender_name, + 'message_id': f"{msg_id}_img{idx}", + 'parent_id': parent_id, + 'root_id': root_id, + 'quote_message_id': quote_message_id, + 'msg_type': 'post_image', + 'content': f'图片 ({idx+1}/{len(image_keys)})', + 'media_url': media_url, + 'msg_time': msg_time, + 'msg_timestamp': msg_timestamp + } + records.append(img_record) + + return records + + def insert_to_mysql(records: list): """批量插入记录到MySQL""" if not records: @@ -302,6 +424,7 @@ def insert_to_mysql(records: list): parent_id = VALUES(parent_id), root_id = VALUES(root_id), quote_message_id = VALUES(quote_message_id), + msg_type = VALUES(msg_type), content = VALUES(content), media_url = VALUES(media_url) """ @@ -362,9 +485,15 @@ def main(): # 处理消息 records = [] for msg in messages: - record = process_message(msg, cos_uploader, date_str) - if record: - records.append(record) + msg_type = msg.get('msg_type', '') + if msg_type == 'post': + # post 富文本消息:拆分为文本+图片多条记录 + post_records = process_post_message(msg, cos_uploader, date_str) + records.extend(post_records) + else: + record = process_message(msg, cos_uploader, date_str) + if record: + records.append(record) # 写入数据库 if records: