auto backup 2026-05-09 08:10:02

This commit is contained in:
--git_token 2026-05-09 08:10:02 +08:00
parent 807f733a1c
commit c3c8dbbab2
15 changed files with 533 additions and 188 deletions

View File

@ -14,4 +14,4 @@ user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
feishu-group-msg-sync 085f95a5b89fec3b6a627da25d66ffeeb0be430098387739a64f7903f0ee88d4
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
feishu-feedback-sync b046dd9f252ba6672acfac66aa732442cded924f0cd3c70e46cba5c6e300af6d
feishu-feedback-sync 878cad61ebd9d480eac6d1146a556031f02580c75841d4173f58afb076af7c34

View File

@ -6,3 +6,4 @@
{"type":"memory.recall.recorded","timestamp":"2026-05-07T02:22:52.564Z","query":"用户反馈问题汇总 写入知识库 append overwrite 文档模式 feishu-feedback-sync","resultCount":6,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-07T02:55:43.649Z","query":"问题分类 分类规则 版本更新 声音音频 UI显示 启动运行 语音识别 关卡内容 priority_classifier","resultCount":6,"results":[{"path":"memory/2026-05-06.md","startLine":61,"endLine":86,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":73,"endLine":102,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-07T03:19:39.321Z","query":"小葵小葵群 chat_id 发消息 bot","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-08T10:25:44.365Z","query":"MySQL database connection credentials lark_group_message","resultCount":4,"results":[{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-23.md","startLine":28,"endLine":55,"score":1},{"path":"memory/2026-04-30.md","startLine":95,"endLine":120,"score":1},{"path":"memory/2026-04-23.md","startLine":47,"endLine":72,"score":1}]}

View File

@ -1,6 +1,6 @@
{
"version": 1,
"updatedAt": "2026-05-07T03:19:39.321Z",
"updatedAt": "2026-05-08T10:25:44.365Z",
"entries": {
"memory:memory/2026-04-18.md:1:5": {
"key": "memory:memory/2026-04-18.md:1:5",
@ -244,6 +244,130 @@
"loading慢/数据丢失",
"安君仪/毋益飞/王胤鑫"
]
},
"memory:memory/2026-04-17.md:1:23": {
"key": "memory:memory/2026-04-17.md:1:23",
"path": "memory/2026-04-17.md",
"startLine": 1,
"endLine": 23,
"source": "memory",
"snippet": "# 2026-04-17 工作日志 ## 飞书群消息同步改造(李若松要求) ### 变更内容 - **存储从飞书表格改为 MySQL**:新建 `vala_test.lark_group_message` 表,结构参考 `wechat_group_message` - **同步频率**从每6小时改为每4小时 - **数据范围**2026.4.1 起的「内容测试问题反馈」群消息 - **数据库账户**chatbottest环境仅对 lark_group_message 有写入权限) ### 完成事项 1. ✅ 创建 `lark_group_message` 表(唯一键 message_id 防重复) 2. ✅ 编写新同步脚本 `scripts/sync_lark_group_to_mysql.py`(基于原有 sync_group_to_sheet.py 改造) 3. ✅ 首次全量同步完成172 条记录2026-04-01 ~ 2026-04-17含文本134条、图片17条、视频10条、富文本9条、表情2条 4. ✅ crontab 定时任务已替换旧的每6小时飞书表格同步 → 新的每4小时MySQL同步 5. ✅ 更新 secrets.md 记录 chatbot 账户 6. ✅ 更新 user-feedback-collector SKILL.md 反馈数据源信息 ### 文件变更 - 新增:`scripts/sync_lark_group_to_mysql.py`(核心同步脚本) - 新增:`scripts/run_lark_group_sync.s",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
"queryHashes": [
"cc0dd7ef50d7"
],
"recallDays": [
"2026-05-08"
],
"conceptTags": [
"vala-test.lark-group-message",
"wechat-group-message",
"2026.4.1",
"lark-group-message",
"message-id",
"sync-group-to-sheet.py",
"secrets.md",
"user-feedback-collector"
]
},
"memory:memory/2026-04-23.md:28:55": {
"key": "memory:memory/2026-04-23.md:28:55",
"path": "memory/2026-04-23.md",
"startLine": 28,
"endLine": 55,
"source": "memory",
"snippet": "- 创建脚本:`scripts/sync_feishu_full_history.py` - 同步时间范围2026-04-10 至今 - 成功处理 57 条消息(包含文本、图片、视频等多种类型) - 所有媒体文件已上传到 COS - 数据已全部写入数据库 4. **定时任务配置** - 每 4 小时自动同步「内容测试问题反馈」群消息 - crontab: `0 */4 * * * cd /root/.openclaw/workspace-xiaokui/skills/feishu-group-msg-sync/scripts && python3 sync_group_to_mysql.py` 5. **文档更新** - `feishu-group-msg-sync/SKILL.md`: 说明数据库存储方案和引用回复支持 - `user-feedback-collector/SKILL.md`: 更新飞书反馈数据源为 MySQL - `feishu-group-msg-sync/references/query_examples.md`: 提供引用回复查询示例 ### 技术要点 - **引用回复识别**: 从消息 `body.quote` 字段提取被引用消息的 `message_id` - **去重机制**: 使用 `UNIQUE KEY (chat_id, message_id)` 避免重复记录 - **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
"queryHashes": [
"cc0dd7ef50d7"
],
"recallDays": [
"2026-05-08"
],
"conceptTags": [
"sync-group-to-mysql.py",
"feishu-group-msg-sync/skill.md",
"user-feedback-collector/skill.md",
"body.quote",
"message-id",
"chat-id",
"创建",
"脚本"
]
},
"memory:memory/2026-04-30.md:95:120": {
"key": "memory:memory/2026-04-30.md:95:120",
"path": "memory/2026-04-30.md",
"startLine": 95,
"endLine": 120,
"source": "memory",
"snippet": "| iOS组件无音频+Loading慢/数据丢失 | 胡陈辰→安君仪/毋益飞/王胤鑫 | 11 | | 网络问题4/29 | Ann | 2 | ### 飞书群 4/25-4/27 数据查询结果 - 查询 MySQL `lark_group_message` 2026-04-25 ~ 2026-04-27 数据 - 结果0 条,该群此时间段无消息记录 ### 反馈同步 Skill 创建 [刘新玉] 将飞书问题反馈同步流程封装为 `feishu-feedback-sync` skill并计划注册定时任务。 #### Skill 文件 - `skills/feishu-feedback-sync/SKILL.md` — 完整技能文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` — 核心同步脚本 - `scripts/sync_feishu_feedback_wrapper.sh` — 定时任务包装脚本 #### Skill 功能 1. 从 MySQL `lark_group_message` 查询近 N 天数据 2. 写入知识库电子表格(按天分 sheet 3. **反馈对话链排序**:按引用关系将同一问题讨论聚合呈现 #### 策略2推断缺失引用关系 [刘新玉] 问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)** 1. **@提及匹配**:消息中 @了某人",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
"queryHashes": [
"cc0dd7ef50d7"
],
"recallDays": [
"2026-05-08"
],
"conceptTags": [
"网络",
"loading慢/数据丢失",
"安君仪/毋益飞/王胤鑫",
"4/29",
"4/25-4/27",
"lark-group-message",
"feishu-feedback-sync",
"quote-message-id"
]
},
"memory:memory/2026-04-23.md:47:72": {
"key": "memory:memory/2026-04-23.md:47:72",
"path": "memory/2026-04-23.md",
"startLine": 47,
"endLine": 72,
"source": "memory",
"snippet": "- **数据一致性**: 使用 `ON DUPLICATE KEY UPDATE` 处理重复消息 - **媒体文件处理**: 下载飞书媒体文件并上传到腾讯 COS记录可访问 URL ### 验证结果 - ✅ 数据库表结构已更新3 个新字段) - ✅ 历史数据已同步57 条消息) - ✅ 定时任务已配置 - ✅ 文档已更新 - 当前数据中无引用回复消息quote_message_id 都是 NULL说明这段时间内群消息都是直接发送的 ### 查询能力 支持以下查询场景: - 查询某条消息的所有回复 - 查询完整对话链(原消息 + 所有回复) - 统计热门话题(被回复最多的消息) - 按时间、发送人、关键词筛选 ### 文件清单 - `scripts/alter_lark_group_message.sql` - 数据库表结构升级脚本 - `scripts/sync_feishu_full_history.py` - 完整历史数据同步脚本 - `skills/feishu-group-msg-sync/scripts/sync_group_to_mysql.py` - 增量同步脚本 - `skills/feishu-group-msg-sync/references/query_examples.md` - 查询示例文档",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
"lastRecalledAt": "2026-05-08T10:25:44.365Z",
"queryHashes": [
"cc0dd7ef50d7"
],
"recallDays": [
"2026-05-08"
],
"conceptTags": [
"quote-message-id",
"数据",
"一致性",
"使用",
"duplicate",
"key",
"update",
"处理"
]
}
}
}

40
memory/2026-05-08-1754.md Normal file
View File

@ -0,0 +1,40 @@
# Session: 2026-05-08 17:54:20 GMT+8
- **Session Key**: agent:xiaokui:feishu:direct:ou_088ee79216826be4a24af44f7268f880
- **Session ID**: 6215a8b7-9dcf-436a-afbb-a0f56b70d246
- **Source**: feishu
## Conversation Summary
user: https://makee-interactive.feishu.cn/wiki/Ih0VwOhd4io22Pkm3fGcTHb6nRg?fromScene=spaceOverview&from=navigation 在这节点下 新建一个 "世界" 文档
assistant: 首先获取目标节点信息确认知识空间ID和节点类型。
assistant: 节点是目录,确认可用。现在在其下创建"世界"文档。
assistant: 已创建完成。新文档"世界"位于"小葵"节点下:
**链接:** https://makee-interactive.feishu.cn/wiki/TktpwWS9FilRJskvdwBcjnebnog
user: 好的 现在在文档中写入一句 "人生几回伤往事"
assistant: 已写入。"世界"文档中现在有了「人生几回伤往事」。
user: 好的 现在删除这句话
assistant: 已删除。"世界"文档中那句话已经没了。
user: 好 现在删除这个文档
assistant: 已删除。"世界"文档已从"小葵"节点下移除。
user: 给我讲个鬼故事
assistant: 老李是个夜班保安,工作是巡视这座刚建好的写字楼。
这天半夜,他按惯例巡到顶层 14 楼。这是个还没装修好的大开间,手电筒的光柱扫过去,只有几根光秃秃的水泥柱子。他转了一圈,正准备顺着安全通道下楼,腰间的对讲机突然响了。
是监控室值班的小王,声音听起来有点抖:“李哥,你……你身后那个人是谁啊?大半夜的,怎么还穿件红衣服?”
老李心里“咯噔”一下,猛地转过身。身后空空荡荡,连只老鼠都没有。
他舒了口气,按下对讲机骂道:“小兔崽子,大半夜别开这种玩笑,我这儿哪有人!”
对讲机那头沉默了几秒钟,随后传来一阵刺耳的静电“嘶嘶”声。紧接着,一个陌生的、沙哑的女人声音从对讲机里传了出来,还带着诡异的笑意:
“他没开玩笑呀,我确实不在你身后。”
老李浑身的汗毛全竖了起来。他突然感觉到脖子后面一阵阴冷的风,余光里,一双惨白的手正慢慢从他自己的肩膀上伸出来,指甲上涂着刺眼的红油漆。
那女声贴着他的耳朵,轻声说道:
“我在你背上呢。”

43
memory/2026-05-08-1805.md Normal file
View File

@ -0,0 +1,43 @@
# Session: 2026-05-08 18:05:25 GMT+8
- **Session Key**: agent:xiaokui:feishu:direct:ou_9d4df593d0419d705274947c5cec5ada
- **Session ID**: 72de7c24-c2da-49fd-b783-bb82f556f676
- **Source**: feishu
## Conversation Summary
user: lark_group_message 数据中message_id为om_x100b50ed0ab29080b3efb880aa8d17d有媒体文件但https://makee-interactive.feishu.cn/wiki/TVivwmzqXiW3YakDUzucFMRenvf 这个5月8日的文档中没有
user: 小葵 你在跟我聊天吗
user: 小葵 你在跟我聊天吗
assistant: 新玉姐,我在的!刚才的消息可能有延迟。让我来查一下你说的这条数据。
assistant: 找到这条消息了。让我看看它所属的对话链,以及知识库文档里的内容。
assistant: 现在让我查看知识库5月8日的文档内容和同步脚本的状态。
assistant: `TVivwmzqXiW3YakDUzucFMRenvf` 是「内容测试问题反馈」电子表格。让我检查5月8日的数据在里面有没有、以及汇总文档的情况。
assistant: 有5月8日的sheet让我读取它的内容看看这条消息是否在里面。
assistant: 消息**确实**在5月8日电子表格中第9行让我再看看汇总文档里有没有对应的归纳。
assistant: 找到根因了!同步脚本在**早上10:00**运行,但这条消息是**下午15:44**才发的——时序倒置。让我确认汇总文档的状态。
assistant: 现在我完全理
user: 小葵 你在吗
assistant: 新玉姐,我在!刚才在处理你前面说的问题。已经查清楚了,给你总结一下:
---
### 关于 `om_x100b50ed0ab29080b3efb880aa8d17d`(李应瑛的图片消息)
**✅ 数据库里有** — 李应瑛 5月8日 15:44 发的图片内容132hotfix测试包卡住的截图
**✅ 电子表格(内容测试问题反馈)里也有** — `TVivwmzqXiW3YakDUzucFMRenvf``2026-05-08` sheet 第9行有媒体URL
**⚠️ 但为什么你可能没看到?** 有两个原因:
1. **对话链排序后位置变了** — 排序算法把这条图片消息排到了对话链末尾第9行而不是按时间顺序排在第2行。图片被放在了整段对话的最后。
2. **汇总文档刚才生成**`2026-05-08 问题反馈` 子文档在 18:03 才跑完数据也是下午才发的早上10点定时任务跑时还没这些数据。汇总里只归纳了文字"卡在如上界面",没有单独提图片。
---
要我帮你**重新跑一次同步**,让排序和归纳更完整吗?或者直接看原始对话链也可以。
user: [assistant turn failed before producing content] 这句话什么意思 翻译成中文
assistant: 新玉姐,不好意思——你上一条消息后面我的回复生成失败了(显示 `assistant turn failed`),所以我没看到你说的是哪句话。
能再发一下那段文字吗?我帮你翻译成中文。

37
memory/2026-05-08.md Normal file
View File

@ -0,0 +1,37 @@
# 2026-05-08 工作日志
## 凭证审计与修复
- 全面审计 xiaokui 工作区所有脚本的飞书凭证引用,修复 10+ 处 `xiaoyan``xiaokui` 错误凭证
- 修复 crontab 分组错误4 个 xiaokui 任务被误放在 xiaoyan 注释块下
- 核心文件:`skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`、`scripts/sync_wechat_feedback_minutely.sh/py`、`scripts/sync_feedback_minutely.sh`、`scripts/sync_group_to_sheet.py`、`scripts/daily_feedback_report.sh`
## 对话链排序 bug 修复
- **根因**`try_merge_orphan_clusters()` 中 `big_cluster_ids.remove(best_cid)` 行导致大簇吸收一个孤儿后被移除候选列表,后续孤儿无法合并
- **影响**:孤立消息(同发送者连续发言、无引用关系)被排到对话链末尾,对话链不完整
- **修复**:删除该行,大簇可吸收多个孤儿
- **验证**5月8日李应瑛 "132hotfix的测试包打不开" → 图片 → "卡在如上界面" 三条消息现在正确按时间排列
## 问题描述症状关键词扩展
扩展了 `generate_problem_description()` 的症状检测模式:
| 新增模式 | 关键词 | 描述生成 |
|---------|--------|---------|
| 打不开/无法进入 | 打不开、无法进入、进不去、启动失败 | 提取目标(测试包/APP+ 版本号 |
| 卡在界面 | 卡在、卡住、停在.*界面、无响应 | 提取卡住的具体界面名 |
| 版本号异常 | 版本.*不对、还是.*版本、显示.*版本 | 提取新旧版本号对比 |
| 测试包/热更 | 测试包、hotfix、热更.*失败、打包.*不了 | 测试包/热更包无法获取 |
同时修复:
- location_str 构建时过滤 "未知" 占位符
- 兜底文本清洗:去除 @提及、URL、emoji[流泪]等)、在句中标点处智能截断
## 对话信息输出逻辑说明(给刘新玉)
汇总文档中"对话信息"列:
- 消息顺序 = sort_threads() 排序结果(根消息按时间 + 递归展开子回复)
- 🚩 报告 = cluster_msgs[0] 的发送者首次出现
- ✅ = cluster_msgs[-1] 的发送者 + 最后一条
- 文本截断 80 字符、媒体追加 📎 链接、纯图显示 [图片]

Binary file not shown.

View File

@ -37,8 +37,8 @@ else
fi
# 4. 发送飞书消息Bot身份
APP_ID=$(jq -r '.apps[0].appId' /root/.openclaw/credentials/xiaoyan/config.json)
APP_SECRET=$(jq -r '.apps[0].appSecret' /root/.openclaw/credentials/xiaoyan/config.json)
APP_ID=$(jq -r '.apps[0].appId' /root/.openclaw/credentials/xiaokui/config.json)
APP_SECRET=$(jq -r '.apps[0].appSecret' /root/.openclaw/credentials/xiaokui/config.json)
TOKEN=$(curl -s -X POST "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal" \
-H "Content-Type: application/json" \
-d "{\"app_id\":\"$APP_ID\",\"app_secret\":\"$APP_SECRET\"}" \

View File

@ -27,7 +27,7 @@ log "=== 同步任务开始 ==="
# 1. 读取源表格所有数据
log "读取源表格数据..."
SOURCE_RAW=$(LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +read --spreadsheet-token "$SOURCE_SPREADSHEET_TOKEN" --range "$SOURCE_SHEET_ID!A:D" --as bot)
SOURCE_RAW=$(LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +read --spreadsheet-token "$SOURCE_SPREADSHEET_TOKEN" --range "$SOURCE_SHEET_ID!A:D" --as bot)
SOURCE_DATA=$(echo "$SOURCE_RAW" | jq -r '.data.valueRange.values | .[1:] | map(select(.[0] != null and .[0] != ""))')
if [ -z "$SOURCE_DATA" ] || [ "$SOURCE_DATA" == "null" ]; then
@ -72,7 +72,7 @@ log "共获取到 $ROW_COUNT 条反馈记录,按时间倒序排列完成"
# 3. 清空目标表格原有数据(保留表头)
log "清空目标表格原有数据..."
EMPTY_DATA=$(printf '[]%.0s' {1..200} | jq -s '.')
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +write \
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +write \
--spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" \
--range "$TARGET_SHEET_ID!A2:O201" \
--values "$EMPTY_DATA" \
@ -83,7 +83,7 @@ echo "$SORTED_DATA" > "$TMP_DATA"
END_ROW=$((ROW_COUNT + 1))
log "写入数据到目标表格..."
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +write \
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +write \
--spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" \
--range "$TARGET_SHEET_ID!A2:O$END_ROW" \
--values "$(cat $TMP_DATA)" \

View File

@ -14,8 +14,11 @@ log() {
log "=== 每日全量分发开始 ==="
# 计算昨天的日期,分发昨天的全量问题
YESTERDAY=$(date -d "yesterday" +%Y-%m-%d)
cd /root/.openclaw/workspace-xiaokui
python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --days 1 --steps 7 --dispatch-mode all >> "$LOG_FILE" 2>&1
python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --date "$YESTERDAY" --steps 7 --dispatch-mode all >> "$LOG_FILE" 2>&1
log "=== 每日全量分发结束 ==="
exit 0

View File

@ -9,7 +9,7 @@ from datetime import datetime, timezone, timedelta
CHAT_ID = "oc_fabff7672e62a9ced7b326ee4a286c26" # 内容测试问题反馈群ID
SPREADSHEET_TOKEN = "E8vFsCmPBhT4SCtNmnJchqeJnJe" # 目标电子表格token
SHEET_ID = "7bce8f" # sheet页ID
LARK_CLI_CONFIG = "/root/.openclaw/credentials/xiaoyan" # Bot凭证目录
LARK_CLI_CONFIG = "/root/.openclaw/credentials/xiaokui" # Bot凭证目录
LAST_SYNC_FILE = "/tmp/last_feedback_sync_time"
COS_BASE_PATH = "feedback" # COS存储路径前缀

View File

@ -13,7 +13,7 @@ LOG_FILE = "/var/log/sync_wechat_feedback.log"
LAST_SYNC_ID_FILE = "/root/.openclaw/workspace-xiaokui/data/last_wechat_sync_id"
TARGET_SPREADSHEET_TOKEN = "AOxbsifk3hybRZteGowcMxNnnqc"
TARGET_SHEET_ID = "f17380"
LARK_CLI_CONFIG = "/root/.openclaw/credentials/xiaoyan"
LARK_CLI_CONFIG = "/root/.openclaw/credentials/xiaokui"
# 数据库配置
DB_CONFIG = {

View File

@ -96,7 +96,7 @@ log "发现 $NEW_COUNT 条新增微信反馈数据最大ID$NEW_MAX_ID"
# 3. 读取目标表格现有数据(飞书+微信)
log "读取目标表格现有数据..."
TARGET_RAW=$(LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +read --spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" --range "$TARGET_SHEET_ID!A2:O" --as bot)
TARGET_RAW=$(LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +read --spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" --range "$TARGET_SHEET_ID!A2:O" --as bot)
TARGET_DATA=$(echo "$TARGET_RAW" | jq '.data.valueRange.values | map(select(.[0] != null and .[0] != ""))')
if [ "$TARGET_DATA" == "null" ] || [ -z "$TARGET_DATA" ]; then
@ -120,14 +120,14 @@ END_ROW=$((ROW_COUNT + 1))
log "写入合并后的数据,共 $ROW_COUNT 条..."
# 先清空原有数据
EMPTY_DATA=$(printf '[]%.0s' {1..200} | jq -s '.')
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +write \
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +write \
--spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" \
--range "$TARGET_SHEET_ID!A2:O201" \
--values "$EMPTY_DATA" \
--as bot >/dev/null 2>&1
# 写入新数据
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaoyan lark-cli sheets +write \
LARKSUITE_CLI_CONFIG_DIR=/root/.openclaw/credentials/xiaokui lark-cli sheets +write \
--spreadsheet-token "$TARGET_SPREADSHEET_TOKEN" \
--range "$TARGET_SHEET_ID!A2:O$END_ROW" \
--values "$(cat $TMP_DATA)" \

View File

@ -16,7 +16,7 @@
--skip-priority 跳过步骤5优先级判定
"""
import sys, os, json, subprocess, argparse, re
import sys, os, json, subprocess, argparse, re, fcntl
from datetime import date, datetime, timedelta
from collections import defaultdict, deque
@ -297,8 +297,6 @@ def sort_threads(rows):
if best_cid:
clusters_dict[best_cid].extend(orphan_msgs)
del clusters_dict[orphan_id]
if best_cid in big_cluster_ids:
big_cluster_ids.remove(best_cid)
return clusters_dict
@ -347,6 +345,41 @@ def sort_threads(rows):
return ordered, clusters, cluster_order
def sort_cluster_msgs(cluster_msgs):
"""
对单个簇内的消息进行递归排序根消息时间线 + 子回复递归展开
cluster_msgs: list of message rows
返回: 排序后的消息列表
"""
ids = {m[0] for m in cluster_msgs}
mdict = {m[0]: m for m in cluster_msgs}
roots = [m for m in cluster_msgs if not m[5] or m[5] not in ids]
roots.sort(key=lambda x: x[6])
def rec(mid):
result = []
if mid not in mdict:
return result
result.append(mdict[mid])
children = sorted([m for m in mdict.values() if m[5] == mid], key=lambda x: x[6])
for c in children:
result += rec(c[0])
return result
seen = set()
ordered = []
for r in roots:
for m in rec(r[0]):
if m[0] not in seen:
seen.add(m[0])
ordered.append(m)
for m in cluster_msgs:
if m[0] not in seen:
ordered.append(m)
return ordered
# ============================================================
# 步骤 4问题归纳
# ============================================================
@ -445,29 +478,28 @@ def generate_problem_description(cluster_msgs, location, root_text):
if not platform:
platform = device if device and device != "未知" else ""
# 场景和关卡
# 场景和关卡(过滤占位值"未知"
loc_parts = []
if platform:
loc_parts.append(platform)
if scene and course:
if scene and course and scene != "未知":
loc_parts.append(f"{scene}{course}")
elif scene:
elif scene and scene != "未知":
loc_parts.append(scene)
elif course:
loc_parts.append(f"关卡{course}")
location_str = "".join(loc_parts) if loc_parts else ""
# 具体表现
# 具体表现(按优先级从高到低排列,先命中的权重更大)
symptoms = []
# Loading超时 + 数据丢失(关联)
# --- 加载/数据/音频 ---
has_loading = bool(re.search(r'Loading.*长|加载.*慢|转星星.*10|加载.*10.*秒|loading.*timeout|加载超时', all_text, re.IGNORECASE))
has_data_loss = bool(re.search(r'数据.*丢|数据.*没|组件.*数据.*没|数据.*不见|数据.*丢失', all_text))
has_no_audio = bool(re.search(r'无音频|没声音|没有声音|组件无音频', all_text))
if has_loading and has_data_loss:
# 提取正常加载时间对比
normal_match = re.search(r'(正常|一般|通常).{0,3}(\d+)\s*秒', all_text)
normal_time = f"(正常{normal_match.group(2)}秒)" if normal_match else ""
loading_desc = f"Loading 耗时约 10 秒{normal_time}"
@ -484,21 +516,65 @@ def generate_problem_description(cluster_msgs, location, root_text):
elif has_data_loss:
symptoms.append("组件数据丢失")
# HUD/UI
# --- 崩溃/闪退/卡死 ---
if re.search(r'闪退|崩溃|crash|闪.*退', all_text, re.IGNORECASE):
symptoms.append("闪退/崩溃")
# --- 无法启动/打不开/卡在界面 ---
has_cannot_open = bool(re.search(r'(打不开|无法打开|无法进入|进不去|无法启动|启动.*失败|打开.*失败)', all_text))
has_stuck = bool(re.search(r'(卡在|卡住|卡死|停在.*界面|没反应|无响应)', all_text))
# 版本号/包异常
version_issue_pat = r'版本.*(不对|错误|没变|没升|还是.*版本|显示.*版本|未更新|版本号|没.*升级)'
has_version_issue = bool(re.search(version_issue_pat, all_text))
build_issue_pat = r'(测试包|hotfix|热更.*(失败|问题|包)|包.*打不|打不.*包|打.*132.*包|包.*没法|没法.*打|打包.*(没有|不了|选项))'
has_build_issue = bool(re.search(build_issue_pat, all_text, re.IGNORECASE))
if has_cannot_open or has_stuck:
# 提取具体场景:是打不开还是卡住了
detail = ""
if has_cannot_open:
# 尝试提取具体描述
m = re.search(r'(测试包|安装包|APP|应用|程序|客户端)', all_text)
target = m.group(1) if m else "应用"
# 检查是否有版本号异常
ver_m = re.search(r'(还是|显示|右下角).{0,5}(\d+\.\d+\.\d+)', all_text)
if ver_m:
detail = f"{target}无法打开(仍显示旧版本 {ver_m.group(2)}"
else:
detail = f"{target}无法打开/进入"
elif has_stuck:
m = re.search(r'卡在(?:了)?(.{2,15}?)(?:界面|页面|屏幕||。|$)', all_text)
if m:
detail = f"卡在{m.group(1)}界面"
else:
detail = "卡在界面上无响应"
symptoms.append(detail)
if has_version_issue and not has_cannot_open:
# 版本号相关,但还没被上面的"打不开+旧版本"覆盖
m = re.search(r'(还是|显示|版本).{0,5}(\d+\.\d+\.\d+)', all_text)
if m:
symptoms.append(f"版本号显示异常(仍为{m.group(2)},未更新到新版本)")
else:
symptoms.append("版本号显示异常,未更新到预期版本")
if has_build_issue and not has_cannot_open:
# 测试包/热更相关问题
symptoms.append("测试包/热更包无法获取或打包")
# --- HUD/UI ---
if re.search(r'HUD.*变成.*条|hud.*窄|hud.*变', all_text, re.IGNORECASE):
symptoms.append("NPC 头上的 HUD 偶尔变成一小条")
if re.search(r'UI.*显示|界面.*显示|显示.*异常|花屏|白屏|黑屏|闪烁', all_text):
symptoms.append("UI显示异常")
# 崩溃/闪退
if re.search(r'闪退|崩溃|crash|卡死|卡住|无响应|闪.*退', all_text, re.IGNORECASE):
symptoms.append("闪退/崩溃")
# 版本/更新
# --- 版本更新相关 ---
if re.search(r'版本.*更新|更新.*版本|更新.*后|升级.*后|新版.*上线', all_text):
symptoms.append("版本更新后出现")
if not has_version_issue and not has_build_issue:
symptoms.append("版本更新后出现")
# 频率标签(放在描述前面)
# --- 频率标签(放在描述前面) ---
freq_tag = ""
if re.search(r'偶尔|有时|有时候|随机|没什么规律|不.*规律', all_text):
freq_tag = "【偶现】"
@ -507,14 +583,25 @@ def generate_problem_description(cluster_msgs, location, root_text):
elif re.search(r'高概率|很大概率|极大概率', all_text, re.IGNORECASE):
freq_tag = "【高概率】"
# 频率后缀(不要了,改用前缀标签)
freq = ""
# 如果没有提取到症状,用首条报告摘要
# --- 兜底:没有匹配到任何症状,从首条消息提取摘要 ---
if not symptoms:
summary = root_text[:80] if root_text else (all_text[:80].strip() or "未知问题")
if len(summary) > 50:
summary = summary[:47] + "..."
# 清洗文本:去除 @ 提及、URL、媒体标记、emoji、多余空白
summary = root_text if root_text else all_text
summary = re.sub(r'@\S+', '', summary) # 去 @ 提及
summary = re.sub(r'https?://\S+', '', summary) # 去 URL
summary = re.sub(r'\[Image:[^\]]+\]', '', summary) # 去图片标记
summary = re.sub(r'\[流泪\]|\[笑哭\]|\[捂脸\]|\[旺柴\]|\[加油\]', '', summary) # 去常见 emoji
summary = re.sub(r'\s+', ' ', summary).strip() # 合并空白
if not summary:
summary = "未知问题"
elif len(summary) > 60:
# 在合适位置截断:优先在标点处断开
truncated = summary[:60]
break_point = max(truncated.rfind(''), truncated.rfind(''), truncated.rfind(''), truncated.rfind(''))
if break_point > 20:
summary = truncated[:break_point] + ""
else:
summary = truncated[:57] + "..."
symptoms.append(summary)
symptom_str = "".join(symptoms)
@ -585,8 +672,10 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None):
def clean_text(t):
if not t:
return ""
t = t.replace('\n', ' ').replace('\r', ' ') # 去换行,防止破坏表格
t = re.sub(r'\[Image:[^\]]+\]', '', t)
t = re.sub(r'https?://\S+', '', t)
t = re.sub(r'\s+', ' ', t) # 合并多余空白
return t.strip()
# 收集关键信息供 AI 归纳
@ -636,7 +725,11 @@ def summarize_cluster(cluster_msgs, idx, priority_info=None):
text = text[:77] + "..."
info_parts.append(text)
if media_url:
info_parts.append(f"📎 [文件]({media_url})")
# 图片格式直接用图片标签,飞书可点击预览;其他文件保留链接格式
if media_url.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
info_parts.append(f"![图片]({media_url})")
else:
info_parts.append(f"📎 [文件]({media_url})")
if not info_parts:
info_parts.append("[图片]")
dialogue_info = " | ".join(info_parts)
@ -780,7 +873,8 @@ def generate_summary(clusters, cluster_order, skip_priority=False):
idx = 0
for vc in valid_clusters:
idx += 1
summary = summarize_cluster(vc["msgs"], idx, vc.get("priority_info"))
sorted_msgs = sort_cluster_msgs(vc["msgs"])
summary = summarize_cluster(sorted_msgs, idx, vc.get("priority_info"))
lines.append(summary)
lines.append("")
@ -796,7 +890,7 @@ def get_tenant_token(cred_dir=None):
"""获取 Bot 的 tenant_access_token。cred_dir 可选指定凭证目录。"""
import urllib.request
if cred_dir is None:
cred_dir = os.environ.get("LARKSUITE_CLI_CONFIG_DIR", "/root/.openclaw/credentials/xiaoyan")
cred_dir = os.environ.get("LARKSUITE_CLI_CONFIG_DIR", "/root/.openclaw/credentials/xiaokui")
config = json.load(open(os.path.join(cred_dir, "config.json")))
app_id = config["apps"][0]["appId"]
app_secret = config["apps"][0]["appSecret"]
@ -836,14 +930,26 @@ def list_child_nodes():
def create_child_doc(title):
"""在「用户反馈问题汇总」下创建子文档,返回 obj_token"""
import urllib.request
import urllib.request, time
token = get_tenant_token()
url = f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{SUMMARY_SPACE_ID}/nodes"
# 提取标题中的日期生成sort_tag时间戳越大排序越靠前
sort_tag = int(time.time()) # 默认当前时间
try:
# 标题格式如2026-05-08 问题反馈
date_str = title.split(" ")[0]
dt = datetime.strptime(date_str, "%Y-%m-%d")
sort_tag = int(dt.timestamp())
except:
pass
body = json.dumps({
"obj_type": "docx",
"node_type": "origin",
"parent_node_token": SUMMARY_PARENT_NODE,
"title": title
"title": title,
"sort_tag": sort_tag
}).encode()
req = urllib.request.Request(url, data=body, headers={
"Authorization": f"Bearer {token}",
@ -857,10 +963,10 @@ def create_child_doc(title):
return None
def update_summary_doc_as_children(day_sections):
def update_summary_doc_as_children(day_summaries):
"""
将各日期的归纳结果写入用户反馈问题汇总的子文档中
day_sections: dict, key=日期字符串('2026-05-06'), value=该日的归纳 markdown
day_summaries: dict, key=日期字符串('2026-05-06'), value={'summary': str, 'has_p0': bool}
逻辑
1. 列出已有子文档
@ -870,29 +976,34 @@ def update_summary_doc_as_children(day_sections):
env = get_env()
existing_nodes = list_child_nodes()
for day in sorted(day_sections.keys(), reverse=True):
# 文件锁:防止多进程并发创建同一天的子文档
lock_path = "/tmp/xiaokui_summary_create.lock"
for day in sorted(day_summaries.keys(), reverse=True):
title = f"{day} 问题反馈"
content = day_sections[day]
content = day_summaries[day]
# 双重检查:重新列一次子节点,避免并发创建重复
existing_nodes = list_child_nodes()
if title in existing_nodes:
obj_token = existing_nodes[title]["obj_token"]
print(f" 📝 更新子文档: {title}")
else:
obj_token = create_child_doc(title)
if not obj_token:
# 创建失败可能是并发冲突,再查一次
existing_nodes = list_child_nodes()
if title in existing_nodes:
obj_token = existing_nodes[title]["obj_token"]
print(f" 📝 已存在,使用已有子文档: {title}")
else:
continue
# 加锁:整个检查+创建+写入流程串行化,彻底避免并发冲突
with open(lock_path, 'w') as lock_f:
fcntl.flock(lock_f, fcntl.LOCK_EX)
existing_nodes = list_child_nodes()
if title in existing_nodes:
obj_token = existing_nodes[title]["obj_token"]
print(f" 📝 更新子文档: {title}")
else:
print(f" 创建子文档: {title}")
obj_token = create_child_doc(title)
if obj_token:
print(f" 创建子文档: {title}")
else:
# 创建失败可能是并发冲突,再查一次
existing_nodes = list_child_nodes()
if title in existing_nodes:
obj_token = existing_nodes[title]["obj_token"]
print(f" 📝 已存在,使用已有子文档: {title}")
else:
continue
# 写入内容
# 写入内容锁内已保证obj_token唯一不会并发写入
result = subprocess.run(
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
"--mode", "overwrite",
@ -904,139 +1015,52 @@ def update_summary_doc_as_children(day_sections):
d = json.loads(result.stdout)
if d.get("ok"):
print(f"{title} 写入成功")
# 重点校验:防止内容重复/空文档
check_pass = False
for retry in range(2): # 最多重试2次
check_result = subprocess.run(
[CLI, "docs", "+fetch", "--doc", obj_token, "--as", "bot"],
env=env, capture_output=True, text=True, timeout=10
)
try:
check_data = json.loads(check_result.stdout)
saved_content = check_data['data']['markdown']
dup_count = saved_content.count('## 今日问题归纳')
if dup_count > 1:
print(f" ⚠️ 检测到内容重复(共{dup_count}次),第{retry+1}次重试写入")
subprocess.run(
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
"--mode", "overwrite",
"--markdown", content],
env=env, capture_output=True, text=True, timeout=15
)
elif len(saved_content.strip()) < 100:
print(f" ⚠️ 检测到文档为空/内容过短,第{retry+1}次重试写入")
subprocess.run(
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot",
"--mode", "overwrite",
"--markdown", content],
env=env, capture_output=True, text=True, timeout=15
)
else:
print(f"{title} 校验通过:长度{len(saved_content)},无内容重复")
check_pass = True
break
except Exception as e:
print(f" ⚠️ 校验出错,重试: {str(e)[:80]}")
if not check_pass:
print(f"{title} 多次重试后仍校验失败")
else:
print(f" ⚠️ {title} 写入失败: {result.stdout[:100]}")
except Exception:
print(f" ⚠️ {title} 写入异常: {result.stderr[:100]}")
print(f" 📂 共处理 {len(day_sections)} 个日期的子文档")
print(f" 📂 共处理 {len(day_summaries)} 个日期的子文档")
return True
def main():
parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)")
parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3")
parser.add_argument("--dry-run", action="store_true", help="仅预览不写入")
parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定")
parser.add_argument("--skip-dispatch", action="store_true", help="跳过步骤7问题分发")
parser.add_argument("--dispatch-mode", type=str, default="all", choices=["all", "p0"], help="分发模式all=发全部, p0=仅发P0级问题默认 all")
parser.add_argument("--steps", type=str, default="1-7", help="执行步骤范围,如 '1-3'(仅写入表格)或 '4-7'(归纳+分类+分发)")
args = parser.parse_args()
end_date = date.today()
start_date = end_date - timedelta(days=args.days - 1)
print(f"📊 查询 {start_date} ~ {end_date} 数据...")
# end_date 是当天不包含,所以用 end_date + 1 天作为上界
next_day = (end_date + timedelta(days=1)).strftime("%Y-%m-%d")
rows = query_messages(start_date.strftime("%Y-%m-%d"), next_day)
# 按天分组
groups = defaultdict(list)
for r in rows:
day = r[6][:10]
groups[day].append(r)
if not groups:
print("✅ 无新数据,无需同步")
return
print(f"📋 数据分布:{dict((d, len(ms)) for d, ms in groups.items())}")
if args.dry_run:
print("\n🔍 Dry run预览前 5 条:")
for day, msgs in sorted(groups.items()):
print(f"\n [{day}] ({len(msgs)} 条)")
for m in msgs[:3]:
print(f" {m[6]} | {m[1]} | {str(m[3])[:50]}")
return
existing = get_existing_sheets()
print(f"📑 现有 sheet{list(existing.keys())}")
total_written = 0
day_summaries = {} # 收集所有天的归纳结果
for day in sorted(groups.keys()):
msgs = groups[day]
# 获取或创建 sheet
if day in existing:
sheet_id = existing[day]
print(f"\n🔄 [{day}] 已有 sheet {sheet_id},覆盖写入")
else:
print(f"\n [{day}] 创建 sheet")
sheet_id = create_sheet(day)
print(f" 新建 sheet_id = {sheet_id}")
# 对话链排序(含推断引用)
sorted_msgs, clusters, cluster_order = sort_threads(msgs)
print(f" 排序完成:{len(sorted_msgs)}")
# 根据 --steps 参数决定是否执行步骤4-6
run_steps = args.steps
do_summary = "4" in run_steps or "5" in run_steps or "6" in run_steps or "7" in run_steps
if do_summary:
# 步骤 4问题归纳
# 步骤 5优先级判定默认启用--skip-priority 可跳过)
summary, has_content = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority)
if has_content:
priority_count = summary.count('优先级:') if not args.skip_priority else 0
print(f" 归纳完成:{summary.count('### 问题')} 个问题")
if not args.skip_priority:
print(f" 步骤5 优先级判定完成:{priority_count} 个问题已评定优先级并排序")
print(f" (归纳内容见下方)")
print(summary)
day_summaries[day] = summary
else:
print(" ⚠️ 无有效问题簇可归纳")
# 写入表格步骤2
success = write_sheet(sheet_id, sorted_msgs)
if success:
total_written += len(sorted_msgs)
print(f" ✅ 写入 {len(sorted_msgs)}")
else:
print(f" ❌ 写入失败")
# 统一写入「用户反馈问题汇总」文档(增量更新,保留已有日期内容)— 步骤4-6
if day_summaries and do_summary:
update_summary_doc_as_children(day_summaries)
# 步骤7问题分发发送到群聊 + @相关人)
if do_summary and not args.skip_dispatch:
dispatch_mode = args.dispatch_mode
print(f"\n📨 步骤7问题分发模式: {dispatch_mode}...")
for day, info in day_summaries.items():
# 检查当天归纳中是否有 P0 问题
has_p0 = "⚠️ P0级" in info["summary"]
if dispatch_mode == "p0" and not has_p0:
print(f" [{day}] 无P0问题跳过分发")
continue
dispatch_summary_to_chat(day, info["summary"], p0_only=(dispatch_mode == "p0"))
print(f"\n🎉 同步完成,总计写入 {total_written}")
if __name__ == "__main__":
main()
# ============================================================
# 步骤 7问题分发发送到群聊 + @相关人)
# ============================================================
# 分发目标群配置
DISPATCH_CHAT_ID = "oc_4171a2188f2554522a4309f2d7c27753" # 「小葵小葵」问题反馈群
# P0问题默认@的人user_id
P0_NOTIFY_USERS = [
"eggbg21g", # 毋益飞
]
# Bot身份: 使用 xiaokui小葵的凭证发消息
DISPATCH_CRED_DIR = "/root/.openclaw/credentials/xiaokui"
def dispatch_summary_to_chat(day_label, summary_text, p0_only=False):
"""
@ -1170,3 +1194,76 @@ def dispatch_summary_to_chat(day_label, summary_text, p0_only=False):
print(f" ⚠️ 分发异常: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)")
parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3")
parser.add_argument("--date", type=str, default=None, help="只处理指定日期(格式 YYYY-MM-DD覆盖 --days 参数")
parser.add_argument("--dry-run", action="store_true", help="仅预览不写入")
parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定")
parser.add_argument("--skip-dispatch", action="store_true", help="跳过步骤7问题分发")
parser.add_argument("--dispatch-mode", type=str, default="all", choices=["all", "p0"], help="分发模式all=发全部, p0=仅发P0级问题默认 all")
parser.add_argument("--steps", type=str, default="1-7", help="执行步骤范围,如 '1-3'(仅写入表格)或 '4-7'(归纳+分类+分发)")
args = parser.parse_args()
if args.date:
start_date = datetime.strptime(args.date, "%Y-%m-%d").date()
end_date = start_date
else:
end_date = date.today()
start_date = end_date - timedelta(days=args.days - 1)
print(f"📊 查询 {start_date} ~ {end_date} 数据...")
# end_date 是当天不包含,所以用 end_date + 1 天作为上界
next_day = (end_date + timedelta(days=1)).strftime("%Y-%m-%d")
rows = query_messages(start_date.strftime("%Y-%m-%d"), next_day)
# 按天分组
groups = defaultdict(list)
for r in rows:
day = r[6][:10]
groups[day].append(r)
if not groups:
print("✅ 无新数据,无需同步")
return
print(f"📋 数据分布:{dict((d, len(ms)) for d, ms in groups.items())}")
if args.dry_run:
print("\n🔍 Dry run预览前 5 条:")
for day, msgs in sorted(groups.items()):
print(f"\n [{day}] ({len(msgs)} 条)")
for m in msgs[:3]:
print(f" {m[6]} | {m[1]} | {str(m[3])[:50]}")
return
existing = get_existing_sheets()
print(f"📑 现有 sheet{list(existing.keys())}")
total_written = 0
day_summaries = {} # 收集所有天的归纳结果
for day in sorted(groups.keys()):
msgs = groups[day]
# 获取或创建 sheet
if day in existing:
sheet_id = existing[day]
print(f"\n🔄 [{day}] 已有 sheet {sheet_id},覆盖写入")
else:
print(f"\n [{day}] 创建 sheet")
sheet_id = create_sheet(day)
print(f" 新建 sheet_id = {sheet_id}")
# 对话链排序(含推断引用)
sorted_msgs, clusters, cluster_order = sort_threads(msgs)
print(f" 排序完成:{len(sorted_msgs)}")
# 根据 --steps 参数决定是否执行步骤4-6
run_steps = args.steps
do_summary = "4" in run_steps or "5" in run_steps or "6" in run_steps or "7" in run_steps
if do_summary:
# 步骤 4问题归纳
if __name__ == "__main__":
main()