auto backup 2026-05-22 08:10:01
This commit is contained in:
parent
6a43e3940c
commit
7d7f2e13c9
@ -13,5 +13,5 @@ tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3
|
||||
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
|
||||
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
|
||||
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
|
||||
feishu-feedback-sync e51dd6d0c4f26898ebcc069ab19272d17e86c01a767c36c1d65ab576da38ba11
|
||||
feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445
|
||||
feishu-feedback-sync 1ec556db6c8523c36efacde6fb92659a5274f65b8104f83e38eacbdc419377ba
|
||||
|
||||
@ -2,10 +2,3 @@
|
||||
|
||||
# 自动同步Skill到公司SkillHub
|
||||
- 查看是否有需要推送的skill: 执行脚本 /root/.openclaw/workspace-xiaokui/scripts/sync_skill_to_skillhub.sh,如果推送成功,给李若松发送通知消息,无需推送则静默
|
||||
|
||||
# AI 问题归纳(每日)
|
||||
- 检查 output/daily_feedback/ 下是否有当天日期的 cluster_context_YYYY-MM-DD.json 文件
|
||||
- 如果有,读取上下文 JSON,为每个问题簇生成精炼的问题描述
|
||||
- 将 AI 描述写入 output/daily_feedback/ai_descriptions_YYYY-MM-DD.json
|
||||
- 执行 python3 skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py --apply-ai output/daily_feedback/ai_descriptions_YYYY-MM-DD.json
|
||||
- 完成后删除 cluster_context 文件(避免重复处理)
|
||||
|
||||
@ -13,3 +13,4 @@
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-11T11:26:09.201Z","query":"知识库 sort_tag 空 文档排序","resultCount":3,"results":[{"path":"memory/2026-05-09.md","startLine":46,"endLine":65,"score":1},{"path":"memory/2026-05-09.md","startLine":78,"endLine":95,"score":1},{"path":"memory/2026-05-06.md","startLine":20,"endLine":47,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-12T06:08:28.032Z","query":"优先级规则 priority rules","resultCount":2,"results":[{"path":"memory/2026-05-07.md","startLine":23,"endLine":50,"score":1},{"path":"memory/2026-05-07.md","startLine":1,"endLine":28,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-14T13:09:32.054Z","query":"微信用户反馈 数据库 表结构 wechat_group_message","resultCount":1,"results":[{"path":"memory/2026-05-07.md","startLine":86,"endLine":116,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-21T10:38:19.153Z","query":"crontab 注释 每分钟 归纳分类 P0分发 步骤","resultCount":4,"results":[{"path":"memory/2026-05-09.md","startLine":17,"endLine":37,"score":1},{"path":"memory/2026-04-17.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-04-10.md","startLine":44,"endLine":68,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1}]}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-05-14T13:09:32.054Z",
|
||||
"updatedAt": "2026-05-21T10:38:19.153Z",
|
||||
"entries": {
|
||||
"memory:memory/2026-04-18.md:1:5": {
|
||||
"key": "memory:memory/2026-04-18.md:1:5",
|
||||
@ -81,24 +81,26 @@
|
||||
"endLine": 142,
|
||||
"source": "memory",
|
||||
"snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N",
|
||||
"recallCount": 5,
|
||||
"recallCount": 6,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 5,
|
||||
"totalScore": 6,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"lastRecalledAt": "2026-05-11T09:48:27.002Z",
|
||||
"lastRecalledAt": "2026-05-21T10:38:19.153Z",
|
||||
"queryHashes": [
|
||||
"f865295b9ac7",
|
||||
"cd9c89262c30",
|
||||
"ac7fd0b52a4e",
|
||||
"49c0959dc960",
|
||||
"70caeba05281"
|
||||
"70caeba05281",
|
||||
"2f315a9f8529"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-06",
|
||||
"2026-05-07",
|
||||
"2026-05-11"
|
||||
"2026-05-11",
|
||||
"2026-05-21"
|
||||
],
|
||||
"conceptTags": [
|
||||
"quote-message-id",
|
||||
@ -258,20 +260,22 @@
|
||||
"endLine": 23,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-04-17 工作日志 ## 飞书群消息同步改造(李若松要求) ### 变更内容 - **存储从飞书表格改为 MySQL**:新建 `vala_test.lark_group_message` 表,结构参考 `wechat_group_message` - **同步频率**:从每6小时改为每4小时 - **数据范围**:2026.4.1 起的「内容测试问题反馈」群消息 - **数据库账户**:chatbot(test环境,仅对 lark_group_message 有写入权限) ### 完成事项 1. ✅ 创建 `lark_group_message` 表(唯一键 message_id 防重复) 2. ✅ 编写新同步脚本 `scripts/sync_lark_group_to_mysql.py`(基于原有 sync_group_to_sheet.py 改造) 3. ✅ 首次全量同步完成:172 条记录(2026-04-01 ~ 2026-04-17),含文本134条、图片17条、视频10条、富文本9条、表情2条 4. ✅ crontab 定时任务已替换:旧的每6小时飞书表格同步 → 新的每4小时MySQL同步 5. ✅ 更新 secrets.md 记录 chatbot 账户 6. ✅ 更新 user-feedback-collector SKILL.md 反馈数据源信息 ### 文件变更 - 新增:`scripts/sync_lark_group_to_mysql.py`(核心同步脚本) - 新增:`scripts/run_lark_group_sync.s",
|
||||
"recallCount": 2,
|
||||
"recallCount": 3,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 2,
|
||||
"totalScore": 3,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-08T10:25:44.365Z",
|
||||
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||
"lastRecalledAt": "2026-05-21T10:38:19.153Z",
|
||||
"queryHashes": [
|
||||
"cc0dd7ef50d7",
|
||||
"5abc37103c15"
|
||||
"5abc37103c15",
|
||||
"2f315a9f8529"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-08",
|
||||
"2026-05-11"
|
||||
"2026-05-11",
|
||||
"2026-05-21"
|
||||
],
|
||||
"conceptTags": [
|
||||
"vala-test.lark-group-message",
|
||||
@ -454,18 +458,20 @@
|
||||
"endLine": 68,
|
||||
"source": "memory",
|
||||
"snippet": "### 验证结果 - 全量同步成功:47条记录写入表格,5张图片+4个视频上传COS - crontab 每小时整点自动执行:`0 * * * *` - 群ID:oc_fabff7672e62a9ced7b326ee4a286c26 ## 封装两个通用Skill **来源:** [李若松] 要求将功能封装为可复用skill ### 1. tencent-cos-upload - 路径:`/root/.openclaw/skills/tencent-cos-upload/` - 功能:上传文件到腾讯COS并生成可访问URL - 提供命令行调用和Python模块两种方式 - 核心文件:`scripts/cos_upload.py`(CosUploader类) ### 2. feishu-group-msg-sync - 路径:`/root/.openclaw/skills/feishu-group-msg-sync/` - 功能:定期同步飞书群聊消息到电子表格,媒体上传COS - 依赖 tencent-cos-upload skill - 核心文件:`scripts/sync_group_to_sheet.py`(模板脚本,修改顶部配置即可复用) - 参考文件:`references/lark-cli-cheatsheet.md` ### 项目脚本也改为引用skill - `scripts/sync_feedback_group.py` 现在只做配置覆盖,逻辑全部引用自skill",
|
||||
"recallCount": 1,
|
||||
"recallCount": 2,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"totalScore": 2,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||
"lastRecalledAt": "2026-05-11T10:43:36.686Z",
|
||||
"lastRecalledAt": "2026-05-21T10:38:19.153Z",
|
||||
"queryHashes": [
|
||||
"5abc37103c15"
|
||||
"5abc37103c15",
|
||||
"2f315a9f8529"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-11"
|
||||
"2026-05-11",
|
||||
"2026-05-21"
|
||||
],
|
||||
"conceptTags": [
|
||||
"tencent-cos-upload",
|
||||
@ -664,6 +670,37 @@
|
||||
"飞书问题反馈-近3天",
|
||||
"chat-id"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-09.md:17:37": {
|
||||
"key": "memory:memory/2026-05-09.md:17:37",
|
||||
"path": "memory/2026-05-09.md",
|
||||
"startLine": 17,
|
||||
"endLine": 37,
|
||||
"source": "memory",
|
||||
"snippet": "- 修复:改为 `sort_tag = 9999999999 - int(dt.timestamp())` 实现日期降序 - ⚠️ 不足:飞书 Wiki V2 API 创建节点时 `sort_tag` 参数可能被忽略(API 返回均为 null) - 兜底方案:按日期由近到远的顺序依次创建子文档,利用 `node_create_time` 自然排序 - 所有旧文档已删除并按正确顺序重建(5月8日→5月7日→4月28日),5月6日需手动创建 ### 飞书分发消息 `<at>` 标签修复 - 根因:`dispatch_summary_to_chat` 中两步打架——第一步 `re.sub` 注入 HTML `<at>` 文本,第二步 `content_parts` 用正确 `{\"tag\":\"at\"}` 格式插入 - 修复:删除 `re.sub` 注入原始 HTML 标签的代码,仅保留富文本 at tag ### 废弃定时任务的 crontab 清理 - 已删除 xiaokui crontab 中 `*/5 * * * *` 的「飞书问题反馈同步每分钟」任务(含 wrapper 脚本调用) - 该任务每分钟执行一次打开 MySQL 连接/查询/返回存在潜在连接泄漏风险 ### 5月9日补跑问题 - 5月9日10:00定时任务因 `IndentationError` 失败(凌晨08:10自动备份 `c3c8dbb` 损坏了脚本) - 修复:从上游版本恢复被清空的步骤4-7逻辑 + 模块常量 - 手动补跑5月8日数据(8条反馈,1个P0)成功 ## 2026-05-09 工作日志",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-21T10:38:19.153Z",
|
||||
"lastRecalledAt": "2026-05-21T10:38:19.153Z",
|
||||
"queryHashes": [
|
||||
"2f315a9f8529"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-21"
|
||||
],
|
||||
"conceptTags": [
|
||||
"备份",
|
||||
"sort-tag",
|
||||
"dt.timestamp",
|
||||
"node-create-time",
|
||||
"dispatch-summary-to-chat",
|
||||
"re.sub",
|
||||
"content-parts",
|
||||
"连接/查询/返回存在潜在连接泄漏风险"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
124
memory/2026-05-21.md
Normal file
124
memory/2026-05-21.md
Normal file
@ -0,0 +1,124 @@
|
||||
# 2026-05-21 工作日志
|
||||
|
||||
## P0 实时检测与推送 [刘新玉需求]
|
||||
|
||||
### 背景
|
||||
- 原有两个每分钟 crontab 任务已被注释(5/11),因为每次全量重跑浪费资源且 P0 会重复推送
|
||||
- 刘新玉要求做真正的增量 P0 实时分发
|
||||
|
||||
### 完成事项
|
||||
1. ✅ 新建 `scripts/detect_p0_realtime.py`(~200行):
|
||||
- 复用 `sync_feishu_feedback.py` 的聚类逻辑 + `priority_classifier.py` 的优先级判定
|
||||
- 查询最近 2 小时消息 → 聚类 → 判定 P0 → 去重(簇签名 MD5)→ 推送
|
||||
- 去重状态存在 `tmp/p0_dispatched_state.json`,24 小时自动过期
|
||||
- 每天 10:00-10:01 自动清空去重,配合全量分发
|
||||
2. ✅ crontab 每分钟执行:`* * * * * python3 detect_p0_realtime.py`
|
||||
3. ✅ 步骤 4-6 不做每分钟恢复(增量改造成本高,日汇总实时性需求弱)
|
||||
|
||||
### AI 归纳流程修复 [刘新玉需求]
|
||||
|
||||
### 背景
|
||||
- 5/15 问题描述出现严重退化:问题一变成"可以在这里Po问题"(元指令文本),问题二变成"图片 (1/1)"
|
||||
- 根因:(1) 关键词模式只能覆盖约 8 类症状,不匹配时退化到首条消息原文 (2) AI 归纳靠心跳触发,5/15 当天漏执行
|
||||
|
||||
### 完成事项
|
||||
1. ✅ 新建 `scripts/ai_summarize_feedback.py`:
|
||||
- 读取 `cluster_context_{date}.json`,调用 DeepSeek API(deepseek-v4-pro)生成精炼问题描述
|
||||
- 保存 `ai_descriptions_{date}.json` → 调用 `--apply-ai` 回写到知识库
|
||||
- 回写后自动清理 context 文件
|
||||
2. ✅ crontab 每天 10:05 执行(在 10:00 全量同步之后)
|
||||
3. ✅ HEARTBEAT.md 移除 AI 归纳任务(已由 crontab 接管)
|
||||
4. ✅ 扩展 `generate_problem_description()` 关键词覆盖:
|
||||
- 新增:语音识别/判分、内容/命名缺失、后台弹窗残留、网络/VPN、热更/打包/测试包
|
||||
- 新增:后台加载场景识别
|
||||
- 5/15 验证:问题一从"可以在这里Po问题"→"语音识别不准确(对话表达,只识别成错误内容)";问题二从"图片 (1/1)"→"后台加载提示/弹窗未自动消失,持续显示 需确认是否因 VPN/网络代理导致"
|
||||
5. ✅ 未启用步骤 4-6 每分钟执行(理由同刘新玉确认)
|
||||
|
||||
### 调度总览(更新后)
|
||||
```
|
||||
每 5 分钟 → 群消息同步入 MySQL
|
||||
每分钟 → P0 实时检测 + 增量推送
|
||||
每分钟 → 反馈数据实时导出到表格
|
||||
每天 10:00 → 全量七步处理 + 全量分发
|
||||
每天 10:05 → AI 归纳(DeepSeek 生成描述 → 回写文档)
|
||||
```
|
||||
# 2026-05-21 工作日志
|
||||
|
||||
## P0 实时检测与推送 [刘新玉需求]
|
||||
|
||||
### 背景
|
||||
- 原有两个每分钟 crontab 任务已被注释(5/11),因为每次全量重跑浪费资源且 P0 会重复推送
|
||||
- 刘新玉要求做真正的增量 P0 实时分发
|
||||
|
||||
### 完成事项
|
||||
1. ✅ 新建 `scripts/detect_p0_realtime.py`(~200行):
|
||||
- 复用 `sync_feishu_feedback.py` 的聚类逻辑 + `priority_classifier.py` 的优先级判定
|
||||
- 查询最近 2 小时消息 → 聚类 → 判定 P0 → 去重(簇签名 MD5)→ 推送
|
||||
- 去重状态存在 `tmp/p0_dispatched_state.json`,24 小时自动过期
|
||||
- 每天 10:00-10:01 自动清空去重,配合全量分发
|
||||
2. ✅ crontab 每分钟执行:`* * * * * python3 detect_p0_realtime.py`
|
||||
3. ✅ 步骤 4-6 不做每分钟恢复(增量改造成本高,日汇总实时性需求弱)
|
||||
|
||||
### AI 归纳流程修复 [刘新玉需求]
|
||||
|
||||
### 背景
|
||||
- 5/15 问题描述出现严重退化:问题一变成"可以在这里Po问题"(元指令文本),问题二变成"图片 (1/1)"
|
||||
- 根因:(1) 关键词模式只能覆盖约 8 类症状,不匹配时退化到首条消息原文 (2) AI 归纳靠心跳触发,5/15 当天漏执行
|
||||
|
||||
### 完成事项
|
||||
1. ✅ 新建 `scripts/ai_summarize_feedback.py`:
|
||||
- 读取 `cluster_context_{date}.json`,调用 DeepSeek API(deepseek-v4-pro)生成精炼问题描述
|
||||
- 保存 `ai_descriptions_{date}.json` → 调用 `--apply-ai` 回写到知识库
|
||||
- 回写后自动清理 context 文件
|
||||
2. ✅ crontab 每天 10:05 执行(在 10:00 全量同步之后)
|
||||
3. ✅ HEARTBEAT.md 移除 AI 归纳任务(已由 crontab 接管)
|
||||
4. ✅ 扩展 `generate_problem_description()` 关键词覆盖:
|
||||
- 新增:语音识别/判分、内容/命名缺失、后台弹窗残留、网络/VPN、热更/打包/测试包
|
||||
- 新增:后台加载场景识别
|
||||
- 5/15 验证:问题一从"可以在这里Po问题"→"语音识别不准确(对话表达,只识别成错误内容)";问题二从"图片 (1/1)"→"后台加载提示/弹窗未自动消失,持续显示 需确认是否因 VPN/网络代理导致"
|
||||
5. ✅ 未启用步骤 4-6 每分钟执行(理由同刘新玉确认)
|
||||
|
||||
### 调度总览(更新后)
|
||||
```
|
||||
每 5 分钟 → 群消息同步入 MySQL
|
||||
每分钟 → P0 实时检测 + 增量推送
|
||||
每分钟 → 反馈数据实时导出到表格
|
||||
每天 10:00 → 全量七步处理 + 全量分发
|
||||
每天 10:05 → AI 归纳(DeepSeek 生成描述 → 回写文档)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 历史数据回写(2026-05-11 ~ 2026-05-19)[刘新玉需求]
|
||||
|
||||
### 需求
|
||||
- 刘新玉要求回写 5/11~5/19 共 9 天的"问题描述"为 AI 归纳版本,验证效果
|
||||
|
||||
### 工具
|
||||
- 新建 `scripts/backfill_ai_descriptions.py`:循环处理多天,读MySQL→聚类→DeepSeek生成描述→覆盖写入知识库子文档
|
||||
- 包含 AI 失败回退机制:空描述/无意义输出自动回退到关键词 `generate_problem_description`
|
||||
|
||||
### 结果
|
||||
| 日期 | 问题数 | AI 归纳质量 |
|
||||
|------|--------|------------|
|
||||
| 5/11 | 2个(P1+P2) | ✅ Spine动画屏幕适配、U级别显示数字 |
|
||||
| 5/12 | 3个(P0+P2) | ✅ Hotfix卡加载、关卡场景物丢失 |
|
||||
| 5/13 | 2个(P2) | ✅ 单元名显示问号占位 |
|
||||
| 5/14 | 1个(P2) | ✅ AI正确识别为无实质问题 |
|
||||
| 5/15 | 2个(P2) | ✅ 语音识别"a bee"→"a b"、后台加载提示不消失 |
|
||||
| 5/16-17 | 无消息 | 周末 |
|
||||
| 5/18 | 1个(P0) | ✅ 热更后无法进入测试包 |
|
||||
| 5/19 | 2个(P2) | ✅ 图片多选数量不匹配、配置URL报错 |
|
||||
|
||||
### 关键发现
|
||||
- **5/15 对比**:关键词版吐出"可以在这里Po问题",AI版准确归纳为「"a bee"被语音识别成"a b"」和「后台加载提示持续显示无法消失」
|
||||
- **5/14**:AI正确识别当天无实质问题(闲聊/确认类消息),未强行生成描述
|
||||
- **AI 偶尔失败需 fallback**:5/12有一个簇 AI 返回空,成功回退到关键词生成「【频繁】移动端关卡4-28内应用无法打开,疑似网络/VPN导致」
|
||||
|
||||
### 文件清单(本次新建/修改)
|
||||
```
|
||||
scripts/ai_summarize_feedback.py # 日常 AI 归纳脚本(crontab 用)
|
||||
scripts/backfill_ai_descriptions.py # 批量回写脚本
|
||||
skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py # 关键词覆盖扩展
|
||||
HEARTBEAT.md # 移除 AI 归纳心跳任务
|
||||
```
|
||||
@ -1,9 +0,0 @@
|
||||
{
|
||||
"date": "2026-05-11",
|
||||
"descriptions": [
|
||||
{
|
||||
"index": 1,
|
||||
"description": "在 132 版本中,L1 级别 U 单元名称显示为数字编号 — U 级别单元名称未命名(季度级别已有名称),新版本发布后用户将全部可见"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
output/daily_feedback/飞书反馈_2026-05-21.xlsx
Normal file
BIN
output/daily_feedback/飞书反馈_2026-05-21.xlsx
Normal file
Binary file not shown.
BIN
scripts/__pycache__/ai_summarize_feedback.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/ai_summarize_feedback.cpython-312.pyc
Normal file
Binary file not shown.
224
scripts/ai_summarize_feedback.py
Normal file
224
scripts/ai_summarize_feedback.py
Normal file
@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI 问题归纳脚本
|
||||
读取 cluster_context_{date}.json,调用 LLM 为每个问题簇生成精炼的问题描述,
|
||||
输出 ai_descriptions_{date}.json,然后回写到飞书知识库文档。
|
||||
|
||||
用法:
|
||||
python3 ai_summarize_feedback.py [--date YYYY-MM-DD] [--dry-run]
|
||||
|
||||
crontab:
|
||||
5 10 * * * python3 .../ai_summarize_feedback.py >> /var/log/xiaokui_ai_summarize.log 2>&1
|
||||
"""
|
||||
|
||||
import sys, os, json, argparse, urllib.request
|
||||
from datetime import datetime, date, timedelta
|
||||
|
||||
# === 配置 ===
|
||||
DEEPSEEK_API_KEY = "sk-7cf94305fb12473b956fd2ed2a6db05b"
|
||||
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
|
||||
DEEPSEEK_MODEL = "deepseek-v4-pro"
|
||||
|
||||
CONTEXT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "output", "daily_feedback")
|
||||
SKILL_SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts")
|
||||
|
||||
SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务是:
|
||||
阅读一段来自测试群的多人对话(可能包含多个发言人、多轮讨论),
|
||||
从中提炼出他们正在讨论的「具体问题是什么」,用一句中文描述清楚。
|
||||
|
||||
要求:
|
||||
1. 只描述问题本身,不要评价或建议
|
||||
2. 包含关键要素:在哪个端、哪个环节、什么表现
|
||||
3. 如果对话中有多种说法,优先采用最后确认的描述
|
||||
4. 输出仅一句中文,不要加任何前缀、编号、引号或换行
|
||||
5. 如果对话全是无实质内容的闲聊(如"好的""收到"),输出"无明确问题"
|
||||
|
||||
输出格式(严格):直接输出问题描述,无任何额外文字。"""
|
||||
|
||||
|
||||
def load_context(date_str):
|
||||
"""加载指定日期的 cluster_context JSON"""
|
||||
path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
|
||||
if not os.path.exists(path):
|
||||
print(f" ⚠️ 无上下文文件: {path}")
|
||||
return None
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def build_user_prompt(cluster):
|
||||
"""为单个问题簇构建 LLM prompt"""
|
||||
lines = []
|
||||
lines.append(f"优先级: {cluster.get('priority', '?')}")
|
||||
lines.append(f"分类: {cluster.get('category', '?')}")
|
||||
lines.append(f"当前排查结论: {cluster.get('conclusion', '无')}")
|
||||
lines.append("")
|
||||
lines.append("--- 对话记录 ---")
|
||||
|
||||
for msg in cluster.get("messages", []):
|
||||
sender = msg.get("sender", "?")
|
||||
content = msg.get("content", "")
|
||||
mtype = msg.get("msg_type", "text")
|
||||
time = msg.get("time", "")
|
||||
|
||||
# 跳过纯媒体消息(无有效文本)
|
||||
if mtype in ("image", "post_image", "media", "file", "sticker") and not content.strip():
|
||||
continue
|
||||
if not content.strip():
|
||||
continue
|
||||
|
||||
# 截断过长内容
|
||||
if len(content) > 200:
|
||||
content = content[:197] + "..."
|
||||
|
||||
lines.append(f"[{time}] {sender}: {content}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def call_deepseek(system_prompt, user_prompt, max_retries=2):
|
||||
"""调用 DeepSeek API 生成问题描述"""
|
||||
body = json.dumps({
|
||||
"model": DEEPSEEK_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 256,
|
||||
}).encode()
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
f"{DEEPSEEK_BASE_URL}/chat/completions",
|
||||
data=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {DEEPSEEK_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=60)
|
||||
data = json.loads(resp.read())
|
||||
content = data["choices"][0]["message"]["content"].strip()
|
||||
# 清理常见的引号/前缀
|
||||
content = content.strip('"\'""'' \n')
|
||||
return content
|
||||
except Exception as e:
|
||||
if attempt < max_retries:
|
||||
print(f" ⚠️ API 调用重试 {attempt + 1}: {e}")
|
||||
import time
|
||||
time.sleep(2)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def generate_descriptions(context_data, dry_run=False):
|
||||
"""为所有问题簇生成 AI 描述"""
|
||||
clusters = context_data.get("clusters", [])
|
||||
if not clusters:
|
||||
print(" ⚠️ 无问题簇数据")
|
||||
return None
|
||||
|
||||
descriptions = []
|
||||
for cluster in clusters:
|
||||
idx = cluster.get("index", 0)
|
||||
print(f" 🤖 处理簇 #{idx}...")
|
||||
|
||||
user_prompt = build_user_prompt(cluster)
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY-RUN] Prompt 长度: {len(user_prompt)} chars")
|
||||
# 输出前 200 字符预览
|
||||
print(f" [DRY-RUN] 对话预览: {user_prompt[:200]}...")
|
||||
description = f"[DRY-RUN] 问题{idx}"
|
||||
else:
|
||||
try:
|
||||
description = call_deepseek(SYSTEM_PROMPT, user_prompt)
|
||||
except Exception as e:
|
||||
print(f" ❌ 簇 #{idx} API 调用失败: {e}")
|
||||
description = f"[API调用失败: {str(e)[:50]}]"
|
||||
|
||||
print(f" 📝 描述: {description}")
|
||||
descriptions.append({"index": idx, "description": description})
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
def apply_descriptions(date_str, descriptions):
|
||||
"""调用 sync_feishu_feedback.py --apply-ai 回写文档"""
|
||||
sys.path.insert(0, SKILL_SCRIPT_DIR)
|
||||
|
||||
# 先保存描述 JSON
|
||||
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
|
||||
payload = {"date": date_str, "descriptions": descriptions}
|
||||
with open(desc_path, "w", encoding="utf-8") as f:
|
||||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||
print(f" 💾 描述已保存: {desc_path}")
|
||||
|
||||
# 调用 --apply-ai
|
||||
sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py")
|
||||
import subprocess
|
||||
env = os.environ.copy()
|
||||
env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui"
|
||||
env["HOME"] = "/root"
|
||||
env["PATH"] = "/root/.nvm/versions/node/v24.14.0/bin:" + env.get("PATH", "")
|
||||
|
||||
result = subprocess.run(
|
||||
["python3", sync_script, "--apply-ai", desc_path],
|
||||
capture_output=True, text=True, timeout=60, env=env
|
||||
)
|
||||
|
||||
if "AI 描述已应用" in result.stdout or "✅" in result.stdout:
|
||||
print(f" ✅ AI 描述已回写到知识库文档")
|
||||
# 回写成功后清理上下文文件,避免心跳重复处理
|
||||
context_path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
|
||||
if os.path.exists(context_path):
|
||||
os.remove(context_path)
|
||||
print(f" 🗑️ 已清理上下文文件: {context_path}")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ 回写失败: {result.stdout[:300]}")
|
||||
if result.stderr:
|
||||
print(f" stderr: {result.stderr[:300]}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="AI 问题归纳")
|
||||
parser.add_argument("--date", help="日期 YYYY-MM-DD,默认昨天")
|
||||
parser.add_argument("--dry-run", action="store_true", help="仅预览不实际调用 API")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.date:
|
||||
date_str = args.date
|
||||
else:
|
||||
# 默认处理昨天的数据(每天 10:05 运行,处理 10:00 生成的前一天数据)
|
||||
date_str = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
|
||||
print(f"📋 AI 问题归纳 - {date_str}")
|
||||
os.makedirs(CONTEXT_DIR, exist_ok=True)
|
||||
|
||||
context = load_context(date_str)
|
||||
if not context:
|
||||
print(" ℹ️ 无待处理数据,退出")
|
||||
return
|
||||
|
||||
descriptions = generate_descriptions(context, dry_run=args.dry_run)
|
||||
if not descriptions:
|
||||
return
|
||||
|
||||
if args.dry_run:
|
||||
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
|
||||
payload = {"date": date_str, "descriptions": descriptions}
|
||||
with open(desc_path, "w", encoding="utf-8") as f:
|
||||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||
print(f"[DRY-RUN] 描述已保存到 {desc_path},未回写文档")
|
||||
return
|
||||
|
||||
apply_descriptions(date_str, descriptions)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
449
scripts/backfill_ai_descriptions.py
Normal file
449
scripts/backfill_ai_descriptions.py
Normal file
@ -0,0 +1,449 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
批量回写 2026-05-11 ~ 2026-05-19 的问题描述(AI 归纳版)
|
||||
|
||||
对每个日期:
|
||||
1. 从 MySQL 读取消息 → 聚类 → 生成问题簇
|
||||
2. 调用 DeepSeek API 为每个簇生成精炼问题描述
|
||||
3. 用 AI 描述重新生成完整归纳内容(替代脚本默认的 generate_problem_description)
|
||||
4. 覆盖写入飞书知识库对应日期的子文档
|
||||
"""
|
||||
|
||||
import sys, os, json, urllib.request, subprocess, time, re
|
||||
from datetime import datetime, date, timedelta
|
||||
from collections import defaultdict
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts"))
|
||||
|
||||
from sync_feishu_feedback import (
|
||||
get_db_connection, sort_threads, sort_cluster_msgs,
|
||||
extract_location_elements, extract_conclusion, classify_problem,
|
||||
get_tenant_token, list_child_nodes, create_child_doc,
|
||||
SUMMARY_PARENT_NODE, SUMMARY_SPACE_ID, DISPATCH_CRED_DIR, XIAOKUI_BOT_OPEN_ID,
|
||||
CLI, get_env,
|
||||
)
|
||||
from priority_classifier import compute_final_priority, sort_by_priority
|
||||
|
||||
# === 配置 ===
|
||||
DEEPSEEK_API_KEY = "sk-7cf94305fb12473b956fd2ed2a6db05b"
|
||||
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
|
||||
DEEPSEEK_MODEL = "deepseek-v4-pro"
|
||||
|
||||
START_DATE = "2026-05-11"
|
||||
END_DATE = "2026-05-19"
|
||||
|
||||
SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。阅读来自测试群的多人对话,用一句中文描述他们讨论的具体问题。
|
||||
|
||||
要求:
|
||||
1. 只描述问题本身,不评价、不建议
|
||||
2. 包含关键要素:在哪个端/环节、什么表现
|
||||
3. 有频率信息(偶现/频繁/必现)要体现
|
||||
4. 仅输出一句中文,不加任何前缀、编号、引号或换行
|
||||
5. 如果对话全是无实质内容的闲聊,输出"无明确问题"
|
||||
6. 如果是打包/热更类问题,说清楚是哪个版本/分支的包
|
||||
7. 如果是语音识别问题,说清楚识别什么内容、识别成了什么"""
|
||||
|
||||
|
||||
def query_date_messages(date_str):
|
||||
"""读取指定日期消息"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
next_date = (datetime.strptime(date_str, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
cursor.execute(
|
||||
"""SELECT message_id, sender_name, msg_type, content, media_url, quote_message_id,
|
||||
msg_time, msg_timestamp
|
||||
FROM lark_group_message
|
||||
WHERE msg_time >= %s AND msg_time < %s
|
||||
ORDER BY msg_time ASC""",
|
||||
(f"{date_str} 00:00:00", f"{next_date} 00:00:00"),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
# 格式化时间为字符串
|
||||
formatted_rows = []
|
||||
for r in rows:
|
||||
r_list = list(r)
|
||||
if r_list[6] and hasattr(r_list[6], 'strftime'):
|
||||
r_list[6] = r_list[6].strftime('%Y-%m-%d %H:%M:%S')
|
||||
formatted_rows.append(tuple(r_list))
|
||||
return formatted_rows
|
||||
|
||||
|
||||
def build_ai_prompt(cluster):
|
||||
"""为单个问题簇构建 LLM prompt"""
|
||||
lines = []
|
||||
lines.append(f"优先级: {cluster.get('priority', '?')}")
|
||||
lines.append(f"分类: {cluster.get('category', '?')}")
|
||||
lines.append(f"排查结论: {cluster.get('conclusion', '无')}")
|
||||
lines.append("--- 对话 ---")
|
||||
|
||||
for msg in cluster.get("messages", []):
|
||||
sender = msg.get("sender", "?")
|
||||
content = msg.get("content", "").strip()
|
||||
mtype = msg.get("msg_type", "text")
|
||||
t = msg.get("time", "")
|
||||
|
||||
if mtype in ("image", "post_image", "media", "file") and not content:
|
||||
continue
|
||||
if not content:
|
||||
continue
|
||||
if len(content) > 200:
|
||||
content = content[:197] + "..."
|
||||
|
||||
lines.append(f"[{t}] {sender}: {content}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def call_deepseek(user_prompt, max_retries=2):
|
||||
"""调用 DeepSeek 生成问题描述"""
|
||||
body = json.dumps(
|
||||
{
|
||||
"model": DEEPSEEK_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 256,
|
||||
}
|
||||
).encode()
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
f"{DEEPSEEK_BASE_URL}/chat/completions",
|
||||
data=body,
|
||||
headers={"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=60)
|
||||
data = json.loads(resp.read())
|
||||
content = data["choices"][0]["message"]["content"].strip()
|
||||
content = content.strip('"\'""'' \n')
|
||||
return content
|
||||
except Exception as e:
|
||||
if attempt < max_retries:
|
||||
print(f" ⚠️ 重试 {attempt + 1}: {e}")
|
||||
time.sleep(3)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def is_valid_description(desc):
|
||||
"""验证 AI 生成的描述是否有效"""
|
||||
if not desc or not desc.strip():
|
||||
return False
|
||||
# 过滤明显的垃圾输出
|
||||
garbage_patterns = [
|
||||
r'^(没有回答|没有回复|不知道|不确定|无法判断|不太清楚)[。!]?$',
|
||||
r'^(没有回答。|没有回复。){3,}', # 重复"没有回答"
|
||||
r'^[。!,\s]+$', # 纯标点
|
||||
r'^[??]+$', # 纯问号
|
||||
]
|
||||
for pat in garbage_patterns:
|
||||
if re.search(pat, desc.strip()):
|
||||
return False
|
||||
# 太短(<3个中文字符)
|
||||
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', desc))
|
||||
if chinese_chars < 3:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def generate_ai_description(cluster_data, cluster_msgs):
|
||||
"""为单个簇调用 AI 生成描述,失败时回退到关键词生成"""
|
||||
prompt = build_ai_prompt(cluster_data)
|
||||
try:
|
||||
desc = call_deepseek(prompt)
|
||||
if is_valid_description(desc):
|
||||
return desc
|
||||
else:
|
||||
print(f" ⚠️ AI 输出无效,使用关键词 fallback")
|
||||
# 回退到关键词生成
|
||||
from sync_feishu_feedback import generate_problem_description, extract_location_elements
|
||||
loc = extract_location_elements(cluster_msgs)
|
||||
return generate_problem_description(cluster_msgs, loc, "")
|
||||
except Exception as e:
|
||||
print(f" ❌ AI 调用失败: {e},使用关键词 fallback")
|
||||
from sync_feishu_feedback import generate_problem_description, extract_location_elements
|
||||
loc = extract_location_elements(cluster_msgs)
|
||||
return generate_problem_description(cluster_msgs, loc, "")
|
||||
|
||||
|
||||
def build_summary_markdown(valid_clusters):
|
||||
"""
|
||||
用 AI 描述重新生成归纳内容(替代 generate_summary)
|
||||
valid_clusters: list of {cluster_id, msgs, earliest_time, priority_info, ai_description, category}
|
||||
"""
|
||||
lines = ["## 今日问题归纳\n"]
|
||||
|
||||
# 按优先级+分类分组
|
||||
grouped = defaultdict(list)
|
||||
for vc in valid_clusters:
|
||||
p = vc.get("priority_info", {}).get("priority", "P2")
|
||||
grouped[p].append(vc)
|
||||
|
||||
priority_headers = {
|
||||
"P0": "⚠️ P0级核心问题(需优先处理)",
|
||||
"P1": "⚡ P1级重要问题",
|
||||
"P2": "📌 P2级一般问题",
|
||||
"P3": "📝 P3级低优先级",
|
||||
}
|
||||
|
||||
for p_level in ["P0", "P1", "P2", "P3"]:
|
||||
items = grouped.get(p_level, [])
|
||||
if not items:
|
||||
continue
|
||||
lines.append(f"**{priority_headers[p_level]}**")
|
||||
|
||||
by_category = defaultdict(list)
|
||||
for vc in items:
|
||||
by_category[vc["category"]].append(vc)
|
||||
|
||||
cat_idx = 0
|
||||
for cat_name, cat_items in by_category.items():
|
||||
cat_idx += 1
|
||||
lines.append(f"{cat_idx}. **{cat_name}**")
|
||||
for vc in cat_items:
|
||||
desc = vc.get("ai_description", "") or vc.get("fallback_description", "未知问题")
|
||||
lines.append(f" - {desc}")
|
||||
lines.append("")
|
||||
|
||||
# 问题拆解
|
||||
lines.append("## 今日问题拆解\n")
|
||||
|
||||
idx = 0
|
||||
for vc in valid_clusters:
|
||||
idx += 1
|
||||
pi = vc.get("priority_info", {})
|
||||
priority_label = pi.get("priority", "P2")
|
||||
emoji = pi.get("emoji", "📌")
|
||||
desc = vc.get("ai_description", "") or "未知问题"
|
||||
|
||||
sorted_msgs = sort_cluster_msgs(vc["msgs"])
|
||||
|
||||
lines.append(f"### {emoji} {priority_label}")
|
||||
lines.append("")
|
||||
lines.append(f"**{idx},问题描述:** {desc}")
|
||||
lines.append("")
|
||||
conclusion = extract_conclusion(sorted_msgs)
|
||||
lines.append(conclusion)
|
||||
lines.append("")
|
||||
lines.append("| 发言人 | 对话信息 |")
|
||||
lines.append("|--------|---------|")
|
||||
|
||||
first_speaker = sorted_msgs[0][1]
|
||||
last_speaker = sorted_msgs[-1][1]
|
||||
seen_speakers = set()
|
||||
|
||||
for i, m in enumerate(sorted_msgs):
|
||||
name = m[1]
|
||||
text = str(m[3]).replace("\n", " ").replace("\r", " ").strip() if m[3] else ""
|
||||
text = re.sub(r"\[Image:[^\]]+\]", "", text)
|
||||
text = re.sub(r"https?://\S+", "", text)
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
media_url = str(m[4]) if m[4] else ""
|
||||
|
||||
info_parts = []
|
||||
if text:
|
||||
if len(text) > 80:
|
||||
text = text[:77] + "..."
|
||||
info_parts.append(text)
|
||||
if media_url:
|
||||
label = "图片" if media_url.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")) else "文件"
|
||||
info_parts.append(f"📎 [{label}]({media_url})")
|
||||
if not info_parts:
|
||||
info_parts.append("[图片]")
|
||||
dialogue_info = "<br>".join(info_parts) if len(info_parts) > 1 else info_parts[0]
|
||||
|
||||
role_tag = ""
|
||||
if name == first_speaker and name not in seen_speakers:
|
||||
role_tag = "🚩 报告:"
|
||||
elif name == last_speaker and i == len(sorted_msgs) - 1:
|
||||
role_tag = "✅ "
|
||||
|
||||
seen_speakers.add(name)
|
||||
lines.append(f"| {name} | {role_tag}{dialogue_info} |")
|
||||
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def write_to_kb(date_str, markdown_content):
|
||||
"""覆盖写入知识库子文档"""
|
||||
title = f"{date_str} 问题反馈"
|
||||
children = list_child_nodes()
|
||||
|
||||
if title in children:
|
||||
obj_token = children[title]["obj_token"]
|
||||
print(f" 📝 更新: {title}")
|
||||
else:
|
||||
obj_token = create_child_doc(title)
|
||||
if not obj_token:
|
||||
children = list_child_nodes()
|
||||
if title in children:
|
||||
obj_token = children[title]["obj_token"]
|
||||
else:
|
||||
print(f" ❌ 无法创建/找到子文档: {title}")
|
||||
return False
|
||||
print(f" ➕ 新建: {title}")
|
||||
|
||||
# 写入(lark-cli --markdown @file 要求相对路径)
|
||||
os.makedirs("tmp", exist_ok=True)
|
||||
tmp_md = f"tmp/_xiaokui_backfill_{date_str}.txt"
|
||||
with open(tmp_md, "w", encoding="utf-8") as f:
|
||||
f.write(markdown_content)
|
||||
|
||||
env = get_env()
|
||||
result = subprocess.run(
|
||||
[CLI, "docs", "+update", "--doc", obj_token, "--as", "bot", "--mode", "overwrite", "--markdown", f"@{tmp_md}"],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
os.unlink(tmp_md)
|
||||
|
||||
# lark-cli 输出可能在 stdout 或 stderr
|
||||
output = (result.stdout + result.stderr).strip()
|
||||
try:
|
||||
d = json.loads(output)
|
||||
if d.get("ok"):
|
||||
print(f" ✅ 写入成功")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ 写入失败: {d.get('error', {}).get('message', output)[:200]}")
|
||||
return False
|
||||
except json.JSONDecodeError:
|
||||
print(f" ❌ 响应解析失败: {output[:200]}")
|
||||
return False
|
||||
|
||||
|
||||
def process_date(date_str, dry_run=False):
|
||||
"""处理单个日期"""
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"📅 {date_str}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
# 1. 读取消息
|
||||
rows = query_date_messages(date_str)
|
||||
if not rows:
|
||||
print(" ⚠️ 无消息")
|
||||
return None
|
||||
print(f" 📊 {len(rows)} 条消息")
|
||||
|
||||
# 2. 聚类
|
||||
sorted_msgs, clusters, cluster_order = sort_threads(rows)
|
||||
print(f" 🔗 {len(clusters)} 个簇")
|
||||
|
||||
# 3. 收集有效簇(≥2条消息)
|
||||
valid_clusters = []
|
||||
for cid in cluster_order:
|
||||
cmsgs = clusters[cid]
|
||||
if len(cmsgs) < 2:
|
||||
continue
|
||||
|
||||
pi = compute_final_priority(cmsgs)
|
||||
cat = classify_problem(cmsgs)
|
||||
|
||||
# 构建簇消息摘要(用于 AI prompt)
|
||||
cluster_data = {
|
||||
"cluster_id": cid,
|
||||
"msgs": cmsgs,
|
||||
"earliest_time": min(m[6] for m in cmsgs),
|
||||
"priority_info": pi,
|
||||
"category": cat,
|
||||
"conclusion": extract_conclusion(sort_cluster_msgs(cmsgs)),
|
||||
"messages": [
|
||||
{
|
||||
"sender": m[1],
|
||||
"content": str(m[3]) if m[3] else "",
|
||||
"msg_type": str(m[2]),
|
||||
"media_url": str(m[4]) if m[4] else "",
|
||||
"time": str(m[6]),
|
||||
}
|
||||
for m in cmsgs
|
||||
],
|
||||
}
|
||||
valid_clusters.append(cluster_data)
|
||||
|
||||
if not valid_clusters:
|
||||
print(" ⚠️ 无有效问题簇(需≥2条消息)")
|
||||
return None
|
||||
|
||||
# 按优先级排序
|
||||
valid_clusters = sort_by_priority(valid_clusters)
|
||||
|
||||
# 4. AI 生成描述
|
||||
for vc in valid_clusters:
|
||||
idx = valid_clusters.index(vc) + 1
|
||||
print(f" 🤖 簇 #{idx}/{len(valid_clusters)}...")
|
||||
if not dry_run:
|
||||
desc = generate_ai_description(vc, vc["msgs"])
|
||||
if desc and desc != "(待归纳)":
|
||||
vc["ai_description"] = desc
|
||||
print(f" ✅ {desc[:80]}...")
|
||||
else:
|
||||
print(f" ⚠️ AI 失败,使用空描述")
|
||||
vc["ai_description"] = "(待归纳)"
|
||||
# API 限速保护
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
vc["ai_description"] = f"[DRY-RUN] 待AI归纳"
|
||||
print(f" [DRY-RUN]")
|
||||
|
||||
# 5. 生成完整 markdown
|
||||
markdown = build_summary_markdown(valid_clusters)
|
||||
|
||||
if dry_run:
|
||||
print(f"\n --- 预览前 500 字符 ---")
|
||||
print(markdown[:500])
|
||||
return markdown
|
||||
|
||||
# 6. 写入知识库
|
||||
write_to_kb(date_str, markdown)
|
||||
return markdown
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--date", help="仅处理指定日期")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.date:
|
||||
dates = [args.date]
|
||||
else:
|
||||
start = datetime.strptime(START_DATE, "%Y-%m-%d")
|
||||
end = datetime.strptime(END_DATE, "%Y-%m-%d")
|
||||
dates = [(start + timedelta(days=i)).strftime("%Y-%m-%d") for i in range((end - start).days + 1)]
|
||||
|
||||
print(f"🚀 批量 AI 归纳回写 {'[DRY-RUN]' if args.dry_run else ''}")
|
||||
print(f" 日期范围: {dates[0]} ~ {dates[-1]} ({len(dates)} 天)")
|
||||
|
||||
results = {}
|
||||
for d in dates:
|
||||
try:
|
||||
r = process_date(d, dry_run=args.dry_run)
|
||||
results[d] = "✅" if r else "⏭️"
|
||||
except Exception as e:
|
||||
print(f" ❌ 异常: {e}")
|
||||
results[d] = f"❌ {e}"
|
||||
|
||||
if not args.dry_run:
|
||||
# 避免 API 频率限制
|
||||
time.sleep(1)
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print("📊 汇总:")
|
||||
for d, status in results.items():
|
||||
print(f" {d}: {status}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
273
scripts/detect_p0_realtime.py
Normal file
273
scripts/detect_p0_realtime.py
Normal file
@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
P0 问题实时检测与分发
|
||||
|
||||
功能:
|
||||
1. 从 MySQL 读取最近一段时间的飞书群消息
|
||||
2. 复用 sync_feishu_feedback.py 的聚类 + 优先级判定逻辑
|
||||
3. 过滤已推送过的 P0 簇(去重)
|
||||
4. 仅推送新增 P0 到「小葵小葵」群
|
||||
|
||||
设计:
|
||||
- 每分钟由 crontab 调用一次
|
||||
- 查询最近 2 小时的消息,确保聚类质量
|
||||
- 用「簇签名」(sorted message_ids)做去重
|
||||
- 每天 10:00 清空去重状态(与全量分发错开)
|
||||
|
||||
用法:
|
||||
python3 detect_p0_realtime.py [--dry-run] [--lookback-minutes 120]
|
||||
"""
|
||||
|
||||
import sys, os, json, urllib.request, argparse, hashlib
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# 将 sync_feishu_feedback.py 所在目录加入 sys.path,以便 import
|
||||
SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts")
|
||||
sys.path.insert(0, SKILL_DIR)
|
||||
|
||||
from sync_feishu_feedback import (
|
||||
get_db_connection, query_messages, sort_threads, get_tenant_token,
|
||||
DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS,
|
||||
MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB,
|
||||
)
|
||||
from priority_classifier import compute_final_priority
|
||||
|
||||
# === 配置 ===
|
||||
STATE_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "tmp", "p0_dispatched_state.json")
|
||||
LOOKBACK_MINUTES = 120 # 默认回顾 2 小时的消息
|
||||
CLUSTER_MIN_SIZE = 2 # 至少 2 条消息才算有效簇
|
||||
|
||||
|
||||
def load_dispatched_state():
|
||||
"""加载已推送的 P0 簇签名状态。返回 {cluster_signature: dispatch_time}"""
|
||||
try:
|
||||
with open(STATE_FILE, "r") as f:
|
||||
state = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
state = {}
|
||||
|
||||
# 清理超过 24 小时的记录
|
||||
cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
|
||||
state = {k: v for k, v in state.items() if v > cutoff}
|
||||
return state
|
||||
|
||||
|
||||
def save_dispatched_state(state):
|
||||
"""保存已推送状态"""
|
||||
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
||||
tmp = STATE_FILE + ".tmp"
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(state, f, ensure_ascii=False, indent=2)
|
||||
os.rename(tmp, STATE_FILE)
|
||||
|
||||
|
||||
def cluster_signature(cluster_msgs):
|
||||
"""
|
||||
生成簇的唯一签名。
|
||||
用簇内所有 message_id 排序后拼接的 MD5,对簇成员变化敏感但适度容忍轻微变化。
|
||||
"""
|
||||
ids = sorted(str(m[0]) for m in cluster_msgs)
|
||||
joined = ",".join(ids)
|
||||
return hashlib.md5(joined.encode()).hexdigest()
|
||||
|
||||
|
||||
def is_probably_p0(cluster_msgs):
|
||||
"""
|
||||
快速判断一个簇是否是 P0 级别问题。
|
||||
返回 (is_p0: bool, priority_info: dict)
|
||||
"""
|
||||
if len(cluster_msgs) < CLUSTER_MIN_SIZE:
|
||||
return False, None
|
||||
info = compute_final_priority(cluster_msgs)
|
||||
return info["priority"] == "P0", info
|
||||
|
||||
|
||||
def generate_p0_alert_text(cluster_msgs, priority_info):
|
||||
"""
|
||||
生成 P0 问题的简短告警文本(精简版,不含完整文档链接)。
|
||||
"""
|
||||
# 收集关键信息
|
||||
root_sender = cluster_msgs[0][1]
|
||||
root_time = cluster_msgs[0][6]
|
||||
latest_time = cluster_msgs[-1][6]
|
||||
|
||||
# 提取第一条有实质内容的消息作为摘要
|
||||
root_text = ""
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]) if m[3] else ""
|
||||
t = t.strip()
|
||||
if t and len(t) > 3:
|
||||
root_text = t[:100]
|
||||
break
|
||||
|
||||
# 收集所有发言人
|
||||
senders = list(dict.fromkeys(m[1] for m in cluster_msgs)) # 去重保序
|
||||
|
||||
lines = [
|
||||
f"🚨 P0 实时告警",
|
||||
f"",
|
||||
f"**报告人:** {root_sender}",
|
||||
f"**时间:** {root_time}",
|
||||
f"**涉及人员:** {'、'.join(senders[:5])}" + ("等" if len(senders) > 5 else ""),
|
||||
f"**消息数:** {len(cluster_msgs)} 条",
|
||||
f"",
|
||||
f"**摘要:** {root_text}",
|
||||
f"",
|
||||
f"**判定依据:** {priority_info.get('reasoning', 'P0')}",
|
||||
f"**修复时限:** {priority_info.get('deadline', '2小时内')}",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def dispatch_p0_alert(alert_text):
|
||||
"""
|
||||
将 P0 告警发送到「小葵小葵」群,@ 指定人员。
|
||||
使用飞书 post 富文本格式。
|
||||
"""
|
||||
import urllib.request
|
||||
token = get_tenant_token(cred_dir=DISPATCH_CRED_DIR)
|
||||
|
||||
# 构建富文本内容
|
||||
content_parts = []
|
||||
for line in alert_text.split("\n"):
|
||||
if line.strip():
|
||||
content_parts.append([{"tag": "text", "text": line + "\n"}])
|
||||
|
||||
# 追加 @ 通知
|
||||
if P0_NOTIFY_USERS:
|
||||
at_line = [{"tag": "text", "text": "\n⚠️ 请关注: "}]
|
||||
for uid in P0_NOTIFY_USERS:
|
||||
at_line.append({"tag": "at", "user_id": uid})
|
||||
at_line.append({"tag": "text", "text": " "})
|
||||
content_parts.append(at_line)
|
||||
|
||||
post_content = json.dumps({
|
||||
"zh_cn": {
|
||||
"title": "🚨 P0 问题实时告警",
|
||||
"content": content_parts
|
||||
}
|
||||
}, ensure_ascii=False)
|
||||
|
||||
body = json.dumps({
|
||||
"receive_id": DISPATCH_CHAT_ID,
|
||||
"msg_type": "post",
|
||||
"content": post_content
|
||||
}, ensure_ascii=False).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://open.feishu.cn/open-apis/im/v1/messages?receive_id_type=chat_id",
|
||||
data=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
method="POST"
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=10)
|
||||
d = json.loads(resp.read())
|
||||
if d.get("code") == 0:
|
||||
return True
|
||||
else:
|
||||
print(f" ⚠️ 实时分发失败: {d.get('msg', '')[:100]}")
|
||||
return False
|
||||
|
||||
|
||||
def should_clear_state():
|
||||
"""检查是否到了每天清空状态的时间(10:00-10:01 之间清空,配合全量分发)"""
|
||||
now = datetime.now()
|
||||
return now.hour == 10 and now.minute <= 1
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="P0 问题实时检测与分发")
|
||||
parser.add_argument("--dry-run", action="store_true", help="仅打印不推送")
|
||||
parser.add_argument("--lookback-minutes", type=int, default=LOOKBACK_MINUTES,
|
||||
help=f"查询最近 N 分钟的消息(默认 {LOOKBACK_MINUTES})")
|
||||
args = parser.parse_args()
|
||||
|
||||
# 清空逻辑:每天 10:00-10:01 之间清空去重状态,让全量分发正常进行
|
||||
if should_clear_state():
|
||||
print("[P0-detect] 10:00 清空去重状态,配合全量分发")
|
||||
save_dispatched_state({})
|
||||
# 10:00 的全量流程会处理,这里直接退出避免重复
|
||||
print("[P0-detect] 全量分发时段,跳过实时检测")
|
||||
return
|
||||
|
||||
print(f"[P0-detect] 扫描最近 {args.lookback_minutes} 分钟消息...")
|
||||
lookback_start = (datetime.now() - timedelta(minutes=args.lookback_minutes)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# 查询最近消息(直接用 SQL,避免 query_messages 的按天查询限制)
|
||||
import pymysql
|
||||
conn = pymysql.connect(
|
||||
host=MYSQL_HOST, port=MYSQL_PORT,
|
||||
user=MYSQL_USER, password=MYSQL_PASS,
|
||||
database=MYSQL_DB, charset="utf8mb4"
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT message_id, sender_name, msg_type, content, media_url, quote_message_id,
|
||||
DATE_FORMAT(msg_time, '%%Y-%%m-%%d %%H:%%i:%%s') as msg_time, msg_timestamp
|
||||
FROM lark_group_message
|
||||
WHERE msg_time >= %s AND msg_time <= %s
|
||||
ORDER BY msg_time ASC
|
||||
""", (lookback_start, now_str))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
print(f"[P0-detect] 查询到 {len(rows)} 条消息")
|
||||
|
||||
if len(rows) < 2:
|
||||
print("[P0-detect] 消息不足,退出")
|
||||
return
|
||||
|
||||
# 聚类
|
||||
sorted_msgs, clusters, cluster_order = sort_threads(rows)
|
||||
print(f"[P0-detect] 聚类完成:{len(clusters)} 个簇")
|
||||
|
||||
# 加载已推送状态
|
||||
state = load_dispatched_state()
|
||||
print(f"[P0-detect] 已记录 {len(state)} 个已推送簇签名")
|
||||
|
||||
# 遍历簇,找出 P0 且未推送的
|
||||
new_p0_count = 0
|
||||
for cid in cluster_order:
|
||||
cmsgs = clusters[cid]
|
||||
is_p0, info = is_probably_p0(cmsgs)
|
||||
if not is_p0:
|
||||
continue
|
||||
|
||||
sig = cluster_signature(cmsgs)
|
||||
if sig in state:
|
||||
print(f"[P0-detect] 已推送过,跳过: sig={sig[:8]}...")
|
||||
continue
|
||||
|
||||
print(f"[P0-detect] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
|
||||
|
||||
if args.dry_run:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
print(f"[DRY-RUN] 将发送:\n{alert}")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
new_p0_count += 1
|
||||
else:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
success = dispatch_p0_alert(alert)
|
||||
if success:
|
||||
print(f"[P0-detect] ✅ P0 已实时推送")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
new_p0_count += 1
|
||||
else:
|
||||
print(f"[P0-detect] ❌ 推送失败")
|
||||
|
||||
if new_p0_count > 0:
|
||||
save_dispatched_state(state)
|
||||
print(f"[P0-detect] 共推送 {new_p0_count} 个新 P0")
|
||||
|
||||
print("[P0-detect] 完成")
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
@ -422,6 +422,7 @@ def extract_location_elements(msgs):
|
||||
(r"关卡内|关卡里|关卡中|进关卡|在关卡", "关卡内"),
|
||||
(r"关卡外", "关卡外"),
|
||||
(r"\bLoading\b|loading|加载|转星星", "关卡内"),
|
||||
(r"后台.*加载|后台.*提示|后台.*转圈|加载.*提示.*一直|提示.*一直.*在", "关卡内"),
|
||||
(r"知识巩固", "知识巩固"),
|
||||
(r"巩固题", "巩固题"),
|
||||
(r"单元挑战", "单元挑战"),
|
||||
@ -588,7 +589,51 @@ def generate_problem_description(cluster_msgs, location, root_text, ai_placehold
|
||||
if re.search(r'版本.*更新|更新.*版本|更新.*后|升级.*后|新版.*上线', all_text):
|
||||
if not has_version_issue and not has_build_issue:
|
||||
symptoms.append("版本更新后出现")
|
||||
|
||||
|
||||
# --- 语音识别/判分类 ---
|
||||
has_speech_recog = bool(re.search(r'(识别|跟读|判分|打分|评测).*(不准|不对|错误|异常|识别率|识别成|只能.*识别)', all_text))
|
||||
has_hotword = bool(re.search(r'热词|大模型.*纠|识别.*干扰|极短句.*识别', all_text))
|
||||
if has_speech_recog:
|
||||
# 尝试提取具体表现
|
||||
recog_detail = ""
|
||||
m = re.search(r'(要求.*表达|对话表达|需要.*读|应该.*读).{0,30}([""].+?[""]|\S+)', all_text)
|
||||
if m:
|
||||
recog_detail = f"语音识别不准确({m.group(1)},只识别成错误内容)"
|
||||
elif has_hotword:
|
||||
recog_detail = "语音识别率低,已尝试大模型热词纠正"
|
||||
else:
|
||||
recog_detail = "语音识别/判分不准确"
|
||||
symptoms.append(recog_detail)
|
||||
|
||||
# --- 内容/命名缺失 ---
|
||||
if re.search(r'(未命名|全是数字|显示.*数字|Label.*缺失|名称.*丢失|名称.*没有)', all_text):
|
||||
m = re.search(r'([A-Z]+\d+|L\d+).*(未命名|全是数字|显示.*数字)', all_text)
|
||||
if m:
|
||||
symptoms.append(f"{m.group(1)} 级别单元名称未命名,显示为数字编号")
|
||||
else:
|
||||
symptoms.append("内容/单元名称显示异常(未命名或显示为数字)")
|
||||
|
||||
# --- 后台/弹窗/提示残留 ---
|
||||
has_overlay_stuck = bool(re.search(r'(提示|弹窗|toast|popup|加载中).*(一直|不消失|不.*关|残留|卡)', all_text, re.IGNORECASE))
|
||||
has_bg_stuck = bool(re.search(r'(后台|背景).*(加载|提示|转圈).*(一直|不停|没.*停)', all_text))
|
||||
if has_overlay_stuck or has_bg_stuck:
|
||||
symptoms.append("后台加载提示/弹窗未自动消失,持续显示")
|
||||
|
||||
# --- 网络/VPN ---
|
||||
if re.search(r'(VPN|网络.*问题|网络.*异常|断网|连不上|代理).*(导致|造成|影响|是不是)', all_text):
|
||||
symptoms.append("疑似网络/VPN导致的功能异常")
|
||||
elif re.search(r'挂.*VPN|开了.*VPN|VPN.*关', all_text, re.IGNORECASE):
|
||||
symptoms.append("需确认是否因 VPN/网络代理导致")
|
||||
|
||||
# --- 热更/测试包/打包(增强覆盖) ---
|
||||
has_package_issue = bool(re.search(r'(打包|测试包|hotfix|热更|构建|build).*(没有|不了|失败|选项|没了|异常)', all_text, re.IGNORECASE))
|
||||
has_no_test_build = bool(re.search(r'(没有.*测试包|没有.*包|找不到.*包|打不.*包)', all_text))
|
||||
if (has_package_issue or has_no_test_build) and not has_cannot_open and not has_build_issue:
|
||||
if has_no_test_build:
|
||||
symptoms.append("无法获取测试包/安装包")
|
||||
else:
|
||||
symptoms.append("测试包打包/热更新异常")
|
||||
|
||||
# --- 频率标签(放在描述前面) ---
|
||||
freq_tag = ""
|
||||
if re.search(r'偶尔|有时|有时候|随机|没什么规律|不.*规律', all_text):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user