auto backup 2026-05-07 08:10:01
This commit is contained in:
parent
ec805b0731
commit
a0fe79b29b
@ -12,6 +12,6 @@ vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf
|
||||
tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06
|
||||
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
|
||||
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
|
||||
user-feedback-processor 3bc199b29eaec4e20d7904ae13e006276532145d203df92718cbf3197825bc44
|
||||
feishu-group-msg-sync 085f95a5b89fec3b6a627da25d66ffeeb0be430098387739a64f7903f0ee88d4
|
||||
feishu-feedback-sync ad9934adbb72b3fb6503dc97379f0a5eb4af82e4d0bdacfbcafbd563645f68b1
|
||||
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
|
||||
feishu-feedback-sync 1fe2d72586096fc49cb39f5aa1d3f3e364664d8c7578a27ae49ef1be8f7a9896
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-04-30T03:47:21.989Z","query":"微信反馈群 数据库 表结构 MySQL","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-04-30T06:52:35.560Z","query":"用户反馈数据源 飞书群 微信群 数据库表","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-04-30T08:11:39.116Z","query":"飞书群反馈表格 token E8vFsCmPBhT4SCtNmnJchqeJnJe 内容测试问题反馈","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-06T13:30:08.593Z","query":"内容测试反馈群 群聊规则 GroupSystemPrompt","resultCount":5,"results":[{"path":"memory/2026-04-30.md","startLine":198,"endLine":224,"score":1},{"path":"memory/2026-04-30.md","startLine":116,"endLine":142,"score":1},{"path":"memory/2026-04-30.md","startLine":134,"endLine":166,"score":1},{"path":"memory/2026-04-30.md","startLine":177,"endLine":205,"score":1},{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-04-30T08:11:39.116Z",
|
||||
"updatedAt": "2026-05-06T13:30:08.593Z",
|
||||
"entries": {
|
||||
"memory:memory/2026-04-18.md:1:5": {
|
||||
"key": "memory:memory/2026-04-18.md:1:5",
|
||||
@ -9,20 +9,22 @@
|
||||
"endLine": 5,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-04-18 工作日志 ## 术语共识 [李若松确认] 术语「飞书反馈消息数据库」默认指代用户反馈收集技能中的飞书内部测试反馈MySQL数据表 `vala_test.lark_group_message`,存储「内容测试问题反馈」群(oc_fabff7672e62a9ced7b326ee4a286c26)的同步消息数据。",
|
||||
"recallCount": 3,
|
||||
"recallCount": 4,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 3,
|
||||
"totalScore": 4,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-04-30T03:47:21.989Z",
|
||||
"lastRecalledAt": "2026-04-30T08:11:39.116Z",
|
||||
"lastRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"queryHashes": [
|
||||
"353f9765c086",
|
||||
"a6b740c99377",
|
||||
"9625ed0029fd"
|
||||
"9625ed0029fd",
|
||||
"f865295b9ac7"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-04-30"
|
||||
"2026-04-30",
|
||||
"2026-05-06"
|
||||
],
|
||||
"conceptTags": [
|
||||
"vala-test.lark-group-message",
|
||||
@ -34,6 +36,130 @@
|
||||
"确认",
|
||||
"反馈"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-04-30.md:198:224": {
|
||||
"key": "memory:memory/2026-04-30.md:198:224",
|
||||
"path": "memory/2026-04-30.md",
|
||||
"startLine": 198,
|
||||
"endLine": 224,
|
||||
"source": "memory",
|
||||
"snippet": "3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固定模板) ```markdown ### 问题 N > **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** | 发言人 | 要点 | |--------|------| | ... | ... | **当前问题排查结论:** ... ``` #### 结论提取规则增强 - 解释性关键词:上云/预下载/加载/原因是/改为了/首次 → 标记为分析性发言 - 分析性发言 + 日志上传 → 输出「疑似{原因},已上传日志,排查中」 - 分析性发言 + 无日志 → 输出「{原因},待确认」 - 无分析 + 无日志 → 改为「暂未排查到问题」(刘新玉确认,比「暂未排查到根因」更准确) #### 4/28 最终归纳结果(AI 生成) 1. **NPC HUD 显示**:在移动端关卡内(11-2),NPC 头上的 HUD 偶尔变成一小条 → 暂未排查到问题 2. **iOS Loading 慢**:在 iOS 端关卡内(L1 3-2),Loading 耗时约 10 秒(正常 3 秒),导致组件数据丢失/无音频 → 疑似关卡内容上云加载导致,已上传日志,排查中 #### 结论提取的边界 - 刘新玉指出:\"暂未排查到问题\" vs \"暂未排查到根因\" → 前者更准确(问题被描述了但可能没被排查)",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"lastRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"queryHashes": [
|
||||
"f865295b9ac7"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-06"
|
||||
],
|
||||
"conceptTags": [
|
||||
"角色/组件",
|
||||
"上云/预下载/加载/原因是/改为了/首次",
|
||||
"4/28",
|
||||
"11-2",
|
||||
"3-2",
|
||||
"导致组件数据丢失/无音频",
|
||||
"问题",
|
||||
"描述"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-04-30.md:116:142": {
|
||||
"key": "memory:memory/2026-04-30.md:116:142",
|
||||
"path": "memory/2026-04-30.md",
|
||||
"startLine": 116,
|
||||
"endLine": 142,
|
||||
"source": "memory",
|
||||
"snippet": "问题:很多消息有关联但没有 `quote_message_id`(飞书 API 的 `root_id`/`parent_id` 未采集) **推断规则(按优先级)**: 1. **@提及匹配**:消息中 @了某人 → 关联到被@者最近一条消息 2. **同发送者聚类**:同一人在 2 分钟窗口内连续发多条 → 认为是对同一目标消息的回复 3. **最近不同发送者**:关联到最近一条不同发送者的消息(30 分钟内) 已测试效果:上午 NPC HUD 问题链成功串联,下午 iOS 问题链准确分组。部分跨话题误判仍需 AI 语义辅助(策略3,待后续评估)。 #### 触发方式 - 手动:「同步飞书反馈」「整理反馈对话链」 - 定时:每天 10:00 crontab 自动执行 ## 步骤4:问题归纳功能开发 [刘新玉] - 2026-04-30 18:38 完成 ### 步骤4 包含两部分 1. **问题描述**:在{端}{环节}内({课程}),{角色/组件}出现了{现象} 2. **当前问题排查结论**:从对话最后 1-2 条提取,匹配规则: - \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"lastRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"queryHashes": [
|
||||
"f865295b9ac7"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-06"
|
||||
],
|
||||
"conceptTags": [
|
||||
"quote-message-id",
|
||||
"root-id",
|
||||
"parent-id",
|
||||
"角色/组件",
|
||||
"1-2",
|
||||
"日志上传/排查/查",
|
||||
"确认/确实",
|
||||
"已修复/已解决"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-04-30.md:134:166": {
|
||||
"key": "memory:memory/2026-04-30.md:134:166",
|
||||
"path": "memory/2026-04-30.md",
|
||||
"startLine": 134,
|
||||
"endLine": 166,
|
||||
"source": "memory",
|
||||
"snippet": "- \"日志上传/排查/查\" → \"日志已上传,排查中\" - \"确认/确实\" → \"已确认,待修复\" - \"已修复/已解决\" → \"已修复\" - \"不是 bug/设计如此\" → \"非问题,设计如此\" - 无明确结论 → \"暂未排查到根因\" ### 归纳格式 ```markdown ### 问题 N > **在{端}端{环节}内({课程}),{角色/组件}出现了{现象}** | 发言人 | 要点 | |--------|------| | 报告人 | 🚩 报告:... | | ... | ... | | 最终人 | ✅ 结论/待排查 | ``` ### 维度提取规则 | 维度 | 优先级/来源 | |------|------------| | 端 | iOS > iPad > pad端 > Android > 移动端 > PC(正则匹配,忽略大小写) | | 环节 | 关卡内/知识巩固/单元挑战/听力挑战/阅读挑战/口语挑战/写作挑战/单元强化/瓦拉学院/报告(从消息文本匹配) | | 课程 | 匹配数字编号(如 11-2、L1 3-2) | | 角色/组件 | NPC/HUD/音频/组件/数据/Loading/加载/日志(从消息文本匹配) | | 现象 | 从消息中提取要害描述,截断在 35 字符以内 | ### 现象提取逻辑 1. 优先从包含 \"Bug的表现是这样的:\"、\"问题是\"、\"发现\"、\"出现\" 等关键词的消息中截取描述句 2. 提取的句子去除 URL、图片标记、疑问句 3. 截断到 35 字符防止过长 ### Bug 修复记录",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"lastRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"queryHashes": [
|
||||
"f865295b9ac7"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-06"
|
||||
],
|
||||
"conceptTags": [
|
||||
"日志上传/排查/查",
|
||||
"确认/确实",
|
||||
"已修复/已解决",
|
||||
"bug/设计如此",
|
||||
"角色/组件",
|
||||
"结论/待排查",
|
||||
"优先级/来源",
|
||||
"11-2"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-04-30.md:177:205": {
|
||||
"key": "memory:memory/2026-04-30.md:177:205",
|
||||
"path": "memory/2026-04-30.md",
|
||||
"startLine": 177,
|
||||
"endLine": 205,
|
||||
"source": "memory",
|
||||
"snippet": "- iOS 的两个相关话题(组件无音频 / Loading 慢)因无引用关系而分成两个簇(需策略3语义聚类解决) - 单消息簇被跳过(需至少 2 条消息才能形成问题) ### Skill 文件最终状态 - `skills/feishu-feedback-sync/SKILL.md`:已包含完整步骤1-4的文档 - `skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`:已集成 `summarize_cluster()`、`extract_location_elements()`、`generate_summary()` 函数 - crontab 每日 10:00 执行(与步骤3一起) ### 步骤4 架构调整:AI 归纳取代规则生成 [刘新玉] - 2026-04-30 19:07 #### 问题 脚本规则匹配生成的问题描述质量差: - 组件匹配失败(NPC/HUD → \"未知组件\") - 现象摘取了完整原始消息(含 @、无关词) - 端识别不稳定 #### 决策 **脚本输出结构化元数据 + 对话表,AI 负责归纳描述。** - 脚本 `summarize_cluster` 改为输出: 1. 位置元数据(端/环节/课程/组件)— 由 `extract_location_elements` 提取 2. 发言人-要点表格(规则生成) 3. 问题描述留 `[AI归纳]` 占位符 - 运行时 AI(即助手本身)根据元数据 + 对话上下文,生成精炼的问题描述 #### AI 归纳的最终输出格式(固",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"lastRecalledAt": "2026-05-06T13:30:08.593Z",
|
||||
"queryHashes": [
|
||||
"f865295b9ac7"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-06"
|
||||
],
|
||||
"conceptTags": [
|
||||
"已包含完整步骤1-4的文档",
|
||||
"summarize-cluster",
|
||||
"extract-location-elements",
|
||||
"generate-summary",
|
||||
"npc/hud",
|
||||
"端/环节/课程/组件",
|
||||
"发言人-要点表格",
|
||||
"ios"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
131
memory/2026-05-06.md
Normal file
131
memory/2026-05-06.md
Normal file
@ -0,0 +1,131 @@
|
||||
# 2026-05-06 工作日志
|
||||
|
||||
## 刘新玉:飞书反馈优先级判定(步骤5)落地
|
||||
|
||||
### 需求背景
|
||||
- 刘新玉发来「用户反馈问题优先级判断文档」PDF,要求将优先级判定集成到飞书反馈同步流程的第5步
|
||||
- 前四步骤已完成:数据同步→写入表格→对话链排序→问题归纳
|
||||
- 刘新玉确认"需要"两个方案(脚本自动判定 + 知识库文档标注),全自动判定无需人工确认
|
||||
|
||||
### 完成内容
|
||||
|
||||
#### 1. 新增 priority_classifier.py
|
||||
- 路径:`skills/feishu-feedback-sync/scripts/priority_classifier.py`
|
||||
- P0-P3 基础优先级判定:基于关键词匹配(崩溃/闪退/数据丢失/无音频等)
|
||||
- 动态调整:出现概率(必现/高概率/中概率/低概率/偶现)× 影响范围(全部用户/大部分/部分/极少数)
|
||||
- 最终排序:P0→P1→P2→P3,同级按最早消息时间
|
||||
|
||||
#### 2. 集成到 sync_feishu_feedback.py
|
||||
- `summarize_cluster()` 增加 priority 参数,输出带优先级标签
|
||||
- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序
|
||||
- 新增 `--skip-priority` CLI 参数
|
||||
- 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑
|
||||
|
||||
#### 3. 更新 SKILL.md 文档
|
||||
- 增加了步骤5的完整文档说明
|
||||
- 增加了优先级判定规则速查表
|
||||
|
||||
#### 4. 真实数据验证结果
|
||||
- 2026-04-28 数据:2个问题
|
||||
- P0:iOS端Loading超时致数据丢失/无音频(15条)
|
||||
- P1:移动端NPC HUD偶尔变成一小条(8条)
|
||||
|
||||
### 待确认
|
||||
- 步骤5的优先级标注目前仅在脚本输出中展示,尚未写入「用户反馈问题汇总」文档(RaL6whoYMijyYHkSlWrc7OLLnBy)
|
||||
- 刘新玉需确认是否需要增加文档自动写入能力
|
||||
|
||||
## 刘新玉:PDF文件处理
|
||||
- 刘新玉于11:02发送PDF(用户反馈问题优先级判断文档),8页A4
|
||||
- 已用pdftotext解析并完整阅读
|
||||
- 内容:P0-P3四级优先级定义、动态调整规则、三大判断问题
|
||||
- 已基于此文档实现优先级判定规则
|
||||
# 2026-05-06 工作日志
|
||||
|
||||
## 刘新玉:飞书反馈优先级判定(步骤5)落地
|
||||
|
||||
### 需求背景
|
||||
- 刘新玉发来「用户反馈问题优先级判断文档」PDF,要求将优先级判定集成到飞书反馈同步流程的第5步
|
||||
- 前四步骤已完成:数据同步→写入表格→对话链排序→问题归纳
|
||||
- 刘新玉确认"需要"两个方案(脚本自动判定 + 知识库文档标注),全自动判定无需人工确认
|
||||
|
||||
### 完成内容
|
||||
|
||||
#### 1. 新增 priority_classifier.py
|
||||
- 路径:`skills/feishu-feedback-sync/scripts/priority_classifier.py`
|
||||
- P0-P3 基础优先级判定:基于关键词匹配(崩溃/闪退/数据丢失/无音频等)
|
||||
- 动态调整:出现概率(必现/高概率/中概率/低概率/偶现)× 影响范围(全部用户/大部分/部分/极少数)
|
||||
- 最终排序:P0→P1→P2→P3,同级按最早消息时间
|
||||
|
||||
#### 2. 集成到 sync_feishu_feedback.py
|
||||
- `summarize_cluster()` 增加 priority 参数,输出带优先级标签
|
||||
- `generate_summary()` 调用 `compute_final_priority()` + `sort_by_priority()` 排序
|
||||
- 新增 `--skip-priority` CLI 参数
|
||||
- 修复了推断引用算法的孤立簇问题:在 sort_threads Union-Find 后增加合并单条孤立消息到有发送者重叠的大簇的逻辑
|
||||
|
||||
#### 3. 更新 SKILL.md 文档
|
||||
- 增加了步骤5的完整文档说明
|
||||
- 增加了优先级判定规则速查表
|
||||
|
||||
#### 4. 真实数据验证结果
|
||||
- 2026-04-28 数据:2个问题
|
||||
- P0:iOS端Loading超时致数据丢失/无音频(15条)
|
||||
- P1:移动端NPC HUD偶尔变成一小条(8条)
|
||||
|
||||
#### 5. 刘新玉要求简化文档格式(~11:46)
|
||||
- 反馈文档太繁琐,只要在原有格式把"问题 1"改成"P0-问题 1"
|
||||
- 去掉了优先级分布汇总、额外信息行,只保留标题前缀 + 一行判定依据
|
||||
- 最终格式:`### P0-问题 1` + `**优先级判定:** 规则...`
|
||||
- 知识库文档已更新为简洁版
|
||||
|
||||
#### 6. 修复推断引用算法 Bug
|
||||
- 策略2的 else 分支(同发送者无引用时往前找不同发送者)缺少时间限制
|
||||
- 导致胡陈辰 20:45 的媒体消息被推断引用到徐思清 12:29(跨8小时)
|
||||
- 修复:加了 `GAP_THRESHOLD_MIN` 检查
|
||||
|
||||
#### 7. 文档写入方式改进
|
||||
- 从 `insert_before` + `replace_range` 改为 `append` 模式
|
||||
- 每次同步在文档末尾追加当日归纳(含优先级),不去动旧内容
|
||||
- 函数:`update_summary_doc(markdown_content, day_label)`
|
||||
- 目标文档:`RaL6whoYMijyYHkSlWrc7OLLnBy`(用户反馈问题汇总)
|
||||
|
||||
### 关键文件路径
|
||||
- 主脚本:`skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`
|
||||
- 分类器:`skills/feishu-feedback-sync/scripts/priority_classifier.py`
|
||||
- 技能文档:`skills/feishu-feedback-sync/SKILL.md`
|
||||
- 目标知识库文档 token:`RaL6whoYMijyYHkSlWrc7OLLnBy`
|
||||
|
||||
### 已知限制
|
||||
- 文档更新用 `append` 模式,多次同步会产生重复内容(可后续加去重逻辑)
|
||||
- 优先级分类器仅处理中文文本
|
||||
|
||||
## 下午更新(13:13)— 文档格式持续迭代
|
||||
|
||||
### 刘新玉今日格式调整汇总(feishu-feedback-sync 步骤4+5输出)
|
||||
|
||||
1. **问题描述改为自动规则提取**(不再用 `[AI归纳中]` 占位)
|
||||
- 函数:`generate_problem_description()`
|
||||
- 格式:`在{端}端{环节}({课程}),{具体表现}`
|
||||
- Loading + 数据丢失关联描述:`Loading 耗时约10秒(正常3秒),导致组件数据丢失、无音频`
|
||||
- 频率标签前缀:`【偶现】`/`【频繁】`,通过关键词匹配
|
||||
|
||||
2. **排查结论简化为状态**(按4.3规则)
|
||||
- 规则4:解释性分析 + 日志上传 + 排查中 → `疑似{原因}导致,已上传日志,排查中`
|
||||
- 规则5:日志已上传 → `已上传日志,排查中`
|
||||
- 默认:`暂无结论排查中`
|
||||
|
||||
3. **文档结构新增"今日问题归纳"顶层索引**
|
||||
- 按 `**【P0问题】**` / `**【P1问题】**` 分组,每个问题一行索引
|
||||
- 后接 `## 今日问题拆解` 含详细内容
|
||||
|
||||
4. **今日问题拆解标题格式**
|
||||
- `### 【P0】` / `### 【P1】`(不含编号和问题名)
|
||||
- 内容行:`**1,问题描述:** {描述}`
|
||||
|
||||
5. **Bug修复:多天overwrite互覆盖**
|
||||
- 多天循环时第一天用 `overwrite`,后续天用 `append`
|
||||
- `update_summary_doc()` 新增 `mode` 参数
|
||||
|
||||
### 关键文件
|
||||
- 主脚本:`skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py`
|
||||
- 新函数:`generate_problem_description()`
|
||||
- 修改函数:`summarize_cluster()`、`extract_conclusion()`、`generate_summary()`、`update_summary_doc()`
|
||||
@ -1,20 +1,23 @@
|
||||
---
|
||||
name: feishu-feedback-sync
|
||||
description: 同步飞书「内容测试问题反馈」群消息到知识库电子表格并执行对话链排序。用于:(1) 刘新玉说"同步飞书反馈"、"更新飞书问题反馈表格"、"整理反馈对话链"时触发;(2) 定时任务每日自动同步。数据源为 MySQL vala_test.lark_group_message,目标为知识库飞书问题反馈-近3天表格。
|
||||
description: 同步飞书「内容测试问题反馈」群消息到知识库电子表格并执行对话链排序。用于:(1) 刘新玉说"同步飞书反馈"、"更新飞书问题反馈表格"、"整理反馈对话链"时触发;(2) 定时任务每日自动同步。数据源为 MySQL vala_test.lark_group_message,目标为知识库飞书问题反馈-近3天表格。含五步完整流程:数据同步 → 写入表格 → 对话链排序 → 问题归纳 → 优先级判定排序。
|
||||
---
|
||||
|
||||
# 飞书问题反馈同步与对话链排序
|
||||
# 飞书问题反馈同步(五步完整流程)
|
||||
|
||||
## 概述
|
||||
|
||||
从 MySQL `vala_test.lark_group_message` 读取飞书群消息,同步到知识库电子表格,并按引用关系执行「反馈对话链排序」。
|
||||
从 MySQL `vala_test.lark_group_message` 读取飞书群消息,执行五步完整处理流程。
|
||||
|
||||
### 两阶段输出(双文档分工)
|
||||
### 五步流程总览
|
||||
|
||||
| 阶段 | 目标文档 | 用途 |
|
||||
|------|---------|------|
|
||||
| 步骤 1-3 | 飞书问题反馈-近3天 电子表格 (`AHtnsehwShUVyDtjasSciIvgn7b`) | 原始数据初步整理,验证信息源准确性 |
|
||||
| 步骤 4 | 用户反馈问题汇总 文档 (`RaL6whoYMijyYHkSlWrc7OLLnBy`) | 问题归纳输出,供二次处理 |
|
||||
| 步骤 | 名称 | 目标文档 | 产出 |
|
||||
|------|------|---------|------|
|
||||
| 步骤 1 | 查询数据库 | — | 原始消息数据 |
|
||||
| 步骤 2 | 写入飞书表格 | `AHtnsehwShUVyDtjasSciIvgn7b` | 按天分 sheet 的原始数据 |
|
||||
| 步骤 3 | 反馈对话链排序 | `AHtnsehwShUVyDtjasSciIvgn7b` | 已排序的问题簇 |
|
||||
| 步骤 4 | 问题归纳 | `RaL6whoYMijyYHkSlWrc7OLLnBy` | 结构化问题描述+对话表格+结论 |
|
||||
| 步骤 5 | 优先级判定与排序 | `RaL6whoYMijyYHkSlWrc7OLLnBy` | 按 P0>P1>P2>P3 排序的问题列表 |
|
||||
|
||||
## 关键标识
|
||||
|
||||
@ -225,6 +228,73 @@ lark-cli sheets +write \
|
||||
**当前问题排查结论:** 日志已上传,排查中
|
||||
```
|
||||
|
||||
### 步骤 5:优先级判定与排序
|
||||
|
||||
基于《用户反馈问题优先级判断文档》,对步骤4归纳的问题自动评定优先级(P0-P3),并按优先级从高到低排序输出。
|
||||
|
||||
#### 5.1 判定维度
|
||||
|
||||
| 维度 | 说明 | 来源 |
|
||||
|------|------|------|
|
||||
| 基础优先级 | 根据问题严重程度(崩溃/功能异常/体验瑕疵/细节优化) | 关键词规则匹配 |
|
||||
| 出现概率 | 必现/高概率/中概率/低概率/偶现 | 消息内容关键词 |
|
||||
| 影响范围 | 全部用户/大部分用户/部分用户/极少数用户 | 消息内容关键词 |
|
||||
|
||||
#### 5.2 动态调整规则
|
||||
|
||||
| 规则 | 条件 | 效果 |
|
||||
|------|------|------|
|
||||
| 必现升级 | 出现概率为必现/高概率 | 原有优先级 +1 级(P1→P0, P2→P1) |
|
||||
| 偶现降级 | 出现概率为低概率/偶现 | 原有优先级 -1 级(最多降一级,P1→P2) |
|
||||
| 范围升级 | 影响全部用户 | 原有优先级 +1 级 |
|
||||
| 范围降级 | 仅影响极少数用户 | 原有优先级 -1 级 |
|
||||
|
||||
#### 5.3 优先级等级定义
|
||||
|
||||
| 级别 | 定义 | 修复时限 | 典型关键词 |
|
||||
|------|------|----------|-----------|
|
||||
| 🔴 **P0** | 孩子完全上不了课 / 核心功能崩溃 | 2小时内处理,当天解决 | 闪退、崩溃、进不去、数据丢失、服务器宕机 |
|
||||
| 🟠 **P1** | 功能能用但不对劲 / 学习效果打折扣 | 3天内修复上线 | 音频异常、判分错误、进度保存失败、奖励未发放 |
|
||||
| 🟡 **P2** | 偶尔小毛病 / 界面瑕疵 / 刷新就好 | 当周修复 | UI显示异常、偶尔不显示、拖拽不流畅、刷新恢复 |
|
||||
| 🟢 **P3** | 几乎不影响使用 / 追求完美细节 | 不单独排期 | 分辨率略低、错别字、背景音不统一 |
|
||||
|
||||
#### 5.4 输出格式
|
||||
|
||||
在步骤4归纳结果的基础上,每个问题标题追加优先级标签,并新增优先级信息行:
|
||||
|
||||
```markdown
|
||||
### 问题 1 🔴 P0
|
||||
|
||||
**优先级:** 🔴 P0 | 基础优先级: P1(匹配P1规则:learning_func);出现频率: 必现;动态调整: 升级 1 级
|
||||
**修复时限:** 2小时内处理,当天解决
|
||||
|
||||
> **[AI归纳: 问题描述]**
|
||||
...
|
||||
```
|
||||
|
||||
问题在文档中按 **P0 → P1 → P2 → P3** 排列,同优先级内按最早消息时间排序。
|
||||
|
||||
在归纳开头先输出优先级分布汇总:
|
||||
|
||||
```markdown
|
||||
**优先级分布:** P0:1个 | P1:3个 | P2:5个 | P3:2个
|
||||
```
|
||||
|
||||
#### 5.5 执行方式
|
||||
|
||||
```bash
|
||||
# 默认启用步骤5
|
||||
python3 sync_feishu_feedback.py --days 3
|
||||
|
||||
# 跳过步骤5(仅执行步骤1-4)
|
||||
python3 sync_feishu_feedback.py --days 3 --skip-priority
|
||||
```
|
||||
|
||||
优先级判定函数位于独立模块 `scripts/priority_classifier.py`,可单独测试:
|
||||
```bash
|
||||
python3 priority_classifier.py
|
||||
```
|
||||
|
||||
## 定时任务
|
||||
|
||||
建议每日执行一次,在飞书群消息同步完毕后(`feishu-group-msg-sync` 之后)运行。
|
||||
|
||||
Binary file not shown.
Binary file not shown.
278
skills/feishu-feedback-sync/scripts/priority_classifier.py
Normal file
278
skills/feishu-feedback-sync/scripts/priority_classifier.py
Normal file
@ -0,0 +1,278 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
步骤5:用户反馈问题优先级判定与排序
|
||||
|
||||
基于《用户反馈问题优先级判断文档》,对步骤4归纳的问题簇进行优先级评估:
|
||||
- 先按问题严重程度定基础优先级(P0-P3)
|
||||
- 再按出现概率和影响范围动态调整
|
||||
- 最终按优先级排序输出(P0 > P1 > P2 > P3)
|
||||
"""
|
||||
|
||||
import re, sys, json
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
|
||||
# ============================================================
|
||||
# 优先级判定规则(基于优先级判断文档)
|
||||
# ============================================================
|
||||
|
||||
# P0 关键词:完全阻断使用、核心功能崩溃
|
||||
P0_KEYWORDS = {
|
||||
"crash": r'闪退|崩溃|卡死|无法启动|打不开|进不去|登不上|完全.*进|完全.*打|完全.*不能|无法.*进入',
|
||||
"core": r'(知识巩固|跟读|录音).*(进不去|没反应|无法|不工作|崩溃)|(核心功能|主线).*(崩溃|卡死|无法推进|完全.*卡)',
|
||||
"payment": r'(付费|购买|充值).*(无法|不到账|失败|不能用)|(钱|金额).*(不对|错误|问题)',
|
||||
"server": r'(服务器|全国|所有.*用户|全体).*(挂了|宕机|无法连接|登不上)',
|
||||
"data_loss": r'(数据.*(丢失|没记录|白学|全没|消失))|(没.*获取到.*(知识点|数据))',
|
||||
}
|
||||
|
||||
# P1 关键词:功能明显异常、学习效果打折扣
|
||||
P1_KEYWORDS = {
|
||||
"learning_func": r'(音频|声音|播放).*(异常|没声音|无声|不对|听不到|没有|无)|(听不到|不能听|无音|没音|没声)',
|
||||
"score_judge": r'(判分|打分|识别|评测).*(不对|不准|错误|异常)|(明明.*对.*系统.*(说|判断).*不对)',
|
||||
"progress": r'(进度|版本|课程.*序号|单元.*序号).*(显示|记录|保存).*(错|不对|失败|异常)',
|
||||
"content": r'(题目|选项|图片|显示).*(有问题|异常|错位|看不见|看不清)|(内容|关卡).*(异常|有问题)',
|
||||
"reward": r'(奖励|贴纸|成就).*(没发|没到账|异常|没有)',
|
||||
"data_error": r'(学习.*(数据|时长|记录)).*(不对|错误|异常|不准确)',
|
||||
"update": r'(更新|版本).*(失败|无法|出问题|异常)',
|
||||
}
|
||||
|
||||
# P2 关键词:不影响核心功能,但体验有瑕疵
|
||||
P2_KEYWORDS = {
|
||||
"ui_display": r'(界面|UI|显示|排版|文案).*(偶尔|小|异常|不对|错位|不美观|截断)|(偶尔.*(不显示|显示.*异常))',
|
||||
"animation": r'(动画|动效).*(不流畅|异常|缺失|卡顿)',
|
||||
"lag": r'(按钮|点击|操作).*(延迟|卡顿|等一下|反应慢|不灵敏)',
|
||||
"refresh_fix": r'(刷新|重启|重进).*就.*(好|恢复|正常)',
|
||||
"minor_order": r'(顺序).*(不对|怪|反了)',
|
||||
}
|
||||
|
||||
# P3 关键词:几乎不影响使用
|
||||
P3_KEYWORDS = {
|
||||
"cosmetic": r'(图标|颜色|色系|分辨率).*(模糊|不搭|略低|不统一)',
|
||||
"typo": r'(错别字|文字.*错误)',
|
||||
"perf_minor": r'(帧率|加载).*(慢.*秒|下降|略慢)',
|
||||
"audio_bg": r'(背景音乐|音量).*(不统一|异常)',
|
||||
}
|
||||
|
||||
|
||||
# 出现概率特征词
|
||||
FREQUENCY_PATTERNS = {
|
||||
"必现": {
|
||||
"keywords": r'每次|必现|100%|稳定.*复现|总能|一直.*出现|每次.*都|始终',
|
||||
"weight": 1.0,
|
||||
},
|
||||
"高概率": {
|
||||
"keywords": r'经常|十.*次.*[89八]|频繁|大部分.*时候|经常.*出现',
|
||||
"weight": 0.8,
|
||||
},
|
||||
"中概率": {
|
||||
"keywords": r'(偶尔|有时候|有时|时不时)',
|
||||
"weight": 0.5,
|
||||
},
|
||||
"低概率": {
|
||||
"keywords": r'很少|偶尔.*一次|个别|不多|少见|十.*次.*[123]',
|
||||
"weight": 0.3,
|
||||
},
|
||||
"偶现": {
|
||||
"keywords": r'(偶现|特殊.*条件|特定.*机型|特定.*操作|说不清|不知道怎么.*触发|极.*少见)',
|
||||
"weight": 0.1,
|
||||
},
|
||||
}
|
||||
|
||||
# 影响范围特征词
|
||||
SCOPE_PATTERNS = {
|
||||
"全部用户": {
|
||||
"keywords": r'所有.*用户|每个.*用户|全部.*用户|全国|全体',
|
||||
"adjust": +1,
|
||||
},
|
||||
"大部分用户": {
|
||||
"keywords": r'大部分|多数.*用户|很多.*用户|普遍',
|
||||
"adjust": 0,
|
||||
},
|
||||
"部分用户": {
|
||||
"keywords": r'部分.*用户|有些|某种.*机型|某个.*平台|iOS.*端|Android.*端|手机',
|
||||
"adjust": 0,
|
||||
},
|
||||
"极少数用户": {
|
||||
"keywords": r'极少数|个例|个别.*用户|只有.*一个|就.*遇到|就.*发现',
|
||||
"adjust": -1,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def determine_base_priority(text: str) -> int:
|
||||
"""
|
||||
根据消息内容判定基础优先级(0=P0, 1=P1, 2=P2, 3=P3)
|
||||
返回: (priority_level, reason)
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
|
||||
# 先按级别从高到低匹配
|
||||
for level_name, patterns in [("P0", P0_KEYWORDS), ("P1", P1_KEYWORDS)]:
|
||||
for category, pattern in patterns.items():
|
||||
if re.search(pattern, text_lower):
|
||||
return (0 if level_name == "P0" else 1), f"匹配{level_name}规则:{category}"
|
||||
|
||||
for category, pattern in P2_KEYWORDS.items():
|
||||
if re.search(pattern, text_lower):
|
||||
return 2, f"匹配P2规则:{category}"
|
||||
|
||||
for category, pattern in P3_KEYWORDS.items():
|
||||
if re.search(pattern, text_lower):
|
||||
return 3, f"匹配P3规则:{category}"
|
||||
|
||||
# 默认:无法判定 → P2(一般问题)
|
||||
return 2, "无法精确匹配,默认归为P2"
|
||||
|
||||
|
||||
def detect_frequency(text: str) -> Tuple[str, float]:
|
||||
"""检测出现频率"""
|
||||
for freq_label, info in FREQUENCY_PATTERNS.items():
|
||||
if re.search(info["keywords"], text):
|
||||
return freq_label, info["weight"]
|
||||
return "未知", 0.5 # 默认中等
|
||||
|
||||
|
||||
def detect_scope(text: str) -> Tuple[str, int]:
|
||||
"""检测影响范围"""
|
||||
for scope_label, info in SCOPE_PATTERNS.items():
|
||||
if re.search(info["keywords"], text):
|
||||
return scope_label, info["adjust"]
|
||||
return "未知", 0 # 默认不调整
|
||||
|
||||
|
||||
def compute_final_priority(cluster_msgs: List) -> Dict:
|
||||
"""
|
||||
综合判定优先级
|
||||
|
||||
输入: cluster_msgs - 问题簇的消息列表 [message_id, sender_name, msg_type, content, ...]
|
||||
输出: {
|
||||
"priority": "P0" | "P1" | "P2" | "P3",
|
||||
"base_priority": int, # 0-3
|
||||
"frequency": str,
|
||||
"scope": str,
|
||||
"reasoning": str, # 判定理由
|
||||
"emoji": str,
|
||||
"deadline": str,
|
||||
}
|
||||
"""
|
||||
# 收集所有消息文本
|
||||
all_text = " ".join(str(m[3]) or "" for m in cluster_msgs)
|
||||
all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text)
|
||||
|
||||
# 1. 基础优先级
|
||||
base_level, base_reason = determine_base_priority(all_text)
|
||||
|
||||
# 2. 频率判定
|
||||
freq_label, freq_weight = detect_frequency(all_text)
|
||||
|
||||
# 3. 影响范围
|
||||
scope_label, scope_adjust = detect_scope(all_text)
|
||||
|
||||
# 4. 综合调整
|
||||
# 4a. 频率调整:必现→ 升级;偶现→ 降级(最多降一级)
|
||||
freq_adjust = 0
|
||||
if freq_weight >= 0.8: # 必现或高概率
|
||||
if base_level > 0:
|
||||
freq_adjust = -1 # 升级(level 数减小)
|
||||
elif freq_weight <= 0.3: # 低概率或偶现
|
||||
if base_level < 3:
|
||||
freq_adjust = +1 # 降级(level 数增大,最多降一级)
|
||||
|
||||
# 4b. 影响范围调整
|
||||
# scope_adjust 已经定义:全部用户+1级(升级),极少数用户-1级(降级)
|
||||
|
||||
total_adjust = freq_adjust + scope_adjust
|
||||
final_level = base_level + total_adjust
|
||||
|
||||
# 夹紧到 0-3 范围
|
||||
final_level = max(0, min(3, final_level))
|
||||
|
||||
priority_labels = ["P0", "P1", "P2", "P3"]
|
||||
emojis = ["🔴", "🟠", "🟡", "🟢"]
|
||||
deadlines = [
|
||||
"2小时内处理,当天解决",
|
||||
"3天内修复上线",
|
||||
"当周排期修复,1周内解决",
|
||||
"不单独排期,有空再修",
|
||||
]
|
||||
|
||||
# 生成判定理由
|
||||
reasons = [f"基础优先级: {priority_labels[base_level]}({base_reason})"]
|
||||
if freq_label != "未知":
|
||||
reasons.append(f"出现频率: {freq_label}")
|
||||
if scope_label != "未知":
|
||||
reasons.append(f"影响范围: {scope_label}")
|
||||
if total_adjust < 0:
|
||||
reasons.append(f"动态调整: 升级 {abs(total_adjust)} 级")
|
||||
elif total_adjust > 0:
|
||||
reasons.append(f"动态调整: 降级 {total_adjust} 级")
|
||||
if total_adjust == 0 and base_level != final_level:
|
||||
reasons.append("动态调整: 维持原级")
|
||||
|
||||
return {
|
||||
"priority": priority_labels[final_level],
|
||||
"base_priority": base_level,
|
||||
"base_label": priority_labels[base_level],
|
||||
"frequency": freq_label,
|
||||
"scope": scope_label,
|
||||
"reasoning": ";".join(reasons),
|
||||
"emoji": emojis[final_level],
|
||||
"deadline": deadlines[final_level],
|
||||
}
|
||||
|
||||
|
||||
def sort_by_priority(clusters_with_priority: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
按优先级排序:P0 > P1 > P2 > P3
|
||||
|
||||
同优先级内按最早消息时间排序
|
||||
"""
|
||||
return sorted(clusters_with_priority, key=lambda x: (
|
||||
x.get("priority_info", {}).get("base_priority", 2), # 先按优先级
|
||||
x.get("earliest_time", "9999"), # 再按时间
|
||||
))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 自测
|
||||
# ============================================================
|
||||
if __name__ == "__main__":
|
||||
# 测试用例
|
||||
test_cases = [
|
||||
{
|
||||
"name": "P0: App崩溃",
|
||||
"msgs": [
|
||||
[None, "测试员", "text", "孩子刚打开App就闪退了,完全无法使用", None, None, None, None],
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "P1: 音频问题",
|
||||
"msgs": [
|
||||
[None, "测试员", "text", "iOS端关卡内L1 3-2组件无音频,每次进都这样", None, None, None, None],
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "P2: UI显示偶尔问题",
|
||||
"msgs": [
|
||||
[None, "测试员", "text", "大地图卡片偶尔不显示,刷新一下就好了", None, None, None, None],
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "P1→P0: 必现的数据丢失",
|
||||
"msgs": [
|
||||
[None, "测试员", "text", "每次完成关卡后学习数据都不记录,100%复现,等于白学", None, None, None, None],
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "P1→P2: 偶现的音频问题",
|
||||
"msgs": [
|
||||
[None, "测试员", "text", "偶现跟读音频播放异常,特定机型才出现,概率很低", None, None, None, None],
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
for tc in test_cases:
|
||||
result = compute_final_priority(tc["msgs"])
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试: {tc['name']}")
|
||||
print(f"结果: {result['emoji']} {result['priority']} | {result['reasoning']}")
|
||||
print(f"时限: {result['deadline']}")
|
||||
@ -1,19 +1,29 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
飞书问题反馈同步脚本
|
||||
功能:从 MySQL 读取飞书群消息,同步到知识库电子表格,并执行对话链排序
|
||||
飞书问题反馈同步脚本(五步完整流程)
|
||||
功能:
|
||||
步骤1:从 MySQL 读取飞书群消息
|
||||
步骤2:写入知识库电子表格
|
||||
步骤3:反馈对话链排序
|
||||
步骤4:问题归纳 → 输出到「用户反馈问题汇总」
|
||||
步骤5:优先级判定与排序 → 按 P0>P1>P2>P3 重排输出
|
||||
|
||||
用法:
|
||||
python3 sync_feishu_feedback.py [--days N] [--dry-run]
|
||||
python3 sync_feishu_feedback.py [--days N] [--dry-run] [--skip-priority]
|
||||
|
||||
--days N 同步最近 N 天(默认 3)
|
||||
--dry-run 仅打印操作不写入
|
||||
--days N 同步最近 N 天(默认 3)
|
||||
--dry-run 仅打印操作不写入
|
||||
--skip-priority 跳过步骤5优先级判定
|
||||
"""
|
||||
|
||||
import sys, os, json, subprocess, argparse, re
|
||||
from datetime import date, datetime, timedelta
|
||||
from collections import defaultdict, deque
|
||||
|
||||
# 确保能找到同级目录下的 priority_classifier
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from priority_classifier import compute_final_priority, sort_by_priority
|
||||
|
||||
# === 配置 ===
|
||||
MYSQL_HOST = "bj-cdb-8frbdwju.sql.tencentcdb.com"
|
||||
MYSQL_PORT = 25413
|
||||
@ -172,10 +182,12 @@ def infer_missing_references(rows):
|
||||
if rows[j][5]:
|
||||
same_sender_parent = rows[j][5]
|
||||
else:
|
||||
# 同发送者无引用,关联到它的前一条不同发送者
|
||||
# 同发送者无引用,关联到它的前一条不同发送者(需在 GAP 限制内)
|
||||
for k in range(j - 1, -1, -1):
|
||||
if rows[k][1] != sender:
|
||||
same_sender_parent = rows[k][0]
|
||||
k_diff = (ts - parse_time(rows[k][6])).total_seconds() / 60
|
||||
if k_diff <= GAP_THRESHOLD_MIN:
|
||||
same_sender_parent = rows[k][0]
|
||||
break
|
||||
break
|
||||
|
||||
@ -251,6 +263,48 @@ def sort_threads(rows):
|
||||
for r in enriched_rows:
|
||||
clusters[find(r[0])].append(r)
|
||||
|
||||
# 合并孤立单簇(只有1条消息的簇,尝试合并到相邻时间上最近的簇)
|
||||
def try_merge_orphan_clusters(clusters_dict):
|
||||
"""将只有1-2条消息的孤立簇合并到最近的大簇"""
|
||||
orphan_ids = [cid for cid, cmsgs in clusters_dict.items() if len(cmsgs) <= 2]
|
||||
big_cluster_ids = [cid for cid, cmsgs in clusters_dict.items() if len(cmsgs) > 2]
|
||||
|
||||
if not orphan_ids or not big_cluster_ids:
|
||||
return clusters_dict
|
||||
|
||||
for orphan_id in orphan_ids:
|
||||
orphan_msgs = clusters_dict[orphan_id]
|
||||
orphan_time = min(parse_time(m[6]) for m in orphan_msgs)
|
||||
orphan_senders = set(m[1] for m in orphan_msgs)
|
||||
|
||||
# 找时间上最近的大簇
|
||||
best_cid = None
|
||||
best_gap = float('inf')
|
||||
for bcid in big_cluster_ids:
|
||||
b_msgs = clusters_dict[bcid]
|
||||
b_time = min(parse_time(m[6]) for m in b_msgs)
|
||||
gap = abs((orphan_time - b_time).total_seconds() / 60)
|
||||
# 优先:发送者有交集 & 时间间隔 < 60 分钟
|
||||
b_senders = set(m[1] for m in b_msgs)
|
||||
has_overlap = bool(orphan_senders & b_senders)
|
||||
if has_overlap and gap < 60 and gap < best_gap:
|
||||
best_gap = gap
|
||||
best_cid = bcid
|
||||
elif not has_overlap and gap < 10 and gap < best_gap:
|
||||
best_gap = gap
|
||||
best_cid = bcid
|
||||
|
||||
if best_cid:
|
||||
clusters_dict[best_cid].extend(orphan_msgs)
|
||||
del clusters_dict[orphan_id]
|
||||
if best_cid in big_cluster_ids:
|
||||
big_cluster_ids.remove(best_cid)
|
||||
|
||||
return clusters_dict
|
||||
|
||||
clusters = try_merge_orphan_clusters(dict(clusters))
|
||||
clusters = defaultdict(list, clusters)
|
||||
|
||||
# 簇间按最早时间排序
|
||||
cluster_order = sorted(clusters.keys(),
|
||||
key=lambda cid: min(m[6] for m in clusters[cid]))
|
||||
@ -367,15 +421,126 @@ def extract_location_elements(msgs):
|
||||
return result
|
||||
|
||||
|
||||
def summarize_cluster(cluster_msgs, idx):
|
||||
def generate_problem_description(cluster_msgs, location, root_text):
|
||||
"""
|
||||
基于簇中消息生成问题描述。
|
||||
格式:在{端}端{环节}({课程}),{具体表现}
|
||||
"""
|
||||
all_text = " ".join(str(m[3]) or "" for m in cluster_msgs)
|
||||
all_text = re.sub(r'\[Image:[^\]]+\]', '', all_text)
|
||||
all_text = re.sub(r'https?://\S+', '', all_text)
|
||||
|
||||
device = location.get("端", "") or ""
|
||||
scene = location.get("环节", "") or ""
|
||||
course = location.get("课程", "") or ""
|
||||
|
||||
# 端信息
|
||||
platform = ""
|
||||
if re.search(r'iOS|ios|苹果|iPhone|iPad', all_text):
|
||||
platform = "iOS 端"
|
||||
elif re.search(r'移动端|手机|APP|android|安卓', all_text, re.IGNORECASE):
|
||||
platform = "移动端"
|
||||
elif re.search(r'PC|unity|编辑器', all_text, re.IGNORECASE):
|
||||
platform = "PC/编辑器"
|
||||
if not platform:
|
||||
platform = device if device and device != "未知" else ""
|
||||
|
||||
# 场景和关卡
|
||||
loc_parts = []
|
||||
if platform:
|
||||
loc_parts.append(platform)
|
||||
if scene and course:
|
||||
loc_parts.append(f"{scene}({course})")
|
||||
elif scene:
|
||||
loc_parts.append(scene)
|
||||
elif course:
|
||||
loc_parts.append(f"关卡{course}")
|
||||
|
||||
location_str = "".join(loc_parts) if loc_parts else ""
|
||||
|
||||
# 具体表现
|
||||
symptoms = []
|
||||
|
||||
# Loading超时 + 数据丢失(关联)
|
||||
has_loading = bool(re.search(r'Loading.*长|加载.*慢|转星星.*10|加载.*10.*秒|loading.*timeout|加载超时', all_text, re.IGNORECASE))
|
||||
has_data_loss = bool(re.search(r'数据.*丢|数据.*没|组件.*数据.*没|数据.*不见|数据.*丢失', all_text))
|
||||
has_no_audio = bool(re.search(r'无音频|没声音|没有声音|组件无音频', all_text))
|
||||
|
||||
if has_loading and has_data_loss:
|
||||
# 提取正常加载时间对比
|
||||
normal_match = re.search(r'(正常|一般|通常).{0,3}(\d+)\s*秒', all_text)
|
||||
normal_time = f"(正常{normal_match.group(2)}秒)" if normal_match else ""
|
||||
loading_desc = f"Loading 耗时约 10 秒{normal_time}"
|
||||
if has_no_audio:
|
||||
symptoms.append(f"{loading_desc},导致组件数据丢失、无音频")
|
||||
else:
|
||||
symptoms.append(f"{loading_desc},导致组件数据丢失")
|
||||
elif has_loading:
|
||||
symptoms.append("Loading 时间特别长(约 10 秒)")
|
||||
if has_no_audio:
|
||||
symptoms.append("组件无音频")
|
||||
elif has_no_audio:
|
||||
symptoms.append("组件无音频")
|
||||
elif has_data_loss:
|
||||
symptoms.append("组件数据丢失")
|
||||
|
||||
# HUD/UI
|
||||
if re.search(r'HUD.*变成.*条|hud.*窄|hud.*变', all_text, re.IGNORECASE):
|
||||
symptoms.append("NPC 头上的 HUD 偶尔变成一小条")
|
||||
if re.search(r'UI.*显示|界面.*显示|显示.*异常|花屏|白屏|黑屏|闪烁', all_text):
|
||||
symptoms.append("UI显示异常")
|
||||
|
||||
# 崩溃/闪退
|
||||
if re.search(r'闪退|崩溃|crash|卡死|卡住|无响应|闪.*退', all_text, re.IGNORECASE):
|
||||
symptoms.append("闪退/崩溃")
|
||||
|
||||
# 版本/更新
|
||||
if re.search(r'版本.*更新|更新.*版本|更新.*后|升级.*后|新版.*上线', all_text):
|
||||
symptoms.append("版本更新后出现")
|
||||
|
||||
# 频率标签(放在描述前面)
|
||||
freq_tag = ""
|
||||
if re.search(r'偶尔|有时|有时候|随机|没什么规律|不.*规律', all_text):
|
||||
freq_tag = "【偶现】"
|
||||
elif re.search(r'频繁|每次都|经常|必现|总是|一直', all_text):
|
||||
freq_tag = "【频繁】"
|
||||
elif re.search(r'高概率|很大概率|极大概率', all_text, re.IGNORECASE):
|
||||
freq_tag = "【高概率】"
|
||||
|
||||
# 频率后缀(不要了,改用前缀标签)
|
||||
freq = ""
|
||||
|
||||
# 如果没有提取到症状,用首条报告摘要
|
||||
if not symptoms:
|
||||
summary = root_text[:80] if root_text else (all_text[:80].strip() or "未知问题")
|
||||
if len(summary) > 50:
|
||||
summary = summary[:47] + "..."
|
||||
symptoms.append(summary)
|
||||
|
||||
symptom_str = "".join(symptoms)
|
||||
|
||||
if location_str:
|
||||
result = f"在{location_str},{symptom_str}"
|
||||
else:
|
||||
result = symptom_str
|
||||
|
||||
# 加频率标签前缀
|
||||
if freq_tag and not result.startswith("【"):
|
||||
result = freq_tag + result
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def summarize_cluster(cluster_msgs, idx, priority_info=None):
|
||||
"""
|
||||
生成单个问题簇的结构化归纳。
|
||||
|
||||
脚本负责:提取位置要素元数据 + 输出对话要点表格 + 提取结论
|
||||
AI 负责:基于元数据和上下文,生成精炼的问题描述
|
||||
priority_info 可选,包含步骤5的优先级判定结果
|
||||
|
||||
输出格式:
|
||||
### 问题 N
|
||||
### 问题 N 🔴 P0
|
||||
> **[AI归纳: 问题描述]**
|
||||
<details><summary>位置要素(脚本提取)</summary>{元数据}</details>
|
||||
| 发言人 | 要点 |
|
||||
@ -406,16 +571,20 @@ def summarize_cluster(cluster_msgs, idx):
|
||||
course = loc["课程"] or "未知"
|
||||
component = loc["角色/组件"] or "未知"
|
||||
|
||||
lines = [f"### 问题 {idx}"]
|
||||
# 优先级标签:简洁格式,只改标题
|
||||
priority_prefix = ""
|
||||
if priority_info:
|
||||
priority_prefix = f"【{priority_info['priority']}】"
|
||||
|
||||
lines = [f"### {priority_prefix}"]
|
||||
lines.append("")
|
||||
lines.append(f"> **[AI归纳: 问题描述]**")
|
||||
|
||||
description = generate_problem_description(cluster_msgs, loc, root_msg_text)
|
||||
lines.append(f"**{idx},问题描述:** {description}")
|
||||
lines.append("")
|
||||
conclusion = extract_conclusion(cluster_msgs)
|
||||
lines.append(conclusion)
|
||||
lines.append("")
|
||||
lines.append(f"📋 脚本提取的位置要素:端=`{device}` | 环节=`{scene}` | 课程=`{course}` | 组件=`{component}`")
|
||||
lines.append(f"📋 首条报告内容:{root_msg_text[:120]}")
|
||||
lines.append("")
|
||||
lines.append("| 发言人 | 对话信息 |")
|
||||
lines.append("|--------|---------|")
|
||||
|
||||
@ -478,55 +647,131 @@ def extract_conclusion(cluster_msgs):
|
||||
|
||||
# 检查是否已上传日志
|
||||
has_log = bool(re.search(r'日志.*上传|上传.*日志', all_text, re.IGNORECASE))
|
||||
has_pending = bool(re.search(r'明天.*查|排查|查一下|待查|等.*查', all_text, re.IGNORECASE))
|
||||
has_pending = bool(re.search(r'明天.*查|排查|查一下|待查|等.*查|排查中', all_text, re.IGNORECASE))
|
||||
|
||||
if match_any(r'已修复|已解决|修好了|已处理好'):
|
||||
return "**当前问题排查结论:** 已修复"
|
||||
if match_any(r'确认.*是.*bug|确实是.*问题|确实有.*问题'):
|
||||
return "**当前问题排查结论:** 已确认,待修复"
|
||||
# 分析性发言优先于"非问题"判断
|
||||
if analysis_lines and (has_log or has_pending):
|
||||
# 提取第一条分析作为疑似原因
|
||||
first_reason = analysis_lines[0].split(':', 1)[-1][:50]
|
||||
return f"**当前问题排查结论:** 疑似{first_reason},已上传日志,排查中"
|
||||
if analysis_lines:
|
||||
first_reason = analysis_lines[0].split(':', 1)[-1][:50]
|
||||
return f"**当前问题排查结论:** 疑似{first_reason}"
|
||||
# 规则4: 有解释性分析 + 日志已上传 + 排查中 → 疑似{原因},已上传日志,排查中
|
||||
if analysis_lines and has_log and has_pending:
|
||||
first_reason = analysis_lines[0].split(':', 1)[-1][:40]
|
||||
# 精简原因描述:去掉冗余前缀,只保留核心原因
|
||||
first_reason = re.sub(r'^Loading.*是因为|^是因为|^由于', '', first_reason).strip()
|
||||
return f"**当前问题排查结论:** 疑似{first_reason}导致,已上传日志,排查中"
|
||||
# 规则5: 日志已上传 + 排查中
|
||||
if has_log and has_pending:
|
||||
return "**当前问题排查结论:** 已上传日志,排查中"
|
||||
if match_any(r'不是.*bug|就是这样的|设计如此|非问题|不是问题'):
|
||||
return "**当前问题排查结论:** 非问题,设计如此"
|
||||
if match_any(r'正常.*现象'):
|
||||
return "**当前问题排查结论:** 非问题,正常现象"
|
||||
if match_any(r'暂未|还没.*找到|查不到|没复现|未复现'):
|
||||
return "**当前问题排查结论:** 暂未排查到问题"
|
||||
# 默认
|
||||
return "**当前问题排查结论:** 暂未排查到问题"
|
||||
# 规则7: 默认
|
||||
return "**当前问题排查结论:** 暂无结论排查中"
|
||||
|
||||
|
||||
def generate_summary(clusters, cluster_order):
|
||||
def generate_summary(clusters, cluster_order, skip_priority=False):
|
||||
"""
|
||||
生成当日问题归纳 Markdown。
|
||||
生成当日问题归纳 Markdown(步骤4+5)。
|
||||
|
||||
返回: str 完整的 Markdown 归纳文本
|
||||
步骤4:问题归纳
|
||||
步骤5:优先级判定与排序(P0 > P1 > P2 > P3)
|
||||
|
||||
返回: (markdown_str, has_content)
|
||||
"""
|
||||
lines = ["## 今日问题归纳\n"]
|
||||
|
||||
idx = 0
|
||||
# 收集有效问题簇(至少2条消息)
|
||||
valid_clusters = []
|
||||
for cid in cluster_order:
|
||||
cmsgs = clusters[cid]
|
||||
if len(cmsgs) < 2:
|
||||
continue # 跳过单条无法形成完整问题的
|
||||
continue
|
||||
earliest = min(m[6] for m in cmsgs)
|
||||
valid_clusters.append({
|
||||
"cluster_id": cid,
|
||||
"msgs": cmsgs,
|
||||
"earliest_time": earliest,
|
||||
})
|
||||
|
||||
if not valid_clusters:
|
||||
return "\n".join(lines) + "\n*(无可归纳的问题簇)*\n", False
|
||||
|
||||
# 步骤5:优先级判定
|
||||
if not skip_priority:
|
||||
for vc in valid_clusters:
|
||||
vc["priority_info"] = compute_final_priority(vc["msgs"])
|
||||
|
||||
# 按优先级排序:P0 > P1 > P2 > P3,同优先级按时间
|
||||
valid_clusters = sort_by_priority(valid_clusters)
|
||||
|
||||
# 生成层汇总索引
|
||||
grouped = defaultdict(list)
|
||||
for vc in valid_clusters:
|
||||
p = vc.get("priority_info", {}).get("priority", "P2") if not skip_priority else "P2"
|
||||
grouped[p].append(vc)
|
||||
|
||||
for p_level in ["P0", "P1", "P2", "P3"]:
|
||||
items = grouped.get(p_level, [])
|
||||
if not items:
|
||||
continue
|
||||
lines.append(f"**【{p_level}问题】**")
|
||||
item_idx = 0
|
||||
for vc in items:
|
||||
item_idx += 1
|
||||
desc = generate_problem_description(vc["msgs"],
|
||||
extract_location_elements(vc["msgs"]), "")
|
||||
lines.append(f"{item_idx},{desc}")
|
||||
lines.append("")
|
||||
|
||||
# 问题拆解
|
||||
lines.append("## 今日问题拆解\n")
|
||||
|
||||
idx = 0
|
||||
for vc in valid_clusters:
|
||||
idx += 1
|
||||
summary = summarize_cluster(cmsgs, idx)
|
||||
summary = summarize_cluster(vc["msgs"], idx, vc.get("priority_info"))
|
||||
lines.append(summary)
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
return "\n".join(lines), True
|
||||
|
||||
|
||||
# === 步骤5写入目标文档配置 ===
|
||||
SUMMARY_DOC_TOKEN = "J8bKd4dbYoofZixxVsCc06bhnFc" # 用户反馈问题汇总.docx obj_token
|
||||
|
||||
def update_summary_doc(markdown_content, day_label, mode="overwrite"):
|
||||
"""
|
||||
将当日归纳结果写入「用户反馈问题汇总」文档。
|
||||
mode: 'overwrite'(覆盖,默认,第一天使用) / 'append'(追加,多天时后续天使用)
|
||||
"""
|
||||
env = get_env()
|
||||
|
||||
full_md = f"## {day_label} 飞书问题反馈归纳\n\n" + markdown_content
|
||||
|
||||
result = subprocess.run(
|
||||
[CLI, "docs", "+update", "--doc", SUMMARY_DOC_TOKEN, "--as", "bot",
|
||||
"--mode", mode,
|
||||
"--markdown", full_md],
|
||||
env=env, capture_output=True, text=True, timeout=15
|
||||
)
|
||||
|
||||
d = json.loads(result.stdout)
|
||||
if d.get("ok"):
|
||||
print(f" ✅ 归纳结果已{'覆盖写入' if mode == 'overwrite' else '追加'}到「用户反馈问题汇总」文档")
|
||||
return True
|
||||
else:
|
||||
print(f" ⚠️ 文档写入失败: {result.stdout[:200]}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="飞书问题反馈同步")
|
||||
parser = argparse.ArgumentParser(description="飞书问题反馈同步(五步完整流程)")
|
||||
parser.add_argument("--days", type=int, default=3, help="同步最近 N 天(默认 3)")
|
||||
parser.add_argument("--dry-run", action="store_true", help="仅预览不写入")
|
||||
parser.add_argument("--skip-priority", action="store_true", help="跳过步骤5优先级判定")
|
||||
args = parser.parse_args()
|
||||
|
||||
end_date = date.today()
|
||||
@ -561,6 +806,7 @@ def main():
|
||||
print(f"📑 现有 sheet:{list(existing.keys())}")
|
||||
|
||||
total_written = 0
|
||||
first_day_written = False
|
||||
for day in sorted(groups.keys()):
|
||||
msgs = groups[day]
|
||||
|
||||
@ -578,11 +824,23 @@ def main():
|
||||
print(f" 排序完成:{len(sorted_msgs)} 条")
|
||||
|
||||
# 步骤 4:问题归纳
|
||||
summary = generate_summary(clusters, cluster_order)
|
||||
if summary:
|
||||
# 步骤 5:优先级判定(默认启用,--skip-priority 可跳过)
|
||||
summary, has_content = generate_summary(clusters, cluster_order, skip_priority=args.skip_priority)
|
||||
if has_content:
|
||||
priority_count = summary.count('优先级:') if not args.skip_priority else 0
|
||||
print(f" 归纳完成:{summary.count('### 问题')} 个问题")
|
||||
if not args.skip_priority:
|
||||
print(f" 步骤5 优先级判定完成:{priority_count} 个问题已评定优先级并排序")
|
||||
print(f" (归纳内容见下方)")
|
||||
print(summary)
|
||||
|
||||
# 写入「用户反馈问题汇总」文档
|
||||
# 第一个有内容的天用 overwrite,后续用 append
|
||||
doc_mode = "overwrite" if not first_day_written else "append"
|
||||
update_summary_doc(summary, day, mode=doc_mode)
|
||||
first_day_written = True
|
||||
else:
|
||||
print(" ⚠️ 无有效问题簇可归纳")
|
||||
|
||||
# 写入
|
||||
success = write_sheet(sheet_id, sorted_msgs)
|
||||
|
||||
@ -46,12 +46,23 @@ description: |
|
||||
|
||||
### 2. 优先级评估
|
||||
|
||||
| 优先级 | 定义 | 对应问题分类 |
|
||||
|--------|------|-------------|
|
||||
| P0 | 阻断使用 / 大面积影响 / 严重数据问题 | 启动/运行异常问题 |
|
||||
| P1 | 核心流程问题,影响较大 | 版本/更新问题、声音/音频问题、语音识别/判分问题、关卡/内容问题 |
|
||||
| P2 | 一般问题或体验问题 | UI显示问题、用户反馈问题、带媒体的问题 |
|
||||
| P3 | 建议/优化项,低影响 | 其他问题 |
|
||||
> **详细判定规则:** 参照《用户反馈问题优先级判断文档》(刘新玉提供,`priority_classifier.py` 已实现)
|
||||
|
||||
**判定三维度:**
|
||||
1. **基础优先级** — 按问题严重度(崩溃/功能异常/体验瑕疵/细节优化)
|
||||
2. **出现概率** — 必现升级,偶现降级
|
||||
3. **影响范围** — 全部用户升级,极少数用户降级
|
||||
|
||||
| 优先级 | 定义 | 修复时限 | 典型场景 |
|
||||
|--------|------|----------|---------|
|
||||
| 🔴 P0 | 孩子上不了课 / 核心功能崩溃 | 2小时处理,当天解决 | App闪退、进不去、数据丢失、服务器宕机 |
|
||||
| 🟠 P1 | 功能不对劲 / 学习效果打折扣 | 3天内修复 | 音频异常、判分错误、进度保存失败、奖励未发放 |
|
||||
| 🟡 P2 | 偶尔小毛病 / 界面瑕疵 | 当周修复 | UI显示异常、偶尔不显示、刷新恢复 |
|
||||
| 🟢 P3 | 几乎不影响使用 / 细节优化 | 不单独排期 | 分辨率略低、错别字、音量不统一 |
|
||||
|
||||
**动态调整:** 必现问题 +1级,偶现问题 -1级(最多降一级);影响全部用户 +1级。
|
||||
|
||||
**实现:** 优先级由 `feishu-feedback-sync` 技能步骤5自动判定,参见 `scripts/priority_classifier.py`。
|
||||
|
||||
### 3. 汇总到标准表格
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user