auto backup 2026-05-28 08:10:01
This commit is contained in:
parent
179c6d95e4
commit
bcb25d62e4
@ -9,9 +9,9 @@ lark_wiki_operate_as_bot f84c308bcb69280520dadf9458177d9c4af192d60cf409528bd65e1
|
||||
pua f6a38fdd39c22c81370abd6b979b58c767e41738d43a26fbbc23d1e933cdc701
|
||||
smart-auto-model-switch bfb3547dcd6029622c7062b49ae7922614a366b6dfe88c7d0fae9dcd85fc2eb3
|
||||
vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf0bca9fd64b968ec
|
||||
tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06
|
||||
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
|
||||
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
|
||||
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
|
||||
feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445
|
||||
feishu-feedback-sync 9c9ca1f0c42a289e037289cd394299b7debf7e240d3b30429899da42b601d953
|
||||
feishu-group-msg-sync 953534cc3d1cf4489060afe1cf10cad75faedd3f2d699ffa7478d178d528dc51
|
||||
tencent-cos-upload 2505dbc8c7acdaf95f2228598ae12010e09599a25df4319587c4a3109d828053
|
||||
|
||||
10
MEMORY.md
10
MEMORY.md
@ -49,6 +49,16 @@
|
||||
|
||||
**不要在 MEMORY.md 中维护静态分类映射表,所有规则变更直接修改 priority_classifier.py。**
|
||||
|
||||
### Python 脚本修改后需清理 __pycache__(2026-05-27)
|
||||
- 修改 Python 脚本(尤其是新增/删除 import)后,旧 `.pyc` 缓存可能导致 `NameError`(模块名未定义)
|
||||
- 症状:源码中有 `import subprocess`,运行时却报 `NameError: name 'subprocess' is not defined`
|
||||
- 修复:`find <workdir> -name "__pycache__" -type d | xargs rm -rf && find <workdir> -name "*.pyc" -delete`
|
||||
### P0 实时检测去重:内容语义指纹替代消息ID精确匹配(2026-05-27)
|
||||
- 原方案用 `sorted(message_ids)` MD5 做去重,但同一话题每次扫描聚类结果不同,签名失效导致重复推送
|
||||
- 修复:增加内容语义去重层 — 拼接簇内前5条消息内容 + 发送人集合 + 小时窗口,用 Jaccard 相似度比较
|
||||
- 阈值:同小时 + 发送人交集 + 相似度 > 0.20;跨小时 + 发送人 ≥2 重叠 + 相似度 > 0.35
|
||||
- 影响文件:`detect_p0_wechat.py`、`detect_p0_realtime.py`
|
||||
|
||||
## 经验教训
|
||||
|
||||
### 微信反馈全链路(2026-05-22 刘新玉确认)
|
||||
|
||||
@ -1 +1 @@
|
||||
1674
|
||||
1772
|
||||
|
||||
@ -21,3 +21,5 @@
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-25T07:11:25.527Z","query":"微信反馈 全链路 cron 采集 整理 归纳 分发","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:13.634Z","query":"微信飞书问题汇总 小葵小葵群 发送汇总","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:24.708Z","query":"小葵小葵群 chat_id 分发消息群","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-27T02:15:55.251Z","query":"小葵小葵群 chat_id 反馈同步","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-27T02:50:23.510Z","query":"pycache python 缓存 导入错误","resultCount":3,"results":[{"path":"memory/2026-05-27.md","startLine":22,"endLine":32,"score":1},{"path":"memory/2026-05-27.md","startLine":1,"endLine":27,"score":1},{"path":"memory/2026-04-10.md","startLine":20,"endLine":52,"score":1}]}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-05-26T03:40:24.708Z",
|
||||
"updatedAt": "2026-05-27T02:50:23.510Z",
|
||||
"entries": {
|
||||
"memory:memory/2026-04-18.md:1:5": {
|
||||
"key": "memory:memory/2026-04-18.md:1:5",
|
||||
@ -9,13 +9,13 @@
|
||||
"endLine": 5,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-04-18 工作日志 ## 术语共识 [李若松确认] 术语「飞书反馈消息数据库」默认指代用户反馈收集技能中的飞书内部测试反馈MySQL数据表 `vala_test.lark_group_message`,存储「内容测试问题反馈」群(oc_fabff7672e62a9ced7b326ee4a286c26)的同步消息数据。",
|
||||
"recallCount": 7,
|
||||
"recallCount": 8,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 7,
|
||||
"totalScore": 8,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-04-30T03:47:21.989Z",
|
||||
"lastRecalledAt": "2026-05-26T03:40:24.708Z",
|
||||
"lastRecalledAt": "2026-05-27T02:15:55.251Z",
|
||||
"queryHashes": [
|
||||
"353f9765c086",
|
||||
"a6b740c99377",
|
||||
@ -23,14 +23,16 @@
|
||||
"f865295b9ac7",
|
||||
"42fe8210f22c",
|
||||
"81f7a2647922",
|
||||
"261597c52d5b"
|
||||
"261597c52d5b",
|
||||
"3fe44d618bf6"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-04-30",
|
||||
"2026-05-06",
|
||||
"2026-05-07",
|
||||
"2026-05-25",
|
||||
"2026-05-26"
|
||||
"2026-05-26",
|
||||
"2026-05-27"
|
||||
],
|
||||
"conceptTags": [
|
||||
"vala-test.lark-group-message",
|
||||
@ -980,6 +982,99 @@
|
||||
"姓氏",
|
||||
"中文"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-27.md:22:32": {
|
||||
"key": "memory:memory/2026-05-27.md:22:32",
|
||||
"path": "memory/2026-05-27.md",
|
||||
"startLine": 22,
|
||||
"endLine": 32,
|
||||
"source": "memory",
|
||||
"snippet": "| 微信 | `--skip-dispatch` → 不分发 | `--apply-ai` → 替换占位符+分发 | 微信依赖 AI 归纳成功才能分发,飞书双重分发(占位符+AI)。 ### 5月26日反馈概况 - 飞书:17条消息,3个问题簇(2个有效:录音识别率低、飞船音乐不保存) - 微信:27条消息,14个问题簇(4个有效:飞船音乐、录音识别、音频无法播放、AI回复无关语句) ### 后续注意 - 删除/修改 `ai_summarize_feedback.py` 后需清理 `__pycache__`,否则缓存版本可能落后于源码",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"queryHashes": [
|
||||
"b15d92b2dda8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-27"
|
||||
],
|
||||
"conceptTags": [
|
||||
"skip-dispatch",
|
||||
"apply-ai",
|
||||
"删除/修改",
|
||||
"ai-summarize-feedback.py",
|
||||
"skip",
|
||||
"dispatch",
|
||||
"不分",
|
||||
"apply"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-27.md:1:27": {
|
||||
"key": "memory:memory/2026-05-27.md:1:27",
|
||||
"path": "memory/2026-05-27.md",
|
||||
"startLine": 1,
|
||||
"endLine": 27,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-05-27 工作日志 ## 用户反馈同步故障排查与修复 [刘新玉反馈] ### 问题 5月26日飞书/微信用户反馈定时任务出现回写失败。 ### 根因 1. **Python 缓存过期** — `ai_summarize_feedback.py` 中 `subprocess` 模块导入失败(`NameError: name 'subprocess' is not defined`),实际源码有 `import subprocess`,但 `__pycache__` 中的旧 `.pyc` 未包含此导入 2. **sync_wechat_feedback.py** — 同样因缓存问题导致 `with open(tmp_md)` 写入失败 ### 修复 - 清理了工作区内所有 `__pycache__` 和 `.pyc` 文件 - 重新执行飞书 AI 归纳回写:`sync_feishu_feedback.py --date 2026-05-26 --apply-ai` - 重新执行微信 AI 归纳回写:`sync_wechat_feedback.py --date 2026-05-26 --apply-ai` - 两个渠道均成功回写并分发到「小葵小葵」群 ### 分发架构确认 | 渠道 | 10:00/10:02 Wrapper | 10:05/10:07 AI 归纳 | |------|---------------------|---------------------| | 飞书 | `--dispatch-mode all` → 分发占位符 | `--",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"queryHashes": [
|
||||
"b15d92b2dda8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-27"
|
||||
],
|
||||
"conceptTags": [
|
||||
"5月26日飞书/微信用户反馈定时任务出现回写失败",
|
||||
"ai-summarize-feedback.py",
|
||||
"sync-wechat-feedback.py",
|
||||
"tmp-md",
|
||||
"sync-feishu-feedback.py",
|
||||
"apply-ai",
|
||||
"00/10",
|
||||
"05/10"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-04-10.md:20:52": {
|
||||
"key": "memory:memory/2026-04-10.md:20:52",
|
||||
"path": "memory/2026-04-10.md",
|
||||
"startLine": 20,
|
||||
"endLine": 52,
|
||||
"source": "memory",
|
||||
"snippet": "- 图片先下载到工作目录(相对路径),再用 `docs +media-insert` 插入文档 - 去掉 `set -e`,改为手动错误处理避免单条消息失败导致整个脚本退出 - 时间用 ISO 8601 格式存储和传递 ### 验证结果(第一版 → Wiki文档) - 全量同步成功:49 条消息 + 5 张图片写入 Wiki 文档 - Wiki文档:DfUqddItXoDsnNxPypncbinknxh ## 迭代:改为电子表格 + 腾讯COS **来源:** [李若松] 要求改用表格存储,媒体文件上传COS ### 方案 - 脚本改为 Python:`scripts/sync_feedback_group.py` - 记录写入飞书电子表格:`E8vFsCmPBhT4SCtNmnJchqeJnJe`,sheet_id `7bce8f` - 列:时间 | 反馈人 | 信息类型 | 信息内容(或地址) - 非文本消息(图片/视频/音频/文件)下载后上传到腾讯COS - COS桶:`static-1317843270`,区域:`ap-beijing` - COS路径结构:`vala_llm/user_feedback/{type}/{date}/{filename}` - type: image / video / audio / file - date: YYYY-MM-DD - 访问域名:`https://static.valavala.com/vala_llm/user_feedback/...` - COS凭证已存入 `secrets.md` ### 验证结",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
|
||||
"queryHashes": [
|
||||
"b15d92b2dda8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-27"
|
||||
],
|
||||
"conceptTags": [
|
||||
"media-insert",
|
||||
"scripts/sync-feedback-group.py",
|
||||
"sheet-id",
|
||||
"图片/视频/音频/文件",
|
||||
"static-1317843270",
|
||||
"ap-beijing",
|
||||
"vala-llm/user-feedback",
|
||||
"yyyy-mm-dd"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
50
memory/2026-05-27.md
Normal file
50
memory/2026-05-27.md
Normal file
@ -0,0 +1,50 @@
|
||||
# 2026-05-27 工作日志
|
||||
|
||||
## 用户反馈同步故障排查与修复 [刘新玉反馈]
|
||||
|
||||
### 问题
|
||||
5月26日飞书/微信用户反馈定时任务出现回写失败。
|
||||
|
||||
### 根因
|
||||
1. **Python 缓存过期** — `ai_summarize_feedback.py` 中 `subprocess` 模块导入失败(`NameError: name 'subprocess' is not defined`),实际源码有 `import subprocess`,但 `__pycache__` 中的旧 `.pyc` 未包含此导入
|
||||
2. **sync_wechat_feedback.py** — 同样因缓存问题导致 `with open(tmp_md)` 写入失败
|
||||
|
||||
### 修复
|
||||
- 清理了工作区内所有 `__pycache__` 和 `.pyc` 文件
|
||||
- 重新执行飞书 AI 归纳回写:`sync_feishu_feedback.py --date 2026-05-26 --apply-ai`
|
||||
- 重新执行微信 AI 归纳回写:`sync_wechat_feedback.py --date 2026-05-26 --apply-ai`
|
||||
- 两个渠道均成功回写并分发到「小葵小葵」群
|
||||
|
||||
### 分发架构确认
|
||||
| 渠道 | 10:00/10:02 Wrapper | 10:05/10:07 AI 归纳 |
|
||||
|------|---------------------|---------------------|
|
||||
| 飞书 | `--dispatch-mode all` → 分发占位符 | `--apply-ai` → 替换占位符+重新分发 |
|
||||
| 微信 | `--skip-dispatch` → 不分发 | `--apply-ai` → 替换占位符+分发 |
|
||||
|
||||
微信依赖 AI 归纳成功才能分发,飞书双重分发(占位符+AI)。
|
||||
|
||||
### 5月26日反馈概况
|
||||
- 飞书:17条消息,3个问题簇(2个有效:录音识别率低、飞船音乐不保存)
|
||||
- 微信:27条消息,14个问题簇(4个有效:飞船音乐、录音识别、音频无法播放、AI回复无关语句)
|
||||
|
||||
### 后续注意
|
||||
- 删除/修改 `ai_summarize_feedback.py` 后需清理 `__pycache__`,否则缓存版本可能落后于源码
|
||||
|
||||
## P0 实时检测去重修复 [刘新玉反馈]
|
||||
|
||||
### 问题
|
||||
微信 `detect_p0_wechat.py` 每分钟扫描最近120分钟消息,同一个问题因讨论线程持续生长,`sort_threads` 聚类每次产生不同消息集合,导致:
|
||||
- 不同次的聚类有不同的 `cluster_signature`(基于 `sorted(message_ids)` MD5)
|
||||
- 去重完全失效,同一问题被重复推送(今天2个真实问题各推了3次 = 6次)
|
||||
|
||||
### 修复
|
||||
在 `detect_p0_wechat.py` 和 `detect_p0_realtime.py` 中增加**内容语义去重**:
|
||||
1. 新增 `cluster_content_fingerprint()`:拼接簇内前5条有意义消息作为内容指纹 + 发送人集合 + 小时粒度时间窗口
|
||||
2. 新增 `is_duplicate_p0()`:基于内容相似度(Jaccard)+ 发送人重叠 + 时间窗口三层判断
|
||||
- 同小时 + 发送人交集 + 内容相似度 > 0.20 → 重复
|
||||
- 发送人高度重叠(≥2) + 内容相似度 > 0.35 → 跨小时重复
|
||||
3. 状态文件改为 `{"time": ..., "fp": {...}}` 格式存储指纹信息
|
||||
4. 飞书 P0 检测器同步修复
|
||||
|
||||
### 测试验证
|
||||
360分钟窗口测试:同一话题在不同扫描窗口下签名不同(c69d... vs 70a4...),但内容指纹正确识别为重复(相似度 0.462,is_duplicate=True)
|
||||
17
output/daily_feedback/ai_descriptions_feishu_2026-05-26.json
Normal file
17
output/daily_feedback/ai_descriptions_feishu_2026-05-26.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"date": "2026-05-26",
|
||||
"descriptions": [
|
||||
{
|
||||
"index": 1,
|
||||
"description": "在应用录音环节,背景音乐音量过相关人员致用户人声微弱,造成语音识别率低,且麦克风界面动画卡顿。"
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"description": "在飞船系统中,用户更换音乐后重新进入时音乐自动恢复为默认,无法保存切换设置。"
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"description": "无明确问题"
|
||||
}
|
||||
]
|
||||
}
|
||||
61
output/daily_feedback/ai_descriptions_wechat_2026-05-26.json
Normal file
61
output/daily_feedback/ai_descriptions_wechat_2026-05-26.json
Normal file
@ -0,0 +1,61 @@
|
||||
{
|
||||
"date": "2026-05-26",
|
||||
"descriptions": [
|
||||
{
|
||||
"index": 1,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"description": "飞船音乐切换后,重新进出时音乐会恢复为默认歌曲,无法保存切换设置。"
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 4,
|
||||
"description": "在移动端瓦拉英语应用中,录音识别功能存在识别率低且录音时麦克风动画卡顿的问题。"
|
||||
},
|
||||
{
|
||||
"index": 5,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 6,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 7,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 8,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 9,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 10,
|
||||
"description": "英相关人员频无法播放声音"
|
||||
},
|
||||
{
|
||||
"index": 11,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 12,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 13,
|
||||
"description": "在强化练习环节中,AI语音回复出现了与当前练习内容无关的语句。"
|
||||
},
|
||||
{
|
||||
"index": 14,
|
||||
"description": "无明确问题"
|
||||
}
|
||||
]
|
||||
}
|
||||
166
output/daily_feedback/cluster_context_2026-05-26.json
Normal file
166
output/daily_feedback/cluster_context_2026-05-26.json
Normal file
@ -0,0 +1,166 @@
|
||||
{
|
||||
"date": "2026-05-26",
|
||||
"total_clusters": 3,
|
||||
"clusters": [
|
||||
{
|
||||
"index": 1,
|
||||
"cluster_id": "4702315644940596587",
|
||||
"location": {
|
||||
"端": "未知",
|
||||
"环节": "未知",
|
||||
"课程": "",
|
||||
"角色/组件": ""
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "其他问题",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "瓦拉英语-萌萌老师(早10晚7)",
|
||||
"content": "麻烦帮看下吧,我的飞船-音乐切换了歌曲,再重新进出的话音乐又变成了默认的,切换歌曲不能保存吗?",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 09:28:15"
|
||||
},
|
||||
{
|
||||
"sender": "嘿哈",
|
||||
"content": "这个目前设计就是如此,不会保存\n ↳ 回复 瓦拉英语-萌萌老师(早10晚7): 麻烦帮看下吧,我的飞船-音乐切换了歌曲,再重新进出的话音乐又变成了默认的,切换歌曲不能保存吗?",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 10:58:11"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"cluster_id": "6332894499636566314",
|
||||
"location": {
|
||||
"端": "移动端",
|
||||
"环节": "未知",
|
||||
"课程": "",
|
||||
"角色/组件": "音频"
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "声音/音频类",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "胡陈辰🦉",
|
||||
"content": "@许悦 我们可以加个需求 ",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 12:57:57"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "老师,这种录音识别率比较低,是正常的么?辛苦帮忙看看",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:07:45"
|
||||
},
|
||||
{
|
||||
"sender": "嘿哈",
|
||||
"content": "@瓦拉英语-Tom老师 这个可以咨询一下用户,是有连接音响什么的不?\n ↳ 回复 瓦拉英语-Tom老师: 老师,这种录音识别率比较低,是正常的么?辛苦帮忙看看",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 14:42:09"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "@八哥-16619720408好的\n ↳ 回复 嘿哈: @瓦拉英语-Tom老师 这个可以咨询一下用户,是有连接音响什么的不?",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 14:44:20"
|
||||
},
|
||||
{
|
||||
"sender": "嘿哈",
|
||||
"content": "@瓦拉英语-Tom老师 这个可能需要分两步走,我获取了一下用户的音频信息确实用户的声音很小,先让用户尝试通过设置降低一点音乐音量,然后我们这边也和产品老师说一下再优化一下,录音的时候没有其他的声音https://static.valavala.com/vala_user_audio/c6125977134c_134242247464915130.wav\nhttps://static.valavala.com/vala_user_audio/71bec4fc33a2_134242247584791300.wav\nhttps://static.valavala.com/vala_user_audio/b4c7d8eff63c_134242247700853530.wav",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 14:50:07"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "[聊天记录] 雷鸣和瓦拉英语-Tom老师\n雷鸣: [视频]\n雷鸣: [视频]\n雷鸣: [图片]",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:07:44"
|
||||
},
|
||||
{
|
||||
"sender": "嘿哈",
|
||||
"content": "发一下用户手机号吧",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:08:10"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "手机号:13617153553",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:08:53"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "[视频] 17秒 size:2553865",
|
||||
"msg_type": "video",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:47:26"
|
||||
},
|
||||
{
|
||||
"sender": "Ariel",
|
||||
"content": "@kevin 而且它这个麦克风的动画看起来超级卡,一秒动一帧的感觉\n ↳ 回复 瓦拉英语-Tom老师: 25984984606212559@openim:\n<?xml version=\"1.0\"?>\n<msg>\n\t<videomsg aeskey=\"0cfb6db1e5d84683aab0ed170dbf6567\" cdnvideourl=\"\" cdnthumbaeskey=\"\" cdnthumburl=\"\" lengt...",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 14:26:20"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-Tom老师",
|
||||
"content": "老师,这种是什么原因呢?\n手机号:18165276842",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 13:47:26"
|
||||
},
|
||||
{
|
||||
"sender": "kevin",
|
||||
"content": "可以让用户上传一下日志\n ↳ 回复 瓦拉英语-Tom老师: 老师,这种是什么原因呢?\n手机号:18165276842",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 14:07:41"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"cluster_id": "6521596520536988022",
|
||||
"location": {
|
||||
"端": "未知",
|
||||
"环节": "未知",
|
||||
"课程": "",
|
||||
"角色/组件": ""
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "其他问题",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "嘿哈",
|
||||
"content": "让用户上传一下日志吧",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 21:09:17"
|
||||
},
|
||||
{
|
||||
"sender": "王欢",
|
||||
"content": "日志已传@八哥-16619720408\n ↳ 回复 八哥-16619720408: 让用户上传一下日志吧",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-05-26 21:11:43"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
BIN
scripts/__pycache__/detect_p0_realtime.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/detect_p0_realtime.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -27,7 +27,7 @@ SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skil
|
||||
sys.path.insert(0, SKILL_DIR)
|
||||
|
||||
from sync_feishu_feedback import (
|
||||
get_db_connection, query_messages, sort_threads, get_tenant_token,
|
||||
get_db_connection, query_messages, sort_threads, get_tenant_token, content_similarity,
|
||||
DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS,
|
||||
MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB,
|
||||
)
|
||||
@ -40,17 +40,19 @@ CLUSTER_MIN_SIZE = 2 # 至少 2 条消息才算有效簇
|
||||
|
||||
|
||||
def load_dispatched_state():
|
||||
"""加载已推送的 P0 簇签名状态。返回 {cluster_signature: dispatch_time}"""
|
||||
"""加载已推送的 P0 簇状态。兼容新旧格式。"""
|
||||
try:
|
||||
with open(STATE_FILE, "r") as f:
|
||||
state = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
state = {}
|
||||
|
||||
# 清理超过 24 小时的记录
|
||||
cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
|
||||
state = {k: v for k, v in state.items() if v > cutoff}
|
||||
return state
|
||||
cleaned = {}
|
||||
for k, v in state.items():
|
||||
ts = v if isinstance(v, str) else v.get("time", "")
|
||||
if ts > cutoff:
|
||||
cleaned[k] = v
|
||||
return cleaned
|
||||
|
||||
|
||||
def save_dispatched_state(state):
|
||||
@ -72,6 +74,47 @@ def cluster_signature(cluster_msgs):
|
||||
return hashlib.md5(joined.encode()).hexdigest()
|
||||
|
||||
|
||||
def cluster_content_fingerprint(cluster_msgs):
|
||||
"""生成基于内容语义的簇指纹,用于跨扫描去重(不依赖消息ID集合)"""
|
||||
all_contents = []
|
||||
for m in cluster_msgs:
|
||||
c = str(m[3]).strip() if m[3] else ""
|
||||
if c and len(c) > 8:
|
||||
all_contents.append(c[:300])
|
||||
aggregated = " | ".join(all_contents[:5])
|
||||
senders = sorted(set(m[1] for m in cluster_msgs if m[1]))
|
||||
times = [m[6] for m in cluster_msgs if m[6]]
|
||||
hour = times[0][:13] if times else "unknown"
|
||||
return {
|
||||
"content": aggregated,
|
||||
"senders": senders,
|
||||
"hour": hour,
|
||||
"msg_count": len(cluster_msgs),
|
||||
}
|
||||
|
||||
|
||||
def is_duplicate_p0(new_fp, dispatched_entries):
|
||||
"""
|
||||
基于内容语义判断新 P0 是否与已推送 P0 重复。
|
||||
dispatched_entries: {sig: {"time": str, "fp": dict}}
|
||||
"""
|
||||
for entry in dispatched_entries.values():
|
||||
old_fp = entry.get("fp")
|
||||
if not old_fp:
|
||||
continue
|
||||
same_hour = new_fp["hour"] == old_fp["hour"]
|
||||
sender_overlap = len(set(new_fp["senders"]) & set(old_fp["senders"]))
|
||||
if same_hour and sender_overlap >= 1:
|
||||
sim = content_similarity(new_fp["content"], old_fp["content"])
|
||||
if sim > 0.20:
|
||||
return True
|
||||
if sender_overlap >= 2:
|
||||
sim = content_similarity(new_fp["content"], old_fp["content"])
|
||||
if sim > 0.35:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_probably_p0(cluster_msgs):
|
||||
"""
|
||||
快速判断一个簇是否是 P0 级别问题。
|
||||
@ -229,7 +272,7 @@ def main():
|
||||
|
||||
# 加载已推送状态
|
||||
state = load_dispatched_state()
|
||||
print(f"[P0-detect] 已记录 {len(state)} 个已推送簇签名")
|
||||
print(f"[P0-detect] 已记录 {len(state)} 个已推送簇")
|
||||
|
||||
# 遍历簇,找出 P0 且未推送的
|
||||
new_p0_count = 0
|
||||
@ -241,7 +284,13 @@ def main():
|
||||
|
||||
sig = cluster_signature(cmsgs)
|
||||
if sig in state:
|
||||
print(f"[P0-detect] 已推送过,跳过: sig={sig[:8]}...")
|
||||
print(f"[P0-detect] 已推送过(精确匹配),跳过: sig={sig[:8]}...")
|
||||
continue
|
||||
|
||||
# 内容语义去重
|
||||
fp = cluster_content_fingerprint(cmsgs)
|
||||
if is_duplicate_p0(fp, state):
|
||||
print(f"[P0-detect] 已推送过(内容匹配),跳过: senders={fp['senders'][:2]}... hour={fp['hour']}")
|
||||
continue
|
||||
|
||||
print(f"[P0-detect] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
|
||||
@ -249,14 +298,14 @@ def main():
|
||||
if args.dry_run:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
print(f"[DRY-RUN] 将发送:\n{alert}")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
new_p0_count += 1
|
||||
else:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
success = dispatch_p0_alert(alert)
|
||||
if success:
|
||||
print(f"[P0-detect] ✅ P0 已实时推送")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
new_p0_count += 1
|
||||
else:
|
||||
print(f"[P0-detect] ❌ 推送失败")
|
||||
|
||||
@ -25,7 +25,7 @@ SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skil
|
||||
sys.path.insert(0, SKILL_DIR)
|
||||
|
||||
from sync_feishu_feedback import (
|
||||
sort_threads, get_tenant_token,
|
||||
sort_threads, get_tenant_token, content_similarity,
|
||||
DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS,
|
||||
MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB,
|
||||
)
|
||||
@ -44,7 +44,13 @@ def load_dispatched_state():
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
state = {}
|
||||
cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
|
||||
return {k: v for k, v in state.items() if v > cutoff}
|
||||
# 兼容新旧格式:新格式 value 是 {"time": ..., "fp": ...},旧格式是纯时间字符串
|
||||
cleaned = {}
|
||||
for k, v in state.items():
|
||||
ts = v if isinstance(v, str) else v.get("time", "")
|
||||
if ts > cutoff:
|
||||
cleaned[k] = v
|
||||
return cleaned
|
||||
|
||||
|
||||
def save_dispatched_state(state):
|
||||
@ -60,6 +66,53 @@ def cluster_signature(cluster_msgs):
|
||||
return hashlib.md5(",".join(ids).encode()).hexdigest()
|
||||
|
||||
|
||||
def cluster_content_fingerprint(cluster_msgs):
|
||||
"""生成基于内容语义的簇指纹,用于跨扫描去重(不依赖消息ID集合)"""
|
||||
# 拼接簇内所有有意义的消息内容(跳过纯图片/文件/表情)
|
||||
all_contents = []
|
||||
for m in cluster_msgs:
|
||||
c = str(m[3]).strip() if m[3] else ""
|
||||
if c and len(c) > 8:
|
||||
all_contents.append(c[:300])
|
||||
# 取前5条聚合,保证核心问题描述稳定
|
||||
aggregated = " | ".join(all_contents[:5])
|
||||
# 提取发送人集合(排序保证一致性)
|
||||
senders = sorted(set(m[1] for m in cluster_msgs if m[1]))
|
||||
# 提取小时粒度的时间窗口
|
||||
times = [m[6] for m in cluster_msgs if m[6]]
|
||||
hour = times[0][:13] if times else "unknown"
|
||||
return {
|
||||
"content": aggregated,
|
||||
"senders": senders,
|
||||
"hour": hour,
|
||||
"msg_count": len(cluster_msgs),
|
||||
}
|
||||
|
||||
|
||||
def is_duplicate_p0(new_fp, dispatched_entries):
|
||||
"""
|
||||
基于内容语义判断新 P0 是否与已推送 P0 重复。
|
||||
dispatched_entries: {sig: {"time": str, "fp": dict}}
|
||||
"""
|
||||
for entry in dispatched_entries.values():
|
||||
old_fp = entry.get("fp")
|
||||
if not old_fp:
|
||||
continue
|
||||
same_hour = new_fp["hour"] == old_fp["hour"]
|
||||
sender_overlap = len(set(new_fp["senders"]) & set(old_fp["senders"]))
|
||||
# 条件1: 同一小时 + 发送人有交集 + 内容相似度 > 0.20(聚合内容稳定,宽松阈值足够区分)
|
||||
if same_hour and sender_overlap >= 1:
|
||||
sim = content_similarity(new_fp["content"], old_fp["content"])
|
||||
if sim > 0.20:
|
||||
return True
|
||||
# 条件2: 发送人高度重叠 + 内容相似度 > 0.35(跨小时场景)
|
||||
if sender_overlap >= 2:
|
||||
sim = content_similarity(new_fp["content"], old_fp["content"])
|
||||
if sim > 0.35:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_probably_p0(cluster_msgs):
|
||||
if len(cluster_msgs) < CLUSTER_MIN_SIZE:
|
||||
return False, None
|
||||
@ -200,7 +253,7 @@ def main():
|
||||
print(f"[P0-wechat] 聚类完成:{len(clusters)} 个簇")
|
||||
|
||||
state = load_dispatched_state()
|
||||
print(f"[P0-wechat] 已记录 {len(state)} 个已推送簇签名")
|
||||
print(f"[P0-wechat] 已记录 {len(state)} 个已推送簇")
|
||||
|
||||
new_p0_count = 0
|
||||
for cid in cluster_order:
|
||||
@ -211,7 +264,13 @@ def main():
|
||||
|
||||
sig = cluster_signature(cmsgs)
|
||||
if sig in state:
|
||||
print(f"[P0-wechat] 已推送过,跳过: sig={sig[:8]}...")
|
||||
print(f"[P0-wechat] 已推送过(精确匹配),跳过: sig={sig[:8]}...")
|
||||
continue
|
||||
|
||||
# 内容语义去重
|
||||
fp = cluster_content_fingerprint(cmsgs)
|
||||
if is_duplicate_p0(fp, state):
|
||||
print(f"[P0-wechat] 已推送过(内容匹配),跳过: senders={fp['senders'][:2]}... hour={fp['hour']}")
|
||||
continue
|
||||
|
||||
print(f"[P0-wechat] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
|
||||
@ -219,13 +278,13 @@ def main():
|
||||
if args.dry_run:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
print(f"[DRY-RUN] 将发送:\n{alert}")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
new_p0_count += 1
|
||||
else:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
if dispatch_p0_alert(alert):
|
||||
print(f"[P0-wechat] ✅ P0 已实时推送")
|
||||
state[sig] = datetime.now().isoformat()
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
new_p0_count += 1
|
||||
else:
|
||||
print(f"[P0-wechat] ❌ 推送失败")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user