auto backup 2026-05-28 08:10:01

This commit is contained in:
--git_token 2026-05-28 08:10:01 +08:00
parent 179c6d95e4
commit bcb25d62e4
19 changed files with 534 additions and 25 deletions

View File

@ -9,9 +9,9 @@ lark_wiki_operate_as_bot f84c308bcb69280520dadf9458177d9c4af192d60cf409528bd65e1
pua f6a38fdd39c22c81370abd6b979b58c767e41738d43a26fbbc23d1e933cdc701 pua f6a38fdd39c22c81370abd6b979b58c767e41738d43a26fbbc23d1e933cdc701
smart-auto-model-switch bfb3547dcd6029622c7062b49ae7922614a366b6dfe88c7d0fae9dcd85fc2eb3 smart-auto-model-switch bfb3547dcd6029622c7062b49ae7922614a366b6dfe88c7d0fae9dcd85fc2eb3
vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf0bca9fd64b968ec vala_git_workspace_backup.vala 4cf352bec88fe84af065ba1ffcbb06647b77df0e01860faaf0bca9fd64b968ec
tencent-cos-upload 172517ed41d06c48425cd961ec5972a48495cfd62ec588bc1c2912ddf31b3a06
user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81 user-feedback-collector c0320451bf7ea0ce3d8ceaa603ae0a7b55c373c048363a5142258a4c23f45e81
user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13 user-feedback-data-source a95eb9142f3019fd193c46f89147dc7e0bf01dfe250202565a86f8bc52f37b13
user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8 user-feedback-processor 61783a8e9f03a973c187b359a87749ad1993dc71f8364b0a853d8b3ff64c75e8
feishu-group-msg-sync 1b581de76d419e6a33db0836125efc16ef2c972013fcae6f08c03aa7e2276445
feishu-feedback-sync 9c9ca1f0c42a289e037289cd394299b7debf7e240d3b30429899da42b601d953 feishu-feedback-sync 9c9ca1f0c42a289e037289cd394299b7debf7e240d3b30429899da42b601d953
feishu-group-msg-sync 953534cc3d1cf4489060afe1cf10cad75faedd3f2d699ffa7478d178d528dc51
tencent-cos-upload 2505dbc8c7acdaf95f2228598ae12010e09599a25df4319587c4a3109d828053

View File

@ -49,6 +49,16 @@
**不要在 MEMORY.md 中维护静态分类映射表,所有规则变更直接修改 priority_classifier.py。** **不要在 MEMORY.md 中维护静态分类映射表,所有规则变更直接修改 priority_classifier.py。**
### Python 脚本修改后需清理 __pycache__2026-05-27
- 修改 Python 脚本(尤其是新增/删除 import`.pyc` 缓存可能导致 `NameError`(模块名未定义)
- 症状:源码中有 `import subprocess`,运行时却报 `NameError: name 'subprocess' is not defined`
- 修复:`find <workdir> -name "__pycache__" -type d | xargs rm -rf && find <workdir> -name "*.pyc" -delete`
### P0 实时检测去重内容语义指纹替代消息ID精确匹配2026-05-27
- 原方案用 `sorted(message_ids)` MD5 做去重,但同一话题每次扫描聚类结果不同,签名失效导致重复推送
- 修复:增加内容语义去重层 — 拼接簇内前5条消息内容 + 发送人集合 + 小时窗口,用 Jaccard 相似度比较
- 阈值:同小时 + 发送人交集 + 相似度 > 0.20;跨小时 + 发送人 ≥2 重叠 + 相似度 > 0.35
- 影响文件:`detect_p0_wechat.py`、`detect_p0_realtime.py`
## 经验教训 ## 经验教训
### 微信反馈全链路2026-05-22 刘新玉确认) ### 微信反馈全链路2026-05-22 刘新玉确认)

View File

@ -1 +1 @@
1674 1772

View File

@ -21,3 +21,5 @@
{"type":"memory.recall.recorded","timestamp":"2026-05-25T07:11:25.527Z","query":"微信反馈 全链路 cron 采集 整理 归纳 分发","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-25T07:11:25.527Z","query":"微信反馈 全链路 cron 采集 整理 归纳 分发","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:13.634Z","query":"微信飞书问题汇总 小葵小葵群 发送汇总","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:13.634Z","query":"微信飞书问题汇总 小葵小葵群 发送汇总","resultCount":3,"results":[{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:24.708Z","query":"小葵小葵群 chat_id 分发消息群","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-26T03:40:24.708Z","query":"小葵小葵群 chat_id 分发消息群","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-27T02:15:55.251Z","query":"小葵小葵群 chat_id 反馈同步","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-27T02:50:23.510Z","query":"pycache python 缓存 导入错误","resultCount":3,"results":[{"path":"memory/2026-05-27.md","startLine":22,"endLine":32,"score":1},{"path":"memory/2026-05-27.md","startLine":1,"endLine":27,"score":1},{"path":"memory/2026-04-10.md","startLine":20,"endLine":52,"score":1}]}

View File

@ -1,6 +1,6 @@
{ {
"version": 1, "version": 1,
"updatedAt": "2026-05-26T03:40:24.708Z", "updatedAt": "2026-05-27T02:50:23.510Z",
"entries": { "entries": {
"memory:memory/2026-04-18.md:1:5": { "memory:memory/2026-04-18.md:1:5": {
"key": "memory:memory/2026-04-18.md:1:5", "key": "memory:memory/2026-04-18.md:1:5",
@ -9,13 +9,13 @@
"endLine": 5, "endLine": 5,
"source": "memory", "source": "memory",
"snippet": "# 2026-04-18 工作日志 ## 术语共识 [李若松确认] 术语「飞书反馈消息数据库」默认指代用户反馈收集技能中的飞书内部测试反馈MySQL数据表 `vala_test.lark_group_message`存储「内容测试问题反馈」群oc_fabff7672e62a9ced7b326ee4a286c26的同步消息数据。", "snippet": "# 2026-04-18 工作日志 ## 术语共识 [李若松确认] 术语「飞书反馈消息数据库」默认指代用户反馈收集技能中的飞书内部测试反馈MySQL数据表 `vala_test.lark_group_message`存储「内容测试问题反馈」群oc_fabff7672e62a9ced7b326ee4a286c26的同步消息数据。",
"recallCount": 7, "recallCount": 8,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 7, "totalScore": 8,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-04-30T03:47:21.989Z", "firstRecalledAt": "2026-04-30T03:47:21.989Z",
"lastRecalledAt": "2026-05-26T03:40:24.708Z", "lastRecalledAt": "2026-05-27T02:15:55.251Z",
"queryHashes": [ "queryHashes": [
"353f9765c086", "353f9765c086",
"a6b740c99377", "a6b740c99377",
@ -23,14 +23,16 @@
"f865295b9ac7", "f865295b9ac7",
"42fe8210f22c", "42fe8210f22c",
"81f7a2647922", "81f7a2647922",
"261597c52d5b" "261597c52d5b",
"3fe44d618bf6"
], ],
"recallDays": [ "recallDays": [
"2026-04-30", "2026-04-30",
"2026-05-06", "2026-05-06",
"2026-05-07", "2026-05-07",
"2026-05-25", "2026-05-25",
"2026-05-26" "2026-05-26",
"2026-05-27"
], ],
"conceptTags": [ "conceptTags": [
"vala-test.lark-group-message", "vala-test.lark-group-message",
@ -980,6 +982,99 @@
"姓氏", "姓氏",
"中文" "中文"
] ]
},
"memory:memory/2026-05-27.md:22:32": {
"key": "memory:memory/2026-05-27.md:22:32",
"path": "memory/2026-05-27.md",
"startLine": 22,
"endLine": 32,
"source": "memory",
"snippet": "| 微信 | `--skip-dispatch` → 不分发 | `--apply-ai` → 替换占位符+分发 | 微信依赖 AI 归纳成功才能分发,飞书双重分发(占位符+AI。 ### 5月26日反馈概况 - 飞书17条消息3个问题簇2个有效录音识别率低、飞船音乐不保存 - 微信27条消息14个问题簇4个有效飞船音乐、录音识别、音频无法播放、AI回复无关语句 ### 后续注意 - 删除/修改 `ai_summarize_feedback.py` 后需清理 `__pycache__`,否则缓存版本可能落后于源码",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
"queryHashes": [
"b15d92b2dda8"
],
"recallDays": [
"2026-05-27"
],
"conceptTags": [
"skip-dispatch",
"apply-ai",
"删除/修改",
"ai-summarize-feedback.py",
"skip",
"dispatch",
"不分",
"apply"
]
},
"memory:memory/2026-05-27.md:1:27": {
"key": "memory:memory/2026-05-27.md:1:27",
"path": "memory/2026-05-27.md",
"startLine": 1,
"endLine": 27,
"source": "memory",
"snippet": "# 2026-05-27 工作日志 ## 用户反馈同步故障排查与修复 [刘新玉反馈] ### 问题 5月26日飞书/微信用户反馈定时任务出现回写失败。 ### 根因 1. **Python 缓存过期** — `ai_summarize_feedback.py` 中 `subprocess` 模块导入失败(`NameError: name 'subprocess' is not defined`),实际源码有 `import subprocess`,但 `__pycache__` 中的旧 `.pyc` 未包含此导入 2. **sync_wechat_feedback.py** — 同样因缓存问题导致 `with open(tmp_md)` 写入失败 ### 修复 - 清理了工作区内所有 `__pycache__` 和 `.pyc` 文件 - 重新执行飞书 AI 归纳回写:`sync_feishu_feedback.py --date 2026-05-26 --apply-ai` - 重新执行微信 AI 归纳回写:`sync_wechat_feedback.py --date 2026-05-26 --apply-ai` - 两个渠道均成功回写并分发到「小葵小葵」群 ### 分发架构确认 | 渠道 | 10:00/10:02 Wrapper | 10:05/10:07 AI 归纳 | |------|---------------------|---------------------| | 飞书 | `--dispatch-mode all` → 分发占位符 | `--",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
"queryHashes": [
"b15d92b2dda8"
],
"recallDays": [
"2026-05-27"
],
"conceptTags": [
"5月26日飞书/微信用户反馈定时任务出现回写失败",
"ai-summarize-feedback.py",
"sync-wechat-feedback.py",
"tmp-md",
"sync-feishu-feedback.py",
"apply-ai",
"00/10",
"05/10"
]
},
"memory:memory/2026-04-10.md:20:52": {
"key": "memory:memory/2026-04-10.md:20:52",
"path": "memory/2026-04-10.md",
"startLine": 20,
"endLine": 52,
"source": "memory",
"snippet": "- 图片先下载到工作目录(相对路径),再用 `docs +media-insert` 插入文档 - 去掉 `set -e`,改为手动错误处理避免单条消息失败导致整个脚本退出 - 时间用 ISO 8601 格式存储和传递 ### 验证结果(第一版 → Wiki文档 - 全量同步成功49 条消息 + 5 张图片写入 Wiki 文档 - Wiki文档DfUqddItXoDsnNxPypncbinknxh ## 迭代:改为电子表格 + 腾讯COS **来源:** [李若松] 要求改用表格存储媒体文件上传COS ### 方案 - 脚本改为 Python`scripts/sync_feedback_group.py` - 记录写入飞书电子表格:`E8vFsCmPBhT4SCtNmnJchqeJnJe`sheet_id `7bce8f` - 列:时间 | 反馈人 | 信息类型 | 信息内容(或地址) - 非文本消息(图片/视频/音频/文件下载后上传到腾讯COS - COS桶`static-1317843270`,区域:`ap-beijing` - COS路径结构`vala_llm/user_feedback/{type}/{date}/{filename}` - type: image / video / audio / file - date: YYYY-MM-DD - 访问域名:`https://static.valavala.com/vala_llm/user_feedback/...` - COS凭证已存入 `secrets.md` ### 验证结",
"recallCount": 1,
"dailyCount": 0,
"groundedCount": 0,
"totalScore": 1,
"maxScore": 1,
"firstRecalledAt": "2026-05-27T02:50:23.510Z",
"lastRecalledAt": "2026-05-27T02:50:23.510Z",
"queryHashes": [
"b15d92b2dda8"
],
"recallDays": [
"2026-05-27"
],
"conceptTags": [
"media-insert",
"scripts/sync-feedback-group.py",
"sheet-id",
"图片/视频/音频/文件",
"static-1317843270",
"ap-beijing",
"vala-llm/user-feedback",
"yyyy-mm-dd"
]
} }
} }
} }

50
memory/2026-05-27.md Normal file
View File

@ -0,0 +1,50 @@
# 2026-05-27 工作日志
## 用户反馈同步故障排查与修复 [刘新玉反馈]
### 问题
5月26日飞书/微信用户反馈定时任务出现回写失败。
### 根因
1. **Python 缓存过期**`ai_summarize_feedback.py``subprocess` 模块导入失败(`NameError: name 'subprocess' is not defined`),实际源码有 `import subprocess`,但 `__pycache__` 中的旧 `.pyc` 未包含此导入
2. **sync_wechat_feedback.py** — 同样因缓存问题导致 `with open(tmp_md)` 写入失败
### 修复
- 清理了工作区内所有 `__pycache__``.pyc` 文件
- 重新执行飞书 AI 归纳回写:`sync_feishu_feedback.py --date 2026-05-26 --apply-ai`
- 重新执行微信 AI 归纳回写:`sync_wechat_feedback.py --date 2026-05-26 --apply-ai`
- 两个渠道均成功回写并分发到「小葵小葵」群
### 分发架构确认
| 渠道 | 10:00/10:02 Wrapper | 10:05/10:07 AI 归纳 |
|------|---------------------|---------------------|
| 飞书 | `--dispatch-mode all` → 分发占位符 | `--apply-ai` → 替换占位符+重新分发 |
| 微信 | `--skip-dispatch` → 不分发 | `--apply-ai` → 替换占位符+分发 |
微信依赖 AI 归纳成功才能分发,飞书双重分发(占位符+AI
### 5月26日反馈概况
- 飞书17条消息3个问题簇2个有效录音识别率低、飞船音乐不保存
- 微信27条消息14个问题簇4个有效飞船音乐、录音识别、音频无法播放、AI回复无关语句
### 后续注意
- 删除/修改 `ai_summarize_feedback.py` 后需清理 `__pycache__`,否则缓存版本可能落后于源码
## P0 实时检测去重修复 [刘新玉反馈]
### 问题
微信 `detect_p0_wechat.py` 每分钟扫描最近120分钟消息同一个问题因讨论线程持续生长`sort_threads` 聚类每次产生不同消息集合,导致:
- 不同次的聚类有不同的 `cluster_signature`(基于 `sorted(message_ids)` MD5
- 去重完全失效同一问题被重复推送今天2个真实问题各推了3次 = 6次
### 修复
`detect_p0_wechat.py``detect_p0_realtime.py` 中增加**内容语义去重**
1. 新增 `cluster_content_fingerprint()`拼接簇内前5条有意义消息作为内容指纹 + 发送人集合 + 小时粒度时间窗口
2. 新增 `is_duplicate_p0()`基于内容相似度Jaccard+ 发送人重叠 + 时间窗口三层判断
- 同小时 + 发送人交集 + 内容相似度 > 0.20 → 重复
- 发送人高度重叠(≥2) + 内容相似度 > 0.35 → 跨小时重复
3. 状态文件改为 `{"time": ..., "fp": {...}}` 格式存储指纹信息
4. 飞书 P0 检测器同步修复
### 测试验证
360分钟窗口测试同一话题在不同扫描窗口下签名不同c69d... vs 70a4...),但内容指纹正确识别为重复(相似度 0.462is_duplicate=True

View File

@ -0,0 +1,17 @@
{
"date": "2026-05-26",
"descriptions": [
{
"index": 1,
"description": "在应用录音环节,背景音乐音量过相关人员致用户人声微弱,造成语音识别率低,且麦克风界面动画卡顿。"
},
{
"index": 2,
"description": "在飞船系统中,用户更换音乐后重新进入时音乐自动恢复为默认,无法保存切换设置。"
},
{
"index": 3,
"description": "无明确问题"
}
]
}

View File

@ -0,0 +1,61 @@
{
"date": "2026-05-26",
"descriptions": [
{
"index": 1,
"description": "无明确问题"
},
{
"index": 2,
"description": "飞船音乐切换后,重新进出时音乐会恢复为默认歌曲,无法保存切换设置。"
},
{
"index": 3,
"description": "无明确问题"
},
{
"index": 4,
"description": "在移动端瓦拉英语应用中,录音识别功能存在识别率低且录音时麦克风动画卡顿的问题。"
},
{
"index": 5,
"description": "无明确问题"
},
{
"index": 6,
"description": "无明确问题"
},
{
"index": 7,
"description": "无明确问题"
},
{
"index": 8,
"description": "无明确问题"
},
{
"index": 9,
"description": "无明确问题"
},
{
"index": 10,
"description": "英相关人员频无法播放声音"
},
{
"index": 11,
"description": "无明确问题"
},
{
"index": 12,
"description": "无明确问题"
},
{
"index": 13,
"description": "在强化练习环节中AI语音回复出现了与当前练习内容无关的语句。"
},
{
"index": 14,
"description": "无明确问题"
}
]
}

View File

@ -0,0 +1,166 @@
{
"date": "2026-05-26",
"total_clusters": 3,
"clusters": [
{
"index": 1,
"cluster_id": "4702315644940596587",
"location": {
"端": "未知",
"环节": "未知",
"课程": "",
"角色/组件": ""
},
"priority": "P2",
"priority_detail": "",
"category": "其他问题",
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
"messages": [
{
"sender": "瓦拉英语-萌萌老师早10晚7",
"content": "麻烦帮看下吧,我的飞船-音乐切换了歌曲,再重新进出的话音乐又变成了默认的,切换歌曲不能保存吗?",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 09:28:15"
},
{
"sender": "嘿哈",
"content": "这个目前设计就是如此,不会保存\n ↳ 回复 瓦拉英语-萌萌老师早10晚7: 麻烦帮看下吧,我的飞船-音乐切换了歌曲,再重新进出的话音乐又变成了默认的,切换歌曲不能保存吗?",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 10:58:11"
}
]
},
{
"index": 2,
"cluster_id": "6332894499636566314",
"location": {
"端": "移动端",
"环节": "未知",
"课程": "",
"角色/组件": "音频"
},
"priority": "P2",
"priority_detail": "",
"category": "声音/音频类",
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
"messages": [
{
"sender": "胡陈辰🦉",
"content": "@许悦 我们可以加个需求 ",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 12:57:57"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "老师,这种录音识别率比较低,是正常的么?辛苦帮忙看看",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 13:07:45"
},
{
"sender": "嘿哈",
"content": "@瓦拉英语-Tom老师这个可以咨询一下用户是有连接音响什么的不\n ↳ 回复 瓦拉英语-Tom老师: 老师,这种录音识别率比较低,是正常的么?辛苦帮忙看看",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 14:42:09"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "@八哥-16619720408好的\n ↳ 回复 嘿哈: @瓦拉英语-Tom老师这个可以咨询一下用户是有连接音响什么的不",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 14:44:20"
},
{
"sender": "嘿哈",
"content": "@瓦拉英语-Tom老师这个可能需要分两步走我获取了一下用户的音频信息确实用户的声音很小先让用户尝试通过设置降低一点音乐音量然后我们这边也和产品老师说一下再优化一下录音的时候没有其他的声音https://static.valavala.com/vala_user_audio/c6125977134c_134242247464915130.wav\nhttps://static.valavala.com/vala_user_audio/71bec4fc33a2_134242247584791300.wav\nhttps://static.valavala.com/vala_user_audio/b4c7d8eff63c_134242247700853530.wav",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 14:50:07"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "[聊天记录] 雷鸣和瓦拉英语-Tom老师\n雷鸣: [视频]\n雷鸣: [视频]\n雷鸣: [图片]",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 13:07:44"
},
{
"sender": "嘿哈",
"content": "发一下用户手机号吧",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 13:08:10"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "手机号13617153553",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 13:08:53"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "[视频] 17秒 size:2553865",
"msg_type": "video",
"media_url": "",
"time": "2026-05-26 13:47:26"
},
{
"sender": "Ariel",
"content": "@kevin而且它这个麦克风的动画看起来超级卡一秒动一帧的感觉\n ↳ 回复 瓦拉英语-Tom老师: 25984984606212559@openim:\n<?xml version=\"1.0\"?>\n<msg>\n\t<videomsg aeskey=\"0cfb6db1e5d84683aab0ed170dbf6567\" cdnvideourl=\"\" cdnthumbaeskey=\"\" cdnthumburl=\"\" lengt...",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 14:26:20"
},
{
"sender": "瓦拉英语-Tom老师",
"content": "老师,这种是什么原因呢?\n手机号18165276842",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 13:47:26"
},
{
"sender": "kevin",
"content": "可以让用户上传一下日志\n ↳ 回复 瓦拉英语-Tom老师: 老师,这种是什么原因呢?\n手机号18165276842",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 14:07:41"
}
]
},
{
"index": 3,
"cluster_id": "6521596520536988022",
"location": {
"端": "未知",
"环节": "未知",
"课程": "",
"角色/组件": ""
},
"priority": "P2",
"priority_detail": "",
"category": "其他问题",
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
"messages": [
{
"sender": "嘿哈",
"content": "让用户上传一下日志吧",
"msg_type": "text",
"media_url": "",
"time": "2026-05-26 21:09:17"
},
{
"sender": "王欢",
"content": "日志已传@八哥-16619720408\n ↳ 回复 八哥-16619720408: 让用户上传一下日志吧",
"msg_type": "link",
"media_url": "",
"time": "2026-05-26 21:11:43"
}
]
}
]
}

Binary file not shown.

View File

@ -27,7 +27,7 @@ SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skil
sys.path.insert(0, SKILL_DIR) sys.path.insert(0, SKILL_DIR)
from sync_feishu_feedback import ( from sync_feishu_feedback import (
get_db_connection, query_messages, sort_threads, get_tenant_token, get_db_connection, query_messages, sort_threads, get_tenant_token, content_similarity,
DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS, DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS,
MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB, MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB,
) )
@ -40,17 +40,19 @@ CLUSTER_MIN_SIZE = 2 # 至少 2 条消息才算有效簇
def load_dispatched_state(): def load_dispatched_state():
"""加载已推送的 P0 簇签名状态。返回 {cluster_signature: dispatch_time}""" """加载已推送的 P0 簇状态。兼容新旧格式。"""
try: try:
with open(STATE_FILE, "r") as f: with open(STATE_FILE, "r") as f:
state = json.load(f) state = json.load(f)
except (FileNotFoundError, json.JSONDecodeError): except (FileNotFoundError, json.JSONDecodeError):
state = {} state = {}
# 清理超过 24 小时的记录
cutoff = (datetime.now() - timedelta(hours=24)).isoformat() cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
state = {k: v for k, v in state.items() if v > cutoff} cleaned = {}
return state for k, v in state.items():
ts = v if isinstance(v, str) else v.get("time", "")
if ts > cutoff:
cleaned[k] = v
return cleaned
def save_dispatched_state(state): def save_dispatched_state(state):
@ -72,6 +74,47 @@ def cluster_signature(cluster_msgs):
return hashlib.md5(joined.encode()).hexdigest() return hashlib.md5(joined.encode()).hexdigest()
def cluster_content_fingerprint(cluster_msgs):
"""生成基于内容语义的簇指纹用于跨扫描去重不依赖消息ID集合"""
all_contents = []
for m in cluster_msgs:
c = str(m[3]).strip() if m[3] else ""
if c and len(c) > 8:
all_contents.append(c[:300])
aggregated = " | ".join(all_contents[:5])
senders = sorted(set(m[1] for m in cluster_msgs if m[1]))
times = [m[6] for m in cluster_msgs if m[6]]
hour = times[0][:13] if times else "unknown"
return {
"content": aggregated,
"senders": senders,
"hour": hour,
"msg_count": len(cluster_msgs),
}
def is_duplicate_p0(new_fp, dispatched_entries):
"""
基于内容语义判断新 P0 是否与已推送 P0 重复
dispatched_entries: {sig: {"time": str, "fp": dict}}
"""
for entry in dispatched_entries.values():
old_fp = entry.get("fp")
if not old_fp:
continue
same_hour = new_fp["hour"] == old_fp["hour"]
sender_overlap = len(set(new_fp["senders"]) & set(old_fp["senders"]))
if same_hour and sender_overlap >= 1:
sim = content_similarity(new_fp["content"], old_fp["content"])
if sim > 0.20:
return True
if sender_overlap >= 2:
sim = content_similarity(new_fp["content"], old_fp["content"])
if sim > 0.35:
return True
return False
def is_probably_p0(cluster_msgs): def is_probably_p0(cluster_msgs):
""" """
快速判断一个簇是否是 P0 级别问题 快速判断一个簇是否是 P0 级别问题
@ -229,7 +272,7 @@ def main():
# 加载已推送状态 # 加载已推送状态
state = load_dispatched_state() state = load_dispatched_state()
print(f"[P0-detect] 已记录 {len(state)} 个已推送簇签名") print(f"[P0-detect] 已记录 {len(state)} 个已推送簇")
# 遍历簇,找出 P0 且未推送的 # 遍历簇,找出 P0 且未推送的
new_p0_count = 0 new_p0_count = 0
@ -241,7 +284,13 @@ def main():
sig = cluster_signature(cmsgs) sig = cluster_signature(cmsgs)
if sig in state: if sig in state:
print(f"[P0-detect] 已推送过,跳过: sig={sig[:8]}...") print(f"[P0-detect] 已推送过(精确匹配),跳过: sig={sig[:8]}...")
continue
# 内容语义去重
fp = cluster_content_fingerprint(cmsgs)
if is_duplicate_p0(fp, state):
print(f"[P0-detect] 已推送过(内容匹配),跳过: senders={fp['senders'][:2]}... hour={fp['hour']}")
continue continue
print(f"[P0-detect] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息") print(f"[P0-detect] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
@ -249,14 +298,14 @@ def main():
if args.dry_run: if args.dry_run:
alert = generate_p0_alert_text(cmsgs, info) alert = generate_p0_alert_text(cmsgs, info)
print(f"[DRY-RUN] 将发送:\n{alert}") print(f"[DRY-RUN] 将发送:\n{alert}")
state[sig] = datetime.now().isoformat() state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
new_p0_count += 1 new_p0_count += 1
else: else:
alert = generate_p0_alert_text(cmsgs, info) alert = generate_p0_alert_text(cmsgs, info)
success = dispatch_p0_alert(alert) success = dispatch_p0_alert(alert)
if success: if success:
print(f"[P0-detect] ✅ P0 已实时推送") print(f"[P0-detect] ✅ P0 已实时推送")
state[sig] = datetime.now().isoformat() state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
new_p0_count += 1 new_p0_count += 1
else: else:
print(f"[P0-detect] ❌ 推送失败") print(f"[P0-detect] ❌ 推送失败")

View File

@ -25,7 +25,7 @@ SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skil
sys.path.insert(0, SKILL_DIR) sys.path.insert(0, SKILL_DIR)
from sync_feishu_feedback import ( from sync_feishu_feedback import (
sort_threads, get_tenant_token, sort_threads, get_tenant_token, content_similarity,
DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS, DISPATCH_CHAT_ID, DISPATCH_CRED_DIR, P0_NOTIFY_USERS,
MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB, MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASS, MYSQL_DB,
) )
@ -44,7 +44,13 @@ def load_dispatched_state():
except (FileNotFoundError, json.JSONDecodeError): except (FileNotFoundError, json.JSONDecodeError):
state = {} state = {}
cutoff = (datetime.now() - timedelta(hours=24)).isoformat() cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
return {k: v for k, v in state.items() if v > cutoff} # 兼容新旧格式:新格式 value 是 {"time": ..., "fp": ...},旧格式是纯时间字符串
cleaned = {}
for k, v in state.items():
ts = v if isinstance(v, str) else v.get("time", "")
if ts > cutoff:
cleaned[k] = v
return cleaned
def save_dispatched_state(state): def save_dispatched_state(state):
@ -60,6 +66,53 @@ def cluster_signature(cluster_msgs):
return hashlib.md5(",".join(ids).encode()).hexdigest() return hashlib.md5(",".join(ids).encode()).hexdigest()
def cluster_content_fingerprint(cluster_msgs):
"""生成基于内容语义的簇指纹用于跨扫描去重不依赖消息ID集合"""
# 拼接簇内所有有意义的消息内容(跳过纯图片/文件/表情)
all_contents = []
for m in cluster_msgs:
c = str(m[3]).strip() if m[3] else ""
if c and len(c) > 8:
all_contents.append(c[:300])
# 取前5条聚合保证核心问题描述稳定
aggregated = " | ".join(all_contents[:5])
# 提取发送人集合(排序保证一致性)
senders = sorted(set(m[1] for m in cluster_msgs if m[1]))
# 提取小时粒度的时间窗口
times = [m[6] for m in cluster_msgs if m[6]]
hour = times[0][:13] if times else "unknown"
return {
"content": aggregated,
"senders": senders,
"hour": hour,
"msg_count": len(cluster_msgs),
}
def is_duplicate_p0(new_fp, dispatched_entries):
"""
基于内容语义判断新 P0 是否与已推送 P0 重复
dispatched_entries: {sig: {"time": str, "fp": dict}}
"""
for entry in dispatched_entries.values():
old_fp = entry.get("fp")
if not old_fp:
continue
same_hour = new_fp["hour"] == old_fp["hour"]
sender_overlap = len(set(new_fp["senders"]) & set(old_fp["senders"]))
# 条件1: 同一小时 + 发送人有交集 + 内容相似度 > 0.20(聚合内容稳定,宽松阈值足够区分)
if same_hour and sender_overlap >= 1:
sim = content_similarity(new_fp["content"], old_fp["content"])
if sim > 0.20:
return True
# 条件2: 发送人高度重叠 + 内容相似度 > 0.35(跨小时场景)
if sender_overlap >= 2:
sim = content_similarity(new_fp["content"], old_fp["content"])
if sim > 0.35:
return True
return False
def is_probably_p0(cluster_msgs): def is_probably_p0(cluster_msgs):
if len(cluster_msgs) < CLUSTER_MIN_SIZE: if len(cluster_msgs) < CLUSTER_MIN_SIZE:
return False, None return False, None
@ -200,7 +253,7 @@ def main():
print(f"[P0-wechat] 聚类完成:{len(clusters)} 个簇") print(f"[P0-wechat] 聚类完成:{len(clusters)} 个簇")
state = load_dispatched_state() state = load_dispatched_state()
print(f"[P0-wechat] 已记录 {len(state)} 个已推送簇签名") print(f"[P0-wechat] 已记录 {len(state)} 个已推送簇")
new_p0_count = 0 new_p0_count = 0
for cid in cluster_order: for cid in cluster_order:
@ -211,7 +264,13 @@ def main():
sig = cluster_signature(cmsgs) sig = cluster_signature(cmsgs)
if sig in state: if sig in state:
print(f"[P0-wechat] 已推送过,跳过: sig={sig[:8]}...") print(f"[P0-wechat] 已推送过(精确匹配),跳过: sig={sig[:8]}...")
continue
# 内容语义去重
fp = cluster_content_fingerprint(cmsgs)
if is_duplicate_p0(fp, state):
print(f"[P0-wechat] 已推送过(内容匹配),跳过: senders={fp['senders'][:2]}... hour={fp['hour']}")
continue continue
print(f"[P0-wechat] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息") print(f"[P0-wechat] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
@ -219,13 +278,13 @@ def main():
if args.dry_run: if args.dry_run:
alert = generate_p0_alert_text(cmsgs, info) alert = generate_p0_alert_text(cmsgs, info)
print(f"[DRY-RUN] 将发送:\n{alert}") print(f"[DRY-RUN] 将发送:\n{alert}")
state[sig] = datetime.now().isoformat() state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
new_p0_count += 1 new_p0_count += 1
else: else:
alert = generate_p0_alert_text(cmsgs, info) alert = generate_p0_alert_text(cmsgs, info)
if dispatch_p0_alert(alert): if dispatch_p0_alert(alert):
print(f"[P0-wechat] ✅ P0 已实时推送") print(f"[P0-wechat] ✅ P0 已实时推送")
state[sig] = datetime.now().isoformat() state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
new_p0_count += 1 new_p0_count += 1
else: else:
print(f"[P0-wechat] ❌ 推送失败") print(f"[P0-wechat] ❌ 推送失败")