diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl index f53cbe6..537b19c 100644 --- a/memory/.dreams/events.jsonl +++ b/memory/.dreams/events.jsonl @@ -34,3 +34,4 @@ {"type":"memory.recall.recorded","timestamp":"2026-06-02T06:49:17.685Z","query":"小编 群聊 身份 open_id","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-06-03T02:09:30.724Z","query":"小葵小葵群 chat_id 用户反馈发送","resultCount":1,"results":[{"path":"memory/2026-04-18.md","startLine":1,"endLine":5,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-06-05T02:12:53.027Z","query":"今天用户反馈发送 飞书微信 小葵小葵群","resultCount":6,"results":[{"path":"memory/2026-05-07.md","startLine":142,"endLine":175,"score":1},{"path":"memory/2026-05-07.md","startLine":86,"endLine":116,"score":1},{"path":"memory/2026-05-07.md","startLine":73,"endLine":91,"score":1},{"path":"memory/2026-05-07.md","startLine":46,"endLine":78,"score":1},{"path":"memory/2026-05-07.md","startLine":23,"endLine":50,"score":1},{"path":"memory/2026-05-07.md","startLine":1,"endLine":28,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-06-17T10:48:17.775Z","query":"微信 P0 实时告警 detect_p0_wechat crash","resultCount":5,"results":[{"path":"memory/2026-05-26.md","startLine":1,"endLine":24,"score":1},{"path":"memory/2026-05-25.md","startLine":21,"endLine":42,"score":1},{"path":"memory/2026-05-25.md","startLine":38,"endLine":44,"score":1},{"path":"memory/2026-05-27.md","startLine":22,"endLine":43,"score":1},{"path":"memory/2026-05-22.md","startLine":129,"endLine":158,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index 95d6eea..f172cc0 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-06-05T02:12:53.027Z", + "updatedAt": "2026-06-17T10:48:17.775Z", "entries": { "memory:memory/2026-04-18.md:1:5": { "key": "memory:memory/2026-04-18.md:1:5", @@ -912,13 +912,13 @@ "endLine": 42, "source": "memory", "snippet": "- `update_summary_doc_as_children()` 默认 title_prefix 从 `\"\"` 改为 `\"飞书-\"` - `create_child_doc()` 日期提取兼容 `飞书-` / `微信-` 前缀 - `dispatch_summary_to_chat()` 消息标题和链接同步更新 - `get_today_doc_obj_token()`、`--apply-ai` 路径、步骤7分发 的标题格式全部更新 ### 补跑历史数据 - 飞书 5/22(3条)、5/23(1条)AI归纳回写成功 + 自动分发到群聊 - 飞书 5/24:当天无飞书群新数据,跳过 - 微信 5/22(23条)、5/23(13条)、5/24(29条)AI归纳回写成功 + 自动分发 ### 知识库文档重命名 - 13个飞书历史文档(5/6-5/23)从 `{date} 问题反馈` 重命名为 `飞书-{date} 用户反馈问题归纳` ## 新增逻辑:飞书/微信文档标题渠道区分 - [刘新玉需求] 飞书文档标题加 `飞书-` 前缀以便与微信区分 - 飞书格式:`飞书-{date} 用户反馈问题归纳` - 微信格式:保持 `微信-{date} 问题反馈`(由 `sync_wechat_feedback.py` 传入 `title_prefix=\"微信-\"`) - `update_summary_doc_as_children()` 默认 `title_prefix=\"飞书-\"`,微信调用时显式覆盖为 `\"微信-\"` - `create_child_doc()` 日期提取已兼", - "recallCount": 7, + "recallCount": 8, "dailyCount": 0, "groundedCount": 0, - "totalScore": 7, + "totalScore": 8, "maxScore": 1, "firstRecalledAt": "2026-05-25T07:11:25.527Z", - "lastRecalledAt": "2026-06-02T06:49:10.085Z", + "lastRecalledAt": "2026-06-17T10:48:17.775Z", "queryHashes": [ "1e9d4a024a31", "442660038a9b", @@ -926,13 +926,15 @@ "bc1ed8fedd00", "927d0719ac4d", "35e4329ea912", - "52ecc24c3e06" + "52ecc24c3e06", + "7acf4444a34f" ], "recallDays": [ "2026-05-25", "2026-05-26", "2026-06-01", - "2026-06-02" + "2026-06-02", + "2026-06-17" ], "conceptTags": [ "update-summary-doc-as-children", @@ -952,13 +954,13 @@ "endLine": 44, "source": "memory", "snippet": "- `update_summary_doc_as_children()` 默认 `title_prefix=\"飞书-\"`,微信调用时显式覆盖为 `\"微信-\"` - `create_child_doc()` 日期提取已兼容两种前缀(strip `飞书-` 或 `微信-` 再解析) ## 注意事项 - 今天(5/25)的反馈数据将在明天 10:00 正常走全流程 - `ai_summarize_feedback.py` 中 `apply_descriptions` 修复后需注意:微信渠道需要 `--date` 参数,飞书渠道 `--date` 可选", - "recallCount": 7, + "recallCount": 8, "dailyCount": 0, "groundedCount": 0, - "totalScore": 7, + "totalScore": 8, "maxScore": 1, "firstRecalledAt": "2026-05-25T07:11:25.527Z", - "lastRecalledAt": "2026-06-02T06:49:10.085Z", + "lastRecalledAt": "2026-06-17T10:48:17.775Z", "queryHashes": [ "1e9d4a024a31", "442660038a9b", @@ -966,13 +968,15 @@ "bc1ed8fedd00", "927d0719ac4d", "35e4329ea912", - "52ecc24c3e06" + "52ecc24c3e06", + "7acf4444a34f" ], "recallDays": [ "2026-05-25", "2026-05-26", "2026-06-01", - "2026-06-02" + "2026-06-02", + "2026-06-17" ], "conceptTags": [ "update-summary-doc-as-children", @@ -992,13 +996,13 @@ "endLine": 158, "source": "memory", "snippet": "- 步骤2:姓氏 + 1个中文字符模式匹配,排除内容词白名单(文件/资源/游戏/动画/设计等50+词) - 替换为\"相关人员\" ### 修复效果(簇 #7) | 修复前 | 修复后 | |--------|--------| | \"角色江涛的spine动画中,眼睛设计应为睁开但实际显示为闭眼。\" | \"Spine动画中角色眼睛呈现闭眼状态,与设计不符。\" | ## 刘新玉 - 反馈流程完整概览(5/22 12:00) ### 全链路三层架构 ``` 采集层 → 每5分钟(飞书群同步) / 每分钟(微信群导出+P0检测) 汇总层 → 每天 10:00(飞书) 10:02(微信) 聚类+归纳+写入知识库 AI层 → 每天 10:05(飞书) 10:07(微信) DeepSeek生成描述+回写+分发群聊 ``` ### 时间线 | 时间 | 飞书 | 微信 | |------|------|------| | 10:00 | sync_feishu_feedback (占位符,不分发) | — | | 10:02 | — | sync_wechat_feedback (占位符,不分发) | | 10:05 | ai_summarize_feedback (回写+分发) | — | | 10:07 | — | ai_summarize_feedback --channel wechat (回写+分发) | ## 刘新玉 - 微信反馈同步系统搭建(5/22 下午) ### 背景 刘新玉要求微信用户反馈流程与飞书一致(收集→整理→归纳→分发),之前微信只有 M", - "recallCount": 7, + "recallCount": 8, "dailyCount": 0, "groundedCount": 0, - "totalScore": 7, + "totalScore": 8, "maxScore": 1, "firstRecalledAt": "2026-05-25T07:11:25.527Z", - "lastRecalledAt": "2026-06-02T06:49:10.085Z", + "lastRecalledAt": "2026-06-17T10:48:17.775Z", "queryHashes": [ "1e9d4a024a31", "442660038a9b", @@ -1006,13 +1010,15 @@ "bc1ed8fedd00", "927d0719ac4d", "35e4329ea912", - "52ecc24c3e06" + "52ecc24c3e06", + "7acf4444a34f" ], "recallDays": [ "2026-05-25", "2026-05-26", "2026-06-01", - "2026-06-02" + "2026-06-02", + "2026-06-17" ], "conceptTags": [ "文件/资源/游戏/动画/设计等50", @@ -1125,23 +1131,25 @@ "endLine": 24, "source": "memory", "snippet": "# 2026-05-26 工作日志 ## 刘新玉 - 微信/飞书问题汇总分发排查与修复 ### 问题 刘新玉询问是否向\"小葵小葵\"群发送了昨天(5/25)的问题汇总。 ### 排查结果 - **飞书 5/25**:「内容测试问题反馈」群昨天无新消息,飞书 crontab 10:00 输出 `✅ 无新数据,无需同步`,无需分发 - **微信 5/25**:数据库有 17 条消息,但大部分是单条消息,仅 1 个有效簇(≥2条),AI 归纳为\"无明确问题\",属于无效反馈 ### 修复的 Bug(均在本次会话中修复) **Bug 1:`sync_wechat_feedback.py` 文档标题不匹配** - 文件:`scripts/sync_wechat_feedback.py` - 问题:`--apply-ai` 路径搜索子文档时标题为 `微信-{date} 问题反馈`,但实际创建的是 `微信-{date} 用户反馈问题归纳`,导致创建后找不到文档 - 修复:两处标题改为 `微信-{date} 用户反馈问题归纳`(apply-ai 路径 + 步骤7分发路径) **Bug 2:`dispatch_summary_to_chat` 标题前缀** - 文件:`skills/feishu-feedback-sync/scripts/sync_feishu_feedback.py` - 问题:title 硬编码 `飞书-{day_label}`,微信调用时 day_label 已含 `微信-` 前缀,导致出现 `飞书-微信-{date}` 双重前缀 - 修复:检测 day_labe", - "recallCount": 5, + "recallCount": 6, "dailyCount": 0, "groundedCount": 0, - "totalScore": 5, + "totalScore": 6, "maxScore": 1, "firstRecalledAt": "2026-06-01T02:51:24.899Z", - "lastRecalledAt": "2026-06-02T06:49:10.085Z", + "lastRecalledAt": "2026-06-17T10:48:17.775Z", "queryHashes": [ "b36cfa790039", "bc1ed8fedd00", "927d0719ac4d", "35e4329ea912", - "52ecc24c3e06" + "52ecc24c3e06", + "7acf4444a34f" ], "recallDays": [ "2026-06-01", - "2026-06-02" + "2026-06-02", + "2026-06-17" ], "conceptTags": [ "微信/飞书问题汇总分发排查与修复", @@ -1161,19 +1169,21 @@ "endLine": 43, "source": "memory", "snippet": "| 微信 | `--skip-dispatch` → 不分发 | `--apply-ai` → 替换占位符+分发 | 微信依赖 AI 归纳成功才能分发,飞书双重分发(占位符+AI)。 ### 5月26日反馈概况 - 飞书:17条消息,3个问题簇(2个有效:录音识别率低、飞船音乐不保存) - 微信:27条消息,14个问题簇(4个有效:飞船音乐、录音识别、音频无法播放、AI回复无关语句) ### 后续注意 - 删除/修改 `ai_summarize_feedback.py` 后需清理 `__pycache__`,否则缓存版本可能落后于源码 ## P0 实时检测去重修复 [刘新玉反馈] ### 问题 微信 `detect_p0_wechat.py` 每分钟扫描最近120分钟消息,同一个问题因讨论线程持续生长,`sort_threads` 聚类每次产生不同消息集合,导致: - 不同次的聚类有不同的 `cluster_signature`(基于 `sorted(message_ids)` MD5) - 去重完全失效,同一问题被重复推送(今天2个真实问题各推了3次 = 6次) ### 修复 在 `detect_p0_wechat.py` 和 `detect_p0_realtime.py` 中增加**内容语义去重**: 1. 新增 `cluster_content_fingerprint()`:拼接簇内前5条有意义消息作为内容指纹 + 发送人集合 + 小时粒度时间窗口 2. 新增 `is_duplicate_p0()`:基于内容相似度(Jaccard)+ 发送人重叠 + 时间窗口", - "recallCount": 2, + "recallCount": 3, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 3, "maxScore": 1, "firstRecalledAt": "2026-06-01T02:51:24.899Z", - "lastRecalledAt": "2026-06-01T04:05:41.997Z", + "lastRecalledAt": "2026-06-17T10:48:17.775Z", "queryHashes": [ "b36cfa790039", - "35e4329ea912" + "35e4329ea912", + "7acf4444a34f" ], "recallDays": [ - "2026-06-01" + "2026-06-01", + "2026-06-17" ], "conceptTags": [ "skip-dispatch", diff --git a/output/daily_feedback/ai_descriptions_feishu_2026-06-16.json b/output/daily_feedback/ai_descriptions_feishu_2026-06-16.json new file mode 100644 index 0000000..42cff4e --- /dev/null +++ b/output/daily_feedback/ai_descriptions_feishu_2026-06-16.json @@ -0,0 +1,33 @@ +{ + "date": "2026-06-16", + "descriptions": [ + { + "index": 1, + "description": "荣耀平板设备上无法安装软件,具体设备型号和故障表现待相关人员进行确认。" + }, + { + "index": 2, + "description": "荣耀平板在应用宝中无法搜索到瓦拉英语应用。" + }, + { + "index": 3, + "description": "用户在应用宝中搜索瓦拉英语APP时搜索不到。" + }, + { + "index": 4, + "description": "荣耀平板在应用市场无法搜索到瓦拉英语和应用宝,且直接安装APK文件也未能成功。" + }, + { + "index": 5, + "description": "无明确问题" + }, + { + "index": 6, + "description": "小相关人员习机端下载安装包时提示“正在进行内部优化,暂停下载功能”,导致用户无法安装应用。" + }, + { + "index": 7, + "description": "荣耀平板和小相关人员习机端无法安装应用" + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/ai_descriptions_wechat_2026-06-16.json b/output/daily_feedback/ai_descriptions_wechat_2026-06-16.json new file mode 100644 index 0000000..fb5c957 --- /dev/null +++ b/output/daily_feedback/ai_descriptions_wechat_2026-06-16.json @@ -0,0 +1,81 @@ +{ + "date": "2026-06-16", + "descriptions": [ + { + "index": 1, + "description": "无明确问题" + }, + { + "index": 2, + "description": "用户在删除原有课相关人员重新创建后,已完成的三节课学习进度丢失,上线后课相关人员示从头开始。" + }, + { + "index": 3, + "description": "无明确问题" + }, + { + "index": 4, + "description": "后台在用户删除课相关人员未同步更新角色相关数据,导致显示信息不正确。" + }, + { + "index": 5, + "description": "荣耀平板无法安装某软件,具体原因待确认。" + }, + { + "index": 6, + "description": "荣耀平板通过应用宝搜索不到瓦拉英语" + }, + { + "index": 7, + "description": "家长在应用宝中搜索不到瓦拉英语APP" + }, + { + "index": 8, + "description": "荣耀平板在应用市场搜索不到应用宝和瓦拉英语,且提供APK安装包后仍无法安装。" + }, + { + "index": 9, + "description": "在步步相关人员习机上搜索和下载APP时,客户反映无法搜到。" + }, + { + "index": 10, + "description": "无明确问题" + }, + { + "index": 11, + "description": "无明确问题" + }, + { + "index": 12, + "description": "无明确问题" + }, + { + "index": 13, + "description": "无明确问题" + }, + { + "index": 14, + "description": "无明确问题" + }, + { + "index": 15, + "description": "无明确问题" + }, + { + "index": 16, + "description": "部分用户反馈安装包下载时提示“正在进行内部优化,暂停下载功能”,导致无法下载。" + }, + { + "index": 17, + "description": "荣耀平板设备上无法安装应用的问题" + }, + { + "index": 18, + "description": "在平板投屏至电视时,口语快答环节无法收音,其他环节正常。" + }, + { + "index": 19, + "description": "希沃V1投屏到电视时,核心互动对话环节无法收音,但跟读环节可正相关人员音。" + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/cluster_context_2026-06-16.json b/output/daily_feedback/cluster_context_2026-06-16.json new file mode 100644 index 0000000..9404f0b --- /dev/null +++ b/output/daily_feedback/cluster_context_2026-06-16.json @@ -0,0 +1,342 @@ +{ + "date": "2026-06-16", + "total_clusters": 7, + "clusters": [ + { + "index": 1, + "_idx": 1, + "cluster_id": "7722577324374442850", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-花花班班(早10晚7-周末休息)", + "content": "辛苦看下这个学员下载问题\n ↳ 回复 花花老师: 18006470355老师学员用的荣耀平板无法安装软件,辛苦看下", + "msg_type": "link", + "media_url": "", + "time": "2026-06-16 15:49:23" + }, + { + "sender": "嘿哈", + "content": "@其实我是喷🔥龙 龙哥,辛苦看一下\n ↳ 回复 瓦拉英语-花花班主任(早10晚7): 辛苦看下这个学员下载问题5700辛苦看下这个学员下载问题5700群聊的聊天记录城美夏天: 6953566608637237031 这个买家使用的荣耀平板 搜不到瓦拉英语和应用宝 是什么原因呢\n城美夏天: [图片]\n城美夏天: 这是买家的版本配置\n丹咪尼🌹:稳稳的幸福和瓦拉英语-花花班主任(早10晚7)19https://support.weixin.qq.com/cgi-bin/mmsupport-bin/readtemplate?t=page/favorite_record__...", + "msg_type": "link", + "media_url": "", + "time": "2026-06-16 20:46:22" + }, + { + "sender": "瓦拉英语-花花班班(早10晚7-周末休息)", + "content": "[聊天记录] 稳稳的幸福和瓦拉英语-花花班主任(早10晚7)\n稳稳的幸福: 这个安装包说正在进行内部优化,暂停下载功能\n稳稳的幸福: 是啊,我这也纳闷,寻求你们的帮助啊\n稳稳的幸福: 小袁学习机你们上架了吗\n瓦拉英语-花花班主任(早10晚7): \"这个安装包说正在进行内部优化,暂停下载功能\"\n------\n可以截图看下吗\n瓦拉英语-花花班主任(早10晚7): 学习机的话,也是可以点击我刚才发的安装包安装\n稳稳的幸福: 学习机我试试\n稳稳的幸福: 两个都不行", + "msg_type": "link", + "media_url": "", + "time": "2026-06-16 21:09:37" + } + ] + }, + { + "index": 7, + "_idx": 7, + "cluster_id": "3785226689441846404", + "location": { + "端": "未知", + "环节": "未知", + "课程": "", + "角色/组件": "" + }, + "priority": "P2", + "priority_detail": "", + "category": "其他问题", + "conclusion": "**当前问题排查结论:** 暂无结论排查中", + "messages": [ + { + "sender": "瓦拉英语-花花班班(早10晚7-周末休息)", + "content": "另外家长问小袁学习机上架了吗?还有这个设备", + "msg_type": "text", + "media_url": "", + "time": "2026-06-16 20:51:10" + }, + { + "sender": "其实我是喷🔥龙", + "content": "小袁没有的\n ↳ 回复 瓦拉英语-花花班主任(早10晚7): 另外家长问小袁学习机上架了吗?还有这个设备", + "msg_type": "link", + "media_url": "", + "time": "2026-06-16 21:24:12" + }, + { + "sender": "瓦拉英语-花花班班(早10晚7-周末休息)", + "content": "@其实我是喷🔥龙那现在荣耀平板这个怎么可以解决一下呢老师\n ↳ 回复 其实我是喷🔥龙: 小袁没有的", + "msg_type": "link", + "media_url": "", + "time": "2026-06-16 22:09:19" + }, + { + "sender": "瓦拉英语-花花班班(早10晚7-周末休息)", + "content": "学习机和平板都安装不上", + "msg_type": "text", + "media_url": "", + "time": "2026-06-16 21:10:03" + } + ] + } + ] +} \ No newline at end of file diff --git a/output/daily_feedback/飞书反馈_2026-06-17.xlsx b/output/daily_feedback/飞书反馈_2026-06-17.xlsx new file mode 100644 index 0000000..61bd5d0 Binary files /dev/null and b/output/daily_feedback/飞书反馈_2026-06-17.xlsx differ diff --git a/scripts/detect_p0_realtime.py b/scripts/detect_p0_realtime.py index 58eea6d..2043059 100644 --- a/scripts/detect_p0_realtime.py +++ b/scripts/detect_p0_realtime.py @@ -18,7 +18,7 @@ P0 问题实时检测与分发 python3 detect_p0_realtime.py [--dry-run] [--lookback-minutes 120] """ -import sys, os, json, urllib.request, argparse, hashlib +import sys, os, re, json, urllib.request, argparse, hashlib from datetime import datetime, timedelta from pathlib import Path @@ -126,39 +126,60 @@ def is_probably_p0(cluster_msgs): return info["priority"] == "P0", info +def _clean_summary(text): + """清洗摘要文本,去掉用户ID、话术后缀等冗余信息。""" + # 去掉手机号/用户ID(11位数字,可能紧邻中文) + text = re.sub(r'(?:^|(?<=[^\d]))1[3-9]\d{9}(?=[^\d]|$)', '', text) + # 去掉话术后缀 + text = re.sub(r'[,,]?\s*(老师|辛苦|麻烦|帮忙)\s*(看下|看一下|看看|看)[。!!]*$', '', text) + text = re.sub(r'[,,]?\s*@\S+\s*', '', text) + # 清理多余空格和标点 + text = re.sub(r'\s+', ' ', text).strip() + text = re.sub(r'^[,,\s]+|[,,\s]+$', '', text) + return text + + +def _pick_best_summary(cluster_msgs): + """从簇中选出最能代表 P0 问题的摘要消息。 + 优先选择匹配 P0 关键词的消息,其次选第一条有意义的文本。""" + from priority_classifier import P0_KEYWORDS + + # 收集所有 P0 关键词正则 + p0_patterns = [] + for cat_pats in P0_KEYWORDS.values(): + p0_patterns.append(cat_pats) + combined_p0 = re.compile('|'.join(p0_patterns), re.IGNORECASE) + + best = None + for m in cluster_msgs: + t = str(m[3]).strip() if m[3] else "" + if not t or len(t) <= 3: + continue + if best is None: + best = t # 兜底:第一条有意义的文本 + if combined_p0.search(t): + # 命中 P0 关键词,优先使用 + return _clean_summary(t)[:150] + + return _clean_summary(best or "")[:150] + + def generate_p0_alert_text(cluster_msgs, priority_info): """ 生成 P0 问题的简短告警文本(精简版,不含完整文档链接)。 """ - # 收集关键信息 root_sender = cluster_msgs[0][1] root_time = cluster_msgs[0][6] - latest_time = cluster_msgs[-1][6] - - # 提取第一条有实质内容的消息作为摘要 - root_text = "" - for m in cluster_msgs: - t = str(m[3]) if m[3] else "" - t = t.strip() - if t and len(t) > 3: - root_text = t[:100] - break - - # 收集所有发言人 - senders = list(dict.fromkeys(m[1] for m in cluster_msgs)) # 去重保序 + root_text = _pick_best_summary(cluster_msgs) lines = [ f"🚨 P0 实时告警", f"", - f"**报告人:** {root_sender}", - f"**时间:** {root_time}", - f"**涉及人员:** {'、'.join(senders[:5])}" + ("等" if len(senders) > 5 else ""), - f"**消息数:** {len(cluster_msgs)} 条", - f"", - f"**摘要:** {root_text}", - f"", - f"**判定依据:** {priority_info.get('reasoning', 'P0')}", - f"**修复时限:** {priority_info.get('deadline', '2小时内')}", + f"问题描述: {root_text}", + f"报告人: {root_sender}", + f"报告时间: {root_time}", + f"判定依据: {priority_info.get('reasoning', 'P0')}", + f"修复时限: {priority_info.get('deadline', '2小时内')}", ] return "\n".join(lines) diff --git a/scripts/detect_p0_wechat.py b/scripts/detect_p0_wechat.py index 46c2499..d124e86 100755 --- a/scripts/detect_p0_wechat.py +++ b/scripts/detect_p0_wechat.py @@ -18,7 +18,7 @@ python3 detect_p0_wechat.py [--dry-run] [--lookback-minutes 120] """ -import sys, os, json, hashlib, argparse, pymysql +import sys, os, re, json, hashlib, argparse, pymysql from datetime import datetime, timedelta SKILL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts") @@ -120,30 +120,57 @@ def is_probably_p0(cluster_msgs): return info["priority"] == "P0", info +def _clean_summary(text): + """清洗摘要文本,去掉用户ID、话术后缀等冗余信息。""" + # 去掉手机号/用户ID(11位数字,可能紧邻中文) + text = re.sub(r'(?:^|(?<=[^\d]))1[3-9]\d{9}(?=[^\d]|$)', '', text) + # 去掉话术后缀 + text = re.sub(r'[,,]?\s*(老师|辛苦|麻烦|帮忙)\s*(看下|看一下|看看|看)[。!!]*$', '', text) + text = re.sub(r'[,,]?\s*@\S+\s*', '', text) + # 清理多余空格和标点 + text = re.sub(r'\s+', ' ', text).strip() + text = re.sub(r'^[,,\s]+|[,,\s]+$', '', text) + return text + + +def _pick_best_summary(cluster_msgs): + """从簇中选出最能代表 P0 问题的摘要消息。 + 优先选择匹配 P0 关键词的消息,其次选第一条有意义的文本。""" + from priority_classifier import P0_KEYWORDS + + # 收集所有 P0 关键词正则 + p0_patterns = [] + for cat_pats in P0_KEYWORDS.values(): + p0_patterns.append(cat_pats) + combined_p0 = re.compile('|'.join(p0_patterns), re.IGNORECASE) + + best = None + for m in cluster_msgs: + t = str(m[3]).strip() if m[3] else "" + if not t or len(t) <= 3: + continue + if best is None: + best = t # 兜底:第一条有意义的文本 + if combined_p0.search(t): + # 命中 P0 关键词,优先使用 + return _clean_summary(t)[:150] + + return _clean_summary(best or "")[:150] + + def generate_p0_alert_text(cluster_msgs, priority_info): root_sender = cluster_msgs[0][1] root_time = cluster_msgs[0][6] - root_text = "" - for m in cluster_msgs: - t = str(m[3]) if m[3] else "" - t = t.strip() - if t and len(t) > 3: - root_text = t[:100] - break - senders = list(dict.fromkeys(m[1] for m in cluster_msgs)) + root_text = _pick_best_summary(cluster_msgs) return "\n".join([ f"🚨 微信 P0 实时告警", f"", - f"**报告人:** {root_sender}", - f"**时间:** {root_time}", - f"**涉及人员:** {'、'.join(senders[:5])}" + ("等" if len(senders) > 5 else ""), - f"**消息数:** {len(cluster_msgs)} 条", - f"", - f"**摘要:** {root_text}", - f"", - f"**判定依据:** {priority_info.get('reasoning', 'P0')}", - f"**修复时限:** {priority_info.get('deadline', '2小时内')}", + f"问题描述: {root_text}", + f"报告人: {root_sender}", + f"报告时间: {root_time}", + f"判定依据: {priority_info.get('reasoning', 'P0')}", + f"修复时限: {priority_info.get('deadline', '2小时内')}", ])