auto backup 2026-06-25 08:10:01
This commit is contained in:
parent
52d0ec42a5
commit
6cccac21c5
@ -59,7 +59,7 @@
|
||||
- 阈值:同小时 + 发送人交集 + 相似度 > 0.20;跨小时 + 发送人 ≥2 重叠 + 相似度 > 0.35
|
||||
- 影响文件:`detect_p0_wechat.py`、`detect_p0_realtime.py`
|
||||
|
||||
### P0 告警问题描述清洗规则(2026-06-22 刘新玉确认)
|
||||
### P0 告警问题描述清洗规则(2026-06-24 更新)
|
||||
- P0 告警的"问题描述"字段必须经过 `_clean_summary` 清洗,不能直接贴原始消息
|
||||
- 清洗规则(`detect_p0_wechat.py` / `detect_p0_realtime.py` 的 `_clean_summary` 函数):
|
||||
1. 去掉 `[聊天记录]` 转发标记
|
||||
@ -69,6 +69,8 @@
|
||||
5. 去掉 `[视频]`/`[图片]`/`[语音]` 等媒体标记
|
||||
6. 疑问句→陈述句改写:去掉"这个反馈可以跟用户确认下..."等讨论话术前缀 + 疑问结构 + 句末疑问词;碎片化症状词补全(如 `闪退的` → `用户反馈闪退,需确认操作场景`)
|
||||
- `_pick_best_summary` 优先选非转发/非内部讨论的消息,跳过含 `[聊天记录]`、`↳ 回复`、`<msg>` 标记的消息
|
||||
- **内部技术讨论过滤(2026-06-24 新增)**:`_is_internal_discussion()` 检测技术术语(hotfix/分支/打包/构建/部署/发版等)+ 讨论话术(不是/我觉得/应该是/你们用/改回来等),匹配后跳过该消息。全部消息均为内部讨论时,`_extract_problem_from_discussion()` 尝试提取用户问题关键词,无法提取则返回通用提示"内部技术讨论中提及问题,需人工确认具体用户反馈内容"
|
||||
- **内部讨论 P0 不推送(2026-06-24 刘新玉确认)**:簇内全部消息均为内部技术讨论时,直接跳过不推送到「小葵小葵」群,仅记录去重状态避免重复检测
|
||||
|
||||
## 经验教训
|
||||
|
||||
|
||||
17
output/daily_feedback/ai_descriptions_feishu_2026-06-23.json
Normal file
17
output/daily_feedback/ai_descriptions_feishu_2026-06-23.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"date": "2026-06-23",
|
||||
"descriptions": [
|
||||
{
|
||||
"index": 1,
|
||||
"description": "iPad端用户在完成第四单元(L1)口语单元挑战后无法查看正确答案且解析内容与实际题目不符,应用同时频繁闪退。"
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"description": "在H5页面的L2级别U10单元巩固题目中,用户提交正确答案却被系统判定为错误。"
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"description": "无明确问题"
|
||||
}
|
||||
]
|
||||
}
|
||||
53
output/daily_feedback/ai_descriptions_wechat_2026-06-23.json
Normal file
53
output/daily_feedback/ai_descriptions_wechat_2026-06-23.json
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"date": "2026-06-23",
|
||||
"descriptions": [
|
||||
{
|
||||
"index": 1,
|
||||
"description": "用户完成单元挑战"
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"description": "瓦拉英语L2级别U10单元巩固题目在单元挑战H5页面中,正确答案被判定为错误。"
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"description": "在苹果iPad端的瓦拉app中,语音识别功能出现卡顿和无法识别的问题。"
|
||||
},
|
||||
{
|
||||
"index": 4,
|
||||
"description": "用户反馈语音识别时转圈等待时间长且无法成功识别"
|
||||
},
|
||||
{
|
||||
"index": 5,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 6,
|
||||
"description": "用户在启动游戏时一直处相关人员载转圈状态,无法进入游戏,切换网络和卸载重装均未解决。"
|
||||
},
|
||||
{
|
||||
"index": 7,
|
||||
"description": "部分用户从昨晚开始无法进入课堂进行学习"
|
||||
},
|
||||
{
|
||||
"index": 8,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 9,
|
||||
"description": "用户无法进入S2的14-4关卡"
|
||||
},
|
||||
{
|
||||
"index": 10,
|
||||
"description": "存在资源依相关人员题,需引导用户更新S1资源"
|
||||
},
|
||||
{
|
||||
"index": 11,
|
||||
"description": "无明确问题"
|
||||
},
|
||||
{
|
||||
"index": 12,
|
||||
"description": "无明确问题"
|
||||
}
|
||||
]
|
||||
}
|
||||
211
output/daily_feedback/cluster_context_2026-06-23.json
Normal file
211
output/daily_feedback/cluster_context_2026-06-23.json
Normal file
@ -0,0 +1,211 @@
|
||||
{
|
||||
"date": "2026-06-23",
|
||||
"total_clusters": 3,
|
||||
"clusters": [
|
||||
{
|
||||
"index": 1,
|
||||
"_idx": 1,
|
||||
"cluster_id": "1135459811533962105",
|
||||
"location": {
|
||||
"端": "移动端",
|
||||
"环节": "单元挑战",
|
||||
"课程": "",
|
||||
"角色/组件": "网络"
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "启动/运行异常",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "[聊天记录] 子曦和瓦拉英语-花花班主任(早10晚7)\n子曦: [视频]\n子曦: 花花,做的单元挑战的题,做完之后有错误,怎么看不到答案呢,点击题目解析出来的和今天做的题不一样呢\n子曦: \"咱们用的是什么设备呢?您现在重新安装一下试试,或者是更新一下软件试试\"\n------\n还有关于闪退的问题,刚才孩子在学习的时候又闪退了,我今天刚刚把它卸载又重新安装过了,为什么还是不行呢?\n子曦: [图片]\n子曦: [图片]\n子曦: 我来上班了,手边没有设备,这是我昨天给您录的视频里截出来的两张图片,第一张是刚做的单元挑战的题目,第二张是我点开题目解析后出来的画面,这两道题根本对不上呀\n子曦: \"[视频]\"\n------\n您可以看一下这个视频\n子曦: 主要是有的题目孩子不会,问我呢我也不会,不知道咋办了[捂脸]",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:26:29"
|
||||
},
|
||||
{
|
||||
"sender": "跃馬點煋",
|
||||
"content": "我跟踪一下这个单元挑战的问题。稍后回复。\n ↳ 回复 瓦拉英语-花花班主任(早10晚7): <msg><appmsg appid=\"\" sdkver=\"0\"><title>子曦和瓦拉英语-花花班主任(早10晚7)</title><des></des><action></action><type>19</type><showtype>0</showtype><soundtype>0</soundtype><me...",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:54:59"
|
||||
},
|
||||
{
|
||||
"sender": "花生",
|
||||
"content": "麻烦跟用户确认下是哪个单元的哪个模块的单元挑战呢",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:29:02"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "@花生第四单元单元挑战,口语部分\n ↳ 回复 花生: 麻烦跟用户确认下是哪个单元的哪个模块的单元挑战呢",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:36:50"
|
||||
},
|
||||
{
|
||||
"sender": "花生",
|
||||
"content": "L1还是L2\n ↳ 回复 瓦拉英语-花花班主任(早10晚7): <?xml version=\"1.0\"?>\n<msg>\n\t<appmsg>\n\t\t<title><![CDATA[@花生第四单元单元挑战,口语部分]]></title>\n\t\t<des />\n\t\t<type>57</type>\n\t\t<appattach>\n\t\t\t<cdnthumbaeskey />\n\t\t\t<aeskey><...",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:39:36"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "@花生一级别\n ↳ 回复 花生: L1还是L2",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:26:54"
|
||||
},
|
||||
{
|
||||
"sender": "跃馬點煋",
|
||||
"content": "还有用户手机号",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:29:10"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "18603892527用户做完单元挑战题目和解析对应不上,另外说经常闪退",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:29:34"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "[聊天记录] DX17和瓦拉英语-花花班主任(早10晚7)\nDX17: [视频]\nDX17: [视频]\nDX17: \"[视频]\"\n------\n这个怎么都打不开,我还卸载了重新下载的,也还是不行\nDX17: 也不是网络问题,其他的都可以打开,就他",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:34:05"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "15281078450苹果平板,明天说打不开课程,辛苦看下",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:34:29"
|
||||
},
|
||||
{
|
||||
"sender": "花生",
|
||||
"content": "让用户下一下这个s1的资源\n ↳ 回复 花生: <?xml version=\"1.0\"?>\n<msg>\n\t<img aeskey=\"06a5945f90c903f1197b6fb9bde32cab\" encryver=\"1\" cdnthumbaeskey=\"06a5945f90c903f1197b6fb9bde32cab\" cdnthumburl=\"305f0201...",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:35:08"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"_idx": 2,
|
||||
"cluster_id": "2657060464003498323",
|
||||
"location": {
|
||||
"端": "未知",
|
||||
"环节": "巩固题",
|
||||
"课程": "06-22",
|
||||
"角色/组件": ""
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "关卡/内容类",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "跃馬點煋",
|
||||
"content": "找到问题了,应该是单元挑战h5页面展示的问题,我去找相关研发老师,稍后回复。",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 10:59:27"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-萌萌老师(早10晚7)",
|
||||
"content": "位置是U10的单元巩固题目,我明明做的是正确答案,他判断出来是错误的。",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:06:18"
|
||||
},
|
||||
{
|
||||
"sender": "花生",
|
||||
"content": "具体哪个level的哪个课程,可以让用户看下他瓦拉学院里巩固的学习记录,把他的做题答案截图发一下\n ↳ 回复 瓦拉英语-萌萌班主任(早10晚7): 位置是U10的单元巩固题目,我明明做的是正确答案,他判断出来是错误的。",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:38:28"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-萌萌老师(早10晚7)",
|
||||
"content": "@花生 L2 的\n ↳ 回复 花生: 具体哪个level的哪个课程,可以让用户看下他瓦拉学院里巩固的学习记录,把他的做题答案截图发一下",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 12:44:08"
|
||||
},
|
||||
{
|
||||
"sender": "花生",
|
||||
"content": "需要具体到哪个lesson,以及有学习记录的话就要下截图\n ↳ 回复 瓦拉英语-萌萌班主任(早10晚7): <msg><appmsg appid=\"\" sdkver=\"0\"><title>@花生 L2 的</title><des></des><action></action><type>57</type><showtype>0</showtype><soundtype>0</soundtype><mediatagname><...",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 19:20:14"
|
||||
},
|
||||
{
|
||||
"sender": "一_FVision",
|
||||
"content": "让用户重新进一下看看",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:06:28"
|
||||
},
|
||||
{
|
||||
"sender": "跃馬點煋",
|
||||
"content": "这样操作一下\n ↳ 回复 瓦拉场务-糖果果: 让用户重新进一下看看",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 12:02:15"
|
||||
},
|
||||
{
|
||||
"sender": "跃馬點煋",
|
||||
"content": "目前,我从服务器日志来看,题目解析服务的请求最晚是:06-22 17:07:57\n也就是说用户从6月22日下午5点08分之后没有请求过题目解析",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:10:06"
|
||||
},
|
||||
{
|
||||
"sender": "瓦拉英语-花花班班(早10晚7-周末休息)",
|
||||
"content": "@wayne老师,那现在需要家长操作什么吗?可以解决一下\n ↳ 回复 跃馬點煋: 目前,我从服务器日志来看,题目解析服务的请求最晚是:06-22 17:07:57\n也就是说用户从6月22日下午5点08分之后没有请求过题目解析",
|
||||
"msg_type": "link",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 11:59:42"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"_idx": 3,
|
||||
"cluster_id": "2399062222045209808",
|
||||
"location": {
|
||||
"端": "未知",
|
||||
"环节": "未知",
|
||||
"课程": "",
|
||||
"角色/组件": ""
|
||||
},
|
||||
"priority": "P2",
|
||||
"priority_detail": "",
|
||||
"category": "其他问题",
|
||||
"conclusion": "**当前问题排查结论:** 暂无结论排查中",
|
||||
"messages": [
|
||||
{
|
||||
"sender": "瓦拉英语-益达老师",
|
||||
"content": "[表情]",
|
||||
"msg_type": "sticker",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 20:04:09"
|
||||
},
|
||||
{
|
||||
"sender": "kevin",
|
||||
"content": "@瓦拉英语-益达老师 一样的资源依赖问题,引导用户更新一下S1 的资源就可以了",
|
||||
"msg_type": "text",
|
||||
"media_url": "",
|
||||
"time": "2026-06-23 20:07:02"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
output/daily_feedback/飞书反馈_2026-06-24.xlsx
Normal file
BIN
output/daily_feedback/飞书反馈_2026-06-24.xlsx
Normal file
Binary file not shown.
BIN
scripts/__pycache__/detect_p0_realtime.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/detect_p0_realtime.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/detect_p0_wechat.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/detect_p0_wechat.cpython-312.pyc
Normal file
Binary file not shown.
@ -126,6 +126,79 @@ def is_probably_p0(cluster_msgs):
|
||||
return info["priority"] == "P0", info
|
||||
|
||||
|
||||
# 内部技术讨论特征词(用于检测团队内部讨论而非用户反馈)
|
||||
_INTERNAL_DISCUSSION_PATTERNS = [
|
||||
# 技术术语
|
||||
r'hotfix', r'\w+_hf\b', r'分支', r'打包', r'构建', r'部署', r'预发布',
|
||||
r'测试环境', r'灰度', r'发版', r'上线', r'回滚', r'版本号',
|
||||
r'master\b', r'develop\b', r'release\b',
|
||||
# 讨论话术(否定/反问/建议)
|
||||
r'^(?:不是|不对|不,)', r'我觉得', r'应该是', r'你们(?:用|试|改)',
|
||||
r'想不改除非', r'需要再改回来', r'是一样的', r'除非你们',
|
||||
r'这周要更新', r'更新之前', r'改回来',
|
||||
# 技术操作描述
|
||||
r'hotfix打的包', r'分支的内容', r'测试后面的关卡',
|
||||
]
|
||||
_INTERNAL_DISCUSSION_RE = re.compile('|'.join(_INTERNAL_DISCUSSION_PATTERNS), re.IGNORECASE)
|
||||
|
||||
|
||||
def _is_internal_discussion(text):
|
||||
"""判断消息是否为团队内部技术讨论,而非用户反馈。"""
|
||||
if not text or len(text) < 5:
|
||||
return False
|
||||
# 检查是否匹配内部讨论特征
|
||||
if _INTERNAL_DISCUSSION_RE.search(text):
|
||||
return True
|
||||
# 消息长度超过 80 字且包含多个技术/讨论特征 → 很可能是内部讨论
|
||||
if len(text) > 80:
|
||||
tech_count = len(re.findall(r'(?:hotfix|分支|打包|构建|部署|测试|版本|上线|发版|回滚|灰度|预发布)', text, re.IGNORECASE))
|
||||
discuss_count = len(re.findall(r'(?:不是|不对|我觉得|应该是|你们|我们|改回来|除非|一样的)', text))
|
||||
if tech_count >= 2 or discuss_count >= 2 or (tech_count + discuss_count) >= 3:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _extract_problem_from_discussion(text, cluster_msgs=None):
|
||||
"""从内部技术讨论中提取核心问题描述。
|
||||
尝试从讨论中识别用户实际遇到的问题,而非团队的技术分析。
|
||||
无法提取时,归纳讨论主题作为参考。
|
||||
"""
|
||||
# 先尝试从簇中找非内部讨论的消息
|
||||
if cluster_msgs:
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if t and len(t) > 3 and not _is_internal_discussion(t):
|
||||
return _clean_summary(t)
|
||||
|
||||
# 从讨论文本中提取问题关键词
|
||||
problem_indicators = [
|
||||
(r'(?:用户|玩家|有人).{0,10}(?:反馈|说|遇到|出现|发现).{0,30}(?:闪退|崩溃|卡死|卡顿|白屏|黑屏|无法|不能|打不开|进不去|报错|异常)', ''),
|
||||
(r'(?:闪退|崩溃|卡死|卡顿|白屏|黑屏|无法登录|进不去|打不开|报错|异常|bug|BUG)', ''),
|
||||
]
|
||||
for pat, _ in problem_indicators:
|
||||
m = re.search(pat, text, re.IGNORECASE)
|
||||
if m:
|
||||
extracted = m.group(0)
|
||||
if len(extracted) > 5:
|
||||
return f"用户反馈{extracted},需确认影响范围"
|
||||
|
||||
# 无法提取具体问题 → 归纳讨论主题
|
||||
topic_map = [
|
||||
(r'关卡|解锁|单元|U\d+', '关卡/解锁'),
|
||||
(r'打包|构建|hotfix|分支|发版|上线|更新', '打包/发版'),
|
||||
(r'测试|灰度|预发布|交叉测', '测试/灰度'),
|
||||
(r'配置|后台|服务端|接口', '配置/后台'),
|
||||
(r'闪退|崩溃|卡死|卡顿|白屏|黑屏', '崩溃/异常'),
|
||||
(r'加载|转圈|进不去|打不开|无法', '加载/访问'),
|
||||
]
|
||||
topics = []
|
||||
for pat, label in topic_map:
|
||||
if re.search(pat, text, re.IGNORECASE):
|
||||
topics.append(label)
|
||||
topic_str = '/'.join(topics[:3]) if topics else '相关'
|
||||
return f"团队内部讨论{topic_str}问题,需人工确认具体用户反馈内容"
|
||||
|
||||
|
||||
def _clean_summary(text):
|
||||
"""清洗摘要文本,提取核心问题描述(处理转发消息、内部讨论等噪音)。"""
|
||||
# 去掉 [聊天记录] 等转发标记
|
||||
@ -188,7 +261,8 @@ def _clean_summary(text):
|
||||
|
||||
def _pick_best_summary(cluster_msgs):
|
||||
"""从簇中选出最能代表 P0 问题的摘要消息。
|
||||
优先选择匹配 P0 关键词且非转发/非内部讨论的消息。"""
|
||||
优先选择匹配 P0 关键词且非转发/非内部讨论的消息。
|
||||
内部技术讨论消息会被跳过,优先使用用户原始反馈。"""
|
||||
from priority_classifier import P0_KEYWORDS
|
||||
|
||||
# 收集所有 P0 关键词正则
|
||||
@ -197,27 +271,32 @@ def _pick_best_summary(cluster_msgs):
|
||||
p0_patterns.append(cat_pats)
|
||||
combined_p0 = re.compile('|'.join(p0_patterns), re.IGNORECASE)
|
||||
|
||||
# 判断是否为转发消息/内部讨论(包含 [聊天记录]、↳ 回复 等标记)
|
||||
def _is_forward_or_discussion(t):
|
||||
# 判断是否为转发消息(包含 [聊天记录]、↳ 回复、XML 标记)
|
||||
def _is_forward(t):
|
||||
return bool(re.search(r'^\[聊天记录\]|↳\s*回复|<msg>|<appmsg', t))
|
||||
|
||||
best = None
|
||||
best_noise = None # 兜底:第一条非空消息(即使有噪音)
|
||||
all_internal = True # 是否所有消息都是内部讨论
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if not t or len(t) <= 3:
|
||||
continue
|
||||
if best_noise is None:
|
||||
best_noise = t
|
||||
# 跳过转发消息和内部讨论,优先找用户原始反馈
|
||||
if _is_forward_or_discussion(t):
|
||||
# 跳过转发消息
|
||||
if _is_forward(t):
|
||||
continue
|
||||
# 跳过内部技术讨论
|
||||
if _is_internal_discussion(t):
|
||||
continue
|
||||
all_internal = False
|
||||
if best is None:
|
||||
best = t
|
||||
if combined_p0.search(t):
|
||||
return _clean_summary(t)[:150]
|
||||
|
||||
# 如果没有非转发的消息,从有噪音的消息中找 P0 关键词
|
||||
# 如果没有非转发/非内部讨论的消息,从有噪音的消息中找 P0 关键词
|
||||
if best is None:
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
@ -227,6 +306,12 @@ def _pick_best_summary(cluster_msgs):
|
||||
return _clean_summary(t)[:150]
|
||||
best = best_noise
|
||||
|
||||
# 如果所有消息都是内部讨论,尝试从中提取问题描述
|
||||
if all_internal and best:
|
||||
extracted = _extract_problem_from_discussion(best, cluster_msgs)
|
||||
if extracted:
|
||||
return extracted[:150]
|
||||
|
||||
return _clean_summary(best or "")[:150]
|
||||
|
||||
|
||||
@ -382,6 +467,18 @@ def main():
|
||||
|
||||
print(f"[P0-detect] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
|
||||
|
||||
# 如果簇中所有消息都是内部技术讨论,跳过不推送
|
||||
all_internal = True
|
||||
for m in cmsgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if t and len(t) > 3 and not _is_internal_discussion(t):
|
||||
all_internal = False
|
||||
break
|
||||
if all_internal:
|
||||
print(f"[P0-detect] ⏭️ 全部为内部技术讨论,跳过推送")
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
print(f"[DRY-RUN] 将发送:\n{alert}")
|
||||
|
||||
@ -120,6 +120,79 @@ def is_probably_p0(cluster_msgs):
|
||||
return info["priority"] == "P0", info
|
||||
|
||||
|
||||
# 内部技术讨论特征词(用于检测团队内部讨论而非用户反馈)
|
||||
_INTERNAL_DISCUSSION_PATTERNS = [
|
||||
# 技术术语
|
||||
r'hotfix', r'\w+_hf\b', r'分支', r'打包', r'构建', r'部署', r'预发布',
|
||||
r'测试环境', r'灰度', r'发版', r'上线', r'回滚', r'版本号',
|
||||
r'master\b', r'develop\b', r'release\b',
|
||||
# 讨论话术(否定/反问/建议)
|
||||
r'^(?:不是|不对|不,)', r'我觉得', r'应该是', r'你们(?:用|试|改)',
|
||||
r'想不改除非', r'需要再改回来', r'是一样的', r'除非你们',
|
||||
r'这周要更新', r'更新之前', r'改回来',
|
||||
# 技术操作描述
|
||||
r'hotfix打的包', r'分支的内容', r'测试后面的关卡',
|
||||
]
|
||||
_INTERNAL_DISCUSSION_RE = re.compile('|'.join(_INTERNAL_DISCUSSION_PATTERNS), re.IGNORECASE)
|
||||
|
||||
|
||||
def _is_internal_discussion(text):
|
||||
"""判断消息是否为团队内部技术讨论,而非用户反馈。"""
|
||||
if not text or len(text) < 5:
|
||||
return False
|
||||
# 检查是否匹配内部讨论特征
|
||||
if _INTERNAL_DISCUSSION_RE.search(text):
|
||||
return True
|
||||
# 消息长度超过 80 字且包含多个技术/讨论特征 → 很可能是内部讨论
|
||||
if len(text) > 80:
|
||||
tech_count = len(re.findall(r'(?:hotfix|分支|打包|构建|部署|测试|版本|上线|发版|回滚|灰度|预发布)', text, re.IGNORECASE))
|
||||
discuss_count = len(re.findall(r'(?:不是|不对|我觉得|应该是|你们|我们|改回来|除非|一样的)', text))
|
||||
if tech_count >= 2 or discuss_count >= 2 or (tech_count + discuss_count) >= 3:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _extract_problem_from_discussion(text, cluster_msgs=None):
|
||||
"""从内部技术讨论中提取核心问题描述。
|
||||
尝试从讨论中识别用户实际遇到的问题,而非团队的技术分析。
|
||||
无法提取时,归纳讨论主题作为参考。
|
||||
"""
|
||||
# 先尝试从簇中找非内部讨论的消息
|
||||
if cluster_msgs:
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if t and len(t) > 3 and not _is_internal_discussion(t):
|
||||
return _clean_summary(t)
|
||||
|
||||
# 从讨论文本中提取问题关键词
|
||||
problem_indicators = [
|
||||
(r'(?:用户|玩家|有人).{0,10}(?:反馈|说|遇到|出现|发现).{0,30}(?:闪退|崩溃|卡死|卡顿|白屏|黑屏|无法|不能|打不开|进不去|报错|异常)', ''),
|
||||
(r'(?:闪退|崩溃|卡死|卡顿|白屏|黑屏|无法登录|进不去|打不开|报错|异常|bug|BUG)', ''),
|
||||
]
|
||||
for pat, _ in problem_indicators:
|
||||
m = re.search(pat, text, re.IGNORECASE)
|
||||
if m:
|
||||
extracted = m.group(0)
|
||||
if len(extracted) > 5:
|
||||
return f"用户反馈{extracted},需确认影响范围"
|
||||
|
||||
# 无法提取具体问题 → 归纳讨论主题
|
||||
topic_map = [
|
||||
(r'关卡|解锁|单元|U\d+', '关卡/解锁'),
|
||||
(r'打包|构建|hotfix|分支|发版|上线|更新', '打包/发版'),
|
||||
(r'测试|灰度|预发布|交叉测', '测试/灰度'),
|
||||
(r'配置|后台|服务端|接口', '配置/后台'),
|
||||
(r'闪退|崩溃|卡死|卡顿|白屏|黑屏', '崩溃/异常'),
|
||||
(r'加载|转圈|进不去|打不开|无法', '加载/访问'),
|
||||
]
|
||||
topics = []
|
||||
for pat, label in topic_map:
|
||||
if re.search(pat, text, re.IGNORECASE):
|
||||
topics.append(label)
|
||||
topic_str = '/'.join(topics[:3]) if topics else '相关'
|
||||
return f"团队内部讨论{topic_str}问题,需人工确认具体用户反馈内容"
|
||||
|
||||
|
||||
def _clean_summary(text):
|
||||
"""清洗摘要文本,提取核心问题描述(处理转发消息、内部讨论等噪音)。"""
|
||||
# 去掉 [聊天记录] 等转发标记
|
||||
@ -182,7 +255,8 @@ def _clean_summary(text):
|
||||
|
||||
def _pick_best_summary(cluster_msgs):
|
||||
"""从簇中选出最能代表 P0 问题的摘要消息。
|
||||
优先选择匹配 P0 关键词且非转发/非内部讨论的消息。"""
|
||||
优先选择匹配 P0 关键词且非转发/非内部讨论的消息。
|
||||
内部技术讨论消息会被跳过,优先使用用户原始反馈。"""
|
||||
from priority_classifier import P0_KEYWORDS
|
||||
|
||||
# 收集所有 P0 关键词正则
|
||||
@ -191,27 +265,32 @@ def _pick_best_summary(cluster_msgs):
|
||||
p0_patterns.append(cat_pats)
|
||||
combined_p0 = re.compile('|'.join(p0_patterns), re.IGNORECASE)
|
||||
|
||||
# 判断是否为转发消息/内部讨论(包含 [聊天记录]、↳ 回复 等标记)
|
||||
def _is_forward_or_discussion(t):
|
||||
# 判断是否为转发消息(包含 [聊天记录]、↳ 回复、XML 标记)
|
||||
def _is_forward(t):
|
||||
return bool(re.search(r'^\[聊天记录\]|↳\s*回复|<msg>|<appmsg', t))
|
||||
|
||||
best = None
|
||||
best_noise = None # 兜底:第一条非空消息(即使有噪音)
|
||||
all_internal = True # 是否所有消息都是内部讨论
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if not t or len(t) <= 3:
|
||||
continue
|
||||
if best_noise is None:
|
||||
best_noise = t
|
||||
# 跳过转发消息和内部讨论,优先找用户原始反馈
|
||||
if _is_forward_or_discussion(t):
|
||||
# 跳过转发消息
|
||||
if _is_forward(t):
|
||||
continue
|
||||
# 跳过内部技术讨论
|
||||
if _is_internal_discussion(t):
|
||||
continue
|
||||
all_internal = False
|
||||
if best is None:
|
||||
best = t
|
||||
if combined_p0.search(t):
|
||||
return _clean_summary(t)[:150]
|
||||
|
||||
# 如果没有非转发的消息,从有噪音的消息中找 P0 关键词
|
||||
# 如果没有非转发/非内部讨论的消息,从有噪音的消息中找 P0 关键词
|
||||
if best is None:
|
||||
for m in cluster_msgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
@ -221,6 +300,12 @@ def _pick_best_summary(cluster_msgs):
|
||||
return _clean_summary(t)[:150]
|
||||
best = best_noise
|
||||
|
||||
# 如果所有消息都是内部讨论,尝试从中提取问题描述
|
||||
if all_internal and best:
|
||||
extracted = _extract_problem_from_discussion(best, cluster_msgs)
|
||||
if extracted:
|
||||
return extracted[:150]
|
||||
|
||||
return _clean_summary(best or "")[:150]
|
||||
|
||||
|
||||
@ -368,6 +453,18 @@ def main():
|
||||
|
||||
print(f"[P0-wechat] 🚨 发现新 P0! sig={sig[:8]}... {len(cmsgs)}条消息")
|
||||
|
||||
# 如果簇中所有消息都是内部技术讨论,跳过不推送
|
||||
all_internal = True
|
||||
for m in cmsgs:
|
||||
t = str(m[3]).strip() if m[3] else ""
|
||||
if t and len(t) > 3 and not _is_internal_discussion(t):
|
||||
all_internal = False
|
||||
break
|
||||
if all_internal:
|
||||
print(f"[P0-wechat] ⏭️ 全部为内部技术讨论,跳过推送")
|
||||
state[sig] = {"time": datetime.now().isoformat(), "fp": fp}
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
alert = generate_p0_alert_text(cmsgs, info)
|
||||
print(f"[DRY-RUN] 将发送:\n{alert}")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user