#!/usr/bin/env python3 """ AI 问题归纳脚本 读取 cluster_context_{date}.json,调用 LLM 为每个问题簇生成精炼的问题描述, 输出 ai_descriptions_{date}.json,然后回写到飞书知识库文档。 用法: python3 ai_summarize_feedback.py [--date YYYY-MM-DD] [--dry-run] crontab: 5 10 * * * python3 .../ai_summarize_feedback.py >> /var/log/xiaokui_ai_summarize.log 2>&1 """ import sys, os, json, argparse, urllib.request from datetime import datetime, date, timedelta # === 配置 === DEEPSEEK_API_KEY = "sk-7cf94305fb12473b956fd2ed2a6db05b" DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1" DEEPSEEK_MODEL = "deepseek-v4-pro" CONTEXT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "output", "daily_feedback") SKILL_SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts") SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务是: 阅读一段来自测试群的多人对话(可能包含多个发言人、多轮讨论), 从中提炼出他们正在讨论的「具体问题是什么」,用一句中文描述清楚。 要求: 1. 只描述问题本身,不要评价或建议 2. 包含关键要素:在哪个端、哪个环节、什么表现 3. 如果对话中有多种说法,优先采用最后确认的描述 4. 输出仅一句中文,不要加任何前缀、编号、引号或换行 5. 如果对话全是无实质内容的闲聊(如"好的""收到"),输出"无明确问题" 输出格式(严格):直接输出问题描述,无任何额外文字。""" def load_context(date_str): """加载指定日期的 cluster_context JSON""" path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json") if not os.path.exists(path): print(f" ⚠️ 无上下文文件: {path}") return None with open(path, "r", encoding="utf-8") as f: return json.load(f) def build_user_prompt(cluster): """为单个问题簇构建 LLM prompt""" lines = [] lines.append(f"优先级: {cluster.get('priority', '?')}") lines.append(f"分类: {cluster.get('category', '?')}") lines.append(f"当前排查结论: {cluster.get('conclusion', '无')}") lines.append("") lines.append("--- 对话记录 ---") for msg in cluster.get("messages", []): sender = msg.get("sender", "?") content = msg.get("content", "") mtype = msg.get("msg_type", "text") time = msg.get("time", "") # 跳过纯媒体消息(无有效文本) if mtype in ("image", "post_image", "media", "file", "sticker") and not content.strip(): continue if not content.strip(): continue # 截断过长内容 if len(content) > 200: content = content[:197] + "..." lines.append(f"[{time}] {sender}: {content}") return "\n".join(lines) def call_deepseek(system_prompt, user_prompt, max_retries=2): """调用 DeepSeek API 生成问题描述""" body = json.dumps({ "model": DEEPSEEK_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], "temperature": 0.3, "max_tokens": 256, }).encode() for attempt in range(max_retries + 1): try: req = urllib.request.Request( f"{DEEPSEEK_BASE_URL}/chat/completions", data=body, headers={ "Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json", }, method="POST", ) resp = urllib.request.urlopen(req, timeout=60) data = json.loads(resp.read()) content = data["choices"][0]["message"]["content"].strip() # 清理常见的引号/前缀 content = content.strip('"\'""'' \n') return content except Exception as e: if attempt < max_retries: print(f" ⚠️ API 调用重试 {attempt + 1}: {e}") import time time.sleep(2) else: raise def generate_descriptions(context_data, dry_run=False): """为所有问题簇生成 AI 描述""" clusters = context_data.get("clusters", []) if not clusters: print(" ⚠️ 无问题簇数据") return None descriptions = [] for cluster in clusters: idx = cluster.get("index", 0) print(f" 🤖 处理簇 #{idx}...") user_prompt = build_user_prompt(cluster) if dry_run: print(f" [DRY-RUN] Prompt 长度: {len(user_prompt)} chars") # 输出前 200 字符预览 print(f" [DRY-RUN] 对话预览: {user_prompt[:200]}...") description = f"[DRY-RUN] 问题{idx}" else: try: description = call_deepseek(SYSTEM_PROMPT, user_prompt) except Exception as e: print(f" ❌ 簇 #{idx} API 调用失败: {e}") description = f"[API调用失败: {str(e)[:50]}]" print(f" 📝 描述: {description}") descriptions.append({"index": idx, "description": description}) return descriptions def apply_descriptions(date_str, descriptions): """调用 sync_feishu_feedback.py --apply-ai 回写文档""" sys.path.insert(0, SKILL_SCRIPT_DIR) # 先保存描述 JSON desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json") payload = {"date": date_str, "descriptions": descriptions} with open(desc_path, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2) print(f" 💾 描述已保存: {desc_path}") # 调用 --apply-ai sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py") import subprocess env = os.environ.copy() env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui" env["HOME"] = "/root" env["PATH"] = "/root/.nvm/versions/node/v24.14.0/bin:" + env.get("PATH", "") result = subprocess.run( ["python3", sync_script, "--apply-ai", desc_path], capture_output=True, text=True, timeout=60, env=env ) if "AI 描述已应用" in result.stdout or "✅" in result.stdout: print(f" ✅ AI 描述已回写到知识库文档") # 回写成功后清理上下文文件,避免心跳重复处理 context_path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json") if os.path.exists(context_path): os.remove(context_path) print(f" 🗑️ 已清理上下文文件: {context_path}") return True else: print(f" ❌ 回写失败: {result.stdout[:300]}") if result.stderr: print(f" stderr: {result.stderr[:300]}") return False def main(): parser = argparse.ArgumentParser(description="AI 问题归纳") parser.add_argument("--date", help="日期 YYYY-MM-DD,默认昨天") parser.add_argument("--dry-run", action="store_true", help="仅预览不实际调用 API") args = parser.parse_args() if args.date: date_str = args.date else: # 默认处理昨天的数据(每天 10:05 运行,处理 10:00 生成的前一天数据) date_str = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d") print(f"📋 AI 问题归纳 - {date_str}") os.makedirs(CONTEXT_DIR, exist_ok=True) context = load_context(date_str) if not context: print(" ℹ️ 无待处理数据,退出") return descriptions = generate_descriptions(context, dry_run=args.dry_run) if not descriptions: return if args.dry_run: desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json") payload = {"date": date_str, "descriptions": descriptions} with open(desc_path, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2) print(f"[DRY-RUN] 描述已保存到 {desc_path},未回写文档") return apply_descriptions(date_str, descriptions) if __name__ == "__main__": main()