225 lines
8.2 KiB
Python
225 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
AI 问题归纳脚本
|
||
读取 cluster_context_{date}.json,调用 LLM 为每个问题簇生成精炼的问题描述,
|
||
输出 ai_descriptions_{date}.json,然后回写到飞书知识库文档。
|
||
|
||
用法:
|
||
python3 ai_summarize_feedback.py [--date YYYY-MM-DD] [--dry-run]
|
||
|
||
crontab:
|
||
5 10 * * * python3 .../ai_summarize_feedback.py >> /var/log/xiaokui_ai_summarize.log 2>&1
|
||
"""
|
||
|
||
import sys, os, json, argparse, urllib.request
|
||
from datetime import datetime, date, timedelta
|
||
|
||
# === 配置 ===
|
||
DEEPSEEK_API_KEY = "sk-7cf94305fb12473b956fd2ed2a6db05b"
|
||
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
|
||
DEEPSEEK_MODEL = "deepseek-v4-pro"
|
||
|
||
CONTEXT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "output", "daily_feedback")
|
||
SKILL_SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts")
|
||
|
||
SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务是:
|
||
阅读一段来自测试群的多人对话(可能包含多个发言人、多轮讨论),
|
||
从中提炼出他们正在讨论的「具体问题是什么」,用一句中文描述清楚。
|
||
|
||
要求:
|
||
1. 只描述问题本身,不要评价或建议
|
||
2. 包含关键要素:在哪个端、哪个环节、什么表现
|
||
3. 如果对话中有多种说法,优先采用最后确认的描述
|
||
4. 输出仅一句中文,不要加任何前缀、编号、引号或换行
|
||
5. 如果对话全是无实质内容的闲聊(如"好的""收到"),输出"无明确问题"
|
||
|
||
输出格式(严格):直接输出问题描述,无任何额外文字。"""
|
||
|
||
|
||
def load_context(date_str):
|
||
"""加载指定日期的 cluster_context JSON"""
|
||
path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
|
||
if not os.path.exists(path):
|
||
print(f" ⚠️ 无上下文文件: {path}")
|
||
return None
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
|
||
|
||
def build_user_prompt(cluster):
|
||
"""为单个问题簇构建 LLM prompt"""
|
||
lines = []
|
||
lines.append(f"优先级: {cluster.get('priority', '?')}")
|
||
lines.append(f"分类: {cluster.get('category', '?')}")
|
||
lines.append(f"当前排查结论: {cluster.get('conclusion', '无')}")
|
||
lines.append("")
|
||
lines.append("--- 对话记录 ---")
|
||
|
||
for msg in cluster.get("messages", []):
|
||
sender = msg.get("sender", "?")
|
||
content = msg.get("content", "")
|
||
mtype = msg.get("msg_type", "text")
|
||
time = msg.get("time", "")
|
||
|
||
# 跳过纯媒体消息(无有效文本)
|
||
if mtype in ("image", "post_image", "media", "file", "sticker") and not content.strip():
|
||
continue
|
||
if not content.strip():
|
||
continue
|
||
|
||
# 截断过长内容
|
||
if len(content) > 200:
|
||
content = content[:197] + "..."
|
||
|
||
lines.append(f"[{time}] {sender}: {content}")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def call_deepseek(system_prompt, user_prompt, max_retries=2):
|
||
"""调用 DeepSeek API 生成问题描述"""
|
||
body = json.dumps({
|
||
"model": DEEPSEEK_MODEL,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt},
|
||
],
|
||
"temperature": 0.3,
|
||
"max_tokens": 256,
|
||
}).encode()
|
||
|
||
for attempt in range(max_retries + 1):
|
||
try:
|
||
req = urllib.request.Request(
|
||
f"{DEEPSEEK_BASE_URL}/chat/completions",
|
||
data=body,
|
||
headers={
|
||
"Authorization": f"Bearer {DEEPSEEK_API_KEY}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
resp = urllib.request.urlopen(req, timeout=60)
|
||
data = json.loads(resp.read())
|
||
content = data["choices"][0]["message"]["content"].strip()
|
||
# 清理常见的引号/前缀
|
||
content = content.strip('"\'""'' \n')
|
||
return content
|
||
except Exception as e:
|
||
if attempt < max_retries:
|
||
print(f" ⚠️ API 调用重试 {attempt + 1}: {e}")
|
||
import time
|
||
time.sleep(2)
|
||
else:
|
||
raise
|
||
|
||
|
||
def generate_descriptions(context_data, dry_run=False):
|
||
"""为所有问题簇生成 AI 描述"""
|
||
clusters = context_data.get("clusters", [])
|
||
if not clusters:
|
||
print(" ⚠️ 无问题簇数据")
|
||
return None
|
||
|
||
descriptions = []
|
||
for cluster in clusters:
|
||
idx = cluster.get("index", 0)
|
||
print(f" 🤖 处理簇 #{idx}...")
|
||
|
||
user_prompt = build_user_prompt(cluster)
|
||
|
||
if dry_run:
|
||
print(f" [DRY-RUN] Prompt 长度: {len(user_prompt)} chars")
|
||
# 输出前 200 字符预览
|
||
print(f" [DRY-RUN] 对话预览: {user_prompt[:200]}...")
|
||
description = f"[DRY-RUN] 问题{idx}"
|
||
else:
|
||
try:
|
||
description = call_deepseek(SYSTEM_PROMPT, user_prompt)
|
||
except Exception as e:
|
||
print(f" ❌ 簇 #{idx} API 调用失败: {e}")
|
||
description = f"[API调用失败: {str(e)[:50]}]"
|
||
|
||
print(f" 📝 描述: {description}")
|
||
descriptions.append({"index": idx, "description": description})
|
||
|
||
return descriptions
|
||
|
||
|
||
def apply_descriptions(date_str, descriptions):
|
||
"""调用 sync_feishu_feedback.py --apply-ai 回写文档"""
|
||
sys.path.insert(0, SKILL_SCRIPT_DIR)
|
||
|
||
# 先保存描述 JSON
|
||
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
|
||
payload = {"date": date_str, "descriptions": descriptions}
|
||
with open(desc_path, "w", encoding="utf-8") as f:
|
||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||
print(f" 💾 描述已保存: {desc_path}")
|
||
|
||
# 调用 --apply-ai
|
||
sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py")
|
||
import subprocess
|
||
env = os.environ.copy()
|
||
env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui"
|
||
env["HOME"] = "/root"
|
||
env["PATH"] = "/root/.nvm/versions/node/v24.14.0/bin:" + env.get("PATH", "")
|
||
|
||
result = subprocess.run(
|
||
["python3", sync_script, "--apply-ai", desc_path],
|
||
capture_output=True, text=True, timeout=60, env=env
|
||
)
|
||
|
||
if "AI 描述已应用" in result.stdout or "✅" in result.stdout:
|
||
print(f" ✅ AI 描述已回写到知识库文档")
|
||
# 回写成功后清理上下文文件,避免心跳重复处理
|
||
context_path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
|
||
if os.path.exists(context_path):
|
||
os.remove(context_path)
|
||
print(f" 🗑️ 已清理上下文文件: {context_path}")
|
||
return True
|
||
else:
|
||
print(f" ❌ 回写失败: {result.stdout[:300]}")
|
||
if result.stderr:
|
||
print(f" stderr: {result.stderr[:300]}")
|
||
return False
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="AI 问题归纳")
|
||
parser.add_argument("--date", help="日期 YYYY-MM-DD,默认昨天")
|
||
parser.add_argument("--dry-run", action="store_true", help="仅预览不实际调用 API")
|
||
args = parser.parse_args()
|
||
|
||
if args.date:
|
||
date_str = args.date
|
||
else:
|
||
# 默认处理昨天的数据(每天 10:05 运行,处理 10:00 生成的前一天数据)
|
||
date_str = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d")
|
||
|
||
print(f"📋 AI 问题归纳 - {date_str}")
|
||
os.makedirs(CONTEXT_DIR, exist_ok=True)
|
||
|
||
context = load_context(date_str)
|
||
if not context:
|
||
print(" ℹ️ 无待处理数据,退出")
|
||
return
|
||
|
||
descriptions = generate_descriptions(context, dry_run=args.dry_run)
|
||
if not descriptions:
|
||
return
|
||
|
||
if args.dry_run:
|
||
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
|
||
payload = {"date": date_str, "descriptions": descriptions}
|
||
with open(desc_path, "w", encoding="utf-8") as f:
|
||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||
print(f"[DRY-RUN] 描述已保存到 {desc_path},未回写文档")
|
||
return
|
||
|
||
apply_descriptions(date_str, descriptions)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|