ai_member_xiaokui/scripts/ai_summarize_feedback.py
2026-05-22 08:10:01 +08:00

225 lines
8.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
AI 问题归纳脚本
读取 cluster_context_{date}.json调用 LLM 为每个问题簇生成精炼的问题描述,
输出 ai_descriptions_{date}.json然后回写到飞书知识库文档。
用法:
python3 ai_summarize_feedback.py [--date YYYY-MM-DD] [--dry-run]
crontab:
5 10 * * * python3 .../ai_summarize_feedback.py >> /var/log/xiaokui_ai_summarize.log 2>&1
"""
import sys, os, json, argparse, urllib.request
from datetime import datetime, date, timedelta
# === 配置 ===
DEEPSEEK_API_KEY = "sk-7cf94305fb12473b956fd2ed2a6db05b"
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
DEEPSEEK_MODEL = "deepseek-v4-pro"
CONTEXT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "output", "daily_feedback")
SKILL_SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "skills", "feishu-feedback-sync", "scripts")
SYSTEM_PROMPT = """你是一个游戏产品的问题归纳助手。你的任务是:
阅读一段来自测试群的多人对话(可能包含多个发言人、多轮讨论),
从中提炼出他们正在讨论的「具体问题是什么」,用一句中文描述清楚。
要求:
1. 只描述问题本身,不要评价或建议
2. 包含关键要素:在哪个端、哪个环节、什么表现
3. 如果对话中有多种说法,优先采用最后确认的描述
4. 输出仅一句中文,不要加任何前缀、编号、引号或换行
5. 如果对话全是无实质内容的闲聊(如"好的""收到"),输出"无明确问题"
输出格式(严格):直接输出问题描述,无任何额外文字。"""
def load_context(date_str):
"""加载指定日期的 cluster_context JSON"""
path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
if not os.path.exists(path):
print(f" ⚠️ 无上下文文件: {path}")
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def build_user_prompt(cluster):
"""为单个问题簇构建 LLM prompt"""
lines = []
lines.append(f"优先级: {cluster.get('priority', '?')}")
lines.append(f"分类: {cluster.get('category', '?')}")
lines.append(f"当前排查结论: {cluster.get('conclusion', '')}")
lines.append("")
lines.append("--- 对话记录 ---")
for msg in cluster.get("messages", []):
sender = msg.get("sender", "?")
content = msg.get("content", "")
mtype = msg.get("msg_type", "text")
time = msg.get("time", "")
# 跳过纯媒体消息(无有效文本)
if mtype in ("image", "post_image", "media", "file", "sticker") and not content.strip():
continue
if not content.strip():
continue
# 截断过长内容
if len(content) > 200:
content = content[:197] + "..."
lines.append(f"[{time}] {sender}: {content}")
return "\n".join(lines)
def call_deepseek(system_prompt, user_prompt, max_retries=2):
"""调用 DeepSeek API 生成问题描述"""
body = json.dumps({
"model": DEEPSEEK_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"temperature": 0.3,
"max_tokens": 256,
}).encode()
for attempt in range(max_retries + 1):
try:
req = urllib.request.Request(
f"{DEEPSEEK_BASE_URL}/chat/completions",
data=body,
headers={
"Authorization": f"Bearer {DEEPSEEK_API_KEY}",
"Content-Type": "application/json",
},
method="POST",
)
resp = urllib.request.urlopen(req, timeout=60)
data = json.loads(resp.read())
content = data["choices"][0]["message"]["content"].strip()
# 清理常见的引号/前缀
content = content.strip('"\'""'' \n')
return content
except Exception as e:
if attempt < max_retries:
print(f" ⚠️ API 调用重试 {attempt + 1}: {e}")
import time
time.sleep(2)
else:
raise
def generate_descriptions(context_data, dry_run=False):
"""为所有问题簇生成 AI 描述"""
clusters = context_data.get("clusters", [])
if not clusters:
print(" ⚠️ 无问题簇数据")
return None
descriptions = []
for cluster in clusters:
idx = cluster.get("index", 0)
print(f" 🤖 处理簇 #{idx}...")
user_prompt = build_user_prompt(cluster)
if dry_run:
print(f" [DRY-RUN] Prompt 长度: {len(user_prompt)} chars")
# 输出前 200 字符预览
print(f" [DRY-RUN] 对话预览: {user_prompt[:200]}...")
description = f"[DRY-RUN] 问题{idx}"
else:
try:
description = call_deepseek(SYSTEM_PROMPT, user_prompt)
except Exception as e:
print(f" ❌ 簇 #{idx} API 调用失败: {e}")
description = f"[API调用失败: {str(e)[:50]}]"
print(f" 📝 描述: {description}")
descriptions.append({"index": idx, "description": description})
return descriptions
def apply_descriptions(date_str, descriptions):
"""调用 sync_feishu_feedback.py --apply-ai 回写文档"""
sys.path.insert(0, SKILL_SCRIPT_DIR)
# 先保存描述 JSON
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
payload = {"date": date_str, "descriptions": descriptions}
with open(desc_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
print(f" 💾 描述已保存: {desc_path}")
# 调用 --apply-ai
sync_script = os.path.join(SKILL_SCRIPT_DIR, "sync_feishu_feedback.py")
import subprocess
env = os.environ.copy()
env["LARKSUITE_CLI_CONFIG_DIR"] = "/root/.openclaw/credentials/xiaokui"
env["HOME"] = "/root"
env["PATH"] = "/root/.nvm/versions/node/v24.14.0/bin:" + env.get("PATH", "")
result = subprocess.run(
["python3", sync_script, "--apply-ai", desc_path],
capture_output=True, text=True, timeout=60, env=env
)
if "AI 描述已应用" in result.stdout or "" in result.stdout:
print(f" ✅ AI 描述已回写到知识库文档")
# 回写成功后清理上下文文件,避免心跳重复处理
context_path = os.path.join(CONTEXT_DIR, f"cluster_context_{date_str}.json")
if os.path.exists(context_path):
os.remove(context_path)
print(f" 🗑️ 已清理上下文文件: {context_path}")
return True
else:
print(f" ❌ 回写失败: {result.stdout[:300]}")
if result.stderr:
print(f" stderr: {result.stderr[:300]}")
return False
def main():
parser = argparse.ArgumentParser(description="AI 问题归纳")
parser.add_argument("--date", help="日期 YYYY-MM-DD默认昨天")
parser.add_argument("--dry-run", action="store_true", help="仅预览不实际调用 API")
args = parser.parse_args()
if args.date:
date_str = args.date
else:
# 默认处理昨天的数据(每天 10:05 运行,处理 10:00 生成的前一天数据)
date_str = (date.today() - timedelta(days=1)).strftime("%Y-%m-%d")
print(f"📋 AI 问题归纳 - {date_str}")
os.makedirs(CONTEXT_DIR, exist_ok=True)
context = load_context(date_str)
if not context:
print(" 无待处理数据,退出")
return
descriptions = generate_descriptions(context, dry_run=args.dry_run)
if not descriptions:
return
if args.dry_run:
desc_path = os.path.join(CONTEXT_DIR, f"ai_descriptions_{date_str}.json")
payload = {"date": date_str, "descriptions": descriptions}
with open(desc_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
print(f"[DRY-RUN] 描述已保存到 {desc_path},未回写文档")
return
apply_descriptions(date_str, descriptions)
if __name__ == "__main__":
main()