#!/usr/bin/env python3 """流失用户最后一次完课 Lesson 分布 + 折线图""" import os, psycopg2 import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.ticker as ticker import numpy as np PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = 28591 PG_USER = "ai_member" PG_DB = "vala_bi" PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "") conn = psycopg2.connect(host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB) cur = conn.cursor() level_ranges = {"L1": (333, 581), "L2": (55, 331)} shards = list(range(8)) # 中文字体 plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'SimHei', 'Noto Sans CJK SC', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False for level, (ch_min, ch_max) in level_ranges.items(): union_parts = [] for s in shards: union_parts.append(f""" SELECT c.account_id, r.chapter_id, r.created_at FROM bi_user_chapter_play_record_{s} r JOIN bi_vala_app_character c ON r.user_id = c.id WHERE r.play_status = 1 AND r.chapter_id BETWEEN {ch_min} AND {ch_max} """) union_sql = " UNION ALL ".join(union_parts) sql = f""" WITH all_records AS ({union_sql}), user_last AS ( SELECT DISTINCT ON (account_id) account_id, chapter_id, created_at as last_study FROM all_records ORDER BY account_id, created_at DESC ), churned AS ( SELECT account_id, chapter_id FROM user_last WHERE last_study < NOW() - INTERVAL '30 days' ) SELECT l.course_unit, l.course_lesson, COUNT(*) as cnt FROM churned c JOIN bi_level_unit_lesson l ON c.chapter_id = l.id GROUP BY l.course_unit, l.course_lesson ORDER BY l.course_unit, l.course_lesson """ cur.execute(sql) rows = cur.fetchall() # Build ordered labels: U00_L01, U00_L02, ... labels = [] values = [] data_map = {} for unit, lesson, cnt in rows: key = f"{unit}_{lesson}" data_map[key] = cnt # Get all possible unit-lesson combos for this level cur.execute(f"SELECT course_unit, course_lesson FROM bi_level_unit_lesson WHERE course_level='{level}' ORDER BY course_unit, course_lesson") all_lessons = cur.fetchall() for unit, lesson in all_lessons: key = f"{unit}_{lesson}" labels.append(key) values.append(data_map.get(key, 0)) # Build x-tick labels: show unit label at first lesson of each unit x_ticks = [] x_tick_labels = [] prev_unit = None for i, (unit, lesson) in enumerate(all_lessons): if unit != prev_unit: x_ticks.append(i) x_tick_labels.append(unit) prev_unit = unit fig, ax = plt.subplots(figsize=(20, 6)) x = range(len(labels)) ax.plot(x, values, color='#E74C3C' if level == 'L1' else '#3498DB', linewidth=1.2, marker='.', markersize=2) # Color U00 area u00_end = next((i for i, l in enumerate(labels) if not l.startswith('U00_')), len(labels)) if u00_end > 0: ax.axvspan(-0.5, u00_end - 0.5, alpha=0.08, color='orange') ax.set_xticks(x_ticks) ax.set_xticklabels(x_tick_labels, fontsize=8, rotation=45) ax.set_ylabel('流失人数', fontsize=11) ax.set_title(f'{level} 流失用户最后一次完课 Lesson 分布', fontsize=14, fontweight='bold') ax.set_xlim(-0.5, len(labels) - 0.5) ax.grid(axis='y', alpha=0.3) # Annotate U00 ax.annotate('U00 (体验单元)', xy=(u00_end/2, max(values[:u00_end]) if u00_end>0 else 0), fontsize=9, color='orange', ha='center', va='bottom', bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) plt.tight_layout() out_path = f'/root/.openclaw/workspace/output/churn_lesson_{level}.png' plt.savefig(out_path, dpi=150) plt.close() print(f"Saved: {out_path}") # Print top 10 sorted_data = sorted(data_map.items(), key=lambda x: x[1], reverse=True) print(f"\n{level} Top 10 流失 Lesson:") for k, v in sorted_data[:10]: print(f" {k}: {v}人") cur.close() conn.close() print("\nDone.")