ai_member_xiaoxi/scripts/churn_lesson_split.py

#!/usr/bin/env python3
"""流失用户最后完课 Lesson 分布 — 拆成 U00 和 U01+ 各一张"""
import os, psycopg2
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
PG_PORT = 28591
PG_USER = "ai_member"
PG_DB = "vala_bi"
PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "")

conn = psycopg2.connect(host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB)
cur = conn.cursor()

level_ranges = {"L1": (333, 581), "L2": (55, 331)}
shards = list(range(8))

plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'SimHei', 'Noto Sans CJK SC', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

for level, (ch_min, ch_max) in level_ranges.items():
    union_parts = []
    for s in shards:
        union_parts.append(f"""
            SELECT c.account_id, r.chapter_id, r.created_at
            FROM bi_user_chapter_play_record_{s} r
            JOIN bi_vala_app_character c ON r.user_id = c.id
            WHERE r.play_status = 1
              AND r.chapter_id BETWEEN {ch_min} AND {ch_max}
        """)
    union_sql = " UNION ALL ".join(union_parts)

    sql = f"""
        WITH all_records AS ({union_sql}),
        user_last AS (
            SELECT DISTINCT ON (account_id) account_id, chapter_id, created_at as last_study
            FROM all_records
            ORDER BY account_id, created_at DESC
        ),
        churned AS (
            SELECT account_id, chapter_id FROM user_last
            WHERE last_study < NOW() - INTERVAL '30 days'
        )
        SELECT l.course_unit, l.course_lesson, COUNT(*) as cnt
        FROM churned c
        JOIN bi_level_unit_lesson l ON c.chapter_id = l.id
        GROUP BY l.course_unit, l.course_lesson
        ORDER BY l.course_unit, l.course_lesson
    """
    cur.execute(sql)
    rows = cur.fetchall()
    data_map = {}
    for unit, lesson, cnt in rows:
        data_map[f"{unit}_{lesson}"] = cnt

    cur.execute(f"SELECT course_unit, course_lesson FROM bi_level_unit_lesson WHERE course_level='{level}' ORDER BY course_unit, course_lesson")
    all_lessons = cur.fetchall()

    # Split into U00 and U01+
    u00_lessons = [(u, l) for u, l in all_lessons if u == 'U00']
    u01p_lessons = [(u, l) for u, l in all_lessons if u != 'U00']

    for suffix, lessons in [("U00", u00_lessons), ("U01+", u01p_lessons)]:
        labels = [f"{u}_{l}" for u, l in lessons]
        values = [data_map.get(f"{u}_{l}", 0) for u, l in lessons]

        # Build unit tick positions
        x_ticks = []
        x_tick_labels = []
        prev_unit = None
        for i, (u, l) in enumerate(lessons):
            if u != prev_unit:
                x_ticks.append(i)
                x_tick_labels.append(u)
                prev_unit = u

        fig, ax = plt.subplots(figsize=(14 if suffix == "U00" else 18, 5))
        color = '#E74C3C' if level == 'L1' else '#3498DB'
        x = range(len(labels))
        ax.plot(x, values, color=color, linewidth=1.5, marker='o', markersize=3)

        # Add value labels on top of each point
        for xi, vi in zip(x, values):
            if vi > 0:
                ax.annotate(str(vi), (xi, vi), textcoords="offset points", xytext=(0, 5),
                           fontsize=6, ha='center', color='#555')

        ax.set_xticks(x_ticks)
        ax.set_xticklabels(x_tick_labels, fontsize=9, rotation=0)
        ax.set_ylabel('流失人数', fontsize=11)
        ax.set_title(f'{level} 流失用户最后完课 Lesson 分布 — {suffix}', fontsize=14, fontweight='bold')
        ax.set_xlim(-0.5, len(labels) - 0.5)
        ax.grid(axis='y', alpha=0.3)
        ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))

        plt.tight_layout()
        out_path = f'/root/.openclaw/workspace/output/churn_lesson_{level}_{suffix}.png'
        plt.savefig(out_path, dpi=150)
        plt.close()
        print(f"Saved: {out_path}")

cur.close()
conn.close()
print("Done.")