ai_member_xiaoxi/scripts/churn_rate_l1_l2.py

#!/usr/bin/env python3
"""计算 L1/L2 用户流失率
口径（李承龙确认）：
- 分母：历史上有过学习行为（play_status=1）的用户（按 account_id 去重）
- 分子：分母中，最后一次学习距今超过 30 天的用户
- 区分 L1 和 L2 分别计算
"""
import os
import psycopg2

PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
PG_PORT = 28591
PG_USER = "ai_member"
PG_DB = "vala_bi"
PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "")

conn = psycopg2.connect(
    host=PG_HOST, port=PG_PORT, user=PG_USER,
    password=PG_PASSWORD, dbname=PG_DB
)
cur = conn.cursor()

# L1 chapters: 333-581, L2 chapters: 55-331
level_ranges = {
    "L1": (333, 581),
    "L2": (55, 331),
}

shards = list(range(8))

for level, (ch_min, ch_max) in level_ranges.items():
    # Build UNION ALL across all 8 shards
    union_parts = []
    for s in shards:
        union_parts.append(f"""
            SELECT c.account_id, MAX(r.created_at) as last_study
            FROM bi_user_chapter_play_record_{s} r
            JOIN bi_vala_app_character c ON r.user_id = c.id
            WHERE r.play_status = 1
              AND r.chapter_id BETWEEN {ch_min} AND {ch_max}
            GROUP BY c.account_id
        """)

    union_sql = " UNION ALL ".join(union_parts)

    # Aggregate across shards: take max last_study per account
    sql = f"""
        WITH all_records AS (
            {union_sql}
        ),
        user_last_study AS (
            SELECT account_id, MAX(last_study) as last_study
            FROM all_records
            GROUP BY account_id
        )
        SELECT
            COUNT(*) as total_users,
            COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') as churned_users,
            ROUND(
                100.0 * COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') / COUNT(*),
                1
            ) as churn_rate_pct
        FROM user_last_study
    """

    cur.execute(sql)
    row = cur.fetchone()
    total, churned, rate = row
    print(f"{level}: 总用户={total}, 流失用户={churned}, 流失率={rate}%")

cur.close()
conn.close()