#!/usr/bin/env python3 """计算 L1/L2 用户流失率 口径(李承龙确认): - 分母:历史上有过学习行为(play_status=1)的用户(按 account_id 去重) - 分子:分母中,最后一次学习距今超过 30 天的用户 - 区分 L1 和 L2 分别计算 """ import os import psycopg2 PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = 28591 PG_USER = "ai_member" PG_DB = "vala_bi" PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "") conn = psycopg2.connect( host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB ) cur = conn.cursor() # L1 chapters: 333-581, L2 chapters: 55-331 level_ranges = { "L1": (333, 581), "L2": (55, 331), } shards = list(range(8)) for level, (ch_min, ch_max) in level_ranges.items(): # Build UNION ALL across all 8 shards union_parts = [] for s in shards: union_parts.append(f""" SELECT c.account_id, MAX(r.created_at) as last_study FROM bi_user_chapter_play_record_{s} r JOIN bi_vala_app_character c ON r.user_id = c.id WHERE r.play_status = 1 AND r.chapter_id BETWEEN {ch_min} AND {ch_max} GROUP BY c.account_id """) union_sql = " UNION ALL ".join(union_parts) # Aggregate across shards: take max last_study per account sql = f""" WITH all_records AS ( {union_sql} ), user_last_study AS ( SELECT account_id, MAX(last_study) as last_study FROM all_records GROUP BY account_id ) SELECT COUNT(*) as total_users, COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') as churned_users, ROUND( 100.0 * COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') / COUNT(*), 1 ) as churn_rate_pct FROM user_last_study """ cur.execute(sql) row = cur.fetchone() total, churned, rate = row print(f"{level}: 总用户={total}, 流失用户={churned}, 流失率={rate}%") cur.close() conn.close()