#!/usr/bin/env python3 """流失用户最后一次完课的课程分布(按 Unit 汇总) 口径:对 L1/L2 流失用户(最后完课距今>30天),统计最后一次完课所在 Unit """ import os import psycopg2 PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = 28591 PG_USER = "ai_member" PG_DB = "vala_bi" PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "") conn = psycopg2.connect( host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB ) cur = conn.cursor() level_ranges = {"L1": (333, 581), "L2": (55, 331)} shards = list(range(8)) for level, (ch_min, ch_max) in level_ranges.items(): union_parts = [] for s in shards: union_parts.append(f""" SELECT c.account_id, r.chapter_id, r.created_at FROM bi_user_chapter_play_record_{s} r JOIN bi_vala_app_character c ON r.user_id = c.id WHERE r.play_status = 1 AND r.chapter_id BETWEEN {ch_min} AND {ch_max} """) union_sql = " UNION ALL ".join(union_parts) # Get last study per user, and the chapter_id of that last study sql = f""" WITH all_records AS ( {union_sql} ), user_last AS ( SELECT DISTINCT ON (account_id) account_id, chapter_id, created_at as last_study FROM all_records ORDER BY account_id, created_at DESC ), churned AS ( SELECT account_id, chapter_id FROM user_last WHERE last_study < NOW() - INTERVAL '30 days' ) SELECT l.course_unit, COUNT(*) as cnt, ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 1) as pct FROM churned c JOIN bi_level_unit_lesson l ON c.chapter_id = l.id GROUP BY l.course_unit ORDER BY l.course_unit """ cur.execute(sql) rows = cur.fetchall() print(f"\n=== {level} 流失用户最后完课 Unit 分布 ===") total = 0 for unit, cnt, pct in rows: print(f" {unit}: {cnt}人 ({pct}%)") total += cnt print(f" 合计: {total}人") cur.close() conn.close()