ai_member_xiaoxi/scripts/churn_course_dist.py
2026-06-10 08:00:01 +08:00

73 lines
2.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""流失用户最后一次完课的课程分布(按 Unit 汇总)
口径:对 L1/L2 流失用户(最后完课距今>30天统计最后一次完课所在 Unit
"""
import os
import psycopg2
PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
PG_PORT = 28591
PG_USER = "ai_member"
PG_DB = "vala_bi"
PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "")
conn = psycopg2.connect(
host=PG_HOST, port=PG_PORT, user=PG_USER,
password=PG_PASSWORD, dbname=PG_DB
)
cur = conn.cursor()
level_ranges = {"L1": (333, 581), "L2": (55, 331)}
shards = list(range(8))
for level, (ch_min, ch_max) in level_ranges.items():
union_parts = []
for s in shards:
union_parts.append(f"""
SELECT c.account_id, r.chapter_id, r.created_at
FROM bi_user_chapter_play_record_{s} r
JOIN bi_vala_app_character c ON r.user_id = c.id
WHERE r.play_status = 1
AND r.chapter_id BETWEEN {ch_min} AND {ch_max}
""")
union_sql = " UNION ALL ".join(union_parts)
# Get last study per user, and the chapter_id of that last study
sql = f"""
WITH all_records AS (
{union_sql}
),
user_last AS (
SELECT DISTINCT ON (account_id)
account_id, chapter_id, created_at as last_study
FROM all_records
ORDER BY account_id, created_at DESC
),
churned AS (
SELECT account_id, chapter_id
FROM user_last
WHERE last_study < NOW() - INTERVAL '30 days'
)
SELECT
l.course_unit,
COUNT(*) as cnt,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 1) as pct
FROM churned c
JOIN bi_level_unit_lesson l ON c.chapter_id = l.id
GROUP BY l.course_unit
ORDER BY l.course_unit
"""
cur.execute(sql)
rows = cur.fetchall()
print(f"\n=== {level} 流失用户最后完课 Unit 分布 ===")
total = 0
for unit, cnt, pct in rows:
print(f" {unit}: {cnt}人 ({pct}%)")
total += cnt
print(f" 合计: {total}")
cur.close()
conn.close()