ai_member_xiaoxi/scripts/churn_rate_l1_l2.py
2026-06-10 08:00:01 +08:00

73 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""计算 L1/L2 用户流失率
口径(李承龙确认):
- 分母历史上有过学习行为play_status=1的用户按 account_id 去重)
- 分子:分母中,最后一次学习距今超过 30 天的用户
- 区分 L1 和 L2 分别计算
"""
import os
import psycopg2
PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
PG_PORT = 28591
PG_USER = "ai_member"
PG_DB = "vala_bi"
PG_PASSWORD = os.environ.get("PG_ONLINE_PASSWORD", "")
conn = psycopg2.connect(
host=PG_HOST, port=PG_PORT, user=PG_USER,
password=PG_PASSWORD, dbname=PG_DB
)
cur = conn.cursor()
# L1 chapters: 333-581, L2 chapters: 55-331
level_ranges = {
"L1": (333, 581),
"L2": (55, 331),
}
shards = list(range(8))
for level, (ch_min, ch_max) in level_ranges.items():
# Build UNION ALL across all 8 shards
union_parts = []
for s in shards:
union_parts.append(f"""
SELECT c.account_id, MAX(r.created_at) as last_study
FROM bi_user_chapter_play_record_{s} r
JOIN bi_vala_app_character c ON r.user_id = c.id
WHERE r.play_status = 1
AND r.chapter_id BETWEEN {ch_min} AND {ch_max}
GROUP BY c.account_id
""")
union_sql = " UNION ALL ".join(union_parts)
# Aggregate across shards: take max last_study per account
sql = f"""
WITH all_records AS (
{union_sql}
),
user_last_study AS (
SELECT account_id, MAX(last_study) as last_study
FROM all_records
GROUP BY account_id
)
SELECT
COUNT(*) as total_users,
COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') as churned_users,
ROUND(
100.0 * COUNT(*) FILTER (WHERE last_study < NOW() - INTERVAL '30 days') / COUNT(*),
1
) as churn_rate_pct
FROM user_last_study
"""
cur.execute(sql)
row = cur.fetchone()
total, churned, rate = row
print(f"{level}: 总用户={total}, 流失用户={churned}, 流失率={rate}%")
cur.close()
conn.close()