ai_member_xiaoxi/scripts/popup_analysis.py
2026-05-29 08:00:01 +08:00

450 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""弹窗策略数据分析 V2仅端内购课用户 + 注册→购课天数分布"""
import os
import psycopg2
import psycopg2.extras
def get_conn():
return psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password=os.environ["PG_ONLINE_PASSWORD"],
dbname="vala_bi"
)
def run_sql(sql):
conn = get_conn()
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute(sql)
rows = cur.fetchall()
cur.close()
conn.close()
return rows
# ── 端内过滤条件 ──
ENDPOINT_INNER = "o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')"
GOODS_FILTER = "o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)"
ORDER_STATUS_FILTER = "o.order_status IN (3, 4)"
# ======================================================================
print("=" * 70)
print("分析零:注册→购课天数分布(端内付费用户)")
print("=" * 70)
sql0 = f"""
SELECT
o.pay_success_date::date - a.created_at::date AS days_to_purchase,
COUNT(DISTINCT o.account_id) AS user_count
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)
AND o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
GROUP BY days_to_purchase
ORDER BY days_to_purchase;
"""
rows0 = run_sql(sql0)
total_users0 = sum(r['user_count'] for r in rows0)
cum = 0
print(f"\n{'天数':>6} {'人数':>8} {'占比':>8} {'累计占比':>10}")
print("-" * 38)
for r in rows0:
cum += r['user_count']
pct = 100.0 * r['user_count'] / total_users0
cum_pct = 100.0 * cum / total_users0
print(f"{r['days_to_purchase']:>6} {r['user_count']:>8} {pct:>7.1f}% {cum_pct:>9.1f}%")
print(f"\n端内付费用户总数: {total_users0}")
# ── 分段统计 ──
sql0b = f"""
SELECT
CASE
WHEN days_to_purchase = 0 THEN '当天购买'
WHEN days_to_purchase BETWEEN 1 AND 3 THEN '1-3天'
WHEN days_to_purchase BETWEEN 4 AND 7 THEN '4-7天'
WHEN days_to_purchase BETWEEN 8 AND 14 THEN '8-14天'
WHEN days_to_purchase BETWEEN 15 AND 30 THEN '15-30天'
ELSE '30天以上'
END AS day_bucket,
COUNT(DISTINCT account_id) AS user_count,
ROUND(100.0 * COUNT(DISTINCT account_id) / SUM(COUNT(DISTINCT account_id)) OVER(), 1) AS pct
FROM (
SELECT
o.pay_success_date::date - a.created_at::date AS days_to_purchase,
o.account_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)
AND o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
) gap
GROUP BY day_bucket
ORDER BY MIN(gap.days_to_purchase);
"""
rows0b = run_sql(sql0b)
print(f"\n{'时间段':<12} {'人数':>8} {'占比':>8}")
print("-" * 32)
cum2 = 0
for r in rows0b:
cum2 += r['user_count']
print(f"{r['day_bucket']:<12} {r['user_count']:>8} {r['pct']:>7}%")
# ======================================================================
print("\n" + "=" * 70)
print("分析一:购买节点分布(仅端内付费用户)")
print("=" * 70)
sql1 = f"""
WITH u00_lessons AS (
SELECT id AS chapter_id, course_level,
CASE course_lesson
WHEN 'L01' THEN 1 WHEN 'L02' THEN 2 WHEN 'L03' THEN 3
WHEN 'L04' THEN 4 WHEN 'L05' THEN 5
END AS lesson_order
FROM bi_level_unit_lesson WHERE course_unit = 'U00'
),
paid_orders AS (
SELECT DISTINCT o.account_id, o.pay_success_date::date AS pay_date, o.goods_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND {ORDER_STATUS_FILTER}
AND {GOODS_FILTER}
AND {ENDPOINT_INNER}
),
paid_characters AS (
SELECT c.id AS character_id, po.account_id, po.pay_date, po.goods_id
FROM paid_orders po
JOIN bi_vala_app_character c ON po.account_id = c.account_id
),
u00_done AS (
SELECT pc.account_id, pc.pay_date, pc.goods_id, l.lesson_order, l.course_level
FROM paid_characters pc
JOIN (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
) p ON pc.character_id = p.user_id
JOIN u00_lessons l ON p.chapter_id = l.chapter_id
WHERE p.created_at::date <= pc.pay_date
GROUP BY pc.account_id, pc.pay_date, pc.goods_id, l.lesson_order, l.course_level
),
best_progress AS (
SELECT DISTINCT ON (account_id, pay_date, goods_id)
account_id, pay_date, goods_id, lesson_order
FROM u00_done
ORDER BY account_id, pay_date, goods_id, lesson_order DESC
)
SELECT
CASE
WHEN lesson_order IS NULL THEN '学习前购买'
WHEN lesson_order = 1 THEN 'L01之后'
WHEN lesson_order = 2 THEN 'L02之后'
WHEN lesson_order = 3 THEN 'L03之后'
WHEN lesson_order = 4 THEN 'L04之后'
WHEN lesson_order = 5 THEN 'L05之后'
END AS purchase_node,
CASE
WHEN goods_id IN (57, 60, 63) THEN 'L1'
WHEN goods_id IN (31, 32, 33, 54) THEN 'L2'
WHEN goods_id = 61 THEN 'L1+L2'
END AS product_level,
COUNT(*) AS order_count,
COUNT(DISTINCT account_id) AS user_count
FROM best_progress
GROUP BY purchase_node, product_level
UNION ALL
SELECT
'学习前购买' AS purchase_node,
CASE
WHEN po.goods_id IN (57, 60, 63) THEN 'L1'
WHEN po.goods_id IN (31, 32, 33, 54) THEN 'L2'
WHEN po.goods_id = 61 THEN 'L1+L2'
END AS product_level,
COUNT(*) AS order_count,
COUNT(DISTINCT po.account_id) AS user_count
FROM (
SELECT DISTINCT o.account_id, o.pay_success_date::date AS pay_date, o.goods_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND {ORDER_STATUS_FILTER}
AND {GOODS_FILTER}
AND {ENDPOINT_INNER}
) po
LEFT JOIN best_progress bp ON po.account_id = bp.account_id AND po.pay_date = bp.pay_date AND po.goods_id = bp.goods_id
WHERE bp.account_id IS NULL
GROUP BY CASE
WHEN po.goods_id IN (57, 60, 63) THEN 'L1'
WHEN po.goods_id IN (31, 32, 33, 54) THEN 'L2'
WHEN po.goods_id = 61 THEN 'L1+L2'
END
ORDER BY product_level, purchase_node;
"""
rows1 = run_sql(sql1)
print(f"\n{'购买节点':<14} {'产品':<8} {'订单数':>8} {'用户数':>8}")
print("-" * 44)
for r in rows1:
print(f"{r['purchase_node']:<14} {r['product_level']:<8} {r['order_count']:>8} {r['user_count']:>8}")
# ── 汇总 ──
sql1b = f"""
WITH u00_lessons AS (
SELECT id AS chapter_id,
CASE course_lesson
WHEN 'L01' THEN 1 WHEN 'L02' THEN 2 WHEN 'L03' THEN 3
WHEN 'L04' THEN 4 WHEN 'L05' THEN 5 END AS lesson_order
FROM bi_level_unit_lesson WHERE course_unit = 'U00'
),
paid_orders AS (
SELECT DISTINCT o.account_id, o.pay_success_date::date AS pay_date
FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL AND {ORDER_STATUS_FILTER} AND {GOODS_FILTER} AND {ENDPOINT_INNER}
),
paid_characters AS (
SELECT c.id AS character_id, po.account_id, po.pay_date
FROM paid_orders po JOIN bi_vala_app_character c ON po.account_id = c.account_id
),
u00_done AS (
SELECT pc.account_id, pc.pay_date, l.lesson_order
FROM paid_characters pc
JOIN (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
) p ON pc.character_id = p.user_id
JOIN u00_lessons l ON p.chapter_id = l.chapter_id
WHERE p.created_at::date <= pc.pay_date
GROUP BY pc.account_id, pc.pay_date, l.lesson_order
),
best_progress AS (
SELECT DISTINCT ON (account_id, pay_date) account_id, pay_date, lesson_order
FROM u00_done ORDER BY account_id, pay_date, lesson_order DESC
)
SELECT
CASE
WHEN lesson_order IS NULL THEN '学习前购买'
WHEN lesson_order = 1 THEN 'L01之后'
WHEN lesson_order = 2 THEN 'L02之后'
WHEN lesson_order = 3 THEN 'L03之后'
WHEN lesson_order = 4 THEN 'L04之后'
WHEN lesson_order = 5 THEN 'L05之后'
END AS purchase_node,
COUNT(*) AS order_count,
COUNT(DISTINCT account_id) AS user_count
FROM best_progress
GROUP BY purchase_node
UNION ALL
SELECT '学习前购买', COUNT(*), COUNT(DISTINCT po.account_id)
FROM (
SELECT DISTINCT o.account_id, o.pay_success_date::date AS pay_date
FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL AND {ORDER_STATUS_FILTER} AND {GOODS_FILTER} AND {ENDPOINT_INNER}
) po
LEFT JOIN best_progress bp ON po.account_id = bp.account_id AND po.pay_date = bp.pay_date
WHERE bp.account_id IS NULL
ORDER BY purchase_node;
"""
rows1b = run_sql(sql1b)
print(f"\n{'购买节点':<14} {'订单数':>8} {'用户数':>8} {'占比':>8}")
print("-" * 44)
total_orders = sum(r['order_count'] for r in rows1b)
for r in rows1b:
pct = 100.0 * r['user_count'] / sum(x['user_count'] for x in rows1b)
print(f"{r['purchase_node']:<14} {r['order_count']:>8} {r['user_count']:>8} {pct:>7.1f}%")
print(f"\n端内付费总订单: {total_orders}")
# ======================================================================
print("\n" + "=" * 70)
print("分析二:峰值后用户 —— 学完U00但未购端内购买为判断标准")
print("=" * 70)
sql2 = f"""
WITH u00_lessons AS (
SELECT id AS chapter_id,
CASE course_lesson
WHEN 'L01' THEN 1 WHEN 'L02' THEN 2 WHEN 'L03' THEN 3
WHEN 'L04' THEN 4 WHEN 'L05' THEN 5 END AS lesson_order
FROM bi_level_unit_lesson WHERE course_unit = 'U00'
),
user_u00_progress AS (
SELECT DISTINCT a.id AS account_id, l.lesson_order
FROM bi_vala_app_account a
JOIN bi_vala_app_character c ON a.id = c.account_id
JOIN (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
) p ON c.id = p.user_id
JOIN u00_lessons l ON p.chapter_id = l.chapter_id
WHERE a.status = 1 AND a.deleted_at IS NULL
),
user_max_progress AS (
SELECT account_id, MAX(lesson_order) AS max_lesson
FROM user_u00_progress GROUP BY account_id
),
u00_completers AS (
SELECT account_id FROM user_max_progress WHERE max_lesson = 5
),
-- 端内付费用户
paid_inner AS (
SELECT DISTINCT o.account_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)
AND o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
)
SELECT
'学完U00全部5课' AS user_group,
COUNT(*) AS total_users,
COUNT(*) FILTER (WHERE pu.account_id IS NOT NULL) AS paid_users,
COUNT(*) FILTER (WHERE pu.account_id IS NULL) AS unpaid_users,
ROUND(100.0 * COUNT(*) FILTER (WHERE pu.account_id IS NOT NULL) / COUNT(*), 1) AS paid_rate
FROM u00_completers uc
LEFT JOIN paid_inner pu ON uc.account_id = pu.account_id;
"""
rows2 = run_sql(sql2)
for r in rows2:
print(f"\n总用户数: {r['total_users']}, 已端内付费: {r['paid_users']}, 未端内付费: {r['unpaid_users']}, 端内付费率: {r['paid_rate']}%")
# ======================================================================
print("\n" + "=" * 70)
print("分析三:各断点用户规模 + 端内付费率")
print("=" * 70)
sql3 = f"""
WITH u00_lessons AS (
SELECT id AS chapter_id, course_level,
CASE course_lesson
WHEN 'L01' THEN 1 WHEN 'L02' THEN 2 WHEN 'L03' THEN 3
WHEN 'L04' THEN 4 WHEN 'L05' THEN 5 END AS lesson_order
FROM bi_level_unit_lesson WHERE course_unit = 'U00'
),
user_u00_progress AS (
SELECT a.id AS account_id, l.course_level, l.lesson_order
FROM bi_vala_app_account a
JOIN bi_vala_app_character c ON a.id = c.account_id
JOIN (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
) p ON c.id = p.user_id
JOIN u00_lessons l ON p.chapter_id = l.chapter_id
WHERE a.status = 1 AND a.deleted_at IS NULL
GROUP BY a.id, l.course_level, l.lesson_order
),
user_max_per_level AS (
SELECT account_id, course_level, MAX(lesson_order) AS max_lesson
FROM user_u00_progress GROUP BY account_id, course_level
),
paid_inner AS (
SELECT DISTINCT o.account_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)
AND o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
)
SELECT
course_level, max_lesson,
COUNT(*) AS user_count,
COUNT(*) FILTER (WHERE pu.account_id IS NOT NULL) AS paid_count,
ROUND(100.0 * COUNT(*) FILTER (WHERE pu.account_id IS NOT NULL) / COUNT(*), 1) AS paid_rate
FROM user_max_per_level um
LEFT JOIN paid_inner pu ON um.account_id = pu.account_id
GROUP BY course_level, max_lesson
ORDER BY course_level, max_lesson;
"""
rows3 = run_sql(sql3)
print(f"\n{'Level':<6} {'最大完成':<10} {'用户数':>8} {'端内付费':>10} {'端内付费率':>10}")
print("-" * 48)
for r in rows3:
pct = 100.0 * r['paid_count'] / r['user_count']
print(f"{r['course_level']:<6} L{r['max_lesson']:02d} {r['user_count']:>8} {r['paid_count']:>10} {pct:>9.1f}%")
# ======================================================================
print("\n" + "=" * 70)
print("分析四到达各lesson的端内付费转化率")
print("=" * 70)
sql4 = f"""
WITH u00_lessons AS (
SELECT id AS chapter_id, course_level,
CASE course_lesson
WHEN 'L01' THEN 1 WHEN 'L02' THEN 2 WHEN 'L03' THEN 3
WHEN 'L04' THEN 4 WHEN 'L05' THEN 5 END AS lesson_order
FROM bi_level_unit_lesson WHERE course_unit = 'U00'
),
user_u00_progress AS (
SELECT a.id AS account_id, l.course_level, l.lesson_order
FROM bi_vala_app_account a
JOIN bi_vala_app_character c ON a.id = c.account_id
JOIN (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
) p ON c.id = p.user_id
JOIN u00_lessons l ON p.chapter_id = l.chapter_id
WHERE a.status = 1 AND a.deleted_at IS NULL
GROUP BY a.id, l.course_level, l.lesson_order
),
paid_inner AS (
SELECT DISTINCT o.account_id
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id IN (57, 60, 63, 31, 32, 33, 54, 61)
AND o.key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
)
SELECT
up.course_level,
up.lesson_order AS reached_lesson,
COUNT(DISTINCT up.account_id) AS reached_users,
COUNT(DISTINCT pu.account_id) AS paid_users,
ROUND(100.0 * COUNT(DISTINCT pu.account_id) / COUNT(DISTINCT up.account_id), 1) AS conversion_rate
FROM user_u00_progress up
LEFT JOIN paid_inner pu ON up.account_id = pu.account_id
GROUP BY up.course_level, up.lesson_order
ORDER BY up.course_level, up.lesson_order;
"""
rows4 = run_sql(sql4)
print(f"\n{'Level':<6} {'到达':<10} {'到达人数':>8} {'端内付费':>10} {'端内转化率':>10}")
print("-" * 48)
for r in rows4:
print(f"{r['course_level']:<6} L{r['reached_lesson']:02d} {r['reached_users']:>8} {r['paid_users']:>10} {r['conversion_rate']:>9}%")
print("\n✅ 全部分析完成")