ai_member_xiaoxi/scripts/dual_activation_progress.py
2026-05-28 08:00:01 +08:00

315 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""查询激活L2课程的用户中有多少也激活了L1以及他们的L1/L2学习进度对比"""
import psycopg2
import statistics
import sys
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala_bi"
)
cur = conn.cursor()
# ========================================
# Part 1: 激活L2的用户中有多少也激活了L1
# ========================================
print("=" * 60)
print("Part 1: L2用户中同时激活L1的比例")
print("=" * 60)
sql1 = """
WITH orders AS (
SELECT o.out_trade_no, o.account_id, o.pay_success_date
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE o.pay_success_date >= '2026-03-01'
AND o.order_status != 4
AND o.pay_success_date IS NOT NULL
AND a.status = 1
),
l2_tickets AS (
SELECT DISTINCT o.account_id
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1
AND t.deleted_at IS NULL
AND t.season_package_level = 'A2'
),
l1_tickets AS (
SELECT DISTINCT o.account_id
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1
AND t.deleted_at IS NULL
AND t.season_package_level = 'A1'
),
all_l1_anytime AS (
-- L1激活不限时间
SELECT DISTINCT account_id
FROM l2_tickets l2
WHERE EXISTS (
SELECT 1 FROM bi_vala_seasonal_ticket t
JOIN bi_vala_order o ON t.out_trade_no = o.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL
AND t.season_package_level = 'A1'
AND o.account_id = l2.account_id
)
)
SELECT
(SELECT COUNT(*) FROM l2_tickets) AS total_l2_accounts,
(SELECT COUNT(*) FROM all_l1_anytime) AS l1_l2_both,
(SELECT COUNT(*) FROM l1_tickets WHERE account_id IN (SELECT account_id FROM l2_tickets)) AS l1_l2_both_same_period;
"""
cur.execute(sql1)
r1 = cur.fetchone()
total_l2 = r1[0]
both_any = r1[1]
both_period = r1[2]
print(f"激活L2的账户数2026-03后订单: {total_l2}")
print(f"其中也激活了L1的账户数不限时间: {both_any} ({both_any/total_l2*100:.1f}%)")
print(f"其中也通过2026-03后订单激活了L1: {both_period} ({both_period/total_l2*100:.1f}%)")
# ========================================
# Part 2: 双激活用户的学习进度对比
# ========================================
print(f"\n{'='*60}")
print("Part 2: 双激活用户 L1 vs L2 学习进度对比下单30天内")
print("=" * 60)
sql2 = """
WITH orders AS (
SELECT o.id, o.out_trade_no, o.account_id, o.pay_success_date
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE o.pay_success_date >= '2026-03-01'
AND o.order_status != 4
AND o.pay_success_date IS NOT NULL
AND a.status = 1
),
l2_accounts AS (
SELECT DISTINCT o.account_id
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2'
),
dual_tickets AS (
-- 只取同时激活了L1和L2的账户的tickets
SELECT o.out_trade_no, o.account_id, o.pay_success_date,
t.character_id, t.season_package_level
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1
AND t.deleted_at IS NULL
AND t.season_package_level IN ('A1', 'A2')
AND o.account_id IN (
SELECT account_id FROM l2_accounts
INTERSECT
SELECT DISTINCT o2.account_id FROM orders o2
JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no
WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1'
)
),
all_chapter_records AS (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
),
level_records AS (
SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level
FROM all_chapter_records pr
JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id
WHERE l.course_level IN ('L1', 'L2')
),
ticket_progress AS (
SELECT
t.out_trade_no,
t.account_id,
t.character_id,
t.season_package_level,
COUNT(DISTINCT lr.chapter_id) AS completed_lessons
FROM dual_tickets t
LEFT JOIN level_records lr
ON t.character_id = lr.user_id
AND lr.course_level = CASE
WHEN t.season_package_level = 'A1' THEN 'L1'
WHEN t.season_package_level = 'A2' THEN 'L2'
END
AND lr.created_at >= t.pay_success_date
AND lr.created_at <= t.pay_success_date + INTERVAL '30 days'
GROUP BY t.out_trade_no, t.account_id, t.character_id, t.season_package_level
)
SELECT
CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level,
completed_lessons,
COUNT(*) AS ticket_count
FROM ticket_progress
GROUP BY season_package_level, completed_lessons
ORDER BY season_package_level, completed_lessons;
"""
cur.execute(sql2)
rows = cur.fetchall()
l1_rows = [(r[1], r[2]) for r in rows if r[0] == 'L1']
l2_rows = [(r[1], r[2]) for r in rows if r[0] == 'L2']
for label, data in [('L1', l1_rows), ('L2', l2_rows)]:
total = sum(r[1] for r in data)
print(f"\n--- {label}双激活用户仅2026-03后订单ticket---")
print(f"总 ticket 数: {total}")
# 分桶展示
buckets = [(0,0), (1,5), (6,10), (11,15), (16,20), (21,25), (26,30), (31,999)]
print(f"{'区间':>12} | {'ticket数':>10} | {'占比':>8}")
print("-" * 35)
for lo, hi in buckets:
if lo == hi == 0:
cnt = sum(r[1] for r in data if r[0] == 0)
label_b = "0节"
elif hi == 999:
cnt = sum(r[1] for r in data if r[0] >= lo)
label_b = f"{lo}+"
else:
cnt = sum(r[1] for r in data if lo <= r[0] <= hi)
label_b = f"{lo}-{hi}"
pct = cnt / total * 100 if total > 0 else 0
print(f"{label_b:>12} | {cnt:>10} | {pct:>7.1f}%")
# 统计
values = []
for v, c in data:
values.extend([v] * c)
if values:
avg = sum(values) / len(values)
med = statistics.median(values)
zero_pct = sum(1 for v in values if v == 0) / len(values) * 100
p25 = sorted(values)[len(values)//4]
p75 = sorted(values)[len(values)*3//4]
print(f"\n平均={avg:.1f} | 中位数={med:.0f} | P25={p25} | P75={p75} | 0节占比={zero_pct:.1f}%")
# ========================================
# Part 3: 同一用户 L1 vs L2 进度对比
# ========================================
print(f"\n{'='*60}")
print("Part 3: 同一用户 L1 vs L2 进度逐用户对比")
print("=" * 60)
sql3 = """
WITH orders AS (
SELECT o.id, o.out_trade_no, o.account_id, o.pay_success_date
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE o.pay_success_date >= '2026-03-01'
AND o.order_status != 4
AND o.pay_success_date IS NOT NULL
AND a.status = 1
),
l2_accounts AS (
SELECT DISTINCT o.account_id
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2'
),
dual_accounts AS (
SELECT account_id FROM l2_accounts
INTERSECT
SELECT DISTINCT o2.account_id FROM orders o2
JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no
WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1'
),
dual_tickets AS (
SELECT o.out_trade_no, o.account_id, o.pay_success_date,
t.character_id, t.season_package_level
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL
AND t.season_package_level IN ('A1', 'A2')
AND o.account_id IN (SELECT account_id FROM dual_accounts)
),
all_chapter_records AS (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
),
level_records AS (
SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level
FROM all_chapter_records pr
JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id
WHERE l.course_level IN ('L1', 'L2')
),
ticket_progress AS (
SELECT
t.out_trade_no,
t.account_id,
t.character_id,
t.season_package_level,
COUNT(DISTINCT lr.chapter_id) AS completed_lessons
FROM dual_tickets t
LEFT JOIN level_records lr
ON t.character_id = lr.user_id
AND lr.course_level = CASE
WHEN t.season_package_level = 'A1' THEN 'L1'
WHEN t.season_package_level = 'A2' THEN 'L2'
END
AND lr.created_at >= t.pay_success_date
AND lr.created_at <= t.pay_success_date + INTERVAL '30 days'
GROUP BY t.out_trade_no, t.account_id, t.character_id, t.season_package_level
),
account_level_progress AS (
-- 每个用户每个level取所有ticket中完课数的最大值取最好的那个ticket
SELECT
account_id,
CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level,
MAX(completed_lessons) AS max_completed
FROM ticket_progress
GROUP BY account_id, season_package_level
)
SELECT
COALESCE(l1.max_completed, 0) AS l1_lessons,
COALESCE(l2.max_completed, 0) AS l2_lessons,
COUNT(*) AS account_count
FROM (SELECT DISTINCT account_id FROM dual_accounts) da
LEFT JOIN account_level_progress l1 ON da.account_id = l1.account_id AND l1.course_level = 'L1'
LEFT JOIN account_level_progress l2 ON da.account_id = l2.account_id AND l2.course_level = 'L2'
GROUP BY COALESCE(l1.max_completed, 0), COALESCE(l2.max_completed, 0)
ORDER BY l1_lessons, l2_lessons;
"""
cur.execute(sql3)
rows3 = cur.fetchall()
print(f"\nL1进度 vs L2进度每个用户取该level最佳ticket的完课数")
print(f"{'L1节数':>8} | {'L2节数':>8} | {'用户数':>8}")
print("-" * 30)
for r in rows3:
print(f"{r[0]:>8} | {r[1]:>8} | {r[2]:>8}")
# Summary: L1 > L2, L1 < L2, L1 = L2
l1_gt = sum(r[2] for r in rows3 if r[0] > r[1])
l2_gt = sum(r[2] for r in rows3 if r[0] < r[1])
equal = sum(r[2] for r in rows3 if r[0] == r[1])
total_dual = sum(r[2] for r in rows3)
print(f"\n总结(用户级):")
print(f" L1进度 > L2进度: {l1_gt} 人 ({l1_gt/total_dual*100:.1f}%)")
print(f" L1进度 < L2进度: {l2_gt} 人 ({l2_gt/total_dual*100:.1f}%)")
print(f" L1进度 = L2进度: {equal} 人 ({equal/total_dual*100:.1f}%)")
# Both zero
both_zero = sum(r[2] for r in rows3 if r[0] == 0 and r[1] == 0)
print(f" 两者均为0节: {both_zero} 人 ({both_zero/total_dual*100:.1f}%)")
cur.close()
conn.close()
print("\n查询完成。")