#!/usr/bin/env python3 """查询激活L2课程的用户中,有多少也激活了L1,以及他们的L1/L2学习进度对比""" import psycopg2 import statistics import sys conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) cur = conn.cursor() # ======================================== # Part 1: 激活L2的用户中有多少也激活了L1 # ======================================== print("=" * 60) print("Part 1: L2用户中同时激活L1的比例") print("=" * 60) sql1 = """ WITH orders AS ( SELECT o.out_trade_no, o.account_id, o.pay_success_date FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id WHERE o.pay_success_date >= '2026-03-01' AND o.order_status != 4 AND o.pay_success_date IS NOT NULL AND a.status = 1 ), l2_tickets AS ( SELECT DISTINCT o.account_id FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2' ), l1_tickets AS ( SELECT DISTINCT o.account_id FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A1' ), all_l1_anytime AS ( -- L1激活(不限时间) SELECT DISTINCT account_id FROM l2_tickets l2 WHERE EXISTS ( SELECT 1 FROM bi_vala_seasonal_ticket t JOIN bi_vala_order o ON t.out_trade_no = o.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A1' AND o.account_id = l2.account_id ) ) SELECT (SELECT COUNT(*) FROM l2_tickets) AS total_l2_accounts, (SELECT COUNT(*) FROM all_l1_anytime) AS l1_l2_both, (SELECT COUNT(*) FROM l1_tickets WHERE account_id IN (SELECT account_id FROM l2_tickets)) AS l1_l2_both_same_period; """ cur.execute(sql1) r1 = cur.fetchone() total_l2 = r1[0] both_any = r1[1] both_period = r1[2] print(f"激活L2的账户数(2026-03后订单): {total_l2}") print(f"其中也激活了L1的账户数(不限时间): {both_any} ({both_any/total_l2*100:.1f}%)") print(f"其中也通过2026-03后订单激活了L1: {both_period} ({both_period/total_l2*100:.1f}%)") # ======================================== # Part 2: 双激活用户的学习进度对比 # ======================================== print(f"\n{'='*60}") print("Part 2: 双激活用户 L1 vs L2 学习进度对比(下单30天内)") print("=" * 60) sql2 = """ WITH orders AS ( SELECT o.id, o.out_trade_no, o.account_id, o.pay_success_date FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id WHERE o.pay_success_date >= '2026-03-01' AND o.order_status != 4 AND o.pay_success_date IS NOT NULL AND a.status = 1 ), l2_accounts AS ( SELECT DISTINCT o.account_id FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2' ), dual_tickets AS ( -- 只取同时激活了L1和L2的账户的tickets SELECT o.out_trade_no, o.account_id, o.pay_success_date, t.character_id, t.season_package_level FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level IN ('A1', 'A2') AND o.account_id IN ( SELECT account_id FROM l2_accounts INTERSECT SELECT DISTINCT o2.account_id FROM orders o2 JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1' ) ), all_chapter_records AS ( SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1 ), level_records AS ( SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level FROM all_chapter_records pr JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id WHERE l.course_level IN ('L1', 'L2') ), ticket_progress AS ( SELECT t.out_trade_no, t.account_id, t.character_id, t.season_package_level, COUNT(DISTINCT lr.chapter_id) AS completed_lessons FROM dual_tickets t LEFT JOIN level_records lr ON t.character_id = lr.user_id AND lr.course_level = CASE WHEN t.season_package_level = 'A1' THEN 'L1' WHEN t.season_package_level = 'A2' THEN 'L2' END AND lr.created_at >= t.pay_success_date AND lr.created_at <= t.pay_success_date + INTERVAL '30 days' GROUP BY t.out_trade_no, t.account_id, t.character_id, t.season_package_level ) SELECT CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level, completed_lessons, COUNT(*) AS ticket_count FROM ticket_progress GROUP BY season_package_level, completed_lessons ORDER BY season_package_level, completed_lessons; """ cur.execute(sql2) rows = cur.fetchall() l1_rows = [(r[1], r[2]) for r in rows if r[0] == 'L1'] l2_rows = [(r[1], r[2]) for r in rows if r[0] == 'L2'] for label, data in [('L1', l1_rows), ('L2', l2_rows)]: total = sum(r[1] for r in data) print(f"\n--- {label}(双激活用户,仅2026-03后订单ticket)---") print(f"总 ticket 数: {total}") # 分桶展示 buckets = [(0,0), (1,5), (6,10), (11,15), (16,20), (21,25), (26,30), (31,999)] print(f"{'区间':>12} | {'ticket数':>10} | {'占比':>8}") print("-" * 35) for lo, hi in buckets: if lo == hi == 0: cnt = sum(r[1] for r in data if r[0] == 0) label_b = "0节" elif hi == 999: cnt = sum(r[1] for r in data if r[0] >= lo) label_b = f"{lo}+" else: cnt = sum(r[1] for r in data if lo <= r[0] <= hi) label_b = f"{lo}-{hi}" pct = cnt / total * 100 if total > 0 else 0 print(f"{label_b:>12} | {cnt:>10} | {pct:>7.1f}%") # 统计 values = [] for v, c in data: values.extend([v] * c) if values: avg = sum(values) / len(values) med = statistics.median(values) zero_pct = sum(1 for v in values if v == 0) / len(values) * 100 p25 = sorted(values)[len(values)//4] p75 = sorted(values)[len(values)*3//4] print(f"\n平均={avg:.1f} | 中位数={med:.0f} | P25={p25} | P75={p75} | 0节占比={zero_pct:.1f}%") # ======================================== # Part 3: 同一用户 L1 vs L2 进度对比 # ======================================== print(f"\n{'='*60}") print("Part 3: 同一用户 L1 vs L2 进度逐用户对比") print("=" * 60) sql3 = """ WITH orders AS ( SELECT o.id, o.out_trade_no, o.account_id, o.pay_success_date FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id WHERE o.pay_success_date >= '2026-03-01' AND o.order_status != 4 AND o.pay_success_date IS NOT NULL AND a.status = 1 ), l2_accounts AS ( SELECT DISTINCT o.account_id FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2' ), dual_accounts AS ( SELECT account_id FROM l2_accounts INTERSECT SELECT DISTINCT o2.account_id FROM orders o2 JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1' ), dual_tickets AS ( SELECT o.out_trade_no, o.account_id, o.pay_success_date, t.character_id, t.season_package_level FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level IN ('A1', 'A2') AND o.account_id IN (SELECT account_id FROM dual_accounts) ), all_chapter_records AS ( SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1 ), level_records AS ( SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level FROM all_chapter_records pr JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id WHERE l.course_level IN ('L1', 'L2') ), ticket_progress AS ( SELECT t.out_trade_no, t.account_id, t.character_id, t.season_package_level, COUNT(DISTINCT lr.chapter_id) AS completed_lessons FROM dual_tickets t LEFT JOIN level_records lr ON t.character_id = lr.user_id AND lr.course_level = CASE WHEN t.season_package_level = 'A1' THEN 'L1' WHEN t.season_package_level = 'A2' THEN 'L2' END AND lr.created_at >= t.pay_success_date AND lr.created_at <= t.pay_success_date + INTERVAL '30 days' GROUP BY t.out_trade_no, t.account_id, t.character_id, t.season_package_level ), account_level_progress AS ( -- 每个用户每个level取所有ticket中完课数的最大值(取最好的那个ticket) SELECT account_id, CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level, MAX(completed_lessons) AS max_completed FROM ticket_progress GROUP BY account_id, season_package_level ) SELECT COALESCE(l1.max_completed, 0) AS l1_lessons, COALESCE(l2.max_completed, 0) AS l2_lessons, COUNT(*) AS account_count FROM (SELECT DISTINCT account_id FROM dual_accounts) da LEFT JOIN account_level_progress l1 ON da.account_id = l1.account_id AND l1.course_level = 'L1' LEFT JOIN account_level_progress l2 ON da.account_id = l2.account_id AND l2.course_level = 'L2' GROUP BY COALESCE(l1.max_completed, 0), COALESCE(l2.max_completed, 0) ORDER BY l1_lessons, l2_lessons; """ cur.execute(sql3) rows3 = cur.fetchall() print(f"\nL1进度 vs L2进度(每个用户取该level最佳ticket的完课数)") print(f"{'L1节数':>8} | {'L2节数':>8} | {'用户数':>8}") print("-" * 30) for r in rows3: print(f"{r[0]:>8} | {r[1]:>8} | {r[2]:>8}") # Summary: L1 > L2, L1 < L2, L1 = L2 l1_gt = sum(r[2] for r in rows3 if r[0] > r[1]) l2_gt = sum(r[2] for r in rows3 if r[0] < r[1]) equal = sum(r[2] for r in rows3 if r[0] == r[1]) total_dual = sum(r[2] for r in rows3) print(f"\n总结(用户级):") print(f" L1进度 > L2进度: {l1_gt} 人 ({l1_gt/total_dual*100:.1f}%)") print(f" L1进度 < L2进度: {l2_gt} 人 ({l2_gt/total_dual*100:.1f}%)") print(f" L1进度 = L2进度: {equal} 人 ({equal/total_dual*100:.1f}%)") # Both zero both_zero = sum(r[2] for r in rows3 if r[0] == 0 and r[1] == 0) print(f" 两者均为0节: {both_zero} 人 ({both_zero/total_dual*100:.1f}%)") cur.close() conn.close() print("\n查询完成。")