#!/usr/bin/env python3 """双激活用户 L1 vs L2 学习进度交叉分布 —— 按L1区间看L2分布""" import psycopg2 conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) cur = conn.cursor() sql = """ WITH orders AS ( SELECT o.out_trade_no, o.account_id, o.pay_success_date FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id WHERE o.pay_success_date >= '2026-03-01' AND o.order_status != 4 AND o.pay_success_date IS NOT NULL AND a.status = 1 ), l2_accounts AS ( SELECT DISTINCT o.account_id FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2' ), dual_accounts AS ( SELECT account_id FROM l2_accounts INTERSECT SELECT DISTINCT o2.account_id FROM orders o2 JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1' ), dual_tickets AS ( SELECT o.out_trade_no, o.account_id, o.pay_success_date, t.character_id, t.season_package_level FROM orders o JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level IN ('A1', 'A2') AND o.account_id IN (SELECT account_id FROM dual_accounts) ), all_chapter_records AS ( SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1 UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1 ), level_records AS ( SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level FROM all_chapter_records pr JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id WHERE l.course_level IN ('L1', 'L2') ), ticket_progress AS ( SELECT t.account_id, t.character_id, t.season_package_level, COUNT(DISTINCT lr.chapter_id) AS completed_lessons FROM dual_tickets t LEFT JOIN level_records lr ON t.character_id = lr.user_id AND lr.course_level = CASE WHEN t.season_package_level = 'A1' THEN 'L1' WHEN t.season_package_level = 'A2' THEN 'L2' END AND lr.created_at >= t.pay_success_date AND lr.created_at <= t.pay_success_date + INTERVAL '30 days' GROUP BY t.account_id, t.character_id, t.season_package_level ), account_level_progress AS ( SELECT account_id, CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level, MAX(completed_lessons) AS max_completed FROM ticket_progress GROUP BY account_id, season_package_level ) SELECT COALESCE(l1.max_completed, 0) AS l1_lessons, COALESCE(l2.max_completed, 0) AS l2_lessons, COUNT(*) AS account_count FROM (SELECT DISTINCT account_id FROM dual_accounts) da LEFT JOIN account_level_progress l1 ON da.account_id = l1.account_id AND l1.course_level = 'L1' LEFT JOIN account_level_progress l2 ON da.account_id = l2.account_id AND l2.course_level = 'L2' GROUP BY COALESCE(l1.max_completed, 0), COALESCE(l2.max_completed, 0) ORDER BY l1_lessons, l2_lessons; """ print("正在查询...") cur.execute(sql) rows = cur.fetchall() # 定义 L1 分桶 def bucket_l1(v): if v == 0: return (0, "0节") elif v <= 5: return (1, "1-5节") elif v <= 10: return (2, "6-10节") elif v <= 15: return (3, "11-15节") elif v <= 20: return (4, "16-20节") elif v <= 25: return (5, "21-25节") elif v <= 30: return (6, "26-30节") else: return (7, "31+节") # 定义 L2 分桶(更细一些因为L2集中在低段) def bucket_l2(v): if v == 0: return (0, "0节") elif v <= 3: return (1, "1-3节") elif v <= 5: return (2, "4-5节") elif v <= 10: return (3, "6-10节") elif v <= 15: return (4, "11-15节") elif v <= 20: return (5, "16-20节") else: return (6, "21+节") # 构建交叉表 l1_buckets = [(0,"0节"),(1,"1-5节"),(2,"6-10节"),(3,"11-15节"),(4,"16-20节"),(5,"21-25节"),(6,"26-30节"),(7,"31+节")] l2_buckets = [(0,"0节"),(1,"1-3节"),(2,"4-5节"),(3,"6-10节"),(4,"11-15节"),(5,"16-20节"),(6,"21+节")] # 聚合到桶 from collections import defaultdict cross = defaultdict(lambda: defaultdict(int)) l1_row_total = defaultdict(int) l2_col_total = defaultdict(int) for l1_val, l2_val, cnt in rows: l1b = bucket_l1(l1_val)[0] l2b = bucket_l2(l2_val)[0] cross[l1b][l2b] += cnt l1_row_total[l1b] += cnt l2_col_total[l2b] += cnt total = sum(l1_row_total.values()) # 打印 print(f"\n{'='*80}") print(f"双激活用户 L1 学习进度 × L2 学习进度 交叉分布({total} 人)") print(f"{'='*80}\n") # 表头 header = f"{'L1 ↓ / L2 →':>14}" for _, l2l in l2_buckets: header += f" | {l2l:>8}" header += f" | {'L1合计':>8}" print(header) print("-" * len(header)) for l1k, l1l in l1_buckets: line = f"{l1l:>14}" row_total = l1_row_total[l1k] for l2k, _ in l2_buckets: val = cross[l1k][l2k] if row_total > 0: pct = val / row_total * 100 line += f" | {val:>4}({pct:>4.0f}%)" else: line += f" | {val:>4}( 0%)" line += f" | {row_total:>8}" print(line) # 列合计行 print("-" * len(header)) line = f"{'L2合计':>14}" for l2k, _ in l2_buckets: line += f" | {l2_col_total[l2k]:>8}" line += f" | {total:>8}" print(line) # 对每个 L1 区间补充:L2 的均值和中位数 print(f"\n{'='*80}") print("各 L1 区间下的 L2 学习进度摘要") print(f"{'='*80}") print(f"{'L1区间':>12} | {'L2均值':>8} | {'L2中位':>8} | {'L2=0节%':>8} | {'L2≤3节%':>9}") print("-" * 55) import statistics for l1k, l1l in l1_buckets: vals = [] for l1_val, l2_val, cnt in rows: if bucket_l1(l1_val)[0] == l1k: vals.extend([l2_val] * cnt) if vals: avg = sum(vals) / len(vals) med = statistics.median(vals) zero = sum(1 for v in vals if v == 0) / len(vals) * 100 le3 = sum(1 for v in vals if v <= 3) / len(vals) * 100 print(f"{l1l:>12} | {avg:>7.1f} | {med:>7.0f} | {zero:>7.1f}% | {le3:>8.1f}%") cur.close() conn.close() print("\n查询完成。")