200 lines
6.9 KiB
Python
200 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
||
"""双激活用户 L1 vs L2 学习进度交叉分布 —— 按L1区间看L2分布"""
|
||
import psycopg2
|
||
|
||
conn = psycopg2.connect(
|
||
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
||
port=28591,
|
||
user="ai_member",
|
||
password="LdfjdjL83h3h3^$&**YGG*",
|
||
dbname="vala_bi"
|
||
)
|
||
|
||
cur = conn.cursor()
|
||
|
||
sql = """
|
||
WITH orders AS (
|
||
SELECT o.out_trade_no, o.account_id, o.pay_success_date
|
||
FROM bi_vala_order o
|
||
JOIN bi_vala_app_account a ON o.account_id = a.id
|
||
WHERE o.pay_success_date >= '2026-03-01'
|
||
AND o.order_status != 4
|
||
AND o.pay_success_date IS NOT NULL
|
||
AND a.status = 1
|
||
),
|
||
l2_accounts AS (
|
||
SELECT DISTINCT o.account_id
|
||
FROM orders o
|
||
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
|
||
WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2'
|
||
),
|
||
dual_accounts AS (
|
||
SELECT account_id FROM l2_accounts
|
||
INTERSECT
|
||
SELECT DISTINCT o2.account_id FROM orders o2
|
||
JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no
|
||
WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1'
|
||
),
|
||
dual_tickets AS (
|
||
SELECT o.out_trade_no, o.account_id, o.pay_success_date,
|
||
t.character_id, t.season_package_level
|
||
FROM orders o
|
||
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
|
||
WHERE t.status = 1 AND t.deleted_at IS NULL
|
||
AND t.season_package_level IN ('A1', 'A2')
|
||
AND o.account_id IN (SELECT account_id FROM dual_accounts)
|
||
),
|
||
all_chapter_records AS (
|
||
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
|
||
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
|
||
),
|
||
level_records AS (
|
||
SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level
|
||
FROM all_chapter_records pr
|
||
JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id
|
||
WHERE l.course_level IN ('L1', 'L2')
|
||
),
|
||
ticket_progress AS (
|
||
SELECT
|
||
t.account_id,
|
||
t.character_id,
|
||
t.season_package_level,
|
||
COUNT(DISTINCT lr.chapter_id) AS completed_lessons
|
||
FROM dual_tickets t
|
||
LEFT JOIN level_records lr
|
||
ON t.character_id = lr.user_id
|
||
AND lr.course_level = CASE
|
||
WHEN t.season_package_level = 'A1' THEN 'L1'
|
||
WHEN t.season_package_level = 'A2' THEN 'L2'
|
||
END
|
||
AND lr.created_at >= t.pay_success_date
|
||
AND lr.created_at <= t.pay_success_date + INTERVAL '30 days'
|
||
GROUP BY t.account_id, t.character_id, t.season_package_level
|
||
),
|
||
account_level_progress AS (
|
||
SELECT
|
||
account_id,
|
||
CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level,
|
||
MAX(completed_lessons) AS max_completed
|
||
FROM ticket_progress
|
||
GROUP BY account_id, season_package_level
|
||
)
|
||
SELECT
|
||
COALESCE(l1.max_completed, 0) AS l1_lessons,
|
||
COALESCE(l2.max_completed, 0) AS l2_lessons,
|
||
COUNT(*) AS account_count
|
||
FROM (SELECT DISTINCT account_id FROM dual_accounts) da
|
||
LEFT JOIN account_level_progress l1 ON da.account_id = l1.account_id AND l1.course_level = 'L1'
|
||
LEFT JOIN account_level_progress l2 ON da.account_id = l2.account_id AND l2.course_level = 'L2'
|
||
GROUP BY COALESCE(l1.max_completed, 0), COALESCE(l2.max_completed, 0)
|
||
ORDER BY l1_lessons, l2_lessons;
|
||
"""
|
||
|
||
print("正在查询...")
|
||
cur.execute(sql)
|
||
rows = cur.fetchall()
|
||
|
||
# 定义 L1 分桶
|
||
def bucket_l1(v):
|
||
if v == 0: return (0, "0节")
|
||
elif v <= 5: return (1, "1-5节")
|
||
elif v <= 10: return (2, "6-10节")
|
||
elif v <= 15: return (3, "11-15节")
|
||
elif v <= 20: return (4, "16-20节")
|
||
elif v <= 25: return (5, "21-25节")
|
||
elif v <= 30: return (6, "26-30节")
|
||
else: return (7, "31+节")
|
||
|
||
# 定义 L2 分桶(更细一些因为L2集中在低段)
|
||
def bucket_l2(v):
|
||
if v == 0: return (0, "0节")
|
||
elif v <= 3: return (1, "1-3节")
|
||
elif v <= 5: return (2, "4-5节")
|
||
elif v <= 10: return (3, "6-10节")
|
||
elif v <= 15: return (4, "11-15节")
|
||
elif v <= 20: return (5, "16-20节")
|
||
else: return (6, "21+节")
|
||
|
||
# 构建交叉表
|
||
l1_buckets = [(0,"0节"),(1,"1-5节"),(2,"6-10节"),(3,"11-15节"),(4,"16-20节"),(5,"21-25节"),(6,"26-30节"),(7,"31+节")]
|
||
l2_buckets = [(0,"0节"),(1,"1-3节"),(2,"4-5节"),(3,"6-10节"),(4,"11-15节"),(5,"16-20节"),(6,"21+节")]
|
||
|
||
# 聚合到桶
|
||
from collections import defaultdict
|
||
cross = defaultdict(lambda: defaultdict(int))
|
||
l1_row_total = defaultdict(int)
|
||
l2_col_total = defaultdict(int)
|
||
|
||
for l1_val, l2_val, cnt in rows:
|
||
l1b = bucket_l1(l1_val)[0]
|
||
l2b = bucket_l2(l2_val)[0]
|
||
cross[l1b][l2b] += cnt
|
||
l1_row_total[l1b] += cnt
|
||
l2_col_total[l2b] += cnt
|
||
|
||
total = sum(l1_row_total.values())
|
||
|
||
# 打印
|
||
print(f"\n{'='*80}")
|
||
print(f"双激活用户 L1 学习进度 × L2 学习进度 交叉分布({total} 人)")
|
||
print(f"{'='*80}\n")
|
||
|
||
# 表头
|
||
header = f"{'L1 ↓ / L2 →':>14}"
|
||
for _, l2l in l2_buckets:
|
||
header += f" | {l2l:>8}"
|
||
header += f" | {'L1合计':>8}"
|
||
print(header)
|
||
print("-" * len(header))
|
||
|
||
for l1k, l1l in l1_buckets:
|
||
line = f"{l1l:>14}"
|
||
row_total = l1_row_total[l1k]
|
||
for l2k, _ in l2_buckets:
|
||
val = cross[l1k][l2k]
|
||
if row_total > 0:
|
||
pct = val / row_total * 100
|
||
line += f" | {val:>4}({pct:>4.0f}%)"
|
||
else:
|
||
line += f" | {val:>4}( 0%)"
|
||
line += f" | {row_total:>8}"
|
||
print(line)
|
||
|
||
# 列合计行
|
||
print("-" * len(header))
|
||
line = f"{'L2合计':>14}"
|
||
for l2k, _ in l2_buckets:
|
||
line += f" | {l2_col_total[l2k]:>8}"
|
||
line += f" | {total:>8}"
|
||
print(line)
|
||
|
||
# 对每个 L1 区间补充:L2 的均值和中位数
|
||
print(f"\n{'='*80}")
|
||
print("各 L1 区间下的 L2 学习进度摘要")
|
||
print(f"{'='*80}")
|
||
print(f"{'L1区间':>12} | {'L2均值':>8} | {'L2中位':>8} | {'L2=0节%':>8} | {'L2≤3节%':>9}")
|
||
print("-" * 55)
|
||
|
||
import statistics
|
||
for l1k, l1l in l1_buckets:
|
||
vals = []
|
||
for l1_val, l2_val, cnt in rows:
|
||
if bucket_l1(l1_val)[0] == l1k:
|
||
vals.extend([l2_val] * cnt)
|
||
if vals:
|
||
avg = sum(vals) / len(vals)
|
||
med = statistics.median(vals)
|
||
zero = sum(1 for v in vals if v == 0) / len(vals) * 100
|
||
le3 = sum(1 for v in vals if v <= 3) / len(vals) * 100
|
||
print(f"{l1l:>12} | {avg:>7.1f} | {med:>7.0f} | {zero:>7.1f}% | {le3:>8.1f}%")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
print("\n查询完成。")
|