ai_member_xiaoxi/scripts/dual_progress_cross.py
2026-05-28 08:00:01 +08:00

200 lines
6.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""双激活用户 L1 vs L2 学习进度交叉分布 —— 按L1区间看L2分布"""
import psycopg2
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala_bi"
)
cur = conn.cursor()
sql = """
WITH orders AS (
SELECT o.out_trade_no, o.account_id, o.pay_success_date
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE o.pay_success_date >= '2026-03-01'
AND o.order_status != 4
AND o.pay_success_date IS NOT NULL
AND a.status = 1
),
l2_accounts AS (
SELECT DISTINCT o.account_id
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL AND t.season_package_level = 'A2'
),
dual_accounts AS (
SELECT account_id FROM l2_accounts
INTERSECT
SELECT DISTINCT o2.account_id FROM orders o2
JOIN bi_vala_seasonal_ticket t2 ON o2.out_trade_no = t2.out_trade_no
WHERE t2.status = 1 AND t2.deleted_at IS NULL AND t2.season_package_level = 'A1'
),
dual_tickets AS (
SELECT o.out_trade_no, o.account_id, o.pay_success_date,
t.character_id, t.season_package_level
FROM orders o
JOIN bi_vala_seasonal_ticket t ON o.out_trade_no = t.out_trade_no
WHERE t.status = 1 AND t.deleted_at IS NULL
AND t.season_package_level IN ('A1', 'A2')
AND o.account_id IN (SELECT account_id FROM dual_accounts)
),
all_chapter_records AS (
SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_0 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_1 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_2 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_3 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_4 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_5 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_6 WHERE play_status = 1
UNION ALL SELECT user_id, chapter_id, created_at FROM bi_user_chapter_play_record_7 WHERE play_status = 1
),
level_records AS (
SELECT pr.user_id, pr.chapter_id, pr.created_at, l.course_level
FROM all_chapter_records pr
JOIN bi_level_unit_lesson l ON pr.chapter_id = l.id
WHERE l.course_level IN ('L1', 'L2')
),
ticket_progress AS (
SELECT
t.account_id,
t.character_id,
t.season_package_level,
COUNT(DISTINCT lr.chapter_id) AS completed_lessons
FROM dual_tickets t
LEFT JOIN level_records lr
ON t.character_id = lr.user_id
AND lr.course_level = CASE
WHEN t.season_package_level = 'A1' THEN 'L1'
WHEN t.season_package_level = 'A2' THEN 'L2'
END
AND lr.created_at >= t.pay_success_date
AND lr.created_at <= t.pay_success_date + INTERVAL '30 days'
GROUP BY t.account_id, t.character_id, t.season_package_level
),
account_level_progress AS (
SELECT
account_id,
CASE WHEN season_package_level = 'A1' THEN 'L1' ELSE 'L2' END AS course_level,
MAX(completed_lessons) AS max_completed
FROM ticket_progress
GROUP BY account_id, season_package_level
)
SELECT
COALESCE(l1.max_completed, 0) AS l1_lessons,
COALESCE(l2.max_completed, 0) AS l2_lessons,
COUNT(*) AS account_count
FROM (SELECT DISTINCT account_id FROM dual_accounts) da
LEFT JOIN account_level_progress l1 ON da.account_id = l1.account_id AND l1.course_level = 'L1'
LEFT JOIN account_level_progress l2 ON da.account_id = l2.account_id AND l2.course_level = 'L2'
GROUP BY COALESCE(l1.max_completed, 0), COALESCE(l2.max_completed, 0)
ORDER BY l1_lessons, l2_lessons;
"""
print("正在查询...")
cur.execute(sql)
rows = cur.fetchall()
# 定义 L1 分桶
def bucket_l1(v):
if v == 0: return (0, "0节")
elif v <= 5: return (1, "1-5节")
elif v <= 10: return (2, "6-10节")
elif v <= 15: return (3, "11-15节")
elif v <= 20: return (4, "16-20节")
elif v <= 25: return (5, "21-25节")
elif v <= 30: return (6, "26-30节")
else: return (7, "31+节")
# 定义 L2 分桶更细一些因为L2集中在低段
def bucket_l2(v):
if v == 0: return (0, "0节")
elif v <= 3: return (1, "1-3节")
elif v <= 5: return (2, "4-5节")
elif v <= 10: return (3, "6-10节")
elif v <= 15: return (4, "11-15节")
elif v <= 20: return (5, "16-20节")
else: return (6, "21+节")
# 构建交叉表
l1_buckets = [(0,"0节"),(1,"1-5节"),(2,"6-10节"),(3,"11-15节"),(4,"16-20节"),(5,"21-25节"),(6,"26-30节"),(7,"31+节")]
l2_buckets = [(0,"0节"),(1,"1-3节"),(2,"4-5节"),(3,"6-10节"),(4,"11-15节"),(5,"16-20节"),(6,"21+节")]
# 聚合到桶
from collections import defaultdict
cross = defaultdict(lambda: defaultdict(int))
l1_row_total = defaultdict(int)
l2_col_total = defaultdict(int)
for l1_val, l2_val, cnt in rows:
l1b = bucket_l1(l1_val)[0]
l2b = bucket_l2(l2_val)[0]
cross[l1b][l2b] += cnt
l1_row_total[l1b] += cnt
l2_col_total[l2b] += cnt
total = sum(l1_row_total.values())
# 打印
print(f"\n{'='*80}")
print(f"双激活用户 L1 学习进度 × L2 学习进度 交叉分布({total} 人)")
print(f"{'='*80}\n")
# 表头
header = f"{'L1 ↓ / L2 →':>14}"
for _, l2l in l2_buckets:
header += f" | {l2l:>8}"
header += f" | {'L1合计':>8}"
print(header)
print("-" * len(header))
for l1k, l1l in l1_buckets:
line = f"{l1l:>14}"
row_total = l1_row_total[l1k]
for l2k, _ in l2_buckets:
val = cross[l1k][l2k]
if row_total > 0:
pct = val / row_total * 100
line += f" | {val:>4}({pct:>4.0f}%)"
else:
line += f" | {val:>4}( 0%)"
line += f" | {row_total:>8}"
print(line)
# 列合计行
print("-" * len(header))
line = f"{'L2合计':>14}"
for l2k, _ in l2_buckets:
line += f" | {l2_col_total[l2k]:>8}"
line += f" | {total:>8}"
print(line)
# 对每个 L1 区间补充L2 的均值和中位数
print(f"\n{'='*80}")
print("各 L1 区间下的 L2 学习进度摘要")
print(f"{'='*80}")
print(f"{'L1区间':>12} | {'L2均值':>8} | {'L2中位':>8} | {'L2=0节%':>8} | {'L2≤3节%':>9}")
print("-" * 55)
import statistics
for l1k, l1l in l1_buckets:
vals = []
for l1_val, l2_val, cnt in rows:
if bucket_l1(l1_val)[0] == l1k:
vals.extend([l2_val] * cnt)
if vals:
avg = sum(vals) / len(vals)
med = statistics.median(vals)
zero = sum(1 for v in vals if v == 0) / len(vals) * 100
le3 = sum(1 for v in vals if v <= 3) / len(vals) * 100
print(f"{l1l:>12} | {avg:>7.1f} | {med:>7.0f} | {zero:>7.1f}% | {le3:>8.1f}%")
cur.close()
conn.close()
print("\n查询完成。")