ai_member_xiaoxi/scripts/course_consumption_by_level.py
2026-05-15 08:00:01 +08:00

193 lines
7.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
课消指标:按周统计 2025-09-01 ~ 2026-05-10按 L1/L2/L1+L2 拆分
"""
import psycopg2
from collections import defaultdict
from datetime import datetime, timedelta, date
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591, user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi"
)
cur = conn.cursor()
# ===== 时间参数 =====
overall_start = date(2025, 9, 1)
overall_end = date(2026, 5, 11)
# 生成周列表(周一~周日)
weeks = []
d = overall_start
while d < overall_end:
ws = d
days_to_sunday = 6 - d.weekday()
we = d + timedelta(days=days_to_sunday)
if we >= overall_end:
we = overall_end - timedelta(days=1)
weeks.append((ws, we))
d = we + timedelta(days=1)
print(f"统计区间: {overall_start} ~ {overall_end - timedelta(days=1)}, 共 {len(weeks)}")
# ===== Step 1: 用户 L1/L2 分类 + 付费状态 =====
print("\nStep 1: 分类付费用户...")
cur.execute("""
SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date,
CASE
WHEN o.goods_id IN (57, 60, 63) THEN 'L1'
WHEN o.goods_id = 61 THEN 'L1+L2'
WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2'
ELSE '其他'
END as level_type
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE a.status = 1 AND a.deleted_at IS NULL
AND o.pay_success_date IS NOT NULL
""")
orders = cur.fetchall()
print(f" 订单数: {len(orders)}")
cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3")
refund_trades = set(r[0] for r in cur.fetchall())
# {account_id: {'levels': set, 'orders': [(pay_date, is_refunded, level), ...]}}
user_data = defaultdict(lambda: {'levels': set(), 'orders': []})
for aid, trade_no, order_status, pay_date, lt in orders:
is_refunded = (order_status == 4 and trade_no in refund_trades)
user_data[aid]['levels'].add(lt)
user_data[aid]['orders'].append((pay_date.date(), is_refunded, lt))
# 确定每位用户的 L1/L2 分类
def classify_user(levels):
has_l1 = 'L1' in levels
has_l2 = 'L2' in levels
has_l1l2 = 'L1+L2' in levels
if has_l1l2 or (has_l1 and has_l2):
return 'L1+L2'
elif has_l1:
return '仅L1'
elif has_l2:
return '仅L2'
return '其他'
for aid in user_data:
user_data[aid]['category'] = classify_user(user_data[aid]['levels'])
# 统计各类用户数
cats = defaultdict(int)
for aid, d in user_data.items():
cats[d['category']] += 1
print(f" 仅L1: {cats['仅L1']}, 仅L2: {cats['仅L2']}, L1+L2: {cats['L1+L2']}, 其他: {cats['其他']}")
# 判断某用户截至某日是否为付费用户
def is_paid_as_of(aid, as_of_date):
d = user_data[aid]
unpaid = sum(1 for pd, ref, lt in d['orders'] if pd <= as_of_date and not ref)
return unpaid > 0
# ===== Step 2: 课消记录 =====
print("\nStep 2: 查询课消...")
consumption_map = {} # (user_id, chapter_id) -> earliest date
for table_idx in range(8):
tbl = f"bi_user_chapter_play_record_{table_idx}"
cur.execute(f"""
SELECT user_id, chapter_id, updated_at
FROM {tbl}
WHERE play_status = 1
AND updated_at >= '2025-09-01'
AND updated_at < '2026-05-11'
""")
cnt = 0
for user_id, chapter_id, updated_at in cur.fetchall():
key = (user_id, chapter_id)
d = updated_at.date() if hasattr(updated_at, 'date') else datetime.strptime(str(updated_at)[:10], '%Y-%m-%d').date()
if key not in consumption_map or d < consumption_map[key]:
consumption_map[key] = d
cnt += 1
print(f" {tbl}: {cnt}")
print(f" 去重后: {len(consumption_map)}")
# ===== Step 3: character -> account =====
print("\nStep 3: 角色映射...")
all_uids = list(set(k[0] for k in consumption_map))
char2acct = {}
bs = 500
for i in range(0, len(all_uids), bs):
batch = all_uids[i:i+bs]
ph = ','.join(['%s'] * len(batch))
cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch)
for cid, aid in cur.fetchall():
char2acct[cid] = aid
print(f" 映射: {len(char2acct)}")
# ===== Step 4: 按周 + 按分类汇总 =====
print("\nStep 4: 按周汇总...\n")
results = []
for ws, we in weeks:
# 截至 we 的付费用户(按分类)
paid_by_cat = defaultdict(set)
for aid in user_data:
if is_paid_as_of(aid, we):
cat = user_data[aid]['category']
paid_by_cat[cat].add(aid)
# 该周课消(付费用户)
cons_by_cat = defaultdict(int)
cons_users_by_cat = defaultdict(set)
for (uid, ch_id), cons_date in consumption_map.items():
if ws <= cons_date <= we:
aid = char2acct.get(uid)
if aid:
cat = user_data.get(aid, {}).get('category', '其他')
if aid in paid_by_cat.get(cat, set()):
cons_by_cat[cat] += 1
cons_users_by_cat[cat].add(aid)
week_label = f"{ws.strftime('%m/%d')}-{we.strftime('%m/%d')}"
row = {'week': week_label, 'ws': ws, 'we': we}
for cat in ['仅L1', '仅L2', 'L1+L2', '其他', '合计']:
if cat == '合计':
n_paid = sum(len(v) for v in paid_by_cat.values())
n_cons = sum(cons_by_cat.values())
n_cons_users = len(set.union(*cons_users_by_cat.values())) if cons_users_by_cat else 0
else:
n_paid = len(paid_by_cat.get(cat, set()))
n_cons = cons_by_cat.get(cat, 0)
n_cons_users = len(cons_users_by_cat.get(cat, set()))
avg_all = n_cons / n_paid if n_paid > 0 else 0
avg_cons = n_cons / n_cons_users if n_cons_users > 0 else 0
row[f'{cat}_paid'] = n_paid
row[f'{cat}_cons'] = n_cons
row[f'{cat}_users'] = n_cons_users
row[f'{cat}_avg_all'] = avg_all
row[f'{cat}_avg_cons'] = avg_cons
results.append(row)
print(f" {week_label} | 合计:付费{row['合计_paid']} 课消{row['合计_cons']} "
f"人均{row['合计_avg_all']:.2f} | "
f"L1:{row['仅L1_avg_all']:.2f} L2:{row['仅L2_avg_all']:.2f} L1+L2:{row['L1+L2_avg_all']:.2f}")
# ===== 输出完整表 =====
print("\n" + "="*120)
header = f"{'':<12} {'合计付费':>6} {'合计课消':>7} {'合计人均':>7} | {'L1付费':>6} {'L1课消':>6} {'L1人均':>6} {'L1有消人均':>7} | {'L2付费':>6} {'L2课消':>6} {'L2人均':>6} {'L2有消人均':>7} | {'L1L2付费':>7} {'L1L2课消':>7} {'L1L2人均':>7} {'L1L2有消人均':>8}"
print(header)
print("-"*120)
for r in results:
print(f"{r['week']:<12} {r['合计_paid']:>6} {r['合计_cons']:>7} {r['合计_avg_all']:>7.2f} | "
f"{r['仅L1_paid']:>6} {r['仅L1_cons']:>6} {r['仅L1_avg_all']:>6.2f} {r['仅L1_avg_cons']:>7.2f} | "
f"{r['仅L2_paid']:>6} {r['仅L2_cons']:>6} {r['仅L2_avg_all']:>6.2f} {r['仅L2_avg_cons']:>7.2f} | "
f"{r['L1+L2_paid']:>7} {r['L1+L2_cons']:>7} {r['L1+L2_avg_all']:>7.2f} {r['L1+L2_avg_cons']:>8.2f}")
cur.close()
conn.close()
print("\n完成!")