ai_member_xiaoxi/scripts/course_excel_v3.py
2026-05-15 08:00:01 +08:00

288 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import psycopg2
from collections import defaultdict
from datetime import datetime, timedelta, date
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.chart import LineChart, BarChart, Reference
from openpyxl.utils import get_column_letter
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591, user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi"
)
cur = conn.cursor()
u0_chapters = {55, 56, 57, 58, 59, 343, 344, 345, 346, 348}
overall_start = date(2025, 9, 1)
overall_end = date(2026, 5, 11)
weeks = []
d = overall_start
while d < overall_end:
ws = d
we = d + timedelta(days=6 - d.weekday())
if we >= overall_end: we = overall_end - timedelta(days=1)
weeks.append((ws, we))
d = we + timedelta(days=1)
print("分类付费用户...")
cur.execute("""
SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date,
CASE WHEN o.goods_id IN (57, 60, 63) THEN 'L1'
WHEN o.goods_id = 61 THEN 'L1+L2'
WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2'
ELSE '其他' END as level_type
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE a.status = 1 AND a.deleted_at IS NULL AND o.pay_success_date IS NOT NULL
""")
orders = cur.fetchall()
cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3")
refund_trades = set(r[0] for r in cur.fetchall())
user_levels = defaultdict(set)
user_orders = defaultdict(list)
for aid, trade_no, order_status, pay_date, lt in orders:
is_refunded = (order_status == 4 and trade_no in refund_trades)
user_levels[aid].add(lt)
user_orders[aid].append((pay_date.date(), is_refunded))
def is_paid(aid, as_of):
return sum(1 for pd, ref in user_orders[aid] if pd <= as_of and not ref) > 0
l1_pool = {aid for aid, lv in user_levels.items() if 'L1' in lv or 'L1+L2' in lv}
l2_pool = {aid for aid, lv in user_levels.items() if 'L2' in lv or 'L1+L2' in lv}
all_pool = l1_pool | l2_pool
print(f"L1池: {len(l1_pool)}, L2池: {len(l2_pool)}, 合计: {len(all_pool)}")
print("查询课消...")
cons_map = {}
for ti in range(8):
tbl = f"bi_user_chapter_play_record_{ti}"
cur.execute(f"""SELECT user_id, chapter_id, updated_at FROM {tbl}
WHERE play_status = 1 AND updated_at >= '2025-09-01' AND updated_at < '2026-05-11'""")
for uid, cid, ua in cur.fetchall():
if cid in u0_chapters: continue
key = (uid, cid)
d = ua.date() if hasattr(ua, 'date') else datetime.strptime(str(ua)[:10], '%Y-%m-%d').date()
if key not in cons_map or d < cons_map[key]:
cons_map[key] = d
print("角色映射...")
all_uids = list(set(k[0] for k in cons_map))
char2acct = {}
for i in range(0, len(all_uids), 500):
batch = all_uids[i:i+500]
ph = ','.join(['%s'] * len(batch))
cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch)
for cid, aid in cur.fetchall(): char2acct[cid] = aid
print("按周汇总...")
results = []
for ws, we in weeks:
l1_paid = {aid for aid in l1_pool if is_paid(aid, we)}
l2_paid = {aid for aid in l2_pool if is_paid(aid, we)}
t_paid = {aid for aid in all_pool if is_paid(aid, we)}
l1_cons, l1_cons_users = 0, set()
l2_cons, l2_cons_users = 0, set()
t_cons, t_cu = 0, set()
for (uid, ch_id), cons_date in cons_map.items():
if ws <= cons_date <= we:
aid = char2acct.get(uid)
if not aid: continue
if aid in l1_paid:
l1_cons += 1
l1_cons_users.add(aid)
if aid in l2_paid:
l2_cons += 1
l2_cons_users.add(aid)
if aid in t_paid:
t_cons += 1
t_cu.add(aid)
results.append({
'ws': ws, 'we': we,
'L1_paid': len(l1_paid), 'L1_cons': l1_cons, 'L1_cons_users': len(l1_cons_users),
'L1_no_cons': len(l1_paid) - len(l1_cons_users),
'L1_avg_all': round(l1_cons / len(l1_paid), 2) if l1_paid else 0,
'L1_avg_cons': round(l1_cons / len(l1_cons_users), 2) if l1_cons_users else 0,
'L2_paid': len(l2_paid), 'L2_cons': l2_cons, 'L2_cons_users': len(l2_cons_users),
'L2_no_cons': len(l2_paid) - len(l2_cons_users),
'L2_avg_all': round(l2_cons / len(l2_paid), 2) if l2_paid else 0,
'L2_avg_cons': round(l2_cons / len(l2_cons_users), 2) if l2_cons_users else 0,
'total_paid': len(t_paid), 'total_cons': t_cons, 'total_cons_users': len(t_cu),
'total_no_cons': len(t_paid) - len(t_cu),
'total_avg_all': round(t_cons / len(t_paid), 2) if t_paid else 0,
'total_avg_cons': round(t_cons / len(t_cu), 2) if t_cu else 0,
})
cur.close()
conn.close()
print("\n生成 Excel...")
wb = openpyxl.Workbook()
wb.remove(wb.active)
hfont = Font(name='微软雅黑', bold=True, size=9, color='FFFFFF')
hfill = PatternFill(start_color='2F5496', end_color='2F5496', fill_type='solid')
dfont = Font(name='微软雅黑', size=9)
tfont = Font(name='微软雅黑', bold=True, size=14, color='2F5496')
sfont = Font(name='微软雅黑', bold=True, size=11, color='2F5496')
bd = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin'))
ctr = Alignment(horizontal='center', vertical='center')
def ac(ws, r, c, v, font=dfont, fill=None, align=ctr):
cl = ws.cell(row=r, column=c, value=v)
cl.font, cl.border, cl.alignment = font, bd, align
if fill: cl.fill = fill
return cl
def ah(ws, r, c, v):
cl = ws.cell(row=r, column=c, value=v)
cl.font, cl.fill, cl.border, cl.alignment = hfont, hfill, bd, ctr
return cl
# Sheet 1: 概览
ws1 = wb.create_sheet("概览")
ws1.merge_cells('A1:H1')
ac(ws1, 1, 1, "付费用户课消分析剔除U0序章", font=tfont, fill=None, align=Alignment(horizontal='left'))
notes = [
"口径L1付费用户 = 买过L1商品(含L1+L2)的付费用户 | L2付费用户 = 买过L2商品(含L1+L2)的付费用户",
"L1+L2用户同时出现在L1和L2两个视角中 | 合计为去重统计",
"课消用户首次完成某一课时剔除U0序章仅U1+",
"付费用户status=1 + 未删除 + 有未退款订单",
]
for i, n in enumerate(notes):
ws1.merge_cells(f'A{3+i}:H{3+i}')
ac(ws1, 3+i, 1, n, font=Font(name='微软雅黑', size=9, color='666666'), fill=None, align=Alignment(horizontal='left'))
row = 9
ws1.merge_cells(f'A{row}:H{row}')
ac(ws1, row, 1, "汇总(截至最后一周)", font=sfont, fill=None, align=Alignment(horizontal='left'))
row += 1
for j, h in enumerate(['分类', '付费用户', '有课消', '无课消', '无课消率', '人均课消', '有消人均'], 1):
ah(ws1, row, j, h)
row += 1
last = results[-1]
summary = [
('L1付费群', last['L1_paid'], last['L1_cons_users'], last['L1_no_cons'], last['L1_avg_all'], last['L1_avg_cons'], '#A8CFF1'),
('L2付费群', last['L2_paid'], last['L2_cons_users'], last['L2_no_cons'], last['L2_avg_all'], last['L2_avg_cons'], '#F4A9A0'),
('合计(去重)', last['total_paid'], last['total_cons_users'], last['total_no_cons'], last['total_avg_all'], last['total_avg_cons'], '#C8E6C9'),
]
for name, p, cu, nc, aa, ac_, clr in summary:
no_rate = f"{nc/p*100:.0f}%" if p else "0%"
fl = PatternFill(start_color='00'+clr[1:], end_color='00'+clr[1:], fill_type='solid')
for j, v in enumerate([name, p, cu, nc, no_rate, aa, ac_], 1):
f = Font(name='微软雅黑', bold=(j==1), size=10)
ac(ws1, row, j, v, font=f, fill=fl)
row += 1
# Sheet 2: 每周明细
ws2 = wb.create_sheet("每周明细")
headers = ['', '周一起', '周日']
for prefix in ['合计', 'L1付费群', 'L2付费群']:
for m in ['付费', '有消', '无消', '课消', '人均', '有消人均']:
headers.append(f'{prefix}{m}')
for j, h in enumerate(headers, 1):
ah(ws2, 1, j, h)
for ri, r in enumerate(results):
rw = ri + 2
ac(ws2, rw, 1, r['ws'].strftime('%m/%d'))
ac(ws2, rw, 2, r['ws'].strftime('%Y-%m-%d'))
ac(ws2, rw, 3, r['we'].strftime('%Y-%m-%d'))
col = 4
for prefix in ['total', 'L1', 'L2']:
for k in ['paid', 'cons_users', 'no_cons', 'cons', 'avg_all', 'avg_cons']:
ac(ws2, rw, col, r[f'{prefix}_{k}'])
col += 1
for ci in range(1, len(headers)+1):
ws2.column_dimensions[get_column_letter(ci)].width = 11 if ci <= 3 else 10
ws2.freeze_panes = 'D2'
# Sheet 3: L1图表
ws_l1 = wb.create_sheet("L1图表")
lh = ['', '付费用户', '有课消用户', '无课消用户', '课消总数', '人均课消', '有消人均']
first = next(i for i, r in enumerate(results) if r['L1_paid'] > 0)
l1d = results[first:]
for j, h in enumerate(lh, 1): ah(ws_l1, 1, j, h)
for ri, r in enumerate(l1d):
rw = ri + 2
ac(ws_l1, rw, 1, r['ws'].strftime('%m/%d'))
for j, k in enumerate(['L1_paid','L1_cons_users','L1_no_cons','L1_cons','L1_avg_all','L1_avg_cons'], 2):
ac(ws_l1, rw, j, r[k])
n = len(l1d)
cr = Reference(ws_l1, min_col=1, min_row=2, max_row=n+1)
ch1 = BarChart(); ch1.type = "col"; ch1.grouping = "stacked"
ch1.title = "L1付费用户周课消分布剔除U0序章"; ch1.style = 10; ch1.width = 24; ch1.height = 13
r1 = Reference(ws_l1, min_col=3, min_row=1, max_row=n+1); ch1.add_data(r1, titles_from_data=True)
r2 = Reference(ws_l1, min_col=4, min_row=1, max_row=n+1); ch1.add_data(r2, titles_from_data=True)
ch1.set_categories(cr)
ch1.series[0].graphicalProperties.solidFill = 'A8CFF1'
ch1.series[1].graphicalProperties.solidFill = 'D9D9D9'
ch1.y_axis.title = '用户数'; ch1.legend.position = 'b'
ws_l1.add_chart(ch1, "A9")
ch2 = LineChart(); ch2.title = "L1付费用户周人均课消趋势剔除U0序章"; ch2.style = 10; ch2.width = 24; ch2.height = 13
r3 = Reference(ws_l1, min_col=6, min_row=1, max_row=n+1); ch2.add_data(r3, titles_from_data=True)
r4 = Reference(ws_l1, min_col=7, min_row=1, max_row=n+1); ch2.add_data(r4, titles_from_data=True)
ch2.set_categories(cr)
ch2.series[0].graphicalProperties.line.solidFill = '999999'; ch2.series[0].graphicalProperties.line.width = 20000
ch2.series[1].graphicalProperties.line.solidFill = '4A90D9'; ch2.series[1].graphicalProperties.line.width = 28000
ch2.y_axis.scaling.min = 0; ch2.y_axis.title = '课消数(节/周)'; ch2.legend.position = 'b'
ws_l1.add_chart(ch2, "A27")
for ci in range(1, 8): ws_l1.column_dimensions[get_column_letter(ci)].width = 12
# Sheet 4: L2图表
ws_l2 = wb.create_sheet("L2图表")
first2 = next(i for i, r in enumerate(results) if r['L2_paid'] > 0)
l2d = results[first2:]
for j, h in enumerate(lh, 1): ah(ws_l2, 1, j, h)
for ri, r in enumerate(l2d):
rw = ri + 2
ac(ws_l2, rw, 1, r['ws'].strftime('%m/%d'))
for j, k in enumerate(['L2_paid','L2_cons_users','L2_no_cons','L2_cons','L2_avg_all','L2_avg_cons'], 2):
ac(ws_l2, rw, j, r[k])
n2 = len(l2d)
cr2 = Reference(ws_l2, min_col=1, min_row=2, max_row=n2+1)
ch3 = BarChart(); ch3.type = "col"; ch3.grouping = "stacked"
ch3.title = "L2付费用户周课消分布剔除U0序章"; ch3.style = 10; ch3.width = 24; ch3.height = 13
r5 = Reference(ws_l2, min_col=3, min_row=1, max_row=n2+1); ch3.add_data(r5, titles_from_data=True)
r6 = Reference(ws_l2, min_col=4, min_row=1, max_row=n2+1); ch3.add_data(r6, titles_from_data=True)
ch3.set_categories(cr2)
ch3.series[0].graphicalProperties.solidFill = 'F4A9A0'
ch3.series[1].graphicalProperties.solidFill = 'D9D9D9'
ch3.y_axis.title = '用户数'; ch3.legend.position = 'b'
ws_l2.add_chart(ch3, "A9")
ch4 = LineChart(); ch4.title = "L2付费用户周人均课消趋势剔除U0序章"; ch4.style = 10; ch4.width = 24; ch4.height = 13
r7 = Reference(ws_l2, min_col=6, min_row=1, max_row=n2+1); ch4.add_data(r7, titles_from_data=True)
r8 = Reference(ws_l2, min_col=7, min_row=1, max_row=n2+1); ch4.add_data(r8, titles_from_data=True)
ch4.set_categories(cr2)
ch4.series[0].graphicalProperties.line.solidFill = '999999'; ch4.series[0].graphicalProperties.line.width = 20000
ch4.series[1].graphicalProperties.line.solidFill = 'E85D47'; ch4.series[1].graphicalProperties.line.width = 28000
ch4.y_axis.scaling.min = 0; ch4.y_axis.title = '课消数(节/周)'; ch4.legend.position = 'b'
ws_l2.add_chart(ch4, "A27")
for ci in range(1, 8): ws_l2.column_dimensions[get_column_letter(ci)].width = 12
path = '/root/.openclaw/workspace/output/course_consumption_by_level_v3.xlsx'
wb.save(path)
print(f"\n{path}")
print(f"L1付费群: {last['L1_paid']}人 | L2付费群: {last['L2_paid']}人 | 合计(去重): {last['total_paid']}")
print(f"L1无消率: {last['L1_no_cons']/last['L1_paid']*100:.0f}% | L2无消率: {last['L2_no_cons']/last['L2_paid']*100:.0f}%")