#!/usr/bin/env python3 import psycopg2 from collections import defaultdict from datetime import datetime, timedelta, date import openpyxl from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.chart import LineChart, BarChart, Reference from openpyxl.utils import get_column_letter conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) cur = conn.cursor() u0_chapters = {55, 56, 57, 58, 59, 343, 344, 345, 346, 348} overall_start = date(2025, 9, 1) overall_end = date(2026, 5, 11) weeks = [] d = overall_start while d < overall_end: ws = d we = d + timedelta(days=6 - d.weekday()) if we >= overall_end: we = overall_end - timedelta(days=1) weeks.append((ws, we)) d = we + timedelta(days=1) print("分类付费用户...") cur.execute(""" SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date, CASE WHEN o.goods_id IN (57, 60, 63) THEN 'L1' WHEN o.goods_id = 61 THEN 'L1+L2' WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2' ELSE '其他' END as level_type FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id WHERE a.status = 1 AND a.deleted_at IS NULL AND o.pay_success_date IS NOT NULL """) orders = cur.fetchall() cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3") refund_trades = set(r[0] for r in cur.fetchall()) user_levels = defaultdict(set) user_orders = defaultdict(list) for aid, trade_no, order_status, pay_date, lt in orders: is_refunded = (order_status == 4 and trade_no in refund_trades) user_levels[aid].add(lt) user_orders[aid].append((pay_date.date(), is_refunded)) def is_paid(aid, as_of): return sum(1 for pd, ref in user_orders[aid] if pd <= as_of and not ref) > 0 l1_pool = {aid for aid, lv in user_levels.items() if 'L1' in lv or 'L1+L2' in lv} l2_pool = {aid for aid, lv in user_levels.items() if 'L2' in lv or 'L1+L2' in lv} all_pool = l1_pool | l2_pool print(f"L1池: {len(l1_pool)}, L2池: {len(l2_pool)}, 合计: {len(all_pool)}") print("查询课消...") cons_map = {} for ti in range(8): tbl = f"bi_user_chapter_play_record_{ti}" cur.execute(f"""SELECT user_id, chapter_id, updated_at FROM {tbl} WHERE play_status = 1 AND updated_at >= '2025-09-01' AND updated_at < '2026-05-11'""") for uid, cid, ua in cur.fetchall(): if cid in u0_chapters: continue key = (uid, cid) d = ua.date() if hasattr(ua, 'date') else datetime.strptime(str(ua)[:10], '%Y-%m-%d').date() if key not in cons_map or d < cons_map[key]: cons_map[key] = d print("角色映射...") all_uids = list(set(k[0] for k in cons_map)) char2acct = {} for i in range(0, len(all_uids), 500): batch = all_uids[i:i+500] ph = ','.join(['%s'] * len(batch)) cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch) for cid, aid in cur.fetchall(): char2acct[cid] = aid print("按周汇总...") results = [] for ws, we in weeks: l1_paid = {aid for aid in l1_pool if is_paid(aid, we)} l2_paid = {aid for aid in l2_pool if is_paid(aid, we)} t_paid = {aid for aid in all_pool if is_paid(aid, we)} l1_cons, l1_cons_users = 0, set() l2_cons, l2_cons_users = 0, set() t_cons, t_cu = 0, set() for (uid, ch_id), cons_date in cons_map.items(): if ws <= cons_date <= we: aid = char2acct.get(uid) if not aid: continue if aid in l1_paid: l1_cons += 1 l1_cons_users.add(aid) if aid in l2_paid: l2_cons += 1 l2_cons_users.add(aid) if aid in t_paid: t_cons += 1 t_cu.add(aid) results.append({ 'ws': ws, 'we': we, 'L1_paid': len(l1_paid), 'L1_cons': l1_cons, 'L1_cons_users': len(l1_cons_users), 'L1_no_cons': len(l1_paid) - len(l1_cons_users), 'L1_avg_all': round(l1_cons / len(l1_paid), 2) if l1_paid else 0, 'L1_avg_cons': round(l1_cons / len(l1_cons_users), 2) if l1_cons_users else 0, 'L2_paid': len(l2_paid), 'L2_cons': l2_cons, 'L2_cons_users': len(l2_cons_users), 'L2_no_cons': len(l2_paid) - len(l2_cons_users), 'L2_avg_all': round(l2_cons / len(l2_paid), 2) if l2_paid else 0, 'L2_avg_cons': round(l2_cons / len(l2_cons_users), 2) if l2_cons_users else 0, 'total_paid': len(t_paid), 'total_cons': t_cons, 'total_cons_users': len(t_cu), 'total_no_cons': len(t_paid) - len(t_cu), 'total_avg_all': round(t_cons / len(t_paid), 2) if t_paid else 0, 'total_avg_cons': round(t_cons / len(t_cu), 2) if t_cu else 0, }) cur.close() conn.close() print("\n生成 Excel...") wb = openpyxl.Workbook() wb.remove(wb.active) hfont = Font(name='微软雅黑', bold=True, size=9, color='FFFFFF') hfill = PatternFill(start_color='2F5496', end_color='2F5496', fill_type='solid') dfont = Font(name='微软雅黑', size=9) tfont = Font(name='微软雅黑', bold=True, size=14, color='2F5496') sfont = Font(name='微软雅黑', bold=True, size=11, color='2F5496') bd = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin')) ctr = Alignment(horizontal='center', vertical='center') def ac(ws, r, c, v, font=dfont, fill=None, align=ctr): cl = ws.cell(row=r, column=c, value=v) cl.font, cl.border, cl.alignment = font, bd, align if fill: cl.fill = fill return cl def ah(ws, r, c, v): cl = ws.cell(row=r, column=c, value=v) cl.font, cl.fill, cl.border, cl.alignment = hfont, hfill, bd, ctr return cl # Sheet 1: 概览 ws1 = wb.create_sheet("概览") ws1.merge_cells('A1:H1') ac(ws1, 1, 1, "付费用户课消分析(剔除U0序章)", font=tfont, fill=None, align=Alignment(horizontal='left')) notes = [ "口径:L1付费用户 = 买过L1商品(含L1+L2)的付费用户 | L2付费用户 = 买过L2商品(含L1+L2)的付费用户", "L1+L2用户同时出现在L1和L2两个视角中 | 合计为去重统计", "课消:用户首次完成某一课时(剔除U0序章,仅U1+)", "付费用户:status=1 + 未删除 + 有未退款订单", ] for i, n in enumerate(notes): ws1.merge_cells(f'A{3+i}:H{3+i}') ac(ws1, 3+i, 1, n, font=Font(name='微软雅黑', size=9, color='666666'), fill=None, align=Alignment(horizontal='left')) row = 9 ws1.merge_cells(f'A{row}:H{row}') ac(ws1, row, 1, "汇总(截至最后一周)", font=sfont, fill=None, align=Alignment(horizontal='left')) row += 1 for j, h in enumerate(['分类', '付费用户', '有课消', '无课消', '无课消率', '人均课消', '有消人均'], 1): ah(ws1, row, j, h) row += 1 last = results[-1] summary = [ ('L1付费群', last['L1_paid'], last['L1_cons_users'], last['L1_no_cons'], last['L1_avg_all'], last['L1_avg_cons'], '#A8CFF1'), ('L2付费群', last['L2_paid'], last['L2_cons_users'], last['L2_no_cons'], last['L2_avg_all'], last['L2_avg_cons'], '#F4A9A0'), ('合计(去重)', last['total_paid'], last['total_cons_users'], last['total_no_cons'], last['total_avg_all'], last['total_avg_cons'], '#C8E6C9'), ] for name, p, cu, nc, aa, ac_, clr in summary: no_rate = f"{nc/p*100:.0f}%" if p else "0%" fl = PatternFill(start_color='00'+clr[1:], end_color='00'+clr[1:], fill_type='solid') for j, v in enumerate([name, p, cu, nc, no_rate, aa, ac_], 1): f = Font(name='微软雅黑', bold=(j==1), size=10) ac(ws1, row, j, v, font=f, fill=fl) row += 1 # Sheet 2: 每周明细 ws2 = wb.create_sheet("每周明细") headers = ['周', '周一起', '周日'] for prefix in ['合计', 'L1付费群', 'L2付费群']: for m in ['付费', '有消', '无消', '课消', '人均', '有消人均']: headers.append(f'{prefix}{m}') for j, h in enumerate(headers, 1): ah(ws2, 1, j, h) for ri, r in enumerate(results): rw = ri + 2 ac(ws2, rw, 1, r['ws'].strftime('%m/%d')) ac(ws2, rw, 2, r['ws'].strftime('%Y-%m-%d')) ac(ws2, rw, 3, r['we'].strftime('%Y-%m-%d')) col = 4 for prefix in ['total', 'L1', 'L2']: for k in ['paid', 'cons_users', 'no_cons', 'cons', 'avg_all', 'avg_cons']: ac(ws2, rw, col, r[f'{prefix}_{k}']) col += 1 for ci in range(1, len(headers)+1): ws2.column_dimensions[get_column_letter(ci)].width = 11 if ci <= 3 else 10 ws2.freeze_panes = 'D2' # Sheet 3: L1图表 ws_l1 = wb.create_sheet("L1图表") lh = ['周', '付费用户', '有课消用户', '无课消用户', '课消总数', '人均课消', '有消人均'] first = next(i for i, r in enumerate(results) if r['L1_paid'] > 0) l1d = results[first:] for j, h in enumerate(lh, 1): ah(ws_l1, 1, j, h) for ri, r in enumerate(l1d): rw = ri + 2 ac(ws_l1, rw, 1, r['ws'].strftime('%m/%d')) for j, k in enumerate(['L1_paid','L1_cons_users','L1_no_cons','L1_cons','L1_avg_all','L1_avg_cons'], 2): ac(ws_l1, rw, j, r[k]) n = len(l1d) cr = Reference(ws_l1, min_col=1, min_row=2, max_row=n+1) ch1 = BarChart(); ch1.type = "col"; ch1.grouping = "stacked" ch1.title = "L1付费用户周课消分布(剔除U0序章)"; ch1.style = 10; ch1.width = 24; ch1.height = 13 r1 = Reference(ws_l1, min_col=3, min_row=1, max_row=n+1); ch1.add_data(r1, titles_from_data=True) r2 = Reference(ws_l1, min_col=4, min_row=1, max_row=n+1); ch1.add_data(r2, titles_from_data=True) ch1.set_categories(cr) ch1.series[0].graphicalProperties.solidFill = 'A8CFF1' ch1.series[1].graphicalProperties.solidFill = 'D9D9D9' ch1.y_axis.title = '用户数'; ch1.legend.position = 'b' ws_l1.add_chart(ch1, "A9") ch2 = LineChart(); ch2.title = "L1付费用户周人均课消趋势(剔除U0序章)"; ch2.style = 10; ch2.width = 24; ch2.height = 13 r3 = Reference(ws_l1, min_col=6, min_row=1, max_row=n+1); ch2.add_data(r3, titles_from_data=True) r4 = Reference(ws_l1, min_col=7, min_row=1, max_row=n+1); ch2.add_data(r4, titles_from_data=True) ch2.set_categories(cr) ch2.series[0].graphicalProperties.line.solidFill = '999999'; ch2.series[0].graphicalProperties.line.width = 20000 ch2.series[1].graphicalProperties.line.solidFill = '4A90D9'; ch2.series[1].graphicalProperties.line.width = 28000 ch2.y_axis.scaling.min = 0; ch2.y_axis.title = '课消数(节/周)'; ch2.legend.position = 'b' ws_l1.add_chart(ch2, "A27") for ci in range(1, 8): ws_l1.column_dimensions[get_column_letter(ci)].width = 12 # Sheet 4: L2图表 ws_l2 = wb.create_sheet("L2图表") first2 = next(i for i, r in enumerate(results) if r['L2_paid'] > 0) l2d = results[first2:] for j, h in enumerate(lh, 1): ah(ws_l2, 1, j, h) for ri, r in enumerate(l2d): rw = ri + 2 ac(ws_l2, rw, 1, r['ws'].strftime('%m/%d')) for j, k in enumerate(['L2_paid','L2_cons_users','L2_no_cons','L2_cons','L2_avg_all','L2_avg_cons'], 2): ac(ws_l2, rw, j, r[k]) n2 = len(l2d) cr2 = Reference(ws_l2, min_col=1, min_row=2, max_row=n2+1) ch3 = BarChart(); ch3.type = "col"; ch3.grouping = "stacked" ch3.title = "L2付费用户周课消分布(剔除U0序章)"; ch3.style = 10; ch3.width = 24; ch3.height = 13 r5 = Reference(ws_l2, min_col=3, min_row=1, max_row=n2+1); ch3.add_data(r5, titles_from_data=True) r6 = Reference(ws_l2, min_col=4, min_row=1, max_row=n2+1); ch3.add_data(r6, titles_from_data=True) ch3.set_categories(cr2) ch3.series[0].graphicalProperties.solidFill = 'F4A9A0' ch3.series[1].graphicalProperties.solidFill = 'D9D9D9' ch3.y_axis.title = '用户数'; ch3.legend.position = 'b' ws_l2.add_chart(ch3, "A9") ch4 = LineChart(); ch4.title = "L2付费用户周人均课消趋势(剔除U0序章)"; ch4.style = 10; ch4.width = 24; ch4.height = 13 r7 = Reference(ws_l2, min_col=6, min_row=1, max_row=n2+1); ch4.add_data(r7, titles_from_data=True) r8 = Reference(ws_l2, min_col=7, min_row=1, max_row=n2+1); ch4.add_data(r8, titles_from_data=True) ch4.set_categories(cr2) ch4.series[0].graphicalProperties.line.solidFill = '999999'; ch4.series[0].graphicalProperties.line.width = 20000 ch4.series[1].graphicalProperties.line.solidFill = 'E85D47'; ch4.series[1].graphicalProperties.line.width = 28000 ch4.y_axis.scaling.min = 0; ch4.y_axis.title = '课消数(节/周)'; ch4.legend.position = 'b' ws_l2.add_chart(ch4, "A27") for ci in range(1, 8): ws_l2.column_dimensions[get_column_letter(ci)].width = 12 path = '/root/.openclaw/workspace/output/course_consumption_by_level_v3.xlsx' wb.save(path) print(f"\n✅ {path}") print(f"L1付费群: {last['L1_paid']}人 | L2付费群: {last['L2_paid']}人 | 合计(去重): {last['total_paid']}人") print(f"L1无消率: {last['L1_no_cons']/last['L1_paid']*100:.0f}% | L2无消率: {last['L2_no_cons']/last['L2_paid']*100:.0f}%")