#!/usr/bin/env python3 """ 课消指标 v2:剔除 U0 序章,4张图按 L1/L2 拆分 """ import psycopg2 from collections import defaultdict from datetime import datetime, timedelta, date import openpyxl from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.chart import LineChart, BarChart, Reference from openpyxl.chart.series import DataPoint from openpyxl.chart.label import DataLabelList from openpyxl.utils import get_column_letter conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) cur = conn.cursor() # ===== U0 chapter_ids to exclude ===== u0_chapters = {55, 56, 57, 58, 59, 343, 344, 345, 346, 348} print(f"剔除 U0 序章: {sorted(u0_chapters)}") # ===== 时间参数 ===== overall_start = date(2025, 9, 1) overall_end = date(2026, 5, 11) weeks = [] d = overall_start while d < overall_end: ws = d days_to_sunday = 6 - d.weekday() we = d + timedelta(days=days_to_sunday) if we >= overall_end: we = overall_end - timedelta(days=1) weeks.append((ws, we)) d = we + timedelta(days=1) # ===== Step 1: 用户分类 ===== print("\nStep 1: 分类付费用户...") cur.execute(""" SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date, CASE WHEN o.goods_id IN (57, 60, 63) THEN 'L1' WHEN o.goods_id = 61 THEN 'L1+L2' WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2' ELSE '其他' END as level_type FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id WHERE a.status = 1 AND a.deleted_at IS NULL AND o.pay_success_date IS NOT NULL """) orders = cur.fetchall() cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3") refund_trades = set(r[0] for r in cur.fetchall()) user_data = defaultdict(lambda: {'levels': set(), 'orders': []}) for aid, trade_no, order_status, pay_date, lt in orders: is_refunded = (order_status == 4 and trade_no in refund_trades) user_data[aid]['levels'].add(lt) user_data[aid]['orders'].append((pay_date.date(), is_refunded, lt)) def classify_user(levels): has_l1, has_l2 = 'L1' in levels, 'L2' in levels return 'L1+L2' if ('L1+L2' in levels or (has_l1 and has_l2)) else ('仅L1' if has_l1 else ('仅L2' if has_l2 else '其他')) for aid in user_data: user_data[aid]['category'] = classify_user(user_data[aid]['levels']) def is_paid_as_of(aid, as_of_date): return sum(1 for pd, ref, lt in user_data[aid]['orders'] if pd <= as_of_date and not ref) > 0 # ===== Step 2: 课消 (剔除 U0) ===== print("\nStep 2: 查询课消(剔除U0)...") consumption_map = {} u0_skipped = 0 for table_idx in range(8): tbl = f"bi_user_chapter_play_record_{table_idx}" cur.execute(f""" SELECT user_id, chapter_id, updated_at FROM {tbl} WHERE play_status = 1 AND updated_at >= '2025-09-01' AND updated_at < '2026-05-11' """) for user_id, chapter_id, updated_at in cur.fetchall(): if chapter_id in u0_chapters: u0_skipped += 1 continue key = (user_id, chapter_id) d = updated_at.date() if hasattr(updated_at, 'date') else datetime.strptime(str(updated_at)[:10], '%Y-%m-%d').date() if key not in consumption_map or d < consumption_map[key]: consumption_map[key] = d print(f" 剔除U0课消: {u0_skipped} 条, 去重后: {len(consumption_map)} 条") # ===== Step 3: 角色映射 ===== print("Step 3: 角色映射...") all_uids = list(set(k[0] for k in consumption_map)) char2acct = {} bs = 500 for i in range(0, len(all_uids), bs): batch = all_uids[i:i+bs] ph = ','.join(['%s'] * len(batch)) cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch) for cid, aid in cur.fetchall(): char2acct[cid] = aid print(f" 映射: {len(char2acct)}") # ===== Step 4: 按周汇总 ===== print("Step 4: 按周汇总...") results = [] for ws, we in weeks: paid_by_cat = defaultdict(set) for aid in user_data: if is_paid_as_of(aid, we): paid_by_cat[user_data[aid]['category']].add(aid) cons_by_cat = defaultdict(int) cons_users_by_cat = defaultdict(set) for (uid, ch_id), cons_date in consumption_map.items(): if ws <= cons_date <= we: aid = char2acct.get(uid) if aid: cat = user_data.get(aid, {}).get('category', '其他') if aid in paid_by_cat.get(cat, set()): cons_by_cat[cat] += 1 cons_users_by_cat[cat].add(aid) row = {'ws': ws, 'we': we} for cat in ['仅L1', '仅L2', 'L1+L2', '其他', '合计']: if cat == '合计': n_paid = sum(len(v) for v in paid_by_cat.values()) n_cons = sum(cons_by_cat.values()) n_cons_users = len(set.union(*cons_users_by_cat.values())) if cons_users_by_cat else 0 else: n_paid = len(paid_by_cat.get(cat, set())) n_cons = cons_by_cat.get(cat, 0) n_cons_users = len(cons_users_by_cat.get(cat, set())) row[f'{cat}_paid'] = n_paid row[f'{cat}_cons'] = n_cons row[f'{cat}_cons_users'] = n_cons_users row[f'{cat}_no_cons'] = n_paid - n_cons_users row[f'{cat}_avg_all'] = round(n_cons / n_paid, 2) if n_paid > 0 else 0 row[f'{cat}_avg_cons'] = round(n_cons / n_cons_users, 2) if n_cons_users > 0 else 0 results.append(row) cur.close() conn.close() # ===== 过滤: 仅保留有足够数据的周(付费人数>0)===== for cat in ['仅L1', '仅L2', 'L1+L2']: # 找到第一个付费>0的周 first_idx = next((i for i, r in enumerate(results) if r[f'{cat}_paid'] > 0), 0) print(f"{cat} 数据起于第 {first_idx+1} 周 ({results[first_idx]['ws']})") # ===== 生成 Excel ===== print("\n生成 Excel...") wb = openpyxl.Workbook() wb.remove(wb.active) # 样式 header_font = Font(name='微软雅黑', bold=True, size=9, color='FFFFFF') header_fill = PatternFill(start_color='2F5496', end_color='2F5496', fill_type='solid') data_font = Font(name='微软雅黑', size=9) title_font = Font(name='微软雅黑', bold=True, size=14, color='2F5496') subtitle_font = Font(name='微软雅黑', bold=True, size=11, color='2F5496') border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin')) center = Alignment(horizontal='center', vertical='center') l1_color = '4A90D9' l2_color = 'E85D47' l1l2_color = '7B9E4B' def apply_cell(ws, row, col, value, font=data_font, fill=None, align=center, border_style=border): c = ws.cell(row=row, column=col, value=value) c.font, c.border, c.alignment = font, border_style, align if fill: c.fill = fill return c def apply_header(ws, row, col, value): c = ws.cell(row=row, column=col, value=value) c.font, c.fill, c.border, c.alignment = header_font, header_fill, border, center return c # ===== Sheet 1: 概览 ===== ws1 = wb.create_sheet("概览") ws1.merge_cells('A1:H1') apply_cell(ws1, 1, 1, "付费用户 L1/L2 课消分析(剔除U0序章)", font=title_font, border_style=None, align=Alignment(horizontal='left')) notes = [ "口径:剔除L1/L2的U0序章课时(L1 U00: 343-348, L2 U00: 55-59),仅统计U1及之后的课消", "课消:用户首次完成某一课时;付费用户:status=1 + 未删除 + 有订单 + 未全部退款", ] for i, n in enumerate(notes): ws1.merge_cells(f'A{3+i}:H{3+i}') apply_cell(ws1, 3+i, 1, n, font=Font(name='微软雅黑', size=9, color='666666'), border_style=None, align=Alignment(horizontal='left')) # ===== Sheet 2: 每周明细 ===== ws2 = wb.create_sheet("每周明细") headers_main = ['周', '周一起', '周日'] + ['合计付费', '合计有消', '合计无消', '合计课消', '合计人均', '合计有消人均', '仅L1付费', '仅L1有消', '仅L1无消', '仅L1课消', '仅L1人均', '仅L1有消人均', '仅L2付费', '仅L2有消', '仅L2无消', '仅L2课消', '仅L2人均', '仅L2有消人均', 'L1+L2付费', 'L1+L2有消', 'L1+L2无消', 'L1+L2课消', 'L1+L2人均', 'L1+L2有消人均'] for j, h in enumerate(headers_main, 1): apply_header(ws2, 1, j, h) for ri, r in enumerate(results): row = ri + 2 wl = f"{r['ws'].strftime('%m/%d')}-{r['we'].strftime('%m/%d')}" apply_cell(ws2, row, 1, wl) apply_cell(ws2, row, 2, r['ws'].strftime('%Y-%m-%d')) apply_cell(ws2, row, 3, r['we'].strftime('%Y-%m-%d')) col = 4 for prefix in ['合计', '仅L1', '仅L2', 'L1+L2']: for metric in ['paid', 'cons_users', 'no_cons', 'cons', 'avg_all', 'avg_cons']: val = r[f'{prefix}_{metric}'] apply_cell(ws2, row, col, val if isinstance(val, str) else val) col += 1 for ci in range(1, len(headers_main)+1): ws2.column_dimensions[get_column_letter(ci)].width = 11 if ci <= 3 else 10 ws2.freeze_panes = 'D2' # ===== Sheet 3: L1 图表 ===== sheet_names = {'仅L1': ('L1图表', 'L1', l1_color, '4A90D9'), '仅L2': ('L2图表', 'L2', l2_color, 'E85D47')} for cat, (sname, label, color, light_color) in sheet_names.items(): ws_chart_data = wb.create_sheet(sname) # 只取该分类有付费用户的周 first_idx = next((i for i, r in enumerate(results) if r[f'{cat}_paid'] > 0), 0) cat_results = results[first_idx:] # Header headers = ['周', '付费用户', '有课消用户', '无课消用户', '课消总数', '人均课消', '有消人均'] for j, h in enumerate(headers, 1): apply_header(ws_chart_data, 1, j, h) for ri, r in enumerate(cat_results): row = ri + 2 wl = f"{r['ws'].strftime('%m/%d')}" apply_cell(ws_chart_data, row, 1, wl) apply_cell(ws_chart_data, row, 2, r[f'{cat}_paid']) apply_cell(ws_chart_data, row, 3, r[f'{cat}_cons_users']) apply_cell(ws_chart_data, row, 4, r[f'{cat}_no_cons']) apply_cell(ws_chart_data, row, 5, r[f'{cat}_cons']) apply_cell(ws_chart_data, row, 6, r[f'{cat}_avg_all']) apply_cell(ws_chart_data, row, 7, r[f'{cat}_avg_cons']) n_rows = len(cat_results) cats_ref = Reference(ws_chart_data, min_col=1, min_row=2, max_row=n_rows+1) # --- Chart 1: 堆叠柱状图 (有课消/无课消) --- chart1 = BarChart() chart1.type = "col" chart1.grouping = "stacked" chart1.title = f"{label} 付费用户课消分布(剔除U0序章)" chart1.style = 10 chart1.width = 24 chart1.height = 13 # 有课消用户 ref1 = Reference(ws_chart_data, min_col=3, min_row=1, max_row=n_rows+1) chart1.add_data(ref1, titles_from_data=True) chart1.set_categories(cats_ref) chart1.series[0].graphicalProperties.solidFill = light_color # 无课消用户 ref2 = Reference(ws_chart_data, min_col=4, min_row=1, max_row=n_rows+1) chart1.add_data(ref2, titles_from_data=True) chart1.series[1].graphicalProperties.solidFill = 'D9D9D9' chart1.y_axis.title = '用户数' chart1.legend.position = 'b' ws_chart_data.add_chart(chart1, "A9") # --- Chart 2: 折线图 (人均课消 + 有消人均) --- chart2 = LineChart() chart2.title = f"{label} 周人均课消趋势(剔除U0序章)" chart2.style = 10 chart2.width = 24 chart2.height = 13 chart2.y_axis.title = '课消数(节/周)' ref3 = Reference(ws_chart_data, min_col=6, min_row=1, max_row=n_rows+1) chart2.add_data(ref3, titles_from_data=True) chart2.set_categories(cats_ref) chart2.series[0].graphicalProperties.line.solidFill = '999999' chart2.series[0].graphicalProperties.line.width = 20000 chart2.series[0].graphicalProperties.line.dashStyle = 'dash' ref4 = Reference(ws_chart_data, min_col=7, min_row=1, max_row=n_rows+1) chart2.add_data(ref4, titles_from_data=True) chart2.series[1].graphicalProperties.line.solidFill = color chart2.series[1].graphicalProperties.line.width = 28000 chart2.y_axis.scaling.min = 0 chart2.legend.position = 'b' ws_chart_data.add_chart(chart2, "A27") # Column widths for ci in range(1, 8): ws_chart_data.column_dimensions[get_column_letter(ci)].width = 12 # ===== Sheet 4: L1+L2 图表(第三个分类)===== ws_l1l2 = wb.create_sheet("L1+L2图表") cat = 'L1+L2' color = l1l2_color light_color = 'A8C88E' first_idx = next((i for i, r in enumerate(results) if r[f'{cat}_paid'] > 0), 0) cat_results = results[first_idx:] headers = ['周', '付费用户', '有课消用户', '无课消用户', '课消总数', '人均课消', '有消人均'] for j, h in enumerate(headers, 1): apply_header(ws_l1l2, 1, j, h) n_rows = len(cat_results) for ri, r in enumerate(cat_results): row = ri + 2 wl = f"{r['ws'].strftime('%m/%d')}" apply_cell(ws_l1l2, row, 1, wl) apply_cell(ws_l1l2, row, 2, r[f'{cat}_paid']) apply_cell(ws_l1l2, row, 3, r[f'{cat}_cons_users']) apply_cell(ws_l1l2, row, 4, r[f'{cat}_no_cons']) apply_cell(ws_l1l2, row, 5, r[f'{cat}_cons']) apply_cell(ws_l1l2, row, 6, r[f'{cat}_avg_all']) apply_cell(ws_l1l2, row, 7, r[f'{cat}_avg_cons']) cats_ref = Reference(ws_l1l2, min_col=1, min_row=2, max_row=n_rows+1) chart1 = BarChart() chart1.type = "col" chart1.grouping = "stacked" chart1.title = f"L1+L2 付费用户课消分布(剔除U0序章)" chart1.style = 10 chart1.width = 24 chart1.height = 13 ref1 = Reference(ws_l1l2, min_col=3, min_row=1, max_row=n_rows+1) chart1.add_data(ref1, titles_from_data=True) chart1.set_categories(cats_ref) chart1.series[0].graphicalProperties.solidFill = light_color ref2 = Reference(ws_l1l2, min_col=4, min_row=1, max_row=n_rows+1) chart1.add_data(ref2, titles_from_data=True) chart1.series[1].graphicalProperties.solidFill = 'D9D9D9' chart1.y_axis.title = '用户数' chart1.legend.position = 'b' ws_l1l2.add_chart(chart1, "A9") chart2 = LineChart() chart2.title = f"L1+L2 周人均课消趋势(剔除U0序章)" chart2.style = 10 chart2.width = 24 chart2.height = 13 chart2.y_axis.title = '课消数(节/周)' ref3 = Reference(ws_l1l2, min_col=6, min_row=1, max_row=n_rows+1) chart2.add_data(ref3, titles_from_data=True) chart2.set_categories(cats_ref) chart2.series[0].graphicalProperties.line.solidFill = '999999' chart2.series[0].graphicalProperties.line.width = 20000 chart2.series[0].graphicalProperties.line.dashStyle = 'dash' ref4 = Reference(ws_l1l2, min_col=7, min_row=1, max_row=n_rows+1) chart2.add_data(ref4, titles_from_data=True) chart2.series[1].graphicalProperties.line.solidFill = color chart2.series[1].graphicalProperties.line.width = 28000 chart2.y_axis.scaling.min = 0 chart2.legend.position = 'b' ws_l1l2.add_chart(chart2, "A27") for ci in range(1, 8): ws_l1l2.column_dimensions[get_column_letter(ci)].width = 12 # 保存 path = '/root/.openclaw/workspace/output/course_consumption_by_level_v2.xlsx' wb.save(path) print(f"\n✅ Excel v2 已保存: {path}") # 简要摘要 last = results[-1] print(f""" === 剔除U0后最终数据(截至5/10) === 仅L1: 付费{last['仅L1_paid']} 有消{last['仅L1_cons_users']} 无消{last['仅L1_no_cons']} 人均{last['仅L1_avg_all']} 有消人均{last['仅L1_avg_cons']} 仅L2: 付费{last['仅L2_paid']} 有消{last['仅L2_cons_users']} 无消{last['仅L2_no_cons']} 人均{last['仅L2_avg_all']} 有消人均{last['仅L2_avg_cons']} L1+L2: 付费{last['L1+L2_paid']} 有消{last['L1+L2_cons_users']} 无消{last['L1+L2_no_cons']} 人均{last['L1+L2_avg_all']} 有消人均{last['L1+L2_avg_cons']} 合计: 付费{last['合计_paid']} 有消{last['合计_cons_users']} 无消{last['合计_no_cons']} 人均{last['合计_avg_all']} 有消人均{last['合计_avg_cons']} """)