ai_member_xiaoxi/scripts/generate_excel.py
2026-05-15 08:00:01 +08:00

386 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
生成课消指标 Excel按周 + 按 L1/L2 拆分
"""
import psycopg2
from collections import defaultdict
from datetime import datetime, timedelta, date
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.chart import LineChart, Reference
from openpyxl.utils import get_column_letter
from openpyxl.chart.label import DataLabelList
from openpyxl.chart.series import DataPoint
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591, user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi"
)
cur = conn.cursor()
# ===== 时间参数 =====
overall_start = date(2025, 9, 1)
overall_end = date(2026, 5, 11)
weeks = []
d = overall_start
while d < overall_end:
ws = d
days_to_sunday = 6 - d.weekday()
we = d + timedelta(days=days_to_sunday)
if we >= overall_end:
we = overall_end - timedelta(days=1)
weeks.append((ws, we))
d = we + timedelta(days=1)
# ===== Step 1: 用户分类 =====
print("Step 1: 分类付费用户...")
cur.execute("""
SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date,
CASE WHEN o.goods_id IN (57, 60, 63) THEN 'L1'
WHEN o.goods_id = 61 THEN 'L1+L2'
WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2'
ELSE '其他' END as level_type
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE a.status = 1 AND a.deleted_at IS NULL AND o.pay_success_date IS NOT NULL
""")
orders = cur.fetchall()
print(f" 订单数: {len(orders)}")
cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3")
refund_trades = set(r[0] for r in cur.fetchall())
user_data = defaultdict(lambda: {'levels': set(), 'orders': []})
for aid, trade_no, order_status, pay_date, lt in orders:
is_refunded = (order_status == 4 and trade_no in refund_trades)
user_data[aid]['levels'].add(lt)
user_data[aid]['orders'].append((pay_date.date(), is_refunded, lt))
def classify_user(levels):
has_l1, has_l2 = 'L1' in levels, 'L2' in levels
return 'L1+L2' if ('L1+L2' in levels or (has_l1 and has_l2)) else ('仅L1' if has_l1 else ('仅L2' if has_l2 else '其他'))
for aid in user_data:
user_data[aid]['category'] = classify_user(user_data[aid]['levels'])
def is_paid_as_of(aid, as_of_date):
return sum(1 for pd, ref, lt in user_data[aid]['orders'] if pd <= as_of_date and not ref) > 0
# ===== Step 2: 课消 =====
print("Step 2: 查询课消...")
consumption_map = {}
for table_idx in range(8):
tbl = f"bi_user_chapter_play_record_{table_idx}"
cur.execute(f"""
SELECT user_id, chapter_id, updated_at
FROM {tbl}
WHERE play_status = 1 AND updated_at >= '2025-09-01' AND updated_at < '2026-05-11'
""")
for user_id, chapter_id, updated_at in cur.fetchall():
key = (user_id, chapter_id)
d = updated_at.date() if hasattr(updated_at, 'date') else datetime.strptime(str(updated_at)[:10], '%Y-%m-%d').date()
if key not in consumption_map or d < consumption_map[key]:
consumption_map[key] = d
print(f" 去重后: {len(consumption_map)}")
# ===== Step 3: 角色映射 =====
print("Step 3: 角色映射...")
all_uids = list(set(k[0] for k in consumption_map))
char2acct = {}
bs = 500
for i in range(0, len(all_uids), bs):
batch = all_uids[i:i+bs]
ph = ','.join(['%s'] * len(batch))
cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch)
for cid, aid in cur.fetchall():
char2acct[cid] = aid
print(f" 映射: {len(char2acct)}")
# ===== Step 4: 按周汇总 =====
print("Step 4: 按周汇总...")
results = []
for ws, we in weeks:
paid_by_cat = defaultdict(set)
for aid in user_data:
if is_paid_as_of(aid, we):
paid_by_cat[user_data[aid]['category']].add(aid)
cons_by_cat = defaultdict(int)
cons_users_by_cat = defaultdict(set)
for (uid, ch_id), cons_date in consumption_map.items():
if ws <= cons_date <= we:
aid = char2acct.get(uid)
if aid:
cat = user_data.get(aid, {}).get('category', '其他')
if aid in paid_by_cat.get(cat, set()):
cons_by_cat[cat] += 1
cons_users_by_cat[cat].add(aid)
row = {'ws': ws, 'we': we}
for cat in ['仅L1', '仅L2', 'L1+L2', '其他', '合计']:
if cat == '合计':
n_paid = sum(len(v) for v in paid_by_cat.values())
n_cons = sum(cons_by_cat.values())
n_cons_users = len(set.union(*cons_users_by_cat.values())) if cons_users_by_cat else 0
else:
n_paid = len(paid_by_cat.get(cat, set()))
n_cons = cons_by_cat.get(cat, 0)
n_cons_users = len(cons_users_by_cat.get(cat, set()))
row[f'{cat}_paid'] = n_paid
row[f'{cat}_cons'] = n_cons
row[f'{cat}_cons_users'] = n_cons_users
row[f'{cat}_avg_all'] = round(n_cons / n_paid, 2) if n_paid > 0 else 0
row[f'{cat}_avg_cons'] = round(n_cons / n_cons_users, 2) if n_cons_users > 0 else 0
results.append(row)
cur.close()
conn.close()
# ===== 生成 Excel =====
print("\n生成 Excel...")
wb = openpyxl.Workbook()
# 样式
header_font = Font(name='微软雅黑', bold=True, size=10, color='FFFFFF')
header_fill = PatternFill(start_color='2F5496', end_color='2F5496', fill_type='solid')
data_font = Font(name='微软雅黑', size=10)
title_font = Font(name='微软雅黑', bold=True, size=14, color='2F5496')
subtitle_font = Font(name='微软雅黑', bold=True, size=11, color='2F5496')
border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin'))
center = Alignment(horizontal='center', vertical='center')
l1_fill = PatternFill(start_color='DAEEF3', end_color='DAEEF3', fill_type='solid')
l2_fill = PatternFill(start_color='FDE9D9', end_color='FDE9D9', fill_type='solid')
l1l2_fill = PatternFill(start_color='E4DFEC', end_color='E4DFEC', fill_type='solid')
total_fill = PatternFill(start_color='D9EAD3', end_color='D9EAD3', fill_type='solid')
def apply_cell(ws, row, col, value, font=data_font, fill=None, border_style=border, align=center):
c = ws.cell(row=row, column=col, value=value)
c.font, c.border, c.alignment = font, border_style, align
if fill: c.fill = fill
return c
def apply_header(ws, row, col, value):
c = ws.cell(row=row, column=col, value=value)
c.font, c.fill, c.border, c.alignment = header_font, header_fill, border, center
return c
# ===== Sheet 1: 概览 =====
ws1 = wb.active
ws1.title = "概览"
ws1.merge_cells('A1:G1')
apply_cell(ws1, 1, 1, "付费用户 L1/L2 课消分析", font=title_font, border_style=Border(), align=Alignment(horizontal='left'))
ws1.merge_cells('A2:G2')
apply_cell(ws1, 2, 1, f"数据区间: 2025-09-01 ~ 2026-05-10 | 更新日期: 2026-05-14", font=Font(name='微软雅黑', size=9, color='666666'), border_style=Border(), align=Alignment(horizontal='left'))
# 口径说明
notes = [
"口径说明:",
"• 课消用户首次完成某一课时play_status=1按(user_id,chapter_id)取最早updated_at",
"• L1商品: goods_id IN (57,60,63) | L2商品: goods_id IN (31,32,33,54) | L1+L2商品: goods_id=61",
"• 付费用户status=1 + deleted_at IS NULL + 有订单 + 未全部退款",
"• 人均课消 = 周内课消次数 / 付费用户数",
"• 有消用户人均 = 周内课消次数 / 至少完成1次课消的付费用户数",
]
for i, note in enumerate(notes):
apply_cell(ws1, 4+i, 1, note, font=Font(name='微软雅黑', size=9), border_style=Border(), align=Alignment(horizontal='left'))
# 汇总表
row = 11
ws1.merge_cells(f'A{row}:K{row}')
apply_cell(ws1, row, 1, "付费用户分类(截至最后一周)", font=subtitle_font, border_style=Border(), align=Alignment(horizontal='left'))
row += 1
headers_summary = ['分类', '付费用户数', '占比']
for j, h in enumerate(headers_summary, 1):
apply_header(ws1, row, j, h)
row += 1
last = results[-1]
cats_data = [('仅L1', last['仅L1_paid']), ('仅L2', last['仅L2_paid']), ('L1+L2', last['L1+L2_paid'])]
total = sum(v for _, v in cats_data)
for cat, v in cats_data:
apply_cell(ws1, row, 1, cat)
apply_cell(ws1, row, 2, v)
apply_cell(ws1, row, 3, f"{v/total*100:.1f}%")
if '仅L1' in cat: fill = l1_fill
elif '仅L2' in cat: fill = l2_fill
else: fill = l1l2_fill
for c in range(1, 4): ws1.cell(row=row, column=c).fill = fill
row += 1
apply_cell(ws1, row, 1, '合计', font=Font(name='微软雅黑', bold=True, size=10))
apply_cell(ws1, row, 2, total, font=Font(name='微软雅黑', bold=True, size=10))
apply_cell(ws1, row, 3, '100%', font=Font(name='微软雅黑', bold=True, size=10))
for c in range(1, 4): ws1.cell(row=row, column=c).fill = total_fill
# 近期趋势摘要
row += 2
ws1.merge_cells(f'A{row}:K{row}')
apply_cell(ws1, row, 1, "近期人均课消趋势", font=subtitle_font, border_style=Border(), align=Alignment(horizontal='left'))
row += 1
trend_headers = ['', '合计人均', '仅L1人均', '仅L2人均', 'L1+L2人均', '合计有消人均', '仅L1有消人均', '仅L2有消人均', 'L1+L2有消人均']
for j, h in enumerate(trend_headers, 1):
apply_header(ws1, row, j, h)
row += 1
for r in results[-8:]: # 最近8周
wl = f"{r['ws'].strftime('%m/%d')}-{r['we'].strftime('%m/%d')}"
apply_cell(ws1, row, 1, wl, font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 2, r['合计_avg_all'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 3, r['仅L1_avg_all'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 4, r['仅L2_avg_all'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 5, r['L1+L2_avg_all'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 6, r['合计_avg_cons'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 7, r['仅L1_avg_cons'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 8, r['仅L2_avg_cons'], font=Font(name='微软雅黑', size=9))
apply_cell(ws1, row, 9, r['L1+L2_avg_cons'], font=Font(name='微软雅黑', size=9))
row += 1
# 列宽
for col in range(1, 10):
ws1.column_dimensions[get_column_letter(col)].width = 14
# ===== Sheet 2: 明细 =====
ws2 = wb.create_sheet("每周明细")
# 标题行
row2 = 1
# 第一部分:付费用户数
group_headers = [
('付费用户数', ['合计', '仅L1', '仅L2', 'L1+L2']),
('课消次数', ['合计', '仅L1', '仅L2', 'L1+L2']),
('有课消用户数', ['合计', '仅L1', '仅L2', 'L1+L2']),
('人均课消(全部付费用户)', ['合计', '仅L1', '仅L2', 'L1+L2']),
('人均课消(有课消用户)', ['合计', '仅L1', '仅L2', 'L1+L2']),
]
apply_header(ws2, row2, 1, '')
apply_header(ws2, row2, 2, '周一起')
apply_header(ws2, row2, 3, '周日')
col = 4
spans = []
for grp_name, cols in group_headers:
start_col = col
for _ in cols:
col += 1
end_col = col - 1
if start_col < end_col:
ws2.merge_cells(start_row=row2, start_column=start_col, end_row=row2, end_column=end_col)
apply_header(ws2, row2, start_col, grp_name)
spans.append((start_col, end_col, grp_name, cols))
for ic, cname in enumerate(cols):
apply_header(ws2, row2+1, start_col+ic, cname)
col_count = col - 1
# 数据
row2 = 3
for r in results:
wl = f"{r['ws'].strftime('%m/%d')}-{r['we'].strftime('%m/%d')}"
apply_cell(ws2, row2, 1, wl)
apply_cell(ws2, row2, 2, r['ws'].strftime('%Y-%m-%d'))
apply_cell(ws2, row2, 3, r['we'].strftime('%Y-%m-%d'))
col = 4
for grp_name, cols in group_headers:
for cname in cols:
key_map = {
'付费用户数': f"{cname}_paid",
'课消次数': f"{cname}_cons",
'有课消用户数': f"{cname}_cons_users",
'人均课消(全部付费用户)': f"{cname}_avg_all",
'人均课消(有课消用户)': f"{cname}_avg_cons",
}
val = r[key_map[grp_name]]
apply_cell(ws2, row2, col, val)
col += 1
row2 += 1
# 列宽
ws2.column_dimensions['A'].width = 14
ws2.column_dimensions['B'].width = 12
ws2.column_dimensions['C'].width = 12
for ci in range(4, col_count + 1):
ws2.column_dimensions[get_column_letter(ci)].width = 10
# 冻结首3列+标题
ws2.freeze_panes = 'D4'
# ===== 图表 =====
chart_sheet = wb.create_sheet("图表")
# Chart 1: 人均课消趋势(按类别)
chart1 = LineChart()
chart1.title = "人均课消数(全部付费用户)"
chart1.style = 10
chart1.y_axis.title = "课消数(节/周)"
chart1.x_axis.title = None
chart1.width = 28
chart1.height = 14
chart1.y_axis.scaling.min = 0
data_row_start = 3
data_row_end = row2 - 1
# Categories (周标签)
cats_ref = Reference(ws2, min_col=1, min_row=data_row_start, max_row=data_row_end)
# 各系列列号(人均课消 - 全部付费用户 section
# 合计: col 16, 仅L1: col 17, 仅L2: col 18, L1+L2: col 19
# 需要先确定列号
header_row = 2
grp_col_map = {}
col = 4
for grp_name, cols in group_headers:
grp_col_map[grp_name] = col
col += len(cols)
# 人均课消(全部): group 4, 从 grp_col_map['人均课消(全部付费用户)']
start_avg = grp_col_map['人均课消(全部付费用户)']
colors = ['333333', '4A90D9', 'E85D47', '7B9E4B']
labels = ['合计', '仅L1', '仅L2', 'L1+L2']
for i in range(4):
ref = Reference(ws2, min_col=start_avg+i, min_row=data_row_start-1, max_row=data_row_end) # -1 for header in row2
chart1.add_data(ref, titles_from_data=True)
chart1.set_categories(cats_ref)
s = chart1.series[i]
s.graphicalProperties.line.solidFill = colors[i]
s.graphicalProperties.line.width = 25000 if i == 0 else 20000
if i > 0:
s.graphicalProperties.line.dashStyle = 'solid'
chart_sheet.add_chart(chart1, "A1")
# Chart 2: 付费用户数增长
chart2 = LineChart()
chart2.title = "付费用户数增长趋势"
chart2.style = 10
chart2.y_axis.title = "用户数"
chart2.width = 28
chart2.height = 14
start_paid = grp_col_map['付费用户数']
for i in range(4):
ref = Reference(ws2, min_col=start_paid+i, min_row=data_row_start-1, max_row=data_row_end)
chart2.add_data(ref, titles_from_data=True)
chart2.set_categories(cats_ref)
s = chart2.series[i]
s.graphicalProperties.line.solidFill = colors[i]
s.graphicalProperties.line.width = 25000 if i == 0 else 20000
chart_sheet.add_chart(chart2, "A18")
# ===== 保存 =====
path = '/root/.openclaw/workspace/output/course_consumption_by_level.xlsx'
wb.save(path)
print(f"\n✅ Excel 已保存: {path}")
print(f" Sheet 1: 概览(口径说明 + 近期趋势)")
print(f" Sheet 2: 每周明细36周完整数据")
print(f" Sheet 3: 图表(人均课消趋势 + 付费用户增长)")