ai_member_xiaoxi/scripts/generate_charts_v3.py
2026-05-15 08:00:01 +08:00

219 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
图表 v2L1付费用户 = 仅L1 + L1+L2L2付费用户 = 仅L2 + L1+L2
"""
import psycopg2
from collections import defaultdict
from datetime import datetime, timedelta, date
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.font_manager as fm
import numpy as np
fm.fontManager.addfont('/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc')
plt.rcParams['font.family'] = fm.FontProperties(fname='/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc').get_name()
plt.rcParams['axes.unicode_minus'] = False
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591, user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi"
)
cur = conn.cursor()
u0_chapters = {55, 56, 57, 58, 59, 343, 344, 345, 346, 348}
overall_start = date(2025, 9, 1)
overall_end = date(2026, 5, 11)
weeks = []
d = overall_start
while d < overall_end:
ws = d
we = d + timedelta(days=6 - d.weekday())
if we >= overall_end: we = overall_end - timedelta(days=1)
weeks.append((ws, we))
d = we + timedelta(days=1)
print("分类付费用户...")
cur.execute("""
SELECT o.account_id, o.trade_no, o.order_status, o.pay_success_date,
CASE WHEN o.goods_id IN (57, 60, 63) THEN 'L1'
WHEN o.goods_id = 61 THEN 'L1+L2'
WHEN o.goods_id IN (31, 32, 33, 54) THEN 'L2'
ELSE '其他' END as level_type
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id
WHERE a.status = 1 AND a.deleted_at IS NULL AND o.pay_success_date IS NOT NULL
""")
orders = cur.fetchall()
cur.execute("SELECT trade_no FROM bi_refund_order WHERE status = 3")
refund_trades = set(r[0] for r in cur.fetchall())
user_levels = defaultdict(set)
user_orders = defaultdict(list)
for aid, trade_no, order_status, pay_date, lt in orders:
is_refunded = (order_status == 4 and trade_no in refund_trades)
user_levels[aid].add(lt)
user_orders[aid].append((pay_date.date(), is_refunded))
def is_paid(aid, as_of):
return sum(1 for pd, ref in user_orders[aid] if pd <= as_of and not ref) > 0
# 分组L1群 = 仅L1 + L1+L2L2群 = 仅L2 + L1+L2
l1_group = set() # 买了L1的所有用户
l2_group = set() # 买了L2的所有用户
for aid, levels in user_levels.items():
has_l1 = 'L1' in levels or 'L1+L2' in levels
has_l2 = 'L2' in levels or 'L1+L2' in levels
if has_l1: l1_group.add(aid)
if has_l2: l2_group.add(aid)
print(f"L1付费群: {len(l1_group)}人, L2付费群: {len(l2_group)}人, 重叠(L1+L2): {len(l1_group & l2_group)}")
print("查询课消...")
cons_map = {}
for ti in range(8):
tbl = f"bi_user_chapter_play_record_{ti}"
cur.execute(f"""SELECT user_id, chapter_id, updated_at FROM {tbl}
WHERE play_status = 1 AND updated_at >= '2025-09-01' AND updated_at < '2026-05-11'""")
for uid, cid, ua in cur.fetchall():
if cid in u0_chapters: continue
key = (uid, cid)
d = ua.date() if hasattr(ua, 'date') else datetime.strptime(str(ua)[:10], '%Y-%m-%d').date()
if key not in cons_map or d < cons_map[key]:
cons_map[key] = d
print("角色映射...")
all_uids = list(set(k[0] for k in cons_map))
char2acct = {}
for i in range(0, len(all_uids), 500):
batch = all_uids[i:i+500]
ph = ','.join(['%s'] * len(batch))
cur.execute(f"SELECT id, account_id FROM bi_vala_app_character WHERE id IN ({ph})", batch)
for cid, aid in cur.fetchall(): char2acct[cid] = aid
print("按周汇总...")
results = []
for ws, we in weeks:
# 截至 we 的付费用户
l1_paid = {aid for aid in l1_group if is_paid(aid, we)}
l2_paid = {aid for aid in l2_group if is_paid(aid, we)}
l1_cons, l1_cons_users = 0, set()
l2_cons, l2_cons_users = 0, set()
for (uid, ch_id), cons_date in cons_map.items():
if ws <= cons_date <= we:
aid = char2acct.get(uid)
if not aid: continue
if aid in l1_paid:
l1_cons += 1
l1_cons_users.add(aid)
if aid in l2_paid:
l2_cons += 1
l2_cons_users.add(aid)
results.append({
'ws': ws, 'we': we,
'L1_paid': len(l1_paid), 'L1_cons': l1_cons, 'L1_cons_users': len(l1_cons_users),
'L1_no_cons': len(l1_paid) - len(l1_cons_users),
'L1_avg_all': round(l1_cons / len(l1_paid), 2) if l1_paid else 0,
'L1_avg_cons': round(l1_cons / len(l1_cons_users), 2) if l1_cons_users else 0,
'L2_paid': len(l2_paid), 'L2_cons': l2_cons, 'L2_cons_users': len(l2_cons_users),
'L2_no_cons': len(l2_paid) - len(l2_cons_users),
'L2_avg_all': round(l2_cons / len(l2_paid), 2) if l2_paid else 0,
'L2_avg_cons': round(l2_cons / len(l2_cons_users), 2) if l2_cons_users else 0,
})
cur.close()
conn.close()
# ===== 生成图表 =====
print("\n生成图表...")
out = '/root/.openclaw/workspace/output'
configs = {
'L1_all': {'prefix': 'L1', 'color': '#4A90D9', 'light': '#A8CFF1', 'label': 'L1'},
'L2_all': {'prefix': 'L2', 'color': '#E85D47', 'light': '#F4A9A0', 'label': 'L2'},
}
for key, cfg in configs.items():
pfx = cfg['prefix']
color = cfg['color']
light = cfg['light']
label = cfg['label']
first = next(i for i, r in enumerate(results) if r[f'{pfx}_paid'] > 0)
data = results[first:]
xs = [r['ws'] + timedelta(days=3) for r in data]
dates = [r['ws'] for r in data]
paid = [r[f'{pfx}_paid'] for r in data]
cons_users = [r[f'{pfx}_cons_users'] for r in data]
no_cons = [r[f'{pfx}_no_cons'] for r in data]
avg_all = [r[f'{pfx}_avg_all'] for r in data]
avg_cons = [r[f'{pfx}_avg_cons'] for r in data]
# 图1: 堆叠柱状
fig, ax = plt.subplots(figsize=(18, 8))
x_idx = np.arange(len(xs))
bar_w = 0.65
ax.bar(x_idx, cons_users, bar_w, color=light, label='有课消用户', zorder=3)
ax.bar(x_idx, no_cons, bar_w, bottom=cons_users, color='#D0D0D0', label='无课消用户', zorder=3)
step = max(1, len(data)//10)
for i in range(0, len(data), step):
ax.annotate(str(paid[i]), (i, paid[i]), textcoords='offset points', xytext=(0, 5),
fontsize=7.5, ha='center', color='#333333', fontweight='bold')
ax.set_xticks(x_idx[::step])
ax.set_xticklabels([dates[i].strftime('%m/%d') for i in range(0, len(data), step)], fontsize=8.5, rotation=45)
ax.set_ylabel('用户数', fontsize=13)
ax.set_title(f'{label}付费用户周课消分布剔除U0序章', fontsize=16, fontweight='bold')
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', alpha=0.3, zorder=0)
ax.set_xlim(-0.5, len(x_idx) - 0.5)
no_rate = no_cons[-1] / paid[-1] * 100 if paid[-1] else 0
ax.text(0.97, 0.95, f'付费{paid[-1]}人 | 无课消率{no_rate:.0f}%', transform=ax.transAxes,
fontsize=11, ha='right', va='top', color='#666666', fontstyle='italic')
plt.tight_layout()
plt.savefig(f'{out}/{key}_users_stack.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.close()
print(f'{key}_users_stack.png')
# 图2: 折线
fig, ax = plt.subplots(figsize=(18, 8))
ax.plot(xs, avg_all, 'o-', color='#999999', linewidth=2.2, markersize=5,
label='人均课消(全部付费用户)', markerfacecolor='white')
ax.plot(xs, avg_cons, 's-', color=color, linewidth=2.8, markersize=5,
label='人均课消(有课消用户)', markerfacecolor='white')
ax.fill_between(xs, avg_all, avg_cons, alpha=0.08, color=color)
for i in range(0, len(data), max(1, len(data)//8)):
ax.annotate(f'{avg_all[i]:.1f}', (xs[i], avg_all[i]), textcoords='offset points',
xytext=(0, -15), fontsize=7.5, color='#999999', ha='center')
ax.annotate(f'{avg_cons[i]:.1f}', (xs[i], avg_cons[i]), textcoords='offset points',
xytext=(0, 7), fontsize=7.5, color=color, ha='center', fontweight='bold')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
ax.xaxis.set_major_locator(mdates.MonthLocator())
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, fontsize=9)
ax.set_ylabel('课消数(节/周)', fontsize=13)
ax.set_title(f'{label}付费用户周人均课消趋势剔除U0序章', fontsize=16, fontweight='bold')
ax.legend(fontsize=12, loc='upper left')
ax.grid(True, alpha=0.3)
ax.set_xlim(date(2025, 8, 30), date(2026, 5, 12))
plt.tight_layout()
plt.savefig(f'{out}/{key}_avg_trend.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.close()
print(f'{key}_avg_trend.png')
print('\n✅ 4张图表已生成')