ai_member_xiaoxi/scripts/daren_dim_charts.py
2026-05-27 08:00:01 +08:00

631 lines
28 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""达播业务三维度深度分析图表:达人 × 月度 × 平台"""
import openpyxl, glob, os, numpy as np
from datetime import datetime, timedelta
from collections import defaultdict
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'Noto Sans CJK SC', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# === Colors ===
C_BLUE = '#2563EB'; C_GOLD = '#F59E0B'; C_RED = '#EF4444'; C_GREEN = '#10B981'
C_PURPLE = '#8B5CF6'; C_TEAL = '#14B8A6'; C_PINK = '#EC4899'; C_ORANGE = '#F97316'
C_DARK = '#1F2937'; C_GRAY = '#6B7280'; C_LIGHT = '#F3F4F6'
PALETTE = [C_BLUE, C_GOLD, C_RED, C_PURPLE, C_GREEN, C_PINK, C_TEAL, C_ORANGE, '#6366F1', '#84CC16']
PALETTE_SOFT = ['#BFDBFE','#FDE68A','#FECACA','#C4B5FD','#A7F3D0','#FBCFE8','#99F6E4','#FDBA74','#C7D2FE','#BEF264']
# === Data loading ===
files = glob.glob('/root/.openclaw/media/inbound/*3403f15a*')
wb = openpyxl.load_workbook(files[0])
ws = wb['Sheet1']
def excel_date_to_str(val):
if val is None: return None
if isinstance(val, datetime): return val.strftime('%Y-%m-%d')
if isinstance(val, str): return val
if isinstance(val, (int, float)):
try: return (datetime(1899,12,30)+timedelta(days=int(val))).strftime('%Y-%m-%d')
except: return str(val)
return str(val)
data = []
for row in ws.iter_rows(min_row=2, max_row=ws.max_row, values_only=True):
name = str(row[0]).strip() if row[0] else ''
if '合计' in name or '总计' in name or row[3] is None: continue
data.append({
'name': name, 'date': excel_date_to_str(row[1]),
'platform': str(row[2]).strip() if row[2] else '',
'orders': row[3], 'gmv': row[4] or 0, 'refund_orders': row[5] or 0,
'refund_amount': row[6] or 0, 'gsv': row[10] or 0,
})
# Name norm
name_map = {
'晚柠也是个妈妈了':'晚柠','晚柠':'晚柠','念妈讲学习规划':'念妈','念妈':'念妈',
'学霸三人行':'学霸三人行','学霸老王':'学霸老王',
'开心妈妈学习宝藏':'开心妈妈','开心爸育儿':'开心爸',
'小花生网':'小花生网','小花生':'小花生网','盈姐':'盈姐',
'百克力':'百克力','亮爸':'亮爸',
'万物分销':'万物分销','万物内购':'万物分销','万物团购':'万物分销',
'小小鹰萱妈':'小小鹰萱妈','老狼聊育儿':'老狼聊育儿',
'海淀妈妈优选':'海淀妈妈优选','神奇瓜妈聊成长':'神奇瓜妈',
'宣儿妈妈':'宣儿妈妈','宣儿麻麻':'宣儿妈妈',
'四个娃的组合生活':'四个娃的组合生活','肆个葫芦娃的妈':'四个娃的组合生活',
'小暖阿姨慢一点':'小暖阿姨',
}
for r in data: r['norm_name'] = name_map.get(r['name'], r['name'])
# Month mapping for raw date strings
MONTH_ORDER = ['2025-09','2025-10','2025-11','2025-12','2026-01','2026-02','2026-03','2026-04','2026-05']
MONTH_LABELS = ['9月','10月','11月','12月','1月','2月','3月','4月','5月']
def parse_month(date_str):
"""Parse date to standardized YYYY-MM"""
if not date_str: return None
d = date_str[:7]
remap = {'2026-09':'2025-09','2026-10':'2025-10','2026-11':'2025-11','2026-12':'2025-12',
'2025-12':'2025-12'}
return remap.get(d, d)
# Platform merge
def merge_platform(p):
if '三开' in p: return '三开'
if '小红书' in p: return '小红书'
if '抖音' in p and '视频号' in p: return '抖音+视频号'
if '视频号' in p: return '视频号'
if '抖音' in p: return '抖音'
if '分销' in p: return '分销'
if '万物' in p: return '万物'
if '公众号' in p: return '公众号'
if '社群' in p: return '社群'
if '微信小店' in p: return '微信小店'
return p
# === Prepare cross-dimensional data ===
# 1. By influencer × month
inf_monthly = defaultdict(lambda: defaultdict(lambda: {'gmv':0,'gsv':0,'orders':0,'ref_ords':0}))
for r in data:
m = parse_month(r['date'])
if m: inf_monthly[r['norm_name']][m]['gmv'] += r['gmv']
if m: inf_monthly[r['norm_name']][m]['gsv'] += r['gsv']
if m: inf_monthly[r['norm_name']][m]['orders'] += r['orders']
if m: inf_monthly[r['norm_name']][m]['ref_ords'] += r['refund_orders']
# 2. By platform × month
plat_monthly = defaultdict(lambda: defaultdict(lambda: {'gmv':0,'gsv':0,'orders':0,'ref_ords':0}))
for r in data:
m = parse_month(r['date'])
p = merge_platform(r['platform'])
if m: plat_monthly[p][m]['gmv'] += r['gmv']
if m: plat_monthly[p][m]['gsv'] += r['gsv']
if m: plat_monthly[p][m]['orders'] += r['orders']
if m: plat_monthly[p][m]['ref_ords'] += r['refund_orders']
# Get top influencers
inf_total = defaultdict(lambda: {'gmv':0})
for r in data: inf_total[r['norm_name']]['gmv'] += r['gmv']
TOP_N = 8
# Filter out empty names
inf_filtered = [(k,v) for k,v in inf_total.items() if k and k.strip()]
top_inf = [x[0] for x in sorted(inf_filtered, key=lambda x:x[1]['gmv'], reverse=True)[:TOP_N]]
# Get top platforms
plat_total = defaultdict(lambda: {'gmv':0})
for r in data: plat_total[merge_platform(r['platform'])]['gmv'] += r['gmv']
top_plat = [x[0] for x in sorted(plat_total.items(), key=lambda x:x[1]['gmv'], reverse=True) if x[1]['gmv']>0 and x[0].strip()]
print(f"Top influencers: {top_inf}")
print(f"Top platforms: {top_plat}")
# ============================================================
# FIGURE A: 达人维度深度分析 (2 rows × 3 cols)
# ============================================================
figA, axesA = plt.subplots(2, 3, figsize=(22, 14))
figA.patch.set_facecolor('#FAFBFC')
# A1: 达人月度GMV热力图
ax = axesA[0, 0]
heat = np.zeros((len(top_inf), len(MONTH_ORDER)))
for i, name in enumerate(top_inf):
for j, m in enumerate(MONTH_ORDER):
heat[i, j] = inf_monthly[name][m]['gmv'] / 10000
masked = np.ma.masked_equal(heat, 0)
im = ax.imshow(masked, cmap='YlOrRd', aspect='auto', vmin=0, vmax=150)
for i in range(len(top_inf)):
for j in range(len(MONTH_ORDER)):
v = heat[i, j]
if v > 0:
col = 'white' if v > 80 else C_DARK
ax.text(j, i, f'{v:.0f}', ha='center', va='center', fontsize=8, fontweight='bold', color=col)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_yticks(range(len(top_inf))); ax.set_yticklabels(top_inf, fontsize=9)
ax.set_title('达人月度 GMV 热力图 (万元)', fontsize=13, fontweight='bold', pad=10)
plt.colorbar(im, ax=ax, shrink=0.8, pad=0.02).set_label('万元', fontsize=9)
# A2: 达人月度退款率走势
ax = axesA[0, 1]
for idx, name in enumerate(top_inf):
rates = []
for m in MONTH_ORDER:
s = inf_monthly[name][m]
rates.append((s['ref_ords']/s['orders']*100) if s['orders']>0 else np.nan)
ax.plot(range(len(MONTH_ORDER)), rates, 'o-', color=PALETTE[idx], linewidth=2,
markersize=7, markerfacecolor='white', markeredgewidth=2, label=name, alpha=0.9)
ax.axhline(y=40, color=C_RED, linestyle='--', alpha=0.3, linewidth=1)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('达人月度退款率走势', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('退款率 (%)', fontsize=10)
ax.legend(loc='upper left', fontsize=7.5, ncol=2, framealpha=0.9)
ax.grid(alpha=0.3)
ax.set_ylim(0, 85)
# A3: 达人综合评分雷达 (GMV, GSV, 订单量, 退款率倒数, 单均GMV, 场次)
ax = axesA[0, 2]
# Normalize metrics for radar
metrics = {}
for name in top_inf[:6]:
s = inf_total[name]
total_s = inf_monthly[name]
all_orders = sum(v['orders'] for v in total_s.values())
all_gmv = sum(v['gmv'] for v in total_s.values())
all_gsv = sum(v['gsv'] for v in total_s.values())
all_ref = sum(v['ref_ords'] for v in total_s.values())
sessions = sum(1 for v in total_s.values() if v['orders']>0)
metrics[name] = {
'GMV': all_gmv,
'GSV': all_gsv,
'订单量': all_orders,
'留存率': (1 - all_ref/all_orders)*100 if all_orders>0 else 100,
'均单额': all_gmv/all_orders if all_orders>0 else 0,
'场次': sessions,
}
# Normalize
maxes = {}
for k in ['GMV','GSV','订单量','留存率','均单额','场次']:
maxes[k] = max(metrics[name][k] for name in metrics)
categories = ['GMV', 'GSV', '订单量', '留存率', '均单额', '场次']
N_cat = len(categories)
angles = np.linspace(0, 2*np.pi, N_cat, endpoint=False).tolist()
angles += angles[:1]
for idx, (name, m) in enumerate(metrics.items()):
values = [m[k]/maxes[k]*100 for k in categories]
values += values[:1]
ax.fill(angles, values, alpha=0.08, color=PALETTE[idx])
ax.plot(angles, values, 'o-', color=PALETTE[idx], linewidth=2, markersize=5, label=name)
ax.set_xticks(angles[:-1]); ax.set_xticklabels(categories, fontsize=9, fontweight='bold')
ax.set_title('达人能力雷达图 (TOP6)', fontsize=13, fontweight='bold', pad=15)
ax.legend(loc='lower right', fontsize=7.5, bbox_to_anchor=(1.3,0))
ax.set_ylim(0, 110)
# A4: 达人GSV贡献占比 (按时间)
ax = axesA[1, 0]
# Stacked area chart of top5 + others
top5_names = top_inf[:5]
monthly_stacked = defaultdict(lambda: defaultdict(float))
for r in data:
m = parse_month(r['date'])
n = r['norm_name'] if r['norm_name'] in top5_names else '其他'
if m: monthly_stacked[m][n] += r['gsv']/10000
areas = {}
for n in top5_names + ['其他']:
areas[n] = [monthly_stacked[m].get(n,0) for m in MONTH_ORDER]
bottom = np.zeros(len(MONTH_ORDER))
for idx, name in enumerate(top5_names + ['其他']):
vals = areas[name]
ax.fill_between(range(len(MONTH_ORDER)), bottom, bottom+np.array(vals),
color=PALETTE[idx], alpha=0.8, label=name, linewidth=0.5, edgecolor='white')
bottom += np.array(vals)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度 GSV 达人贡献占比 (万元)', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('GSV (万元)', fontsize=10)
ax.legend(loc='upper left', fontsize=8, ncol=2, framealpha=0.9)
ax.grid(axis='y', alpha=0.3)
# A5: 达人单均GMV & 退款率散点 (by month bubble)
ax = axesA[1, 1]
for idx, name in enumerate(top_inf[:6]):
x_vals, y_vals, s_vals = [], [], []
for m in MONTH_ORDER:
s = inf_monthly[name][m]
if s['orders'] > 0:
avg = s['gmv']/s['orders']
rr = s['ref_ords']/s['orders']*100
x_vals.append(avg)
y_vals.append(rr)
s_vals.append(s['orders']*12)
if x_vals:
ax.scatter(x_vals, y_vals, s=s_vals, color=PALETTE[idx], alpha=0.7, edgecolors='white', linewidth=1.2, label=name)
for x,y,si in zip(x_vals, y_vals, s_vals):
ax.annotate(name[:3], (x,y), fontsize=6.5, ha='center', va='bottom', color=C_GRAY, alpha=0.7)
ax.axhline(y=40, color=C_RED, linestyle='--', alpha=0.3)
ax.set_xlabel('单均GMV (元)', fontsize=10)
ax.set_ylabel('退款率 (%)', fontsize=10)
ax.set_title('达人效率矩阵单均GMV × 退款率\n(气泡=月订单量, 每点=一个月)', fontsize=12, fontweight='bold', pad=10)
ax.legend(fontsize=7.5, loc='upper right', framealpha=0.9)
ax.grid(alpha=0.3)
# A6: 达人月度活跃度 (何时开播)
ax = axesA[1, 2]
# Binary heatmap - which months each influencer streamed
active = np.zeros((len(top_inf), len(MONTH_ORDER)))
for i, name in enumerate(top_inf):
for j, m in enumerate(MONTH_ORDER):
if inf_monthly[name][m]['orders'] > 0:
active[i, j] = 1
ax.imshow(active, cmap='RdYlGn', aspect='auto', alpha=0.8)
for i in range(len(top_inf)):
for j in range(len(MONTH_ORDER)):
if active[i,j] == 1:
ax.text(j, i, '', ha='center', va='center', fontsize=14, color='#065F46', fontweight='bold')
else:
ax.text(j, i, '', ha='center', va='center', fontsize=10, color='#D1D5DB')
# Color background by month phase
phase_colors = ['#FEF3C7','#FEF3C7','#FEF3C7','#FEF3C7','#DBEAFE','#DBEAFE','#FEE2E2','#FEE2E2','#D1FAE5']
for j in range(len(MONTH_ORDER)):
ax.axvspan(j-0.5, j+0.5, alpha=0.12, color=phase_colors[j], zorder=0)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_yticks(range(len(top_inf))); ax.set_yticklabels(top_inf, fontsize=9)
ax.set_title('达人月度开播活跃度', fontsize=13, fontweight='bold', pad=10)
figA.suptitle('维度一:达人维度深度分析', fontsize=17, fontweight='bold', y=1.01, color=C_DARK)
plt.tight_layout(pad=3.5)
outA = '/root/.openclaw/workspace/output/daren_dim_people.png'
figA.savefig(outA, dpi=180, bbox_inches='tight', facecolor=figA.get_facecolor())
plt.close()
print(f"✅ Figure A: {outA} ({os.path.getsize(outA)/1024:.0f} KB)")
# ============================================================
# FIGURE B: 月度维度深度分析 (2 rows × 3 cols)
# ============================================================
figB, axesB = plt.subplots(2, 3, figsize=(22, 14))
figB.patch.set_facecolor('#FAFBFC')
# Monthly totals computed from data
m_totals = {}
for r in data:
m = parse_month(r['date'])
if not m: continue
if m not in m_totals:
m_totals[m] = {'gmv':0,'gsv':0,'orders':0,'ref_ords':0,'ref_amt':0,'sessions':set()}
m_totals[m]['gmv'] += r['gmv']
m_totals[m]['gsv'] += r['gsv']
m_totals[m]['orders'] += r['orders']
m_totals[m]['ref_ords'] += r['refund_orders']
m_totals[m]['ref_amt'] += r['refund_amount']
m_totals[m]['sessions'].add(r['date'])
# B1: 月度核心指标面板
ax = axesB[0, 0]
x = np.arange(len(MONTH_ORDER))
# GMV bar + GSV bar overlay
gmv_arr = [m_totals[m]['gmv']/10000 if m in m_totals else 0 for m in MONTH_ORDER]
gsv_arr = [m_totals[m]['gsv']/10000 if m in m_totals else 0 for m in MONTH_ORDER]
refund_arr = np.array(gmv_arr) - np.array(gsv_arr)
order_arr = [m_totals[m]['orders'] if m in m_totals else 0 for m in MONTH_ORDER]
bars_b1 = ax.bar(x, gsv_arr, color=C_GREEN, alpha=0.85, label='GSV', zorder=3)
bars_b2 = ax.bar(x, refund_arr, bottom=gsv_arr, color=C_RED, alpha=0.5, label='退款', zorder=3)
for i in range(len(MONTH_ORDER)):
if gmv_arr[i] > 0:
ax.text(i, gmv_arr[i]+1, f'{gmv_arr[i]:.0f}\n{order_arr[i]}', ha='center', fontsize=8, fontweight='bold', color=C_DARK)
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度 GMV 构成 & 订单量', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('万元', fontsize=10)
ax.legend(fontsize=8)
ax.grid(axis='y', alpha=0.3, zorder=0)
# B2: 月度退款率 + GSV率 双轴
ax = axesB[0, 1]
ax2 = ax.twinx()
ref_rates = [(m_totals[m]['ref_ords']/m_totals[m]['orders']*100) if m in m_totals and m_totals[m]['orders']>0 else 0 for m in MONTH_ORDER]
gsv_rates = [(m_totals[m]['gsv']/m_totals[m]['gmv']*100) if m in m_totals and m_totals[m]['gmv']>0 else 0 for m in MONTH_ORDER]
ax.bar(x, ref_rates, 0.5, color=C_RED, alpha=0.25, zorder=2)
ax.plot(x, ref_rates, 'o-', color=C_RED, linewidth=2.5, markersize=9, zorder=4)
ax2.plot(x, gsv_rates, 's--', color=C_GREEN, linewidth=2.5, markersize=9, zorder=4)
for i, (r, g) in enumerate(zip(ref_rates, gsv_rates)):
if r > 0:
ax.annotate(f'{r:.0f}%', (i, r), textcoords="offset points", xytext=(0,12), ha='center', fontsize=9, fontweight='bold', color=C_RED)
if g > 0:
ax2.annotate(f'{g:.0f}%', (i, g), textcoords="offset points", xytext=(0,-16), ha='center', fontsize=9, fontweight='bold', color=C_GREEN)
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度退款率 & GSV率 (净收入率)', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('退款率 (%)', color=C_RED, fontsize=10)
ax2.set_ylabel('GSV率 (%)', color=C_GREEN, fontsize=10)
ax.tick_params(axis='y', colors=C_RED)
ax2.tick_params(axis='y', colors=C_GREEN)
ax.grid(axis='y', alpha=0.3, zorder=0)
ax.set_ylim(0, 80); ax2.set_ylim(0, 100)
# B3: 月度场次 vs 场均GMV vs 场均单量
ax = axesB[0, 2]
sessions_arr = [len(m_totals[m]['sessions']) if m in m_totals else 0 for m in MONTH_ORDER]
avg_gmv_session = [m_totals[m]['gmv']/len(m_totals[m]['sessions'])/10000 if m in m_totals and len(m_totals[m]['sessions'])>0 else 0 for m in MONTH_ORDER]
avg_orders_session = [m_totals[m]['orders']/len(m_totals[m]['sessions']) if m in m_totals and len(m_totals[m]['sessions'])>0 else 0 for m in MONTH_ORDER]
ax3_twin = ax.twinx()
bars_b3 = ax.bar(x, sessions_arr, 0.5, color=C_PURPLE, alpha=0.3, label='直播场次', zorder=2)
ax.plot(x, avg_gmv_session, 'D-', color=C_BLUE, linewidth=2.5, markersize=10, zorder=4, label='场均GMV(万)')
ax3_twin.plot(x, avg_orders_session, '^--', color=C_ORANGE, linewidth=2, markersize=9, zorder=4, label='场均单量')
for i, (s, a, o) in enumerate(zip(sessions_arr, avg_gmv_session, avg_orders_session)):
if s > 0:
ax.text(i-0.25, s+0.3, str(s), fontsize=9, color=C_PURPLE, fontweight='bold')
ax.annotate(f'¥{a:.0f}', (i, a), textcoords="offset points", xytext=(0,12), ha='center', fontsize=8.5, color=C_BLUE, fontweight='bold')
ax3_twin.annotate(f'{o:.0f}', (i, o), textcoords="offset points", xytext=(0,-16), ha='center', fontsize=8.5, color=C_ORANGE, fontweight='bold')
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度场次 & 场均效率', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('场次', color=C_PURPLE, fontsize=10)
ax3_twin.set_ylabel('场均单量', color=C_ORANGE, fontsize=10)
ax.tick_params(axis='y', colors=C_PURPLE)
ax3_twin.tick_params(axis='y', colors=C_ORANGE)
ax.grid(axis='y', alpha=0.3, zorder=0)
# Handle combined legend
l1, l1a = ax.get_legend_handles_labels()
l2, l2a = ax3_twin.get_legend_handles_labels()
ax.legend(l1+l2, l1a+l2a, loc='upper left', fontsize=7.5)
# B4: 月度平台GMV分布 (堆叠柱状)
ax = axesB[1, 0]
plat_order = ['小红书','三开','抖音+视频号','视频号','抖音','分销','微信小店','公众号','万物','社群']
stack_bottom = np.zeros(len(MONTH_ORDER))
for pidx, p in enumerate(plat_order):
vals = [plat_monthly[p][m]['gmv']/10000 for m in MONTH_ORDER]
if sum(vals) > 0:
ax.bar(x, vals, bottom=stack_bottom, color=PALETTE[pidx % len(PALETTE)], alpha=0.85, label=p, zorder=3)
stack_bottom += np.array(vals)
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度平台 GMV 分布', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('GMV (万元)', fontsize=10)
ax.legend(loc='upper left', fontsize=8, ncol=2, framealpha=0.9)
ax.grid(axis='y', alpha=0.3, zorder=0)
# B5: 月度新老达人对比 (首月开播 vs 复播)
ax = axesB[1, 1]
first_appear = {}
for r in data:
n = r['norm_name']; m = parse_month(r['date'])
if m and (n not in first_appear or m < first_appear[n]):
first_appear[n] = m
new_count = defaultdict(int); old_count = defaultdict(int)
new_gmv = defaultdict(float); old_gmv = defaultdict(float)
for r in data:
n = r['norm_name']; m = parse_month(r['date'])
if not m: continue
if first_appear.get(n) == m:
new_count[m] += 1; new_gmv[m] += r['gmv']/10000
else:
old_count[m] += 1; old_gmv[m] += r['gmv']/10000
new_arr = [new_gmv[m] for m in MONTH_ORDER]
old_arr = [old_gmv[m] for m in MONTH_ORDER]
ax.bar(x, old_arr, color=C_BLUE, alpha=0.85, label='复播达人', zorder=3)
ax.bar(x, new_arr, bottom=old_arr, color=C_GOLD, alpha=0.85, label='新达人', zorder=3)
for i in range(len(MONTH_ORDER)):
if new_arr[i]+old_arr[i] > 0:
ax.text(i, new_arr[i]+old_arr[i]+3, f'{new_count[MONTH_ORDER[i]]}\n{old_count[MONTH_ORDER[i]]}',
ha='center', fontsize=7.5, color=C_DARK)
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度新老达人 GMV 贡献\n(按首月开播分类)', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('GMV (万元)', fontsize=10)
ax.legend(fontsize=8)
ax.grid(axis='y', alpha=0.3, zorder=0)
# B6: 月度GMV增长率 & 环比变化
ax = axesB[1, 2] # B6 (last subplot in 2x3 grid)
m_gmv_arr = [m_totals[m]['gmv']/10000 if m in m_totals else 0 for m in MONTH_ORDER]
mom_growth = []
for i, g in enumerate(m_gmv_arr):
if i == 0: mom_growth.append(0)
else: mom_growth.append((g-m_gmv_arr[i-1])/m_gmv_arr[i-1]*100 if m_gmv_arr[i-1]>0 else 0)
growth_colors = [C_GREEN if v>=0 else C_RED for v in mom_growth]
ax.bar(x, mom_growth, color=growth_colors, alpha=0.7, zorder=3)
ax.plot(x, m_gmv_arr, 'D-', color=C_DARK, linewidth=2, markersize=10, markerfacecolor=C_GOLD, zorder=4)
for i, (g, gm) in enumerate(zip(mom_growth, m_gmv_arr)):
if i > 0:
label = f'{g:+.0f}%'
else: label = ''
ax.text(i, mom_growth[i]+(8 if mom_growth[i]>=0 else -12), label, ha='center', fontsize=8, fontweight='bold', color=C_DARK)
ax.text(i, gm+3, f'{gm:.0f}', ha='center', fontsize=8, color=C_BLUE, fontweight='bold')
ax.axhline(y=0, color=C_DARK, linewidth=0.5, alpha=0.5)
ax.set_xticks(x); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('月度 GMV 环比增长率', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('环比增长 (%)', fontsize=10)
ax.grid(axis='y', alpha=0.3, zorder=0)
figB.suptitle('维度二:月度维度深度分析', fontsize=17, fontweight='bold', y=1.01, color=C_DARK)
plt.tight_layout(pad=3.5)
outB = '/root/.openclaw/workspace/output/daren_dim_month.png'
figB.savefig(outB, dpi=180, bbox_inches='tight', facecolor=figB.get_facecolor())
plt.close()
print(f"✅ Figure B: {outB} ({os.path.getsize(outB)/1024:.0f} KB)")
# ============================================================
# FIGURE C: 平台维度深度分析 (2 rows × 3 cols)
# ============================================================
figC, axesC = plt.subplots(2, 3, figsize=(22, 14))
figC.patch.set_facecolor('#FAFBFC')
# Filter platforms with meaningful data
meaningful_plats = [p for p in plat_order if sum(plat_monthly[p][m]['gmv'] for m in MONTH_ORDER) > 50000]
# C1: 平台 GMV 占比饼图
ax = axesC[0, 0]
plat_pie_gmv = [sum(plat_monthly[p][m]['gmv'] for m in MONTH_ORDER)/10000 for p in meaningful_plats]
wedges, texts, autotexts = ax.pie(plat_pie_gmv, labels=meaningful_plats, autopct='%1.1f%%',
colors=PALETTE[:len(meaningful_plats)], startangle=90, pctdistance=0.82,
textprops={'fontsize':8.5})
for at in autotexts: at.set_fontweight('bold'); at.set_fontsize(8)
ax.set_title('平台 GMV 占比', fontsize=13, fontweight='bold', pad=10)
# C2: 平台退款率对比 (柱状+标注)
ax = axesC[1, 0]
plat_ref = []
for p in meaningful_plats:
total_o = sum(plat_monthly[p][m]['orders'] for m in MONTH_ORDER)
total_r = sum(plat_monthly[p][m]['ref_ords'] for m in MONTH_ORDER)
plat_ref.append({'name':p, 'orders':total_o, 'ref_rate':total_r/total_o*100 if total_o>0 else 0,
'gmv':sum(plat_monthly[p][m]['gmv'] for m in MONTH_ORDER)/10000})
plat_ref.sort(key=lambda x:x['ref_rate'])
names_r = [x['name'] for x in plat_ref]
rates_r = [x['ref_rate'] for x in plat_ref]
gmv_r = [x['gmv'] for x in plat_ref]
rate_colors = [C_GREEN if r<25 else C_GOLD if r<40 else C_RED for r in rates_r]
bars_c2 = ax.barh(range(len(names_r)), rates_r, color=rate_colors, height=0.6, zorder=3)
for bar, r, g in zip(bars_c2, rates_r, gmv_r):
ax.text(bar.get_width()+1, bar.get_y()+bar.get_height()/2,
f'{r:.1f}% | GMV ¥{g:.0f}', va='center', fontsize=8.5, color=C_DARK)
ax.axvline(x=40, color=C_RED, linestyle='--', alpha=0.3, linewidth=1)
ax.set_yticks(range(len(names_r))); ax.set_yticklabels(names_r, fontsize=9)
ax.set_title('平台退款率对比\n(按退款率升序)', fontsize=13, fontweight='bold', pad=10)
ax.set_xlabel('退款率 (%)', fontsize=10)
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3, zorder=0)
# C3: 平台达人来源多样性
ax = axesC[0, 1]
# How many unique influencers per platform
plat_inf = defaultdict(set)
for r in data:
p = merge_platform(r['platform'])
plat_inf[p].add(r['norm_name'])
plat_div = [(p, len(inf_set), sum(plat_monthly[p][m]['gmv'] for m in MONTH_ORDER)/10000)
for p, inf_set in plat_inf.items() if p in meaningful_plats]
plat_div.sort(key=lambda x:x[1], reverse=True)
names_d = [x[0] for x in plat_div]
divers = [x[1] for x in plat_div]
gmv_d = [x[2] for x in plat_div]
ax_twin = ax.twinx()
bars_d = ax.bar(range(len(names_d)), divers, color=C_PURPLE, alpha=0.7, label='合作达人数', zorder=3)
ax_twin.plot(range(len(names_d)), gmv_d, 'D-', color=C_BLUE, linewidth=2.5, markersize=10, zorder=4, label='GMV(万)')
for i, (d, g) in enumerate(zip(divers, gmv_d)):
ax.text(i, d+0.3, str(d), ha='center', fontsize=10, fontweight='bold', color=C_PURPLE)
ax_twin.annotate(f'¥{g:.0f}', (i, g), textcoords="offset points", xytext=(0,12), ha='center', fontsize=8.5, color=C_BLUE, fontweight='bold')
ax.set_xticks(range(len(names_d))); ax.set_xticklabels(names_d, fontsize=8.5, rotation=20)
ax.set_title('平台达人多样性\n(达人数 vs GMV)', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('合作达人数', color=C_PURPLE, fontsize=10)
ax_twin.set_ylabel('GMV (万元)', color=C_BLUE, fontsize=10)
ax.tick_params(axis='y', colors=C_PURPLE)
ax_twin.tick_params(axis='y', colors=C_BLUE)
ax.grid(axis='y', alpha=0.3)
l1,l1a = ax.get_legend_handles_labels(); l2,l2a = ax_twin.get_legend_handles_labels()
ax.legend(l1+l2, l1a+l2a, loc='upper right', fontsize=7.5)
# C4: 头部平台月度趋势
ax = axesC[0, 2]
top_4_plats = [x[0] for x in plat_div[:4]]
for idx, p in enumerate(top_4_plats):
vals = [plat_monthly[p][m]['gmv']/10000 for m in MONTH_ORDER]
ax.plot(range(len(MONTH_ORDER)), vals, 'o-', color=PALETTE[idx], linewidth=2.5, markersize=9,
markerfacecolor='white', markeredgewidth=2, label=p)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_title('TOP4 平台月度 GMV 趋势', fontsize=13, fontweight='bold', pad=10)
ax.set_ylabel('GMV (万元)', fontsize=10)
ax.legend(fontsize=9, framealpha=0.9)
ax.grid(alpha=0.3)
# C5: 平台退款率月度热力图
ax = axesC[1, 1]
heat_plat = np.zeros((len(meaningful_plats), len(MONTH_ORDER)))
for i, p in enumerate(meaningful_plats):
for j, m in enumerate(MONTH_ORDER):
s = plat_monthly[p][m]
heat_plat[i, j] = (s['ref_ords']/s['orders']*100) if s['orders']>0 else np.nan
masked_p = np.ma.masked_invalid(heat_plat)
im = ax.imshow(masked_p, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=80)
for i in range(len(meaningful_plats)):
for j in range(len(MONTH_ORDER)):
v = heat_plat[i, j]
if not np.isnan(v):
col = 'white' if v > 45 else C_DARK
ax.text(j, i, f'{v:.0f}%', ha='center', va='center', fontsize=8.5, fontweight='bold', color=col)
ax.set_xticks(range(len(MONTH_ORDER))); ax.set_xticklabels(MONTH_LABELS, fontsize=9)
ax.set_yticks(range(len(meaningful_plats))); ax.set_yticklabels(meaningful_plats, fontsize=9)
ax.set_title('平台退款率月份热力图', fontsize=13, fontweight='bold', pad=10)
plt.colorbar(im, ax=ax, shrink=0.8, pad=0.02).set_label('退款率 (%)', fontsize=9)
# C6: 平台场均效率 & 稳定性
ax = axesC[1, 2]
# Per-platform per-session avg GMV and stddev
plat_session_stats = defaultdict(list)
for r in data:
p = merge_platform(r['platform'])
plat_session_stats[p].append(r['gmv']/10000)
plat_eff = []
for p in meaningful_plats:
vals = plat_session_stats[p]
if vals:
plat_eff.append({'name': p, 'avg': np.mean(vals), 'std': np.std(vals), 'count': len(vals)})
plat_eff.sort(key=lambda x:x['avg'], reverse=True)
eff_names = [x['name'] for x in plat_eff]
eff_avg = [x['avg'] for x in plat_eff]
eff_std = [x['std'] for x in plat_eff]
y_pos = np.arange(len(eff_names))
bars_c6 = ax.barh(y_pos, eff_avg, xerr=eff_std, color=C_BLUE, alpha=0.75, height=0.6,
ecolor=C_DARK, capsize=4, zorder=3)
for bar, avg, std, cnt in zip(bars_c6, eff_avg, eff_std, [x['count'] for x in plat_eff]):
cv = std/avg*100 if avg>0 else 0
ax.text(bar.get_width()+0.3, bar.get_y()+bar.get_height()/2,
f'场均 ¥{avg:.1f}万 ±{std:.1f}万 (CV:{cv:.0f}%)',
va='center', fontsize=8, color=C_DARK)
ax.set_yticks(y_pos); ax.set_yticklabels(eff_names, fontsize=9)
ax.set_title('平台场均 GMV & 稳定性\n(误差线=标准差)', fontsize=13, fontweight='bold', pad=10)
ax.set_xlabel('场均 GMV (万元)', fontsize=10)
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3, zorder=0)
figC.suptitle('维度三:平台/渠道维度深度分析', fontsize=17, fontweight='bold', y=1.01, color=C_DARK)
plt.tight_layout(pad=3.5)
outC = '/root/.openclaw/workspace/output/daren_dim_platform.png'
figC.savefig(outC, dpi=180, bbox_inches='tight', facecolor=figC.get_facecolor())
plt.close()
print(f"✅ Figure C: {outC} ({os.path.getsize(outC)/1024:.0f} KB)")
print("\n🎉 All 3 dimensional analysis charts generated!")