ai_member_xiaoxi/scripts/revenue_forecast_2025.py

#!/usr/bin/env python3
"""
瓦拉英语 2026年6-12月收入预测
基于：历史趋势 + 教育行业季节性 + 达人复播频次 + 新达人拓展
"""
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
from matplotlib.patches import FancyBboxPatch
import warnings
warnings.filterwarnings('ignore')

# ============================================================
# 1. 历史数据
# ============================================================
months_hist = ['9月', '10月', '11月', '12月', '1月', '2月', '3月', '4月', '5月']
months_hist_num = [9, 10, 11, 12, 13, 14, 15, 16, 17]  # 2025-09=9, ... 2026-05=17

# 全渠道
gmv_hist =      [617592, 812179, 493983, 395373, 397533, 415884, 2879026, 4970605, 1894740]
gsv_hist =      [369716, 506534, 294240, 301432, 311279, 205390, 2136625, 2919674, 1390617]
refund_hist =   [247876, 305645, 199743,  93941,  86254, 210494,  742401, 2050931,  504123]
orders_hist =   [313,    421,    462,    233,    222,    216,    958,    1665,    623]
users_hist =    [308,    412,    400,    229,    216,    212,    906,    1552,    588]

# 达人直播
daren_gmv =     [598899, 765617, 452373, 177911, 217891, 353823, 2521415, 4497095, 1604719]
daren_orders =  [301,    383,    227,    89,    109,    177,    767,    1429,    490]
daren_count =   [7,      10,     15,     4,      6,      3,      10,     25,     17]
daren_new =     [7,      7,      8,      0,      2,      2,      2,      20,     7]
daren_repeat =  [0,      3,      7,      4,      4,      1,      8,      5,      10]
daren_refund =  [39.5,   38.9,   43.6,   25.8,   29.4,   55.9,   27.9,   44.8,   30.6]

# 非达人渠道
non_daren_gmv = [g - d for g, d in zip(gmv_hist, daren_gmv)]
# = [18693, 46562, 41610, 217462, 179642, 62061, 357611, 473510, 290021]

# 新注册用户
new_users_hist = [1529, 2412, 2971, 3523, 1938, 1743, 4166, 5668, 2609]

# ============================================================
# 2. 教育行业季节性因子
# ============================================================
# 基于中国在线教育行业规律：
# - 6月: 期末/中高考，家长关注点在考试，教育消费下降 (0.75)
# - 7月: 暑假开始，需求爆发 (1.4)
# - 8月: 暑假高峰 (1.5)
# - 9月: 开学季，需求稳健 (1.0)
# - 10月: 国庆黄金周 + 中秋，促销期 (1.15)
# - 11月: 双11大促 (1.25)
# - 12月: 年末冲刺 + 寒假预售 (1.05)

seasonal_factor = {
    6: 0.75,   # 考试季
    7: 1.40,   # 暑假启动
    8: 1.50,   # 暑假高峰
    9: 1.00,   # 开学季
    10: 1.15,  # 国庆+双11预热
    11: 1.25,  # 双11
    12: 1.05,  # 年末寒假预售
}

# ============================================================
# 3. 预测模型
# ============================================================
def forecast_revenue():
    """三层预测模型"""
    months_fc = ['6月', '7月', '8月', '9月', '10月', '11月', '12月']
    months_fc_num = [18, 19, 20, 21, 22, 23, 24]  # 2026-06=18

    results = {'conservative': {}, 'base': {}, 'optimistic': {}}

    for scenario, params in [
        ('conservative', {
            'core_daren_base': 650000,    # 核心达人月均贡献(晚柠+念妈)
            'star_daren_base': 250000,     # 学霸系月均贡献
            'other_repeat_base': 80000,    # 其他复发达人月均
            'new_daren_per_month': 4,      # 每月新达人
            'new_daren_first_gmv': 60000,  # 新达人首场平均GMV
            'new_daren_retention': 0.25,   # 新达人次月留存率
            'non_daren_growth': 1.05,      # 非达人渠道月环比
            'non_daren_base': 290000,      # 5月非达人基准
            'refund_rate_target': 0.38,    # 目标退费率
        }),
        ('base', {
            'core_daren_base': 750000,
            'star_daren_base': 350000,
            'other_repeat_base': 150000,
            'new_daren_per_month': 6,
            'new_daren_first_gmv': 80000,
            'new_daren_retention': 0.35,
            'non_daren_growth': 1.10,
            'non_daren_base': 290000,
            'refund_rate_target': 0.35,
        }),
        ('optimistic', {
            'core_daren_base': 900000,
            'star_daren_base': 500000,
            'other_repeat_base': 250000,
            'new_daren_per_month': 8,
            'new_daren_first_gmv': 100000,
            'new_daren_retention': 0.45,
            'non_daren_growth': 1.15,
            'non_daren_base': 290000,
            'refund_rate_target': 0.32,
        }),
    ]:
        # Initialize new daren pipeline
        # Simulate: new darens from past months that may return
        # May had 7 new darens, assume they contribute in future
        new_daren_pool = []  # list of (month_index, gmv_first)

        gmv_pred = []
        gsv_pred = []

        for i, m in enumerate(months_fc_num):
            month_idx = i + 1  # 1-7
            season = seasonal_factor[m % 12 or 12]
            actual_month = m % 12 or 12

            # === 达人直播预测 ===
            # Core darens (晚柠, 念妈) - stable monthly
            core = params['core_daren_base'] * season

            # Star darens (学霸系) - more variable, season-dependent
            star = params['star_daren_base'] * season

            # Other repeat darens - growing slowly
            other_repeat = params['other_repeat_base'] * (1 + 0.05 * month_idx) * season

            # New darens this month
            new_this_month = params['new_daren_per_month'] * season / seasonal_factor.get(actual_month, 1.0)
            # But season mainly affects GMV per daren, not count
            new_this_month = max(2, int(new_this_month * 0.8 + 0.5))
            new_gmv = new_this_month * params['new_daren_first_gmv'] * season

            # Returning new darens from pool
            returning_gmv = 0
            returning_count = 0
            still_active = []
            for nm, gmv_first in new_daren_pool:
                if np.random.random() < params['new_daren_retention']:
                    # Returning daren, GMV typically 50-80% of first
                    retention_factor = 0.6 + 0.2 * np.random.random()
                    returning_gmv += gmv_first * retention_factor * season
                    returning_count += 1
                    still_active.append((nm, gmv_first * retention_factor))

            # Add new darens to pool for future
            new_daren_pool = still_active
            for _ in range(new_this_month):
                new_daren_pool.append((month_idx, params['new_daren_first_gmv']))

            # Total daren GMV
            daren_gmv_pred = core + star + other_repeat + new_gmv + returning_gmv

            # === 非达人渠道预测 ===
            non_daren_pred = params['non_daren_base'] * (params['non_daren_growth'] ** month_idx) * season

            # === 总计 ===
            total_gmv = daren_gmv_pred + non_daren_pred

            # Apply refund rate to get GSV
            # Jun-Sep: higher refund from summer rush;
            # Oct-Dec: lower refund (双11/年末更理性)
            if actual_month in [6, 7, 8]:
                refund_adj = params['refund_rate_target'] + 0.05
            elif actual_month in [11]:
                refund_adj = params['refund_rate_target'] - 0.03  # 双11促销退费略高但可控
            else:
                refund_adj = params['refund_rate_target']

            total_gsv = total_gmv * (1 - refund_adj)

            gmv_pred.append(round(total_gmv))
            gsv_pred.append(round(total_gsv))

        results[scenario] = {
            'gmv': gmv_pred,
            'gsv': gsv_pred,
        }

    return results, months_fc

# ============================================================
# 4. 执行预测 & 生成图表
# ============================================================
np.random.seed(42)
results, months_fc = forecast_revenue()

# 打印预测表
print("=" * 90)
print("📊 瓦拉英语 2026年6-12月收入预测")
print("=" * 90)
print()
print(f"{'月份':<8} {'保守-GMV':>12} {'保守-GSV':>12} {'基准-GMV':>12} {'基准-GSV':>12} {'乐观-GMV':>12} {'乐观-GSV':>12}")
print("-" * 90)
total = {'c_gmv': 0, 'c_gsv': 0, 'b_gmv': 0, 'b_gsv': 0, 'o_gmv': 0, 'o_gsv': 0}
for i, m in enumerate(months_fc):
    c_gmv = results['conservative']['gmv'][i]
    c_gsv = results['conservative']['gsv'][i]
    b_gmv = results['base']['gmv'][i]
    b_gsv = results['base']['gsv'][i]
    o_gmv = results['optimistic']['gmv'][i]
    o_gsv = results['optimistic']['gsv'][i]
    print(f"{m:<8} ¥{c_gmv:>10,} ¥{c_gsv:>10,} ¥{b_gmv:>10,} ¥{b_gsv:>10,} ¥{o_gmv:>10,} ¥{o_gsv:>10,}")
    total['c_gmv'] += c_gmv
    total['c_gsv'] += c_gsv
    total['b_gmv'] += b_gmv
    total['b_gsv'] += b_gsv
    total['o_gmv'] += o_gmv
    total['o_gsv'] += o_gsv
print("-" * 90)
print(f"{'合计':<8} ¥{total['c_gmv']:>10,} ¥{total['c_gsv']:>10,} ¥{total['b_gmv']:>10,} ¥{total['b_gsv']:>10,} ¥{total['o_gmv']:>10,} ¥{total['o_gsv']:>10,}")
print()

# 对比历史
hist_total_gmv = sum(gmv_hist)
hist_total_gsv = sum(gsv_hist)
print(f"历史累计 (2025.09-2026.05):  GMV ¥{hist_total_gmv:,.0f}  |  GSV ¥{hist_total_gsv:,.0f}")
print(f"月均 (9个月):                 GMV ¥{hist_total_gmv/9:,.0f}  |  GSV ¥{hist_total_gsv/9:,.0f}")
print()
print("预测 vs 历史对比:")
for s, label in [('conservative', '保守'), ('base', '基准'), ('optimistic', '乐观')]:
    t_gmv = total[f'{s[0]}_gmv']
    t_gsv = total[f'{s[0]}_gsv']
    print(f"  {label}: 月均 GMV ¥{t_gmv/7:,.0f} | 7个月 GMV ¥{t_gmv:,.0f} ({t_gmv/hist_total_gmv*100:.0f}% of 历史9个月)")

# ============================================================
# 5. 生成可视化图表
# ============================================================
plt.rcParams['font.family'] = ['DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# Color palette
colors = {
    'hist': '#5B9BD5',
    'cons': '#ED7D31',
    'base': '#4472C4',
    'opt': '#70AD47',
    'gsv': '#A5A5A5',
    'bg': '#F8F9FA',
    'grid': '#E0E0E0',
}

fig = plt.figure(figsize=(20, 24))
fig.suptitle('瓦拉英语 2026年6-12月 收入预测报告', fontsize=22, fontweight='bold', y=0.98)

# -------------------------------------------------------
# Chart 1: 历史趋势 + 三种预测场景
# -------------------------------------------------------
ax1 = fig.add_subplot(3, 2, 1)
all_months = months_hist + months_fc
all_x = list(range(1, len(all_months) + 1))

# Historical GMV bars
bars_hist = ax1.bar(range(1, 10), [g/10000 for g in gmv_hist], color=colors['hist'],
                     alpha=0.8, label='历史 GMV', width=0.6)
# Historical GSV line
ax1.plot(range(1, 10), [g/10000 for g in gsv_hist], 'o-', color=colors['gsv'],
         linewidth=2, markersize=6, label='历史 GSV')

# Forecast GMV (base scenario)
fc_x = list(range(10, 17))
ax1.bar(fc_x, [g/10000 for g in results['base']['gmv']], color='#FFC000',
        alpha=0.6, label='预测 GMV(基准)', width=0.6)
ax1.plot(fc_x, [g/10000 for g in results['base']['gsv']], 's--', color='#C55A11',
         linewidth=2, markersize=6, label='预测 GSV(基准)')

# Separator line
ax1.axvline(x=9.5, color='red', linestyle='--', linewidth=1.5, alpha=0.5)
ax1.text(9.5, ax1.get_ylim()[1]*0.95, '← 历史 | 预测 →', ha='center', fontsize=9, color='red')

# Conservative & Optimistic ranges
ax1.fill_between(fc_x,
                 [g/10000 for g in results['conservative']['gmv']],
                 [g/10000 for g in results['optimistic']['gmv']],
                 alpha=0.15, color='green', label='预测区间(保守-乐观)')

ax1.set_xticks(all_x)
ax1.set_xticklabels(all_months, rotation=0)
ax1.set_ylabel('万元', fontsize=11)
ax1.set_title('月度 GMV/GSV 趋势与预测', fontsize=14, fontweight='bold')
ax1.legend(loc='upper left', fontsize=8)
ax1.grid(axis='y', alpha=0.3)
ax1.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}万'))

# Value labels on bars
for bar, val in zip(bars_hist, gmv_hist):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
             f'¥{val/10000:.0f}万', ha='center', fontsize=7, fontweight='bold')

# -------------------------------------------------------
# Chart 2: 三场景月度对比
# -------------------------------------------------------
ax2 = fig.add_subplot(3, 2, 2)
x = np.arange(len(months_fc))
width = 0.25

bars1 = ax2.bar(x - width, [g/10000 for g in results['conservative']['gmv']],
                width, color=colors['cons'], alpha=0.85, label='保守 GMV')
bars2 = ax2.bar(x, [g/10000 for g in results['base']['gmv']],
                width, color=colors['base'], alpha=0.85, label='基准 GMV')
bars3 = ax2.bar(x + width, [g/10000 for g in results['optimistic']['gmv']],
                width, color=colors['opt'], alpha=0.85, label='乐观 GMV')

# GSV lines
ax2.plot(x - width, [g/10000 for g in results['conservative']['gsv']],
         'v-', color='#C55A11', markersize=6, linewidth=1.5, label='保守 GSV')
ax2.plot(x, [g/10000 for g in results['base']['gsv']],
         's-', color='#2F5496', markersize=6, linewidth=1.5, label='基准 GSV')
ax2.plot(x + width, [g/10000 for g in results['optimistic']['gsv']],
         '^-', color='#375623', markersize=6, linewidth=1.5, label='乐观 GSV')

ax2.set_xticks(x)
ax2.set_xticklabels(months_fc)
ax2.set_ylabel('万元', fontsize=11)
ax2.set_title('6-12月 三场景月度对比', fontsize=14, fontweight='bold')
ax2.legend(loc='upper left', fontsize=7, ncol=2)
ax2.grid(axis='y', alpha=0.3)
ax2.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}万'))

# -------------------------------------------------------
# Chart 3: 渠道贡献拆解（基准场景）
# -------------------------------------------------------
ax3 = fig.add_subplot(3, 2, 3)

# Calculate channel breakdown for base scenario
channels_data = {
    '核心达人\n(晚柠+念妈)': [],
    '学霸系达人': [],
    '其他复发达人': [],
    '新拓达人': [],
    '非达人渠道': [],
}

season = [seasonal_factor.get(m % 12 or 12, 1.0) for m in [18, 19, 20, 21, 22, 23, 24]]
base_params = {
    'core_daren_base': 750000,
    'star_daren_base': 350000,
    'other_repeat_base': 150000,
    'new_daren_per_month': 6,
    'new_daren_first_gmv': 80000,
    'non_daren_growth': 1.10,
    'non_daren_base': 290000,
}

for i, s in enumerate(season):
    core = base_params['core_daren_base'] * s / 10000
    star = base_params['star_daren_base'] * s / 10000
    other = base_params['other_repeat_base'] * (1 + 0.05 * (i+1)) * s / 10000
    new_d = base_params['new_daren_per_month'] * base_params['new_daren_first_gmv'] * s / 10000
    non_d = base_params['non_daren_base'] * (base_params['non_daren_growth'] ** (i+1)) * s / 10000

    channels_data['核心达人\n(晚柠+念妈)'].append(core)
    channels_data['学霸系达人'].append(star)
    channels_data['其他复发达人'].append(other)
    channels_data['新拓达人'].append(new_d)
    channels_data['非达人渠道'].append(non_d)

x_fc = np.arange(len(months_fc))
bottom = np.zeros(len(months_fc))
channel_colors = ['#1F4E79', '#2E75B6', '#9DC3E6', '#BDD7EE', '#F4B183']

for i, (name, vals) in enumerate(channels_data.items()):
    ax3.bar(x_fc, vals, 0.6, bottom=bottom, label=name, color=channel_colors[i], alpha=0.85)
    bottom += np.array(vals)

# Add total labels
for i, (m, total_val) in enumerate(zip(months_fc, bottom)):
    ax3.text(i, total_val + 2, f'¥{total_val:.0f}万', ha='center', fontsize=8, fontweight='bold')

ax3.set_xticks(x_fc)
ax3.set_xticklabels(months_fc)
ax3.set_ylabel('万元', fontsize=11)
ax3.set_title('基准场景：GMV 渠道贡献拆解', fontsize=14, fontweight='bold')
ax3.legend(loc='upper left', fontsize=8)
ax3.grid(axis='y', alpha=0.3)

# -------------------------------------------------------
# Chart 4: 季节性因子 & 达人运营指标
# -------------------------------------------------------
ax4 = fig.add_subplot(3, 2, 4)

# Top: Seasonal factors
months_all_label = ['6月', '7月', '8月', '9月', '10月', '11月', '12月']
sf_values = [seasonal_factor[m % 12 or 12] for m in [18, 19, 20, 21, 22, 23, 24]]
# Normalize to percentage
sf_pct = [(v - 1) * 100 for v in sf_values]
bar_colors_sf = ['#D64545' if v < 0 else '#4472C4' if v <= 0.15 else '#70AD47' for v in sf_pct]

ax4_2 = ax4.twinx()

bars_sf = ax4.bar(months_all_label, sf_values, color=bar_colors_sf, alpha=0.3, width=0.6, label='季节性因子')
ax4.axhline(y=1.0, color='gray', linestyle='--', linewidth=1, alpha=0.5)

# Bottom: Daren metrics
daren_metrics_label = ['达人\n总开播数', '复发达人\n占比', '新达人\n首场GMV', '非达人\n渠道占比']
daren_metrics_value = [25, 55, 8, 18]  # projections
ax4_2.bar(daren_metrics_label, daren_metrics_value, color=['#2E75B6', '#70AD47', '#FFC000', '#ED7D31'],
          alpha=0.7, width=0.5)

ax4.set_ylabel('季节性因子', fontsize=11, color='#4472C4')
ax4_2.set_ylabel('运营指标', fontsize=11, color='#ED7D31')
ax4.set_title('季节性因子 & 关键运营指标', fontsize=14, fontweight='bold')
ax4.tick_params(axis='y', labelcolor='#4472C4')
ax4_2.tick_params(axis='y', labelcolor='#ED7D31')
ax4.set_ylim(0, 2.0)

# Add value labels
for bar, val in zip(bars_sf, sf_values):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
             f'{val:.2f}', ha='center', fontsize=8, fontweight='bold')

# -------------------------------------------------------
# Chart 5: 历史退款率 & 预测改善
# -------------------------------------------------------
ax5 = fig.add_subplot(3, 2, 5)

# Historical refund rate (whole channel)
hist_refund_rate = [r*100/g for r, g in zip(refund_hist, gmv_hist)]
hist_daren_refund = daren_refund

x_hist = range(1, 10)
ax5.plot(x_hist, hist_refund_rate, 'o-', color='#D64545', linewidth=2, markersize=8, label='全渠道退款率')
ax5.plot(x_hist, hist_daren_refund, 's--', color='#ED7D31', linewidth=2, markersize=6, label='达人直播退款率')

# Forecast refund rate targets
x_fc_all = range(10, 17)
for scenario_name, target in [('保守', 0.38), ('基准', 0.35), ('乐观', 0.32)]:
    adj_rates = []
    for m in [18, 19, 20, 21, 22, 23, 24]:
        actual = m % 12 or 12
        if actual in [6, 7, 8]:
            adj_rates.append((target + 0.05) * 100)
        elif actual == 11:
            adj_rates.append((target - 0.03) * 100)
        else:
            adj_rates.append(target * 100)
    ls = '-' if scenario_name == '基准' else '--'
    ax5.plot(x_fc_all, adj_rates, ls, marker='s', linewidth=2, markersize=5,
             label=f'预测退款率({scenario_name})', alpha=0.8)

ax5.axvline(x=9.5, color='gray', linestyle='--', linewidth=1, alpha=0.5)
ax5.set_xticks(list(x_hist) + list(x_fc_all))
ax5.set_xticklabels(months_hist + months_fc, rotation=0)
ax5.set_ylabel('%', fontsize=11)
ax5.set_title('退款率趋势 & 预测目标', fontsize=14, fontweight='bold')
ax5.legend(loc='upper right', fontsize=8)
ax5.grid(alpha=0.3)
ax5.set_ylim(0, 65)

# -------------------------------------------------------
# Chart 6: 累计收入预测 vs 历史 Key Metrics 仪表盘
# -------------------------------------------------------
ax6 = fig.add_subplot(3, 2, 6)
ax6.axis('off')

# Summary text
summary_text = f"""
═══════════════════════════════════════════════════════
              📊 预测总结与关键假设
═══════════════════════════════════════════════════════

  📈 预测区间（2026年6-12月，7个月）
     ┌──────────┬──────────────┬──────────────┐
     │   场景   │   累计 GMV   │   累计 GSV   │
     ├──────────┼──────────────┼──────────────┤
     │  保守    │ ¥{total['c_gmv']:>10,}  │ ¥{total['c_gsv']:>10,}  │
     │  基准    │ ¥{total['b_gmv']:>10,}  │ ¥{total['b_gsv']:>10,}  │
     │  乐观    │ ¥{total['o_gmv']:>10,}  │ ¥{total['o_gsv']:>10,}  │
     └──────────┴──────────────┴──────────────┘

  🔑 关键假设（基准场景）
     • 核心达人：晚柠/念妈 月均 GMV ¥75万
     • 学霸系：学霸老王/三人行 月均 GMV ¥35万
     • 每月新拓达人 6 位，首场均价 ¥8万
     • 新达人次月留存率 35%
     • 非达人渠道（端内+销售）月环比 +10%
     • 目标退费率 35%（当前波动 30-45%）

  📅 季节性峰值
     • 7-8月暑假高峰（因子 1.4-1.5）
     • 11月双11大促（因子 1.25）
     • 6月考试季低谷（因子 0.75）

  ⚠️ 风险因素
     • 达人退款率波动（2月曾达 55.9%）
     • 学霸系达人合作可持续性不确定
     • 行业淡季（6月）收入可能低于预测
     • 新达人质量参差不齐

  🎯 建议
     • 6月重点：储备暑假达人资源，减少开播
     • 7-8月重点：全力投放，学霸系+新达人密集排期
     • 9-10月：保持节奏，端内渠道发力
     • 11月：双11大促，全渠道协同
     • 加强达人退款管控，目标控制在35%以内
═══════════════════════════════════════════════════════
"""

ax6.text(0.02, 0.98, summary_text, transform=ax6.transAxes, fontsize=9,
         verticalalignment='top', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='#F0F4F8', alpha=0.8))

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('/root/.openclaw/workspace/output/revenue_forecast_2026.png', dpi=150, bbox_inches='tight',
            facecolor='white', edgecolor='none')
print("\n✅ 图表已保存: /root/.openclaw/workspace/output/revenue_forecast_2026.png")