ai_member_xiaoxi/scripts/revenue_forecast_2025.py
2026-05-27 08:00:01 +08:00

513 lines
22 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
瓦拉英语 2026年6-12月收入预测
基于:历史趋势 + 教育行业季节性 + 达人复播频次 + 新达人拓展
"""
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
from matplotlib.patches import FancyBboxPatch
import warnings
warnings.filterwarnings('ignore')
# ============================================================
# 1. 历史数据
# ============================================================
months_hist = ['9月', '10月', '11月', '12月', '1月', '2月', '3月', '4月', '5月']
months_hist_num = [9, 10, 11, 12, 13, 14, 15, 16, 17] # 2025-09=9, ... 2026-05=17
# 全渠道
gmv_hist = [617592, 812179, 493983, 395373, 397533, 415884, 2879026, 4970605, 1894740]
gsv_hist = [369716, 506534, 294240, 301432, 311279, 205390, 2136625, 2919674, 1390617]
refund_hist = [247876, 305645, 199743, 93941, 86254, 210494, 742401, 2050931, 504123]
orders_hist = [313, 421, 462, 233, 222, 216, 958, 1665, 623]
users_hist = [308, 412, 400, 229, 216, 212, 906, 1552, 588]
# 达人直播
daren_gmv = [598899, 765617, 452373, 177911, 217891, 353823, 2521415, 4497095, 1604719]
daren_orders = [301, 383, 227, 89, 109, 177, 767, 1429, 490]
daren_count = [7, 10, 15, 4, 6, 3, 10, 25, 17]
daren_new = [7, 7, 8, 0, 2, 2, 2, 20, 7]
daren_repeat = [0, 3, 7, 4, 4, 1, 8, 5, 10]
daren_refund = [39.5, 38.9, 43.6, 25.8, 29.4, 55.9, 27.9, 44.8, 30.6]
# 非达人渠道
non_daren_gmv = [g - d for g, d in zip(gmv_hist, daren_gmv)]
# = [18693, 46562, 41610, 217462, 179642, 62061, 357611, 473510, 290021]
# 新注册用户
new_users_hist = [1529, 2412, 2971, 3523, 1938, 1743, 4166, 5668, 2609]
# ============================================================
# 2. 教育行业季节性因子
# ============================================================
# 基于中国在线教育行业规律:
# - 6月: 期末/中高考,家长关注点在考试,教育消费下降 (0.75)
# - 7月: 暑假开始,需求爆发 (1.4)
# - 8月: 暑假高峰 (1.5)
# - 9月: 开学季,需求稳健 (1.0)
# - 10月: 国庆黄金周 + 中秋,促销期 (1.15)
# - 11月: 双11大促 (1.25)
# - 12月: 年末冲刺 + 寒假预售 (1.05)
seasonal_factor = {
6: 0.75, # 考试季
7: 1.40, # 暑假启动
8: 1.50, # 暑假高峰
9: 1.00, # 开学季
10: 1.15, # 国庆+双11预热
11: 1.25, # 双11
12: 1.05, # 年末寒假预售
}
# ============================================================
# 3. 预测模型
# ============================================================
def forecast_revenue():
"""三层预测模型"""
months_fc = ['6月', '7月', '8月', '9月', '10月', '11月', '12月']
months_fc_num = [18, 19, 20, 21, 22, 23, 24] # 2026-06=18
results = {'conservative': {}, 'base': {}, 'optimistic': {}}
for scenario, params in [
('conservative', {
'core_daren_base': 650000, # 核心达人月均贡献(晚柠+念妈)
'star_daren_base': 250000, # 学霸系月均贡献
'other_repeat_base': 80000, # 其他复发达人月均
'new_daren_per_month': 4, # 每月新达人
'new_daren_first_gmv': 60000, # 新达人首场平均GMV
'new_daren_retention': 0.25, # 新达人次月留存率
'non_daren_growth': 1.05, # 非达人渠道月环比
'non_daren_base': 290000, # 5月非达人基准
'refund_rate_target': 0.38, # 目标退费率
}),
('base', {
'core_daren_base': 750000,
'star_daren_base': 350000,
'other_repeat_base': 150000,
'new_daren_per_month': 6,
'new_daren_first_gmv': 80000,
'new_daren_retention': 0.35,
'non_daren_growth': 1.10,
'non_daren_base': 290000,
'refund_rate_target': 0.35,
}),
('optimistic', {
'core_daren_base': 900000,
'star_daren_base': 500000,
'other_repeat_base': 250000,
'new_daren_per_month': 8,
'new_daren_first_gmv': 100000,
'new_daren_retention': 0.45,
'non_daren_growth': 1.15,
'non_daren_base': 290000,
'refund_rate_target': 0.32,
}),
]:
# Initialize new daren pipeline
# Simulate: new darens from past months that may return
# May had 7 new darens, assume they contribute in future
new_daren_pool = [] # list of (month_index, gmv_first)
gmv_pred = []
gsv_pred = []
for i, m in enumerate(months_fc_num):
month_idx = i + 1 # 1-7
season = seasonal_factor[m % 12 or 12]
actual_month = m % 12 or 12
# === 达人直播预测 ===
# Core darens (晚柠, 念妈) - stable monthly
core = params['core_daren_base'] * season
# Star darens (学霸系) - more variable, season-dependent
star = params['star_daren_base'] * season
# Other repeat darens - growing slowly
other_repeat = params['other_repeat_base'] * (1 + 0.05 * month_idx) * season
# New darens this month
new_this_month = params['new_daren_per_month'] * season / seasonal_factor.get(actual_month, 1.0)
# But season mainly affects GMV per daren, not count
new_this_month = max(2, int(new_this_month * 0.8 + 0.5))
new_gmv = new_this_month * params['new_daren_first_gmv'] * season
# Returning new darens from pool
returning_gmv = 0
returning_count = 0
still_active = []
for nm, gmv_first in new_daren_pool:
if np.random.random() < params['new_daren_retention']:
# Returning daren, GMV typically 50-80% of first
retention_factor = 0.6 + 0.2 * np.random.random()
returning_gmv += gmv_first * retention_factor * season
returning_count += 1
still_active.append((nm, gmv_first * retention_factor))
# Add new darens to pool for future
new_daren_pool = still_active
for _ in range(new_this_month):
new_daren_pool.append((month_idx, params['new_daren_first_gmv']))
# Total daren GMV
daren_gmv_pred = core + star + other_repeat + new_gmv + returning_gmv
# === 非达人渠道预测 ===
non_daren_pred = params['non_daren_base'] * (params['non_daren_growth'] ** month_idx) * season
# === 总计 ===
total_gmv = daren_gmv_pred + non_daren_pred
# Apply refund rate to get GSV
# Jun-Sep: higher refund from summer rush;
# Oct-Dec: lower refund (双11/年末更理性)
if actual_month in [6, 7, 8]:
refund_adj = params['refund_rate_target'] + 0.05
elif actual_month in [11]:
refund_adj = params['refund_rate_target'] - 0.03 # 双11促销退费略高但可控
else:
refund_adj = params['refund_rate_target']
total_gsv = total_gmv * (1 - refund_adj)
gmv_pred.append(round(total_gmv))
gsv_pred.append(round(total_gsv))
results[scenario] = {
'gmv': gmv_pred,
'gsv': gsv_pred,
}
return results, months_fc
# ============================================================
# 4. 执行预测 & 生成图表
# ============================================================
np.random.seed(42)
results, months_fc = forecast_revenue()
# 打印预测表
print("=" * 90)
print("📊 瓦拉英语 2026年6-12月收入预测")
print("=" * 90)
print()
print(f"{'月份':<8} {'保守-GMV':>12} {'保守-GSV':>12} {'基准-GMV':>12} {'基准-GSV':>12} {'乐观-GMV':>12} {'乐观-GSV':>12}")
print("-" * 90)
total = {'c_gmv': 0, 'c_gsv': 0, 'b_gmv': 0, 'b_gsv': 0, 'o_gmv': 0, 'o_gsv': 0}
for i, m in enumerate(months_fc):
c_gmv = results['conservative']['gmv'][i]
c_gsv = results['conservative']['gsv'][i]
b_gmv = results['base']['gmv'][i]
b_gsv = results['base']['gsv'][i]
o_gmv = results['optimistic']['gmv'][i]
o_gsv = results['optimistic']['gsv'][i]
print(f"{m:<8} ¥{c_gmv:>10,} ¥{c_gsv:>10,} ¥{b_gmv:>10,} ¥{b_gsv:>10,} ¥{o_gmv:>10,} ¥{o_gsv:>10,}")
total['c_gmv'] += c_gmv
total['c_gsv'] += c_gsv
total['b_gmv'] += b_gmv
total['b_gsv'] += b_gsv
total['o_gmv'] += o_gmv
total['o_gsv'] += o_gsv
print("-" * 90)
print(f"{'合计':<8} ¥{total['c_gmv']:>10,} ¥{total['c_gsv']:>10,} ¥{total['b_gmv']:>10,} ¥{total['b_gsv']:>10,} ¥{total['o_gmv']:>10,} ¥{total['o_gsv']:>10,}")
print()
# 对比历史
hist_total_gmv = sum(gmv_hist)
hist_total_gsv = sum(gsv_hist)
print(f"历史累计 (2025.09-2026.05): GMV ¥{hist_total_gmv:,.0f} | GSV ¥{hist_total_gsv:,.0f}")
print(f"月均 (9个月): GMV ¥{hist_total_gmv/9:,.0f} | GSV ¥{hist_total_gsv/9:,.0f}")
print()
print("预测 vs 历史对比:")
for s, label in [('conservative', '保守'), ('base', '基准'), ('optimistic', '乐观')]:
t_gmv = total[f'{s[0]}_gmv']
t_gsv = total[f'{s[0]}_gsv']
print(f" {label}: 月均 GMV ¥{t_gmv/7:,.0f} | 7个月 GMV ¥{t_gmv:,.0f} ({t_gmv/hist_total_gmv*100:.0f}% of 历史9个月)")
# ============================================================
# 5. 生成可视化图表
# ============================================================
plt.rcParams['font.family'] = ['DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# Color palette
colors = {
'hist': '#5B9BD5',
'cons': '#ED7D31',
'base': '#4472C4',
'opt': '#70AD47',
'gsv': '#A5A5A5',
'bg': '#F8F9FA',
'grid': '#E0E0E0',
}
fig = plt.figure(figsize=(20, 24))
fig.suptitle('瓦拉英语 2026年6-12月 收入预测报告', fontsize=22, fontweight='bold', y=0.98)
# -------------------------------------------------------
# Chart 1: 历史趋势 + 三种预测场景
# -------------------------------------------------------
ax1 = fig.add_subplot(3, 2, 1)
all_months = months_hist + months_fc
all_x = list(range(1, len(all_months) + 1))
# Historical GMV bars
bars_hist = ax1.bar(range(1, 10), [g/10000 for g in gmv_hist], color=colors['hist'],
alpha=0.8, label='历史 GMV', width=0.6)
# Historical GSV line
ax1.plot(range(1, 10), [g/10000 for g in gsv_hist], 'o-', color=colors['gsv'],
linewidth=2, markersize=6, label='历史 GSV')
# Forecast GMV (base scenario)
fc_x = list(range(10, 17))
ax1.bar(fc_x, [g/10000 for g in results['base']['gmv']], color='#FFC000',
alpha=0.6, label='预测 GMV(基准)', width=0.6)
ax1.plot(fc_x, [g/10000 for g in results['base']['gsv']], 's--', color='#C55A11',
linewidth=2, markersize=6, label='预测 GSV(基准)')
# Separator line
ax1.axvline(x=9.5, color='red', linestyle='--', linewidth=1.5, alpha=0.5)
ax1.text(9.5, ax1.get_ylim()[1]*0.95, '← 历史 | 预测 →', ha='center', fontsize=9, color='red')
# Conservative & Optimistic ranges
ax1.fill_between(fc_x,
[g/10000 for g in results['conservative']['gmv']],
[g/10000 for g in results['optimistic']['gmv']],
alpha=0.15, color='green', label='预测区间(保守-乐观)')
ax1.set_xticks(all_x)
ax1.set_xticklabels(all_months, rotation=0)
ax1.set_ylabel('万元', fontsize=11)
ax1.set_title('月度 GMV/GSV 趋势与预测', fontsize=14, fontweight='bold')
ax1.legend(loc='upper left', fontsize=8)
ax1.grid(axis='y', alpha=0.3)
ax1.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}'))
# Value labels on bars
for bar, val in zip(bars_hist, gmv_hist):
ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'¥{val/10000:.0f}', ha='center', fontsize=7, fontweight='bold')
# -------------------------------------------------------
# Chart 2: 三场景月度对比
# -------------------------------------------------------
ax2 = fig.add_subplot(3, 2, 2)
x = np.arange(len(months_fc))
width = 0.25
bars1 = ax2.bar(x - width, [g/10000 for g in results['conservative']['gmv']],
width, color=colors['cons'], alpha=0.85, label='保守 GMV')
bars2 = ax2.bar(x, [g/10000 for g in results['base']['gmv']],
width, color=colors['base'], alpha=0.85, label='基准 GMV')
bars3 = ax2.bar(x + width, [g/10000 for g in results['optimistic']['gmv']],
width, color=colors['opt'], alpha=0.85, label='乐观 GMV')
# GSV lines
ax2.plot(x - width, [g/10000 for g in results['conservative']['gsv']],
'v-', color='#C55A11', markersize=6, linewidth=1.5, label='保守 GSV')
ax2.plot(x, [g/10000 for g in results['base']['gsv']],
's-', color='#2F5496', markersize=6, linewidth=1.5, label='基准 GSV')
ax2.plot(x + width, [g/10000 for g in results['optimistic']['gsv']],
'^-', color='#375623', markersize=6, linewidth=1.5, label='乐观 GSV')
ax2.set_xticks(x)
ax2.set_xticklabels(months_fc)
ax2.set_ylabel('万元', fontsize=11)
ax2.set_title('6-12月 三场景月度对比', fontsize=14, fontweight='bold')
ax2.legend(loc='upper left', fontsize=7, ncol=2)
ax2.grid(axis='y', alpha=0.3)
ax2.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}'))
# -------------------------------------------------------
# Chart 3: 渠道贡献拆解(基准场景)
# -------------------------------------------------------
ax3 = fig.add_subplot(3, 2, 3)
# Calculate channel breakdown for base scenario
channels_data = {
'核心达人\n(晚柠+念妈)': [],
'学霸系达人': [],
'其他复发达人': [],
'新拓达人': [],
'非达人渠道': [],
}
season = [seasonal_factor.get(m % 12 or 12, 1.0) for m in [18, 19, 20, 21, 22, 23, 24]]
base_params = {
'core_daren_base': 750000,
'star_daren_base': 350000,
'other_repeat_base': 150000,
'new_daren_per_month': 6,
'new_daren_first_gmv': 80000,
'non_daren_growth': 1.10,
'non_daren_base': 290000,
}
for i, s in enumerate(season):
core = base_params['core_daren_base'] * s / 10000
star = base_params['star_daren_base'] * s / 10000
other = base_params['other_repeat_base'] * (1 + 0.05 * (i+1)) * s / 10000
new_d = base_params['new_daren_per_month'] * base_params['new_daren_first_gmv'] * s / 10000
non_d = base_params['non_daren_base'] * (base_params['non_daren_growth'] ** (i+1)) * s / 10000
channels_data['核心达人\n(晚柠+念妈)'].append(core)
channels_data['学霸系达人'].append(star)
channels_data['其他复发达人'].append(other)
channels_data['新拓达人'].append(new_d)
channels_data['非达人渠道'].append(non_d)
x_fc = np.arange(len(months_fc))
bottom = np.zeros(len(months_fc))
channel_colors = ['#1F4E79', '#2E75B6', '#9DC3E6', '#BDD7EE', '#F4B183']
for i, (name, vals) in enumerate(channels_data.items()):
ax3.bar(x_fc, vals, 0.6, bottom=bottom, label=name, color=channel_colors[i], alpha=0.85)
bottom += np.array(vals)
# Add total labels
for i, (m, total_val) in enumerate(zip(months_fc, bottom)):
ax3.text(i, total_val + 2, f'¥{total_val:.0f}', ha='center', fontsize=8, fontweight='bold')
ax3.set_xticks(x_fc)
ax3.set_xticklabels(months_fc)
ax3.set_ylabel('万元', fontsize=11)
ax3.set_title('基准场景GMV 渠道贡献拆解', fontsize=14, fontweight='bold')
ax3.legend(loc='upper left', fontsize=8)
ax3.grid(axis='y', alpha=0.3)
# -------------------------------------------------------
# Chart 4: 季节性因子 & 达人运营指标
# -------------------------------------------------------
ax4 = fig.add_subplot(3, 2, 4)
# Top: Seasonal factors
months_all_label = ['6月', '7月', '8月', '9月', '10月', '11月', '12月']
sf_values = [seasonal_factor[m % 12 or 12] for m in [18, 19, 20, 21, 22, 23, 24]]
# Normalize to percentage
sf_pct = [(v - 1) * 100 for v in sf_values]
bar_colors_sf = ['#D64545' if v < 0 else '#4472C4' if v <= 0.15 else '#70AD47' for v in sf_pct]
ax4_2 = ax4.twinx()
bars_sf = ax4.bar(months_all_label, sf_values, color=bar_colors_sf, alpha=0.3, width=0.6, label='季节性因子')
ax4.axhline(y=1.0, color='gray', linestyle='--', linewidth=1, alpha=0.5)
# Bottom: Daren metrics
daren_metrics_label = ['达人\n总开播数', '复发达人\n占比', '新达人\n首场GMV', '非达人\n渠道占比']
daren_metrics_value = [25, 55, 8, 18] # projections
ax4_2.bar(daren_metrics_label, daren_metrics_value, color=['#2E75B6', '#70AD47', '#FFC000', '#ED7D31'],
alpha=0.7, width=0.5)
ax4.set_ylabel('季节性因子', fontsize=11, color='#4472C4')
ax4_2.set_ylabel('运营指标', fontsize=11, color='#ED7D31')
ax4.set_title('季节性因子 & 关键运营指标', fontsize=14, fontweight='bold')
ax4.tick_params(axis='y', labelcolor='#4472C4')
ax4_2.tick_params(axis='y', labelcolor='#ED7D31')
ax4.set_ylim(0, 2.0)
# Add value labels
for bar, val in zip(bars_sf, sf_values):
ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
f'{val:.2f}', ha='center', fontsize=8, fontweight='bold')
# -------------------------------------------------------
# Chart 5: 历史退款率 & 预测改善
# -------------------------------------------------------
ax5 = fig.add_subplot(3, 2, 5)
# Historical refund rate (whole channel)
hist_refund_rate = [r*100/g for r, g in zip(refund_hist, gmv_hist)]
hist_daren_refund = daren_refund
x_hist = range(1, 10)
ax5.plot(x_hist, hist_refund_rate, 'o-', color='#D64545', linewidth=2, markersize=8, label='全渠道退款率')
ax5.plot(x_hist, hist_daren_refund, 's--', color='#ED7D31', linewidth=2, markersize=6, label='达人直播退款率')
# Forecast refund rate targets
x_fc_all = range(10, 17)
for scenario_name, target in [('保守', 0.38), ('基准', 0.35), ('乐观', 0.32)]:
adj_rates = []
for m in [18, 19, 20, 21, 22, 23, 24]:
actual = m % 12 or 12
if actual in [6, 7, 8]:
adj_rates.append((target + 0.05) * 100)
elif actual == 11:
adj_rates.append((target - 0.03) * 100)
else:
adj_rates.append(target * 100)
ls = '-' if scenario_name == '基准' else '--'
ax5.plot(x_fc_all, adj_rates, ls, marker='s', linewidth=2, markersize=5,
label=f'预测退款率({scenario_name})', alpha=0.8)
ax5.axvline(x=9.5, color='gray', linestyle='--', linewidth=1, alpha=0.5)
ax5.set_xticks(list(x_hist) + list(x_fc_all))
ax5.set_xticklabels(months_hist + months_fc, rotation=0)
ax5.set_ylabel('%', fontsize=11)
ax5.set_title('退款率趋势 & 预测目标', fontsize=14, fontweight='bold')
ax5.legend(loc='upper right', fontsize=8)
ax5.grid(alpha=0.3)
ax5.set_ylim(0, 65)
# -------------------------------------------------------
# Chart 6: 累计收入预测 vs 历史 Key Metrics 仪表盘
# -------------------------------------------------------
ax6 = fig.add_subplot(3, 2, 6)
ax6.axis('off')
# Summary text
summary_text = f"""
═══════════════════════════════════════════════════════
📊 预测总结与关键假设
═══════════════════════════════════════════════════════
📈 预测区间2026年6-12月7个月
┌──────────┬──────────────┬──────────────┐
│ 场景 │ 累计 GMV │ 累计 GSV │
├──────────┼──────────────┼──────────────┤
│ 保守 │ ¥{total['c_gmv']:>10,} │ ¥{total['c_gsv']:>10,}
│ 基准 │ ¥{total['b_gmv']:>10,} │ ¥{total['b_gsv']:>10,}
│ 乐观 │ ¥{total['o_gmv']:>10,} │ ¥{total['o_gsv']:>10,}
└──────────┴──────────────┴──────────────┘
🔑 关键假设(基准场景)
• 核心达人:晚柠/念妈 月均 GMV ¥75万
• 学霸系:学霸老王/三人行 月均 GMV ¥35万
• 每月新拓达人 6 位,首场均价 ¥8万
• 新达人次月留存率 35%
• 非达人渠道(端内+销售)月环比 +10%
• 目标退费率 35%(当前波动 30-45%
📅 季节性峰值
• 7-8月暑假高峰因子 1.4-1.5
• 11月双11大促因子 1.25
• 6月考试季低谷因子 0.75
⚠️ 风险因素
• 达人退款率波动2月曾达 55.9%
• 学霸系达人合作可持续性不确定
• 行业淡季6月收入可能低于预测
• 新达人质量参差不齐
🎯 建议
• 6月重点储备暑假达人资源减少开播
• 7-8月重点全力投放学霸系+新达人密集排期
• 9-10月保持节奏端内渠道发力
• 11月双11大促全渠道协同
• 加强达人退款管控目标控制在35%以内
═══════════════════════════════════════════════════════
"""
ax6.text(0.02, 0.98, summary_text, transform=ax6.transAxes, fontsize=9,
verticalalignment='top', fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='#F0F4F8', alpha=0.8))
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('/root/.openclaw/workspace/output/revenue_forecast_2026.png', dpi=150, bbox_inches='tight',
facecolor='white', edgecolor='none')
print("\n✅ 图表已保存: /root/.openclaw/workspace/output/revenue_forecast_2026.png")