#!/usr/bin/env python3 """ 瓦拉英语 2026年6-12月收入预测 基于:历史趋势 + 教育行业季节性 + 达人复播频次 + 新达人拓展 """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.ticker as mticker import numpy as np import pandas as pd from matplotlib.patches import FancyBboxPatch import warnings warnings.filterwarnings('ignore') # ============================================================ # 1. 历史数据 # ============================================================ months_hist = ['9月', '10月', '11月', '12月', '1月', '2月', '3月', '4月', '5月'] months_hist_num = [9, 10, 11, 12, 13, 14, 15, 16, 17] # 2025-09=9, ... 2026-05=17 # 全渠道 gmv_hist = [617592, 812179, 493983, 395373, 397533, 415884, 2879026, 4970605, 1894740] gsv_hist = [369716, 506534, 294240, 301432, 311279, 205390, 2136625, 2919674, 1390617] refund_hist = [247876, 305645, 199743, 93941, 86254, 210494, 742401, 2050931, 504123] orders_hist = [313, 421, 462, 233, 222, 216, 958, 1665, 623] users_hist = [308, 412, 400, 229, 216, 212, 906, 1552, 588] # 达人直播 daren_gmv = [598899, 765617, 452373, 177911, 217891, 353823, 2521415, 4497095, 1604719] daren_orders = [301, 383, 227, 89, 109, 177, 767, 1429, 490] daren_count = [7, 10, 15, 4, 6, 3, 10, 25, 17] daren_new = [7, 7, 8, 0, 2, 2, 2, 20, 7] daren_repeat = [0, 3, 7, 4, 4, 1, 8, 5, 10] daren_refund = [39.5, 38.9, 43.6, 25.8, 29.4, 55.9, 27.9, 44.8, 30.6] # 非达人渠道 non_daren_gmv = [g - d for g, d in zip(gmv_hist, daren_gmv)] # = [18693, 46562, 41610, 217462, 179642, 62061, 357611, 473510, 290021] # 新注册用户 new_users_hist = [1529, 2412, 2971, 3523, 1938, 1743, 4166, 5668, 2609] # ============================================================ # 2. 教育行业季节性因子 # ============================================================ # 基于中国在线教育行业规律: # - 6月: 期末/中高考,家长关注点在考试,教育消费下降 (0.75) # - 7月: 暑假开始,需求爆发 (1.4) # - 8月: 暑假高峰 (1.5) # - 9月: 开学季,需求稳健 (1.0) # - 10月: 国庆黄金周 + 中秋,促销期 (1.15) # - 11月: 双11大促 (1.25) # - 12月: 年末冲刺 + 寒假预售 (1.05) seasonal_factor = { 6: 0.75, # 考试季 7: 1.40, # 暑假启动 8: 1.50, # 暑假高峰 9: 1.00, # 开学季 10: 1.15, # 国庆+双11预热 11: 1.25, # 双11 12: 1.05, # 年末寒假预售 } # ============================================================ # 3. 预测模型 # ============================================================ def forecast_revenue(): """三层预测模型""" months_fc = ['6月', '7月', '8月', '9月', '10月', '11月', '12月'] months_fc_num = [18, 19, 20, 21, 22, 23, 24] # 2026-06=18 results = {'conservative': {}, 'base': {}, 'optimistic': {}} for scenario, params in [ ('conservative', { 'core_daren_base': 650000, # 核心达人月均贡献(晚柠+念妈) 'star_daren_base': 250000, # 学霸系月均贡献 'other_repeat_base': 80000, # 其他复发达人月均 'new_daren_per_month': 4, # 每月新达人 'new_daren_first_gmv': 60000, # 新达人首场平均GMV 'new_daren_retention': 0.25, # 新达人次月留存率 'non_daren_growth': 1.05, # 非达人渠道月环比 'non_daren_base': 290000, # 5月非达人基准 'refund_rate_target': 0.38, # 目标退费率 }), ('base', { 'core_daren_base': 750000, 'star_daren_base': 350000, 'other_repeat_base': 150000, 'new_daren_per_month': 6, 'new_daren_first_gmv': 80000, 'new_daren_retention': 0.35, 'non_daren_growth': 1.10, 'non_daren_base': 290000, 'refund_rate_target': 0.35, }), ('optimistic', { 'core_daren_base': 900000, 'star_daren_base': 500000, 'other_repeat_base': 250000, 'new_daren_per_month': 8, 'new_daren_first_gmv': 100000, 'new_daren_retention': 0.45, 'non_daren_growth': 1.15, 'non_daren_base': 290000, 'refund_rate_target': 0.32, }), ]: # Initialize new daren pipeline # Simulate: new darens from past months that may return # May had 7 new darens, assume they contribute in future new_daren_pool = [] # list of (month_index, gmv_first) gmv_pred = [] gsv_pred = [] for i, m in enumerate(months_fc_num): month_idx = i + 1 # 1-7 season = seasonal_factor[m % 12 or 12] actual_month = m % 12 or 12 # === 达人直播预测 === # Core darens (晚柠, 念妈) - stable monthly core = params['core_daren_base'] * season # Star darens (学霸系) - more variable, season-dependent star = params['star_daren_base'] * season # Other repeat darens - growing slowly other_repeat = params['other_repeat_base'] * (1 + 0.05 * month_idx) * season # New darens this month new_this_month = params['new_daren_per_month'] * season / seasonal_factor.get(actual_month, 1.0) # But season mainly affects GMV per daren, not count new_this_month = max(2, int(new_this_month * 0.8 + 0.5)) new_gmv = new_this_month * params['new_daren_first_gmv'] * season # Returning new darens from pool returning_gmv = 0 returning_count = 0 still_active = [] for nm, gmv_first in new_daren_pool: if np.random.random() < params['new_daren_retention']: # Returning daren, GMV typically 50-80% of first retention_factor = 0.6 + 0.2 * np.random.random() returning_gmv += gmv_first * retention_factor * season returning_count += 1 still_active.append((nm, gmv_first * retention_factor)) # Add new darens to pool for future new_daren_pool = still_active for _ in range(new_this_month): new_daren_pool.append((month_idx, params['new_daren_first_gmv'])) # Total daren GMV daren_gmv_pred = core + star + other_repeat + new_gmv + returning_gmv # === 非达人渠道预测 === non_daren_pred = params['non_daren_base'] * (params['non_daren_growth'] ** month_idx) * season # === 总计 === total_gmv = daren_gmv_pred + non_daren_pred # Apply refund rate to get GSV # Jun-Sep: higher refund from summer rush; # Oct-Dec: lower refund (双11/年末更理性) if actual_month in [6, 7, 8]: refund_adj = params['refund_rate_target'] + 0.05 elif actual_month in [11]: refund_adj = params['refund_rate_target'] - 0.03 # 双11促销退费略高但可控 else: refund_adj = params['refund_rate_target'] total_gsv = total_gmv * (1 - refund_adj) gmv_pred.append(round(total_gmv)) gsv_pred.append(round(total_gsv)) results[scenario] = { 'gmv': gmv_pred, 'gsv': gsv_pred, } return results, months_fc # ============================================================ # 4. 执行预测 & 生成图表 # ============================================================ np.random.seed(42) results, months_fc = forecast_revenue() # 打印预测表 print("=" * 90) print("📊 瓦拉英语 2026年6-12月收入预测") print("=" * 90) print() print(f"{'月份':<8} {'保守-GMV':>12} {'保守-GSV':>12} {'基准-GMV':>12} {'基准-GSV':>12} {'乐观-GMV':>12} {'乐观-GSV':>12}") print("-" * 90) total = {'c_gmv': 0, 'c_gsv': 0, 'b_gmv': 0, 'b_gsv': 0, 'o_gmv': 0, 'o_gsv': 0} for i, m in enumerate(months_fc): c_gmv = results['conservative']['gmv'][i] c_gsv = results['conservative']['gsv'][i] b_gmv = results['base']['gmv'][i] b_gsv = results['base']['gsv'][i] o_gmv = results['optimistic']['gmv'][i] o_gsv = results['optimistic']['gsv'][i] print(f"{m:<8} ¥{c_gmv:>10,} ¥{c_gsv:>10,} ¥{b_gmv:>10,} ¥{b_gsv:>10,} ¥{o_gmv:>10,} ¥{o_gsv:>10,}") total['c_gmv'] += c_gmv total['c_gsv'] += c_gsv total['b_gmv'] += b_gmv total['b_gsv'] += b_gsv total['o_gmv'] += o_gmv total['o_gsv'] += o_gsv print("-" * 90) print(f"{'合计':<8} ¥{total['c_gmv']:>10,} ¥{total['c_gsv']:>10,} ¥{total['b_gmv']:>10,} ¥{total['b_gsv']:>10,} ¥{total['o_gmv']:>10,} ¥{total['o_gsv']:>10,}") print() # 对比历史 hist_total_gmv = sum(gmv_hist) hist_total_gsv = sum(gsv_hist) print(f"历史累计 (2025.09-2026.05): GMV ¥{hist_total_gmv:,.0f} | GSV ¥{hist_total_gsv:,.0f}") print(f"月均 (9个月): GMV ¥{hist_total_gmv/9:,.0f} | GSV ¥{hist_total_gsv/9:,.0f}") print() print("预测 vs 历史对比:") for s, label in [('conservative', '保守'), ('base', '基准'), ('optimistic', '乐观')]: t_gmv = total[f'{s[0]}_gmv'] t_gsv = total[f'{s[0]}_gsv'] print(f" {label}: 月均 GMV ¥{t_gmv/7:,.0f} | 7个月 GMV ¥{t_gmv:,.0f} ({t_gmv/hist_total_gmv*100:.0f}% of 历史9个月)") # ============================================================ # 5. 生成可视化图表 # ============================================================ plt.rcParams['font.family'] = ['DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False # Color palette colors = { 'hist': '#5B9BD5', 'cons': '#ED7D31', 'base': '#4472C4', 'opt': '#70AD47', 'gsv': '#A5A5A5', 'bg': '#F8F9FA', 'grid': '#E0E0E0', } fig = plt.figure(figsize=(20, 24)) fig.suptitle('瓦拉英语 2026年6-12月 收入预测报告', fontsize=22, fontweight='bold', y=0.98) # ------------------------------------------------------- # Chart 1: 历史趋势 + 三种预测场景 # ------------------------------------------------------- ax1 = fig.add_subplot(3, 2, 1) all_months = months_hist + months_fc all_x = list(range(1, len(all_months) + 1)) # Historical GMV bars bars_hist = ax1.bar(range(1, 10), [g/10000 for g in gmv_hist], color=colors['hist'], alpha=0.8, label='历史 GMV', width=0.6) # Historical GSV line ax1.plot(range(1, 10), [g/10000 for g in gsv_hist], 'o-', color=colors['gsv'], linewidth=2, markersize=6, label='历史 GSV') # Forecast GMV (base scenario) fc_x = list(range(10, 17)) ax1.bar(fc_x, [g/10000 for g in results['base']['gmv']], color='#FFC000', alpha=0.6, label='预测 GMV(基准)', width=0.6) ax1.plot(fc_x, [g/10000 for g in results['base']['gsv']], 's--', color='#C55A11', linewidth=2, markersize=6, label='预测 GSV(基准)') # Separator line ax1.axvline(x=9.5, color='red', linestyle='--', linewidth=1.5, alpha=0.5) ax1.text(9.5, ax1.get_ylim()[1]*0.95, '← 历史 | 预测 →', ha='center', fontsize=9, color='red') # Conservative & Optimistic ranges ax1.fill_between(fc_x, [g/10000 for g in results['conservative']['gmv']], [g/10000 for g in results['optimistic']['gmv']], alpha=0.15, color='green', label='预测区间(保守-乐观)') ax1.set_xticks(all_x) ax1.set_xticklabels(all_months, rotation=0) ax1.set_ylabel('万元', fontsize=11) ax1.set_title('月度 GMV/GSV 趋势与预测', fontsize=14, fontweight='bold') ax1.legend(loc='upper left', fontsize=8) ax1.grid(axis='y', alpha=0.3) ax1.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}万')) # Value labels on bars for bar, val in zip(bars_hist, gmv_hist): ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, f'¥{val/10000:.0f}万', ha='center', fontsize=7, fontweight='bold') # ------------------------------------------------------- # Chart 2: 三场景月度对比 # ------------------------------------------------------- ax2 = fig.add_subplot(3, 2, 2) x = np.arange(len(months_fc)) width = 0.25 bars1 = ax2.bar(x - width, [g/10000 for g in results['conservative']['gmv']], width, color=colors['cons'], alpha=0.85, label='保守 GMV') bars2 = ax2.bar(x, [g/10000 for g in results['base']['gmv']], width, color=colors['base'], alpha=0.85, label='基准 GMV') bars3 = ax2.bar(x + width, [g/10000 for g in results['optimistic']['gmv']], width, color=colors['opt'], alpha=0.85, label='乐观 GMV') # GSV lines ax2.plot(x - width, [g/10000 for g in results['conservative']['gsv']], 'v-', color='#C55A11', markersize=6, linewidth=1.5, label='保守 GSV') ax2.plot(x, [g/10000 for g in results['base']['gsv']], 's-', color='#2F5496', markersize=6, linewidth=1.5, label='基准 GSV') ax2.plot(x + width, [g/10000 for g in results['optimistic']['gsv']], '^-', color='#375623', markersize=6, linewidth=1.5, label='乐观 GSV') ax2.set_xticks(x) ax2.set_xticklabels(months_fc) ax2.set_ylabel('万元', fontsize=11) ax2.set_title('6-12月 三场景月度对比', fontsize=14, fontweight='bold') ax2.legend(loc='upper left', fontsize=7, ncol=2) ax2.grid(axis='y', alpha=0.3) ax2.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'¥{x:.0f}万')) # ------------------------------------------------------- # Chart 3: 渠道贡献拆解(基准场景) # ------------------------------------------------------- ax3 = fig.add_subplot(3, 2, 3) # Calculate channel breakdown for base scenario channels_data = { '核心达人\n(晚柠+念妈)': [], '学霸系达人': [], '其他复发达人': [], '新拓达人': [], '非达人渠道': [], } season = [seasonal_factor.get(m % 12 or 12, 1.0) for m in [18, 19, 20, 21, 22, 23, 24]] base_params = { 'core_daren_base': 750000, 'star_daren_base': 350000, 'other_repeat_base': 150000, 'new_daren_per_month': 6, 'new_daren_first_gmv': 80000, 'non_daren_growth': 1.10, 'non_daren_base': 290000, } for i, s in enumerate(season): core = base_params['core_daren_base'] * s / 10000 star = base_params['star_daren_base'] * s / 10000 other = base_params['other_repeat_base'] * (1 + 0.05 * (i+1)) * s / 10000 new_d = base_params['new_daren_per_month'] * base_params['new_daren_first_gmv'] * s / 10000 non_d = base_params['non_daren_base'] * (base_params['non_daren_growth'] ** (i+1)) * s / 10000 channels_data['核心达人\n(晚柠+念妈)'].append(core) channels_data['学霸系达人'].append(star) channels_data['其他复发达人'].append(other) channels_data['新拓达人'].append(new_d) channels_data['非达人渠道'].append(non_d) x_fc = np.arange(len(months_fc)) bottom = np.zeros(len(months_fc)) channel_colors = ['#1F4E79', '#2E75B6', '#9DC3E6', '#BDD7EE', '#F4B183'] for i, (name, vals) in enumerate(channels_data.items()): ax3.bar(x_fc, vals, 0.6, bottom=bottom, label=name, color=channel_colors[i], alpha=0.85) bottom += np.array(vals) # Add total labels for i, (m, total_val) in enumerate(zip(months_fc, bottom)): ax3.text(i, total_val + 2, f'¥{total_val:.0f}万', ha='center', fontsize=8, fontweight='bold') ax3.set_xticks(x_fc) ax3.set_xticklabels(months_fc) ax3.set_ylabel('万元', fontsize=11) ax3.set_title('基准场景:GMV 渠道贡献拆解', fontsize=14, fontweight='bold') ax3.legend(loc='upper left', fontsize=8) ax3.grid(axis='y', alpha=0.3) # ------------------------------------------------------- # Chart 4: 季节性因子 & 达人运营指标 # ------------------------------------------------------- ax4 = fig.add_subplot(3, 2, 4) # Top: Seasonal factors months_all_label = ['6月', '7月', '8月', '9月', '10月', '11月', '12月'] sf_values = [seasonal_factor[m % 12 or 12] for m in [18, 19, 20, 21, 22, 23, 24]] # Normalize to percentage sf_pct = [(v - 1) * 100 for v in sf_values] bar_colors_sf = ['#D64545' if v < 0 else '#4472C4' if v <= 0.15 else '#70AD47' for v in sf_pct] ax4_2 = ax4.twinx() bars_sf = ax4.bar(months_all_label, sf_values, color=bar_colors_sf, alpha=0.3, width=0.6, label='季节性因子') ax4.axhline(y=1.0, color='gray', linestyle='--', linewidth=1, alpha=0.5) # Bottom: Daren metrics daren_metrics_label = ['达人\n总开播数', '复发达人\n占比', '新达人\n首场GMV', '非达人\n渠道占比'] daren_metrics_value = [25, 55, 8, 18] # projections ax4_2.bar(daren_metrics_label, daren_metrics_value, color=['#2E75B6', '#70AD47', '#FFC000', '#ED7D31'], alpha=0.7, width=0.5) ax4.set_ylabel('季节性因子', fontsize=11, color='#4472C4') ax4_2.set_ylabel('运营指标', fontsize=11, color='#ED7D31') ax4.set_title('季节性因子 & 关键运营指标', fontsize=14, fontweight='bold') ax4.tick_params(axis='y', labelcolor='#4472C4') ax4_2.tick_params(axis='y', labelcolor='#ED7D31') ax4.set_ylim(0, 2.0) # Add value labels for bar, val in zip(bars_sf, sf_values): ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f'{val:.2f}', ha='center', fontsize=8, fontweight='bold') # ------------------------------------------------------- # Chart 5: 历史退款率 & 预测改善 # ------------------------------------------------------- ax5 = fig.add_subplot(3, 2, 5) # Historical refund rate (whole channel) hist_refund_rate = [r*100/g for r, g in zip(refund_hist, gmv_hist)] hist_daren_refund = daren_refund x_hist = range(1, 10) ax5.plot(x_hist, hist_refund_rate, 'o-', color='#D64545', linewidth=2, markersize=8, label='全渠道退款率') ax5.plot(x_hist, hist_daren_refund, 's--', color='#ED7D31', linewidth=2, markersize=6, label='达人直播退款率') # Forecast refund rate targets x_fc_all = range(10, 17) for scenario_name, target in [('保守', 0.38), ('基准', 0.35), ('乐观', 0.32)]: adj_rates = [] for m in [18, 19, 20, 21, 22, 23, 24]: actual = m % 12 or 12 if actual in [6, 7, 8]: adj_rates.append((target + 0.05) * 100) elif actual == 11: adj_rates.append((target - 0.03) * 100) else: adj_rates.append(target * 100) ls = '-' if scenario_name == '基准' else '--' ax5.plot(x_fc_all, adj_rates, ls, marker='s', linewidth=2, markersize=5, label=f'预测退款率({scenario_name})', alpha=0.8) ax5.axvline(x=9.5, color='gray', linestyle='--', linewidth=1, alpha=0.5) ax5.set_xticks(list(x_hist) + list(x_fc_all)) ax5.set_xticklabels(months_hist + months_fc, rotation=0) ax5.set_ylabel('%', fontsize=11) ax5.set_title('退款率趋势 & 预测目标', fontsize=14, fontweight='bold') ax5.legend(loc='upper right', fontsize=8) ax5.grid(alpha=0.3) ax5.set_ylim(0, 65) # ------------------------------------------------------- # Chart 6: 累计收入预测 vs 历史 Key Metrics 仪表盘 # ------------------------------------------------------- ax6 = fig.add_subplot(3, 2, 6) ax6.axis('off') # Summary text summary_text = f""" ═══════════════════════════════════════════════════════ 📊 预测总结与关键假设 ═══════════════════════════════════════════════════════ 📈 预测区间(2026年6-12月,7个月) ┌──────────┬──────────────┬──────────────┐ │ 场景 │ 累计 GMV │ 累计 GSV │ ├──────────┼──────────────┼──────────────┤ │ 保守 │ ¥{total['c_gmv']:>10,} │ ¥{total['c_gsv']:>10,} │ │ 基准 │ ¥{total['b_gmv']:>10,} │ ¥{total['b_gsv']:>10,} │ │ 乐观 │ ¥{total['o_gmv']:>10,} │ ¥{total['o_gsv']:>10,} │ └──────────┴──────────────┴──────────────┘ 🔑 关键假设(基准场景) • 核心达人:晚柠/念妈 月均 GMV ¥75万 • 学霸系:学霸老王/三人行 月均 GMV ¥35万 • 每月新拓达人 6 位,首场均价 ¥8万 • 新达人次月留存率 35% • 非达人渠道(端内+销售)月环比 +10% • 目标退费率 35%(当前波动 30-45%) 📅 季节性峰值 • 7-8月暑假高峰(因子 1.4-1.5) • 11月双11大促(因子 1.25) • 6月考试季低谷(因子 0.75) ⚠️ 风险因素 • 达人退款率波动(2月曾达 55.9%) • 学霸系达人合作可持续性不确定 • 行业淡季(6月)收入可能低于预测 • 新达人质量参差不齐 🎯 建议 • 6月重点:储备暑假达人资源,减少开播 • 7-8月重点:全力投放,学霸系+新达人密集排期 • 9-10月:保持节奏,端内渠道发力 • 11月:双11大促,全渠道协同 • 加强达人退款管控,目标控制在35%以内 ═══════════════════════════════════════════════════════ """ ax6.text(0.02, 0.98, summary_text, transform=ax6.transAxes, fontsize=9, verticalalignment='top', fontfamily='monospace', bbox=dict(boxstyle='round', facecolor='#F0F4F8', alpha=0.8)) plt.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig('/root/.openclaw/workspace/output/revenue_forecast_2026.png', dpi=150, bbox_inches='tight', facecolor='white', edgecolor='none') print("\n✅ 图表已保存: /root/.openclaw/workspace/output/revenue_forecast_2026.png")