#!/usr/bin/env python3 """散点图:每个角色一个点,横轴=购课天数,纵轴=累计完课节数(首次完课)""" import pandas as pd import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.font_manager as fm import numpy as np import glob # 中文字体 cjk_fonts = glob.glob('/usr/share/fonts/opentype/noto/NotoSans*CJK*.ttc') if not cjk_fonts: cjk_fonts = glob.glob('/usr/share/fonts/truetype/arphic/*.ttc') font_path = cjk_fonts[0] if cjk_fonts else None if font_path: fm.fontManager.addfont(font_path) font_name = fm.FontProperties(fname=font_path).get_name() plt.rcParams['font.family'] = font_name plt.rcParams['axes.unicode_minus'] = False df = pd.read_csv('/tmp/char_snapshot.csv') print(f"角色数: {len(df)}") print(f"days_since_purchase: min={df['days_since_purchase'].min()}, max={df['days_since_purchase'].max()}, median={df['days_since_purchase'].median():.0f}") print(f"completed_lessons: min={df['completed_lessons'].min()}, max={df['completed_lessons'].max()}, median={df['completed_lessons'].median():.0f}") # 统计分布 print("\n完课分布:") bins = [0, 1, 6, 11, 21, 31, 51, 1000] labels = ['0节', '1-5节', '6-10节', '11-20节', '21-30节', '31-50节', '50节以上'] df['range'] = pd.cut(df['completed_lessons'], bins=bins, labels=labels, right=False) print(df['range'].value_counts().sort_index()) fig, ax = plt.subplots(figsize=(14, 8)) # 散点:每个角色一个点 ax.scatter(df['days_since_purchase'], df['completed_lessons'], alpha=0.5, s=25, c='#3b82f6', edgecolors='white', linewidth=0.5) # 趋势线(按天数分桶) day_bins = np.arange(0, df['days_since_purchase'].max() + 30, 30) df['day_bin'] = pd.cut(df['days_since_purchase'], bins=day_bins, labels=False, right=False) bin_means = df.groupby('day_bin')['completed_lessons'].agg(['mean', 'count']).reset_index() bin_means['bin_center'] = bin_means['day_bin'] * 30 + 15 bin_means = bin_means[bin_means['count'] >= 3] ax.plot(bin_means['bin_center'], bin_means['mean'], 'o-', color='#ef4444', linewidth=2.5, markersize=6, label='Mean Trend (30-day bins)', zorder=5) ax.set_xlabel('Days Since Purchase', fontsize=13) ax.set_ylabel('Cumulative Lessons Completed', fontsize=13) ax.set_title('599 RMB Non-repeat Buyers: Current State per Character\n(First Completion Only, Post-Purchase)', fontsize=14, fontweight='bold') ax.grid(True, alpha=0.3, linestyle='--') ax.set_xlim(left=0) ax.set_ylim(bottom=0) stats_text = f"Characters: {len(df)}\n" stats_text += f"Days range: {df['days_since_purchase'].min()}~{df['days_since_purchase'].max()}d\n" stats_text += f"Median days: {df['days_since_purchase'].median():.0f}d\n" stats_text += f"Median lessons: {df['completed_lessons'].median():.0f}\n" stats_text += f"0 lessons: {(df['completed_lessons']==0).sum()} chars ({(df['completed_lessons']==0).sum()/len(df)*100:.1f}%)" ax.text(0.98, 0.97, stats_text, transform=ax.transAxes, fontsize=10, verticalalignment='top', horizontalalignment='right', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8)) ax.legend(loc='upper left', fontsize=11) plt.tight_layout() plt.savefig('/root/.openclaw/workspace/output/char_snapshot_scatter.png', dpi=150, bbox_inches='tight') print("\n图表已保存到 output/char_snapshot_scatter.png")