ai_member_xiaoxi/scripts/lesson_scatter.py
2026-06-17 08:00:01 +08:00

73 lines
3.3 KiB
Python

#!/usr/bin/env python3
"""散点图:每个角色一个点,横轴=购课天数,纵轴=累计完课节数(首次完课)"""
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import numpy as np
import glob
# 中文字体
cjk_fonts = glob.glob('/usr/share/fonts/opentype/noto/NotoSans*CJK*.ttc')
if not cjk_fonts:
cjk_fonts = glob.glob('/usr/share/fonts/truetype/arphic/*.ttc')
font_path = cjk_fonts[0] if cjk_fonts else None
if font_path:
fm.fontManager.addfont(font_path)
font_name = fm.FontProperties(fname=font_path).get_name()
plt.rcParams['font.family'] = font_name
plt.rcParams['axes.unicode_minus'] = False
df = pd.read_csv('/tmp/char_snapshot.csv')
print(f"角色数: {len(df)}")
print(f"days_since_purchase: min={df['days_since_purchase'].min()}, max={df['days_since_purchase'].max()}, median={df['days_since_purchase'].median():.0f}")
print(f"completed_lessons: min={df['completed_lessons'].min()}, max={df['completed_lessons'].max()}, median={df['completed_lessons'].median():.0f}")
# 统计分布
print("\n完课分布:")
bins = [0, 1, 6, 11, 21, 31, 51, 1000]
labels = ['0节', '1-5节', '6-10节', '11-20节', '21-30节', '31-50节', '50节以上']
df['range'] = pd.cut(df['completed_lessons'], bins=bins, labels=labels, right=False)
print(df['range'].value_counts().sort_index())
fig, ax = plt.subplots(figsize=(14, 8))
# 散点:每个角色一个点
ax.scatter(df['days_since_purchase'], df['completed_lessons'],
alpha=0.5, s=25, c='#3b82f6', edgecolors='white', linewidth=0.5)
# 趋势线(按天数分桶)
day_bins = np.arange(0, df['days_since_purchase'].max() + 30, 30)
df['day_bin'] = pd.cut(df['days_since_purchase'], bins=day_bins, labels=False, right=False)
bin_means = df.groupby('day_bin')['completed_lessons'].agg(['mean', 'count']).reset_index()
bin_means['bin_center'] = bin_means['day_bin'] * 30 + 15
bin_means = bin_means[bin_means['count'] >= 3]
ax.plot(bin_means['bin_center'], bin_means['mean'], 'o-', color='#ef4444', linewidth=2.5,
markersize=6, label='Mean Trend (30-day bins)', zorder=5)
ax.set_xlabel('Days Since Purchase', fontsize=13)
ax.set_ylabel('Cumulative Lessons Completed', fontsize=13)
ax.set_title('599 RMB Non-repeat Buyers: Current State per Character\n(First Completion Only, Post-Purchase)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, linestyle='--')
ax.set_xlim(left=0)
ax.set_ylim(bottom=0)
stats_text = f"Characters: {len(df)}\n"
stats_text += f"Days range: {df['days_since_purchase'].min()}~{df['days_since_purchase'].max()}d\n"
stats_text += f"Median days: {df['days_since_purchase'].median():.0f}d\n"
stats_text += f"Median lessons: {df['completed_lessons'].median():.0f}\n"
stats_text += f"0 lessons: {(df['completed_lessons']==0).sum()} chars ({(df['completed_lessons']==0).sum()/len(df)*100:.1f}%)"
ax.text(0.98, 0.97, stats_text, transform=ax.transAxes, fontsize=10,
verticalalignment='top', horizontalalignment='right',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
ax.legend(loc='upper left', fontsize=11)
plt.tight_layout()
plt.savefig('/root/.openclaw/workspace/output/char_snapshot_scatter.png', dpi=150, bbox_inches='tight')
print("\n图表已保存到 output/char_snapshot_scatter.png")