ai_member_xiaoxi/scripts/repeat_distribution_chart.py

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np

# Data - monthly distribution
months = ['2026-03', '2026-04', '2026-05']
categories = ['2次', '3次', '4次', '5次+']

# Monthly data: [2次, 3次, 4次, 5次+]
data = {
    '2026-03': [474, 299, 42, 42],
    '2026-04': [1274, 232, 60, 64],
    '2026-05': [931, 174, 58, 46],
}

colors = ['#4472C4', '#ED7D31', '#A5A5A5', '#FFC000']

plt.rcParams['font.family'] = 'sans-serif'
for f in ['WenQuanYi Micro Hei', 'Noto Sans CJK SC', 'SimHei', 'DejaVu Sans']:
    try:
        plt.rcParams['font.sans-serif'] = [f]
        break
    except:
        continue
plt.rcParams['axes.unicode_minus'] = False

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))

# === Left: Monthly stacked bar ===
x = np.arange(len(months))
width = 0.55
bottom = np.zeros(3)

for i, cat in enumerate(categories):
    vals = [data[m][i] for m in months]
    bars = ax1.bar(x, vals, width, bottom=bottom, color=colors[i], label=cat, edgecolor='white', linewidth=0.5)
    bottom += vals
    # Data labels for significant segments
    for j, v in enumerate(vals):
        if v > 40:
            ax1.text(x[j], bottom[j] - v/2, str(v), ha='center', va='center', fontsize=9, fontweight='bold', color='white')

ax1.set_xticks(x)
ax1.set_xticklabels(months, fontsize=12)
ax1.set_ylabel('重复学习组合数', fontsize=13)
ax1.set_title('各月重复学习次数分布（user×chapter组合）', fontsize=14, fontweight='bold')
ax1.legend(loc='upper right', fontsize=10, title='完成次数')
ax1.set_ylim(0, max(bottom) * 1.15)
ax1.grid(axis='y', alpha=0.2, linestyle='--')

# Annotations
totals = [sum(data[m]) for m in months]
for j, (xi, t) in enumerate(zip(x, totals)):
    ax1.text(xi, bottom[j] + max(bottom)*0.02, f'合计 {t}', ha='center', fontsize=10, fontweight='bold', color='#333')

# === Right: Overall pie ===
overall_2 = 4136  # 2 times
overall_3 = 653   # 3 times
overall_4 = 130   # 4 times
overall_5plus = sum(c for c in [51,14,17,5,5,2,2,2,2,1,1])  # 5+

# Actually let me use the exact numbers from the query
all_counts = [2]*4136 + [3]*653 + [4]*130 + [5]*51 + [6]*14 + [7]*17 + [8]*5 + [9]*5 + [10]*2 + [11]*2 + [12]*2 + [13]*2 + [14]*1 + [19]*1

# Grouped for clarity
labels = ['2次', '3次', '4次', '5次', '6-19次']
sizes = [4136, 653, 130, 51, 14+17+5+5+2+2+2+2+1+1]  # = 51
explode = (0, 0, 0, 0, 0.1)

wedges, texts, autotexts = ax2.pie(sizes, explode=explode, labels=None,
                                     colors=['#4472C4','#ED7D31','#A5A5A5','#FFC000','#C00000'],
                                     autopct='%1.1f%%', startangle=90,
                                     textprops={'fontsize': 11})
ax2.set_title('整体分布占比', fontsize=14, fontweight='bold')

# Custom legend
legend_labels = [f'{l}: {s}个组合 ({s/5021*100:.1f}%)' if s < 5000 else f'{l}: {s}个组合'
                 for l, s in zip(labels, sizes)]
ax2.legend(wedges, legend_labels, loc='lower center', fontsize=10, ncol=1)

# Summary text
fig.suptitle('最近3个月（2026.03-05）重复学习次数分布\n（同一角色×同一课时完成≥2次）',
             fontsize=16, fontweight='bold', y=1.02)
fig.text(0.5, 0.02,
         f'活跃角色: 10,409人 | 重复学习角色: 1,317人 (12.7%) | 重复学习组合: 5,021个 | 平均重复: 2.3次',
         ha='center', fontsize=11, color='#555')

plt.tight_layout(rect=[0, 0.05, 1, 0.95])
plt.savefig('/root/.openclaw/workspace/output/repeat_distribution_3m.png', dpi=150, bbox_inches='tight')
print('Saved.')