ai_member_xiaoxi/scripts/repeat_distribution_chart.py
2026-05-29 08:00:01 +08:00

93 lines
3.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
# Data - monthly distribution
months = ['2026-03', '2026-04', '2026-05']
categories = ['2次', '3次', '4次', '5次+']
# Monthly data: [2次, 3次, 4次, 5次+]
data = {
'2026-03': [474, 299, 42, 42],
'2026-04': [1274, 232, 60, 64],
'2026-05': [931, 174, 58, 46],
}
colors = ['#4472C4', '#ED7D31', '#A5A5A5', '#FFC000']
plt.rcParams['font.family'] = 'sans-serif'
for f in ['WenQuanYi Micro Hei', 'Noto Sans CJK SC', 'SimHei', 'DejaVu Sans']:
try:
plt.rcParams['font.sans-serif'] = [f]
break
except:
continue
plt.rcParams['axes.unicode_minus'] = False
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))
# === Left: Monthly stacked bar ===
x = np.arange(len(months))
width = 0.55
bottom = np.zeros(3)
for i, cat in enumerate(categories):
vals = [data[m][i] for m in months]
bars = ax1.bar(x, vals, width, bottom=bottom, color=colors[i], label=cat, edgecolor='white', linewidth=0.5)
bottom += vals
# Data labels for significant segments
for j, v in enumerate(vals):
if v > 40:
ax1.text(x[j], bottom[j] - v/2, str(v), ha='center', va='center', fontsize=9, fontweight='bold', color='white')
ax1.set_xticks(x)
ax1.set_xticklabels(months, fontsize=12)
ax1.set_ylabel('重复学习组合数', fontsize=13)
ax1.set_title('各月重复学习次数分布user×chapter组合', fontsize=14, fontweight='bold')
ax1.legend(loc='upper right', fontsize=10, title='完成次数')
ax1.set_ylim(0, max(bottom) * 1.15)
ax1.grid(axis='y', alpha=0.2, linestyle='--')
# Annotations
totals = [sum(data[m]) for m in months]
for j, (xi, t) in enumerate(zip(x, totals)):
ax1.text(xi, bottom[j] + max(bottom)*0.02, f'合计 {t}', ha='center', fontsize=10, fontweight='bold', color='#333')
# === Right: Overall pie ===
overall_2 = 4136 # 2 times
overall_3 = 653 # 3 times
overall_4 = 130 # 4 times
overall_5plus = sum(c for c in [51,14,17,5,5,2,2,2,2,1,1]) # 5+
# Actually let me use the exact numbers from the query
all_counts = [2]*4136 + [3]*653 + [4]*130 + [5]*51 + [6]*14 + [7]*17 + [8]*5 + [9]*5 + [10]*2 + [11]*2 + [12]*2 + [13]*2 + [14]*1 + [19]*1
# Grouped for clarity
labels = ['2次', '3次', '4次', '5次', '6-19次']
sizes = [4136, 653, 130, 51, 14+17+5+5+2+2+2+2+1+1] # = 51
explode = (0, 0, 0, 0, 0.1)
wedges, texts, autotexts = ax2.pie(sizes, explode=explode, labels=None,
colors=['#4472C4','#ED7D31','#A5A5A5','#FFC000','#C00000'],
autopct='%1.1f%%', startangle=90,
textprops={'fontsize': 11})
ax2.set_title('整体分布占比', fontsize=14, fontweight='bold')
# Custom legend
legend_labels = [f'{l}: {s}个组合 ({s/5021*100:.1f}%)' if s < 5000 else f'{l}: {s}个组合'
for l, s in zip(labels, sizes)]
ax2.legend(wedges, legend_labels, loc='lower center', fontsize=10, ncol=1)
# Summary text
fig.suptitle('最近3个月2026.03-05重复学习次数分布\n同一角色×同一课时完成≥2次',
fontsize=16, fontweight='bold', y=1.02)
fig.text(0.5, 0.02,
f'活跃角色: 10,409人 | 重复学习角色: 1,317人 (12.7%) | 重复学习组合: 5,021个 | 平均重复: 2.3次',
ha='center', fontsize=11, color='#555')
plt.tight_layout(rect=[0, 0.05, 1, 0.95])
plt.savefig('/root/.openclaw/workspace/output/repeat_distribution_3m.png', dpi=150, bbox_inches='tight')
print('Saved.')