ai_member_xiaoxi/scripts/repeat_duration_l1l2.py
2026-05-29 08:00:01 +08:00

83 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'sans-serif'
for f in ['WenQuanYi Micro Hei', 'Noto Sans CJK SC', 'SimHei', 'DejaVu Sans']:
try:
plt.rcParams['font.sans-serif'] = [f]
break
except:
continue
plt.rcParams['axes.unicode_minus'] = False
ranges = ['0分钟\n(无时长数据)', '5-10', '10-15', '15-20', '20-30', '30-45', '45-60', '60-90', '90-120', '120+']
l1 = [0, 0, 7, 195, 1036, 749, 225, 118, 37, 28]
l2 = [1403, 4, 14, 45, 172, 582, 238, 115, 31, 22]
fig, axes = plt.subplots(1, 2, figsize=(18, 7))
x = np.arange(len(ranges))
w = 0.35
# === Left: Side-by-side bar ===
b1 = axes[0].bar(x - w/2, l1, w, color='#4472C4', label='L1 (2,395组合)', edgecolor='white', linewidth=0.5)
b2 = axes[0].bar(x + w/2, l2, w, color='#ED7D31', label='L2 (2,626组合)', edgecolor='white', linewidth=0.5)
# Labels for bars > 0
for bar, v, c in zip(b1, l1, ['#4472C4']*len(l1)):
if v > 20:
axes[0].text(bar.get_x()+bar.get_width()/2, v+max(max(l1),max(l2))*0.02,
str(v), ha='center', fontsize=8, color=c)
for bar, v, c in zip(b2, l2, ['#ED7D31']*len(l2)):
if v > 20:
axes[0].text(bar.get_x()+bar.get_width()/2, v+max(max(l1),max(l2))*0.02,
str(v), ha='center', fontsize=8, color=c)
axes[0].set_xticks(x)
axes[0].set_xticklabels(ranges, fontsize=10, rotation=0)
axes[0].set_ylabel('重复学习组合数', fontsize=12)
axes[0].set_title('L1 vs L2 总学习时长分布对比', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(axis='y', alpha=0.2, linestyle='--')
axes[0].set_ylim(0, max(max(l1), max(l2))*1.18)
# === Right: Percentage within each level ===
l1_total = sum(l1) # 2395
l2_total = sum(l2) # 2626
l1_pct = [v/l1_total*100 if l1_total > 0 else 0 for v in l1]
l2_pct = [v/l2_total*100 if l2_total > 0 else 0 for v in l2]
bp1 = axes[1].bar(x - w/2, l1_pct, w, color='#4472C4', label=f'L1 ({l1_total}组合)', edgecolor='white', linewidth=0.5)
bp2 = axes[1].bar(x + w/2, l2_pct, w, color='#ED7D31', label=f'L2 ({l2_total}组合)', edgecolor='white', linewidth=0.5)
for bar, v in zip(bp1, l1_pct):
if v > 2:
axes[1].text(bar.get_x()+bar.get_width()/2, v+max(max(l1_pct),max(l2_pct))*0.02,
f'{v:.1f}%', ha='center', fontsize=8, color='#4472C4')
for bar, v in zip(bp2, l2_pct):
if v > 2:
axes[1].text(bar.get_x()+bar.get_width()/2, v+max(max(l1_pct),max(l2_pct))*0.02,
f'{v:.1f}%', ha='center', fontsize=8, color='#ED7D31')
axes[1].set_xticks(x)
axes[1].set_xticklabels(ranges, fontsize=10, rotation=0)
axes[1].set_ylabel('占比 (%)', fontsize=12)
axes[1].set_title('L1 vs L2 总学习时长占比对比', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(axis='y', alpha=0.2, linestyle='--')
axes[1].set_ylim(0, max(max(l1_pct), max(l2_pct))*1.18)
fig.suptitle('最近3个月2026.03-05重复学习时长分布 L1 vs L2',
fontsize=16, fontweight='bold', y=1.02)
fig.text(0.5, 0.01,
'⚠️ L2有1,403个组合(53.4%)无组件级时长数据(0分钟) | L1数据完整 | '
'L1峰值20-30分钟(43%), L2峰值30-45分钟(去除零值后占48%)',
ha='center', fontsize=10, color='#C0392B')
plt.tight_layout(rect=[0, 0.06, 1, 0.95])
plt.savefig('/root/.openclaw/workspace/output/repeat_duration_l1l2_3m.png', dpi=150, bbox_inches='tight')
print('Saved.')