ai_member_xiaoban/skills/studytime-analysis/scripts/studytime_analysis.py

#!/usr/bin/env python3
"""
studytime-analysis — 角色学习时间分析工具
用法: python3 studytime_analysis.py <role_id>
输出: Markdown 格式的分析报告

数据源: PostgreSQL Online (vala 库)
核心表: user_chapter_play_record_0~7
"""

import os
import sys
import psycopg2
import psycopg2.extras
from datetime import datetime, timedelta
from collections import defaultdict, OrderedDict

# ── 配置 ──────────────────────────────────────────────
PG_CONFIG = {
    "host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"),
    "port": int(os.environ.get("PG_DB_PORT", "28591")),
    "user": os.environ.get("PG_DB_USER", "ai_member"),
    "password": os.environ.get("PG_DB_PASSWORD", ""),
    "dbname": os.environ.get("PG_DB_DATABASE", "vala"),
}

EXCLUDED_MONTHS = (1, 2, 7, 8)  # 寒假1-2月, 暑假7-8月

WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]

PERIODS = OrderedDict([
    ("凌晨", (0, 6)),
    ("上午", (6, 12)),
    ("中午", (12, 14)),
    ("下午", (14, 18)),
    ("晚上", (18, 24)),
])


# ── 数据库查询 ────────────────────────────────────────

def get_connection():
    """连接 PostgreSQL"""
    conn = psycopg2.connect(
        host=PG_CONFIG["host"],
        port=PG_CONFIG["port"],
        user=PG_CONFIG["user"],
        password=PG_CONFIG["password"],
        dbname=PG_CONFIG["dbname"],
    )
    return conn


def fetch_completion_records(role_id):
    """查询指定角色全部完课记录（排除寒暑假）"""
    params = {}
    union_parts = []
    for i in range(8):
        param_name = f"rid_{i}"
        params[param_name] = role_id
        union_parts.append(f"""
            SELECT user_id, chapter_id, chapter_unique_id, level, updated_at
            FROM user_chapter_play_record_{i}
            WHERE user_id = %({param_name})s
              AND play_status = 1
              AND EXTRACT(MONTH FROM updated_at) NOT IN (1, 2, 7, 8)
        """)

    union_sql = " UNION ALL ".join(union_parts)
    sql = f"""
        SELECT * FROM (
            {union_sql}
        ) t
        ORDER BY updated_at ASC
    """

    conn = get_connection()
    try:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute(sql, params)
            rows = cur.fetchall()
    finally:
        conn.close()
    return rows


def count_excluded_records(role_id):
    """统计被寒暑假排除的记录数"""
    params = {}
    union_parts = []
    for i in range(8):
        param_name = f"rid_{i}"
        params[param_name] = role_id
        union_parts.append(f"""
            SELECT COUNT(*) as cnt
            FROM user_chapter_play_record_{i}
            WHERE user_id = %({param_name})s
              AND play_status = 1
              AND EXTRACT(MONTH FROM updated_at) IN (1, 2, 7, 8)
        """)

    union_sql = " UNION ALL ".join(union_parts)
    sql = f"SELECT SUM(cnt) as total FROM ({union_sql}) t"

    conn = get_connection()
    try:
        with conn.cursor() as cur:
            cur.execute(sql, params)
            result = cur.fetchone()
    finally:
        conn.close()
    return result[0] if result and result[0] else 0


# ── 分析函数 ──────────────────────────────────────────

def classify_period(hour):
    """根据小时数返回时段名称"""
    for name, (lo, hi) in PERIODS.items():
        if lo <= hour < hi:
            return name
    return "未知"


def analyze_weekly_distribution(records):
    """
    分析一周内分布: 周一至周日各天完课数 + 周一至周五时段分布
    返回: (day_counts, weekday_periods)
    """
    day_counts = defaultdict(int)
    weekday_periods = defaultdict(lambda: defaultdict(int))

    today = datetime.now().date()

    for r in records:
        dt = r["updated_at"]
        if dt is None:
            continue
        # dt is timezone-aware, convert to local naive for analysis
        if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
            # PostgreSQL returns tz-aware, but we just need local time
            pass
        weekday = dt.weekday()  # 0=Mon
        hour = dt.hour
        period = classify_period(hour)
        day_counts[weekday] += 1
        if weekday < 5:
            weekday_periods[period][weekday] += 1

    return day_counts, weekday_periods


def analyze_weekly_trend(records):
    """
    按周统计完课趋势
    返回: (weeks_data, analysis_dict)
    """
    if not records:
        return [], {}

    week_counts = defaultdict(int)
    for r in records:
        dt = r["updated_at"]
        if dt is None:
            continue
        iso = dt.isocalendar()
        year, week_num = iso[0], iso[1]
        week_counts[(year, week_num)] += 1

    sorted_weeks = sorted(week_counts.keys())
    weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks]

    total_weeks = len(weeks_data)
    total_lessons = sum(c for _, _, c in weeks_data)
    avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0

    # 时间跨度（含空周）
    if sorted_weeks:
        first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1)
        last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1)
        total_span_weeks = ((last - first).days // 7) + 1

        all_weeks_in_span = set()
        cur = first
        while cur <= last:
            iso = cur.isocalendar()
            all_weeks_in_span.add((iso[0], iso[1]))
            cur += timedelta(days=7)

        active_weeks = set(sorted_weeks)
        empty_weeks = sorted(all_weeks_in_span - active_weeks)
    else:
        total_span_weeks = 0
        empty_weeks = []

    consecutive = (len(empty_weeks) == 0)

    # 趋势: 前半段 vs 后半段
    mid = len(weeks_data) // 2
    first_half_data = weeks_data[:mid]
    first_half_avg = sum(c for _, _, c in first_half_data) / mid if mid > 0 else 0
    second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1
    second_half_data = weeks_data[second_half_start:]
    second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0

    trend = "持平"
    if first_half_avg > 0:
        ratio = second_half_avg / first_half_avg
        if ratio > 1.15:
            trend = "上涨 ↑"
        elif ratio < 0.85:
            trend = "下降 ↓"

    return weeks_data, {
        "total_weeks": total_weeks,
        "total_span_weeks": total_span_weeks,
        "total_lessons": total_lessons,
        "avg_per_week": avg_per_week,
        "consecutive": consecutive,
        "empty_weeks": empty_weeks,
        "first_half_avg": round(first_half_avg, 1),
        "second_half_avg": round(second_half_avg, 1),
        "trend": trend,
    }


# ── 输出格式化 ────────────────────────────────────────

def format_report(role_id, records, excluded_count, day_counts, weekday_periods, weeks_data, analysis):
    """生成 Markdown 格式分析报告"""
    lines = []
    now_str = datetime.now().strftime('%Y-%m-%d %H:%M')

    lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}")
    lines.append(f"")
    lines.append(f"**分析时间**: {now_str}")
    lines.append(f"**有效完课记录**: {len(records)} 条")
    if excluded_count > 0:
        lines.append(f"**已排除寒暑假记录**: {excluded_count} 条（寒假1-2月、暑假7-8月，不算入分析）")
    lines.append(f"")

    if not records:
        lines.append("> ⚠️ 该角色没有非寒暑假期间的完课记录，无法进行分析。")
        return "\n".join(lines)

    # ═══ 一、一周时间分布 ═══
    lines.append(f"---")
    lines.append(f"## 一、一周时间分布")
    lines.append(f"")

    # 日分布表
    lines.append(f"### 各天完课数量")
    lines.append(f"")
    total = sum(day_counts.values())
    max_day = max(day_counts.values()) if day_counts else 1
    lines.append(f"| 星期 | 完课数 | 占比 |")
    lines.append(f"|------|--------|------|")
    for i, name in enumerate(WEEKDAY_NAMES):
        cnt = day_counts.get(i, 0)
        pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%"
        bar = "█" * max(1, int(cnt / max_day * 20)) if cnt > 0 else ""
        lines.append(f"| {name} | {cnt} {bar} | {pct} |")
    lines.append(f"")

    # 规律小结
    weekday_total = sum(day_counts.get(i, 0) for i in range(5))
    weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7))

    lines.append(f"### 规律小结")
    lines.append(f"")

    if weekend_total > 0:
        sat = day_counts.get(5, 0)
        sun = day_counts.get(6, 0)
        lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节，周日 {sun} 节")
    else:
        lines.append(f"- **周末上课**: ❌ 否 — 周末无完课记录")

    # 时段分布（周一至周五）
    lines.append(f"")
    lines.append(f"### 周一至周五上课时段分布")
    lines.append(f"")
    lines.append(f"| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |")
    lines.append(f"|------|------|------|------|------|------|------|")

    for period in ["上午", "中午", "下午", "晚上", "凌晨"]:
        period_data = weekday_periods.get(period, {})
        period_total = sum(period_data.values())
        if period_total == 0:
            continue
        row = [period]
        for d in range(5):
            cnt = period_data.get(d, 0)
            row.append(str(cnt) if cnt > 0 else "-")
        row.append(str(period_total))
        lines.append(f"| {' | '.join(row)} |")
    lines.append(f"")

    # 时段规律
    lines.append(f"**时段规律分析**:")
    for period in ["上午", "中午", "下午", "晚上"]:
        period_data = weekday_periods.get(period, {})
        period_sum = sum(period_data.values())
        if period_sum == 0:
            continue
        pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0
        active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0]
        if active_days:
            lines.append(f"- **{period}**（{period_sum}节, {pct:.0f}%）→ 集中在 {'、'.join(active_days)}")
        else:
            lines.append(f"- **{period}**（{period_sum}节, {pct:.0f}%）")
    lines.append(f"")

    # ═══ 二、跨周学习趋势 ═══
    lines.append(f"---")
    lines.append(f"## 二、跨周学习趋势")
    lines.append(f"")

    lines.append(f"### 基本数据")
    lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周（含空周），有课周数 **{analysis['total_weeks']}** 周")
    lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节")
    lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节")
    lines.append(f"- 连续性: {'✅ 每周连续上课，无中断' if analysis['consecutive'] else '⚠️ 存在中断周（见下方）'}")
    lines.append(f"")

    if analysis["empty_weeks"]:
        lines.append(f"### 中断周明细")
        empty_list = []
        for y, w in sorted(analysis["empty_weeks"]):
            monday = datetime.fromisocalendar(y, w, 1)
            empty_list.append(f"{y}年W{w:02d}（{monday.strftime('%m/%d')}起）")
        lines.append(f"- {', '.join(empty_list)}")
        lines.append(f"")

    lines.append(f"### 各周完课详情")
    lines.append(f"")
    lines.append(f"| 周次 | 起止日期 | 完课数 | 趋势 |")
    lines.append(f"|------|----------|--------|------|")

    max_count = max(c for _, _, c in weeks_data) if weeks_data else 1
    for i, (y, w, cnt) in enumerate(weeks_data):
        monday = datetime.fromisocalendar(y, w, 1)
        sunday = monday + timedelta(days=6)
        date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}"

        marker = ""
        if i > 0:
            prev_cnt = weeks_data[i - 1][2]
            if prev_cnt > 0 and cnt >= prev_cnt * 2:
                marker = "📈 突增"
            elif cnt > prev_cnt * 1.3:
                marker = "📈"
            elif prev_cnt > 0 and cnt < prev_cnt * 0.7:
                marker = "📉"

        bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0
        bar = "█" * bar_len if bar_len > 0 else ""
        lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |")

    lines.append(f"")

    # 趋势总结
    lines.append(f"### 趋势分析")
    lines.append(f"- **整体趋势**: {analysis['trend']}")
    first_half_weeks = len(weeks_data) // 2
    second_half_weeks = len(weeks_data) - first_half_weeks
    lines.append(f"  - 前半段（前 {first_half_weeks} 周）平均: {analysis['first_half_avg']} 节/周")
    lines.append(f"  - 后半段（后 {second_half_weeks} 周）平均: {analysis['second_half_avg']} 节/周")
    lines.append(f"")

    # 特殊事件
    if len(weeks_data) >= 2:
        counts = [c for _, _, c in weeks_data]
        events_found = []

        for i in range(1, len(counts)):
            if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2:
                y, w, _ = weeks_data[i]
                monday = datetime.fromisocalendar(y, w, 1)
                events_found.append(f"⚡ **{y}年W{w:02d}周（{monday.strftime('%m/%d')}起）完课量突增**：{counts[i-1]}→{counts[i]} 节")
                break

        for i in range(1, len(counts)):
            if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1:
                y, w, _ = weeks_data[i]
                monday = datetime.fromisocalendar(y, w, 1)
                events_found.append(f"🔻 **{y}年W{w:02d}周（{monday.strftime('%m/%d')}起）完课量骤降**：{counts[i-1]}→{counts[i]} 节")
                break

        if events_found:
            lines.append(f"**值得关注的变化**:")
            for ev in events_found:
                lines.append(f"- {ev}")
            lines.append(f"")

    # ═══ 三、完课记录明细 ═══
    lines.append(f"---")
    lines.append(f"## 三、完课记录明细")
    lines.append(f"")
    lines.append(f"| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |")
    lines.append(f"|------|------|------|------|------|------|--------|")

    for i, r in enumerate(records, 1):
        dt = r["updated_at"]
        if dt is None:
            continue
        date_str = dt.strftime("%Y-%m-%d")
        time_str = dt.strftime("%H:%M")
        weekday = WEEKDAY_NAMES[dt.weekday()]
        period = classify_period(dt.hour)
        level = r.get("level") or "-"
        chapter_id = r.get("chapter_id") or "-"
        lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |")

    lines.append(f"")

    return "\n".join(lines)


# ── 主函数 ────────────────────────────────────────────

def main():
    if len(sys.argv) < 2:
        print("用法: python3 studytime_analysis.py <role_id>", file=sys.stderr)
        sys.exit(1)

    try:
        role_id = int(sys.argv[1])
    except ValueError:
        print(f"错误: 角色ID必须是数字，收到: {sys.argv[1]}", file=sys.stderr)
        sys.exit(1)

    records = fetch_completion_records(role_id)
    excluded_count = count_excluded_records(role_id)
    day_counts, weekday_periods = analyze_weekly_distribution(records)
    weeks_data, analysis = analyze_weekly_trend(records)
    report = format_report(role_id, records, excluded_count, day_counts, weekday_periods, weeks_data, analysis)

    print(report)


if __name__ == "__main__":
    main()