#!/usr/bin/env python3 """ studytime-analysis — 角色学习时间分析工具 用法: python3 studytime_analysis.py 输出: Markdown 格式的分析报告 数据源: PostgreSQL Online (vala 库) 核心表: user_chapter_play_record_0~7 """ import os import sys import psycopg2 import psycopg2.extras from datetime import datetime, timedelta from collections import defaultdict, OrderedDict # ── 配置 ────────────────────────────────────────────── PG_CONFIG = { "host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"), "port": int(os.environ.get("PG_DB_PORT", "28591")), "user": os.environ.get("PG_DB_USER", "ai_member"), "password": os.environ.get("PG_DB_PASSWORD", ""), "dbname": os.environ.get("PG_DB_DATABASE", "vala"), } EXCLUDED_MONTHS = (1, 2, 7, 8) # 寒假1-2月, 暑假7-8月 WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"] PERIODS = OrderedDict([ ("凌晨", (0, 6)), ("上午", (6, 12)), ("中午", (12, 14)), ("下午", (14, 18)), ("晚上", (18, 24)), ]) # ── 数据库查询 ──────────────────────────────────────── def get_connection(): """连接 PostgreSQL""" conn = psycopg2.connect( host=PG_CONFIG["host"], port=PG_CONFIG["port"], user=PG_CONFIG["user"], password=PG_CONFIG["password"], dbname=PG_CONFIG["dbname"], ) return conn def fetch_completion_records(role_id): """查询指定角色全部完课记录(包含寒暑假)""" params = {} union_parts = [] for i in range(8): param_name = f"rid_{i}" params[param_name] = role_id union_parts.append(f""" SELECT user_id, chapter_id, chapter_unique_id, level, updated_at FROM user_chapter_play_record_{i} WHERE user_id = %({param_name})s AND play_status = 1 """) union_sql = " UNION ALL ".join(union_parts) sql = f""" SELECT * FROM ( {union_sql} ) t ORDER BY updated_at ASC """ conn = get_connection() try: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, params) rows = cur.fetchall() finally: conn.close() return rows def is_holiday(dt): """判断是否为寒暑假月份(1-2月寒假, 7-8月暑假)""" if dt is None: return False return dt.month in EXCLUDED_MONTHS def split_records(records): """ 拆分记录: - non_holiday: 非寒暑假记录(用于一周分布分析) - holiday: 寒暑假记录 返回统计信息 """ non_holiday = [] holiday = [] for r in records: dt = r["updated_at"] if dt is None: continue if is_holiday(dt): holiday.append(r) else: non_holiday.append(r) return non_holiday, holiday # ── 分析函数 ────────────────────────────────────────── def classify_period(hour): """根据小时数返回时段名称""" for name, (lo, hi) in PERIODS.items(): if lo <= hour < hi: return name return "未知" def analyze_weekly_distribution(records): """ 分析一周内分布: 周一至周日各天完课数 + 周一至周五时段分布 返回: (day_counts, weekday_periods) """ day_counts = defaultdict(int) weekday_periods = defaultdict(lambda: defaultdict(int)) today = datetime.now().date() for r in records: dt = r["updated_at"] if dt is None: continue # dt is timezone-aware, convert to local naive for analysis if hasattr(dt, 'tzinfo') and dt.tzinfo is not None: # PostgreSQL returns tz-aware, but we just need local time pass weekday = dt.weekday() # 0=Mon hour = dt.hour period = classify_period(hour) day_counts[weekday] += 1 if weekday < 5: weekday_periods[period][weekday] += 1 return day_counts, weekday_periods def analyze_weekly_trend(records): """ 按周统计完课趋势 返回: (weeks_data, analysis_dict) """ if not records: return [], {} week_counts = defaultdict(int) for r in records: dt = r["updated_at"] if dt is None: continue iso = dt.isocalendar() year, week_num = iso[0], iso[1] week_counts[(year, week_num)] += 1 sorted_weeks = sorted(week_counts.keys()) weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks] total_weeks = len(weeks_data) total_lessons = sum(c for _, _, c in weeks_data) avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0 # 时间跨度(含空周) if sorted_weeks: first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1) last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1) total_span_weeks = ((last - first).days // 7) + 1 all_weeks_in_span = set() cur = first while cur <= last: iso = cur.isocalendar() all_weeks_in_span.add((iso[0], iso[1])) cur += timedelta(days=7) active_weeks = set(sorted_weeks) empty_weeks = sorted(all_weeks_in_span - active_weeks) else: total_span_weeks = 0 empty_weeks = [] consecutive = (len(empty_weeks) == 0) # 趋势: 前半段 vs 后半段 mid = len(weeks_data) // 2 first_half_data = weeks_data[:mid] first_half_avg = sum(c for _, _, c in first_half_data) / mid if mid > 0 else 0 second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1 second_half_data = weeks_data[second_half_start:] second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0 trend = "持平" if first_half_avg > 0: ratio = second_half_avg / first_half_avg if ratio > 1.15: trend = "上涨 ↑" elif ratio < 0.85: trend = "下降 ↓" return weeks_data, { "total_weeks": total_weeks, "total_span_weeks": total_span_weeks, "total_lessons": total_lessons, "avg_per_week": avg_per_week, "consecutive": consecutive, "empty_weeks": empty_weeks, "first_half_avg": round(first_half_avg, 1), "second_half_avg": round(second_half_avg, 1), "trend": trend, } # ── 输出格式化 ──────────────────────────────────────── def format_report(role_id, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis): """生成 Markdown 格式分析报告 Args: all_records: 全部完课记录(用于明细表) non_holiday_records: 非寒暑假记录(用于一周分布分析) holiday_count: 寒暑假记录数(仅统计,不参与一周分布) day_counts, weekday_periods: 基于 non_holiday_records 的分析结果 weeks_data, analysis: 基于 all_records 的分析结果 """ lines = [] now_str = datetime.now().strftime('%Y-%m-%d %H:%M') lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}") lines.append(f"") lines.append(f"**分析时间**: {now_str}") lines.append(f"**完课记录总数**: {len(all_records)} 条") if holiday_count > 0: lines.append(f"**其中寒暑假记录**: {holiday_count} 条(寒假1-2月、暑假7-8月)") lines.append(f"**非寒暑假记录**: {len(non_holiday_records)} 条") lines.append(f"") lines.append(f"> ⚠️ **说明**: 「一周时间分布」仅基于非寒暑假数据(共 {len(non_holiday_records)} 条),排除寒暑假作息差异的干扰。") lines.append(f"> 「跨周趋势」和「完课明细」包含全部数据(共 {len(all_records)} 条),反映完整学习轨迹。") lines.append(f"") if not all_records: lines.append("> ⚠️ 该角色没有任何完课记录。") return "\n".join(lines) if not non_holiday_records: lines.append("> ⚠️ 该角色在非寒暑假期间没有完课记录,一周时间分布无法分析。") # ═══ 一、一周时间分布 ═══ lines.append(f"---") lines.append(f"## 一、一周时间分布(仅非寒暑假,{len(non_holiday_records)} 条记录)") lines.append(f"") # 日分布表 lines.append(f"### 各天完课数量") lines.append(f"") total = sum(day_counts.values()) max_day = max(day_counts.values()) if day_counts else 1 lines.append(f"| 星期 | 完课数 | 占比 |") lines.append(f"|------|--------|------|") for i, name in enumerate(WEEKDAY_NAMES): cnt = day_counts.get(i, 0) pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%" bar = "█" * max(1, int(cnt / max_day * 20)) if cnt > 0 else "" lines.append(f"| {name} | {cnt} {bar} | {pct} |") lines.append(f"") # 规律小结 weekday_total = sum(day_counts.get(i, 0) for i in range(5)) weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7)) lines.append(f"### 规律小结") lines.append(f"") if weekend_total > 0: sat = day_counts.get(5, 0) sun = day_counts.get(6, 0) lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节,周日 {sun} 节") else: lines.append(f"- **周末上课**: ❌ 否 — 周末无完课记录") # 时段分布(周一至周五) lines.append(f"") lines.append(f"### 周一至周五上课时段分布") lines.append(f"") lines.append(f"| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |") lines.append(f"|------|------|------|------|------|------|------|") for period in ["上午", "中午", "下午", "晚上", "凌晨"]: period_data = weekday_periods.get(period, {}) period_total = sum(period_data.values()) if period_total == 0: continue row = [period] for d in range(5): cnt = period_data.get(d, 0) row.append(str(cnt) if cnt > 0 else "-") row.append(str(period_total)) lines.append(f"| {' | '.join(row)} |") lines.append(f"") # 时段规律 lines.append(f"**时段规律分析**:") for period in ["上午", "中午", "下午", "晚上"]: period_data = weekday_periods.get(period, {}) period_sum = sum(period_data.values()) if period_sum == 0: continue pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0 active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0] if active_days: lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)→ 集中在 {'、'.join(active_days)}") else: lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)") lines.append(f"") # ═══ 二、跨周学习趋势 ═══ lines.append(f"---") lines.append(f"## 二、跨周学习趋势") lines.append(f"") lines.append(f"### 基本数据") lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周") lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节") lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节") lines.append(f"- 连续性: {'✅ 每周连续上课,无中断' if analysis['consecutive'] else '⚠️ 存在中断周(见下方)'}") lines.append(f"") if analysis["empty_weeks"]: lines.append(f"### 中断周明细") empty_list = [] for y, w in sorted(analysis["empty_weeks"]): monday = datetime.fromisocalendar(y, w, 1) empty_list.append(f"{y}年W{w:02d}({monday.strftime('%m/%d')}起)") lines.append(f"- {', '.join(empty_list)}") lines.append(f"") lines.append(f"### 各周完课详情") lines.append(f"") lines.append(f"| 周次 | 起止日期 | 完课数 | 趋势 |") lines.append(f"|------|----------|--------|------|") max_count = max(c for _, _, c in weeks_data) if weeks_data else 1 for i, (y, w, cnt) in enumerate(weeks_data): monday = datetime.fromisocalendar(y, w, 1) sunday = monday + timedelta(days=6) date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}" marker = "" if i > 0: prev_cnt = weeks_data[i - 1][2] if prev_cnt > 0 and cnt >= prev_cnt * 2: marker = "📈 突增" elif cnt > prev_cnt * 1.3: marker = "📈" elif prev_cnt > 0 and cnt < prev_cnt * 0.7: marker = "📉" bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0 bar = "█" * bar_len if bar_len > 0 else "" lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |") lines.append(f"") # 趋势总结 lines.append(f"### 趋势分析") lines.append(f"- **整体趋势**: {analysis['trend']}") first_half_weeks = len(weeks_data) // 2 second_half_weeks = len(weeks_data) - first_half_weeks lines.append(f" - 前半段(前 {first_half_weeks} 周)平均: {analysis['first_half_avg']} 节/周") lines.append(f" - 后半段(后 {second_half_weeks} 周)平均: {analysis['second_half_avg']} 节/周") lines.append(f"") # 特殊事件 if len(weeks_data) >= 2: counts = [c for _, _, c in weeks_data] events_found = [] for i in range(1, len(counts)): if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2: y, w, _ = weeks_data[i] monday = datetime.fromisocalendar(y, w, 1) events_found.append(f"⚡ **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量突增**:{counts[i-1]}→{counts[i]} 节") break for i in range(1, len(counts)): if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1: y, w, _ = weeks_data[i] monday = datetime.fromisocalendar(y, w, 1) events_found.append(f"🔻 **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量骤降**:{counts[i-1]}→{counts[i]} 节") break if events_found: lines.append(f"**值得关注的变化**:") for ev in events_found: lines.append(f"- {ev}") lines.append(f"") # ═══ 三、完课记录明细 ═══ lines.append(f"---") lines.append(f"## 三、完课记录明细(全部 {len(all_records)} 条记录)") lines.append(f"") lines.append(f"| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |") lines.append(f"|------|------|------|------|------|------|--------|") for i, r in enumerate(all_records, 1): dt = r["updated_at"] if dt is None: continue date_str = dt.strftime("%Y-%m-%d") time_str = dt.strftime("%H:%M") weekday = WEEKDAY_NAMES[dt.weekday()] period = classify_period(dt.hour) level = r.get("level") or "-" chapter_id = r.get("chapter_id") or "-" lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |") lines.append(f"") return "\n".join(lines) # ── 主函数 ──────────────────────────────────────────── def main(): if len(sys.argv) < 2: print("用法: python3 studytime_analysis.py ", file=sys.stderr) sys.exit(1) try: role_id = int(sys.argv[1]) except ValueError: print(f"错误: 角色ID必须是数字,收到: {sys.argv[1]}", file=sys.stderr) sys.exit(1) all_records = fetch_completion_records(role_id) non_holiday_records, holiday_records = split_records(all_records) holiday_count = len(holiday_records) # 一周分布分析:仅用非寒暑假数据 day_counts, weekday_periods = analyze_weekly_distribution(non_holiday_records) # 跨周趋势分析:用全部数据 weeks_data, analysis = analyze_weekly_trend(all_records) report = format_report(role_id, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis) print(report) if __name__ == "__main__": main()