#!/usr/bin/env python3 """ studytime-analysis — 角色学习时间分析工具 用法: python3 studytime_analysis.py [--format html] [--output ] 默认输出: Markdown 格式的分析报告 --format html: 输出 HTML 格式的详细报告 --output: 指定输出文件路径(仅在 html 模式下生效,默认输出到 stdout) """ import os import sys import json import argparse import psycopg2 import psycopg2.extras import pymysql from datetime import datetime, timedelta from collections import defaultdict, OrderedDict # ── 配置 ────────────────────────────────────────────── PG_CONFIG = { "host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"), "port": int(os.environ.get("PG_DB_PORT", "28591")), "user": os.environ.get("PG_DB_USER", "ai_member"), "password": os.environ.get("PG_DB_PASSWORD", ""), "dbname": os.environ.get("PG_DB_DATABASE", "vala"), } MYSQL_CONFIG = { "host": os.environ.get("MYSQL_HOST_online", "bj-cdb-dh2fkqa0.sql.tencentcdb.com"), "port": int(os.environ.get("MYSQL_PORT_online", "27751")), "user": os.environ.get("MYSQL_USERNAME_online", "read_only"), "password": os.environ.get("MYSQL_PASSWORD_online", ""), "charset": "utf8mb4", } EXCLUDED_MONTHS = (1, 2, 7, 8) WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"] PERIODS = OrderedDict([ ("凌晨", (0, 6)), ("上午", (6, 12)), ("中午", (12, 14)), ("下午", (14, 18)), ("晚上", (18, 24)), ]) # ── 数据库连接 ──────────────────────────────────────── def get_pg_connection(): return psycopg2.connect( host=PG_CONFIG["host"], port=PG_CONFIG["port"], user=PG_CONFIG["user"], password=PG_CONFIG["password"], dbname=PG_CONFIG["dbname"], ) def get_mysql_connection(db="vala_user"): return pymysql.connect( host=MYSQL_CONFIG["host"], port=MYSQL_CONFIG["port"], user=MYSQL_CONFIG["user"], password=MYSQL_CONFIG["password"], db=db, charset=MYSQL_CONFIG["charset"], ) # ── 章节映射(MySQL vala 库) ────────────────────────── _chapter_info_cache = None def fetch_chapter_info_map(): """从 MySQL vala 库加载全部章节信息,建立 chapter_id → {level, unit, lesson, lesson_type} 映射""" global _chapter_info_cache if _chapter_info_cache is not None: return _chapter_info_cache conn = get_mysql_connection("vala") try: with conn.cursor() as cur: cur.execute(""" SELECT gc.id AS chapter_id, IFNULL(sp.level, '') AS level, IFNULL(sp.cn_name, '') AS unit_name, IFNULL(sp.season_of_quarter, -1) AS unit_num, gc.`index` AS lesson_index, gc.lesson_type FROM vala_game_chapter gc LEFT JOIN vala_game_season_package sp ON gc.season_package_id = sp.id """) rows = cur.fetchall() finally: conn.close() _chapter_info_cache = {} for row in rows: chapter_id, level, unit_name, unit_num, lesson_index, lesson_type = row _chapter_info_cache[int(chapter_id)] = { "level": level or "", "unit_name": unit_name or "", "unit_num": int(unit_num) if unit_num is not None else -1, "lesson_index": int(lesson_index) if lesson_index is not None else 0, "lesson_type": int(lesson_type) if lesson_type is not None else 1, } return _chapter_info_cache # ── 角色信息 ──────────────────────────────────────────── def fetch_role_info(role_id): """从 MySQL vala_user 库查询角色基本信息(含注册时间)""" sql = """ SELECT c.id AS role_id, c.account_id, c.nickname, c.gender, c.birthday, c.created_at, a.tel FROM vala_app_character c LEFT JOIN vala_app_account a ON c.account_id = a.id WHERE c.id = %s """ conn = get_mysql_connection("vala_user") try: with conn.cursor() as cur: cur.execute(sql, (role_id,)) row = cur.fetchone() finally: conn.close() if not row: return None role_id_val, account_id, nickname, gender, birthday, reg_time, tel = row gender_str = "" if gender == 0: gender_str = "女" elif gender == 1: gender_str = "男" elif gender is not None: gender_str = str(gender) age = "" if birthday: try: parts = str(birthday).split("-") if len(parts) >= 1 and parts[0].isdigit(): age = datetime.now().year - int(parts[0]) except (ValueError, IndexError): pass phone_tail = "" if tel: digits = ''.join(c for c in str(tel) if c.isdigit()) if len(digits) >= 4: phone_tail = digits[-4:] elif digits: phone_tail = digits reg_time_str = "" if reg_time: if isinstance(reg_time, datetime): reg_time_str = reg_time.strftime("%Y-%m-%d %H:%M") else: reg_time_str = str(reg_time)[:16] return { "role_id": role_id_val, "account_id": account_id, "nickname": nickname or "", "gender": gender_str, "age": age, "phone_tail": phone_tail, "reg_time": reg_time_str, } def check_retention(records, cutoff_days=14): if not records: return "无完课记录" cutoff = datetime.now() - timedelta(days=cutoff_days) has_recent = any( r["updated_at"].replace(tzinfo=None) >= cutoff for r in records ) return "正常" if has_recent else "流失" # ── 完课记录查询 ──────────────────────────────────────── def fetch_completion_records(role_id): """查询全部完课记录,附带 created_at 用于计算耗时""" params = {} union_parts = [] for i in range(8): pn = f"rid_{i}" params[pn] = role_id union_parts.append(f""" SELECT user_id, chapter_id, chapter_unique_id, level, created_at, updated_at FROM user_chapter_play_record_{i} WHERE user_id = %({pn})s AND play_status = 1 """) sql = f"SELECT * FROM ({' UNION ALL '.join(union_parts)}) t ORDER BY updated_at ASC" conn = get_pg_connection() try: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, params) rows = cur.fetchall() finally: conn.close() return rows def is_holiday(dt): if dt is None: return False return dt.month in EXCLUDED_MONTHS def split_records(records): non_holiday, holiday = [], [] for r in records: dt = r["updated_at"] if dt is None: continue (holiday if is_holiday(dt) else non_holiday).append(r) return non_holiday, holiday # ── 分析函数 ────────────────────────────────────────── def classify_period(hour): for name, (lo, hi) in PERIODS.items(): if lo <= hour < hi: return name return "未知" def analyze_weekly_distribution(records): day_counts = defaultdict(int) weekday_periods = defaultdict(lambda: defaultdict(int)) for r in records: dt = r["updated_at"] if dt is None: continue weekday = dt.weekday() hour = dt.hour period = classify_period(hour) day_counts[weekday] += 1 if weekday < 5: weekday_periods[period][weekday] += 1 return day_counts, weekday_periods def analyze_weekly_trend(records): if not records: return [], {} week_counts = defaultdict(int) for r in records: dt = r["updated_at"] if dt is None: continue iso = dt.isocalendar() week_counts[(iso[0], iso[1])] += 1 sorted_weeks = sorted(week_counts.keys()) weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks] total_weeks = len(weeks_data) total_lessons = sum(c for _, _, c in weeks_data) avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0 if sorted_weeks: first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1) last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1) total_span_weeks = ((last - first).days // 7) + 1 all_weeks_in_span = set() cur = first while cur <= last: iso = cur.isocalendar() all_weeks_in_span.add((iso[0], iso[1])) cur += timedelta(days=7) empty_weeks = sorted(all_weeks_in_span - set(sorted_weeks)) else: total_span_weeks = 0 empty_weeks = [] consecutive = (len(empty_weeks) == 0) mid = len(weeks_data) // 2 first_half_avg = sum(c for _, _, c in weeks_data[:mid]) / mid if mid > 0 else 0 second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1 second_half_data = weeks_data[second_half_start:] second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0 trend = "持平" if first_half_avg > 0: ratio = second_half_avg / first_half_avg if ratio > 1.15: trend = "上涨 ↑" elif ratio < 0.85: trend = "下降 ↓" return weeks_data, { "total_weeks": total_weeks, "total_span_weeks": total_span_weeks, "total_lessons": total_lessons, "avg_per_week": avg_per_week, "consecutive": consecutive, "empty_weeks": empty_weeks, "first_half_avg": round(first_half_avg, 1), "second_half_avg": round(second_half_avg, 1), "trend": trend, } # ── Markdown 输出 ────────────────────────────────────── def format_markdown(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis): lines = [] now_str = datetime.now().strftime('%Y-%m-%d %H:%M') lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}") lines.append("") if role_info: lines.append("## 基本信息") lines.append("") lines.append("| 项目 | 详情 |") lines.append("|------|------|") lines.append(f"| 角色ID | {role_info['role_id']} |") lines.append(f"| 账号ID | {role_info['account_id']} |") if role_info['nickname']: lines.append(f"| 角色名字 | {role_info['nickname']} |") lines.append(f"| 性别 | {role_info['gender']} |") if role_info['age']: lines.append(f"| 年龄 | {role_info['age']} 岁 |") if role_info['phone_tail']: lines.append(f"| 账号手机号后4位 | {role_info['phone_tail']} |") if retention_status: lines.append(f"| 最近留存状态 | {retention_status} |") lines.append("") lines.append(f"**分析时间**: {now_str}") lines.append(f"**完课记录总数**: {len(all_records)} 条") if holiday_count > 0: lines.append(f"**其中寒暑假记录**: {holiday_count} 条") lines.append(f"**非寒暑假记录**: {len(non_holiday_records)} 条") lines.append("") lines.append(f"> ⚠️ 一周时间分布仅基于非寒暑假数据({len(non_holiday_records)} 条),跨周趋势和完课明细包含全部数据({len(all_records)} 条)。") lines.append("") if not all_records: lines.append("> ⚠️ 该角色没有任何完课记录。") return "\n".join(lines) if not non_holiday_records: lines.append("> ⚠️ 该角色在非寒暑假期间没有完课记录,一周时间分布无法分析。") # ── 一、一周时间分布 ── lines.append("---") lines.append(f"## 一、一周时间分布(仅非寒暑假,{len(non_holiday_records)} 条记录)") lines.append("") lines.append("### 各天完课数量") lines.append("") total = sum(day_counts.values()) max_day = max(day_counts.values()) if day_counts else 1 lines.append("| 星期 | 完课数 | 占比 |") lines.append("|------|--------|------|") for i, name in enumerate(WEEKDAY_NAMES): cnt = day_counts.get(i, 0) pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%" bar = "█" * max(1, int(cnt / max_day * 20)) if cnt > 0 else "" lines.append(f"| {name} | {cnt} {bar} | {pct} |") lines.append("") weekday_total = sum(day_counts.get(i, 0) for i in range(5)) weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7)) lines.append("### 规律小结") lines.append("") if weekend_total > 0: sat = day_counts.get(5, 0) sun = day_counts.get(6, 0) lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节,周日 {sun} 节") else: lines.append("- **周末上课**: ❌ 否 — 周末无完课记录") lines.append("") lines.append("### 周一至周五上课时段分布") lines.append("") lines.append("| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |") lines.append("|------|------|------|------|------|------|------|") for period in ["上午", "中午", "下午", "晚上", "凌晨"]: period_data = weekday_periods.get(period, {}) if sum(period_data.values()) == 0: continue row = [period] for d in range(5): cnt = period_data.get(d, 0) row.append(str(cnt) if cnt > 0 else "-") row.append(str(sum(period_data.values()))) lines.append(f"| {' | '.join(row)} |") lines.append("") lines.append("**时段规律分析**:") for period in ["上午", "中午", "下午", "晚上"]: period_data = weekday_periods.get(period, {}) period_sum = sum(period_data.values()) if period_sum == 0: continue pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0 active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0] if active_days: lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)→ 集中在 {'、'.join(active_days)}") else: lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)") lines.append("") # ── 二、跨周趋势 ── lines.append("---") lines.append("## 二、跨周学习趋势") lines.append("") lines.append("### 基本数据") lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周") lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节") lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节") con_str = "✅ 每周连续上课,无中断" if analysis['consecutive'] else "⚠️ 存在中断周(见下方)" lines.append(f"- 连续性: {con_str}") lines.append("") if analysis["empty_weeks"]: lines.append("### 中断周明细") empty_list = [] for y, w in sorted(analysis["empty_weeks"]): monday = datetime.fromisocalendar(y, w, 1) empty_list.append(f"{y}年W{w:02d}({monday.strftime('%m/%d')}起)") lines.append(f"- {', '.join(empty_list)}") lines.append("") lines.append("### 各周完课详情") lines.append("") lines.append("| 周次 | 起止日期 | 完课数 | 趋势 |") lines.append("|------|----------|--------|------|") max_count = max(c for _, _, c in weeks_data) if weeks_data else 1 for i, (y, w, cnt) in enumerate(weeks_data): monday = datetime.fromisocalendar(y, w, 1) sunday = monday + timedelta(days=6) date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}" marker = "" if i > 0: prev_cnt = weeks_data[i - 1][2] if prev_cnt > 0 and cnt >= prev_cnt * 2: marker = "📈 突增" elif cnt > prev_cnt * 1.3: marker = "📈" elif prev_cnt > 0 and cnt < prev_cnt * 0.7: marker = "📉" bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0 bar = "█" * bar_len if bar_len > 0 else "" lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |") lines.append("") lines.append("### 趋势分析") lines.append(f"- **整体趋势**: {analysis['trend']}") fhw = len(weeks_data) // 2 shw = len(weeks_data) - fhw lines.append(f" - 前半段(前 {fhw} 周)平均: {analysis['first_half_avg']} 节/周") lines.append(f" - 后半段(后 {shw} 周)平均: {analysis['second_half_avg']} 节/周") lines.append("") if len(weeks_data) >= 2: counts = [c for _, _, c in weeks_data] events_found = [] for i in range(1, len(counts)): if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2: y, w, _ = weeks_data[i] monday = datetime.fromisocalendar(y, w, 1) events_found.append(f"⚡ **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量突增**:{counts[i-1]}→{counts[i]} 节") break for i in range(1, len(counts)): if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1: y, w, _ = weeks_data[i] monday = datetime.fromisocalendar(y, w, 1) events_found.append(f"🔻 **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量骤降**:{counts[i-1]}→{counts[i]} 节") break if events_found: lines.append("**值得关注的变化**:") for ev in events_found: lines.append(f"- {ev}") lines.append("") # ── 三、完课明细 ── lines.append("---") lines.append(f"## 三、完课记录明细(全部 {len(all_records)} 条记录)") lines.append("") lines.append("| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |") lines.append("|------|------|------|------|------|------|--------|") for i, r in enumerate(all_records, 1): dt = r["updated_at"] if dt is None: continue date_str = dt.strftime("%Y-%m-%d") time_str = dt.strftime("%H:%M") weekday = WEEKDAY_NAMES[dt.weekday()] period = classify_period(dt.hour) level = r.get("level") or "-" chapter_id = r.get("chapter_id") or "-" lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |") lines.append("") # ── HTML 提示 ── lines.append("---") lines.append("> 💡 是否需要将以上所有详细信息生成为一个 HTML 文件?回复「是」或「需要」即可。") return "\n".join(lines) # ── HTML 输出 ────────────────────────────────────────── def _fmt_dt(dt_val): """安全格式化 datetime""" if dt_val is None: return "-" return dt_val.strftime('%Y-%m-%d %H:%M') def _fmt_date(dt_val): if dt_val is None: return "-" return dt_val.strftime('%Y-%m-%d') def _format_duration(seconds): if seconds is None or seconds < 0: return "-" m, s = divmod(int(seconds), 60) if m >= 60: h, m = divmod(m, 60) return f"{h}时{m}分{s}秒" if m > 0: return f"{m}分{s}秒" return f"{s}秒" def _build_weekly_text(all_records, non_holiday_records, day_counts, weekday_periods): """生成周上课时间分布的文字总结""" total_non = len(non_holiday_records) if total_non == 0: return "该角色在非寒暑假期间没有完课记录,无法分析周上课时间分布。" parts = [] # 最活跃的星期 max_day_idx = max(day_counts, key=day_counts.get, default=-1) if max_day_idx >= 0: parts.append(f"非寒暑假期间共完成 {total_non} 节课,主要集中在 **{WEEKDAY_NAMES[max_day_idx]}**({day_counts[max_day_idx]} 节,占 {day_counts[max_day_idx]/total_non*100:.0f}%)。") # 时段偏好 weekday_total = sum(day_counts.get(i, 0) for i in range(5)) top_period = None top_period_cnt = 0 for period in ["晚上", "上午", "下午", "中午"]: cnt = sum(weekday_periods.get(period, {}).values()) if cnt > top_period_cnt: top_period_cnt = cnt top_period = period if top_period and top_period_cnt > 0: pct = top_period_cnt / weekday_total * 100 if weekday_total > 0 else 0 parts.append(f"工作日上课集中在 **{top_period}**时段({top_period_cnt} 节,占 {pct:.0f}%)。") # 周末情况 sat = day_counts.get(5, 0) sun = day_counts.get(6, 0) if sat + sun > 0: parts.append(f"周末也保持上课节奏,周六 {sat} 节、周日 {sun} 节。") else: parts.append("周末无上课记录。") return " ".join(parts) def _build_trend_text(weeks_data, analysis): """生成跨周趋势分析的总结文字""" if not weeks_data: return "无完课记录,无法分析趋势。" parts = [] parts.append(f"完课跨越 {analysis['total_span_weeks']} 周(有课 {analysis['total_weeks']} 周),共 {analysis['total_lessons']} 节,周均 {analysis['avg_per_week']} 节。") if analysis['consecutive']: parts.append("学习连续性良好,无中断周。") else: empty_list = [] for y, w in sorted(analysis['empty_weeks']): monday = datetime.fromisocalendar(y, w, 1) empty_list.append(f"{y}年W{w:02d}({monday.strftime('%m/%d')}起)") parts.append(f"存在间断:{'、'.join(empty_list)}。") parts.append(f"整体趋势:{analysis['trend']},前半段平均 {analysis['first_half_avg']} 节/周 → 后半段 {analysis['second_half_avg']} 节/周。") return " ".join(parts) def _build_summary(role_info, retention_status, all_records, non_holiday_records, day_counts, weekday_periods, weeks_data, analysis): """生成关键特征总结""" items = [] total = len(all_records) total_non = len(non_holiday_records) if total == 0: items.append("暂无完课记录。") return items # 1. 学习规模与留存 if retention_status == "流失": items.append(f"⚠️ 近14天无完课,已**流失**。历史共 {total} 节完课记录。") elif retention_status == "正常": items.append(f"✅ 状态**正常**,近14天内有完课。累计 {total} 节完课。") else: items.append(f"累计 {total} 节完课记录。") # 2. 学习时长判断 if weeks_data: first_week = datetime.fromisocalendar(weeks_data[0][0], weeks_data[0][1], 1) last_week = datetime.fromisocalendar(weeks_data[-1][0], weeks_data[-1][1], 1) span_months = (last_week.year - first_week.year) * 12 + (last_week.month - first_week.month) + 1 if span_months >= 6: items.append(f"📅 长期用户,学习跨度约 {span_months} 个月。") elif span_months >= 2: items.append(f"📅 中期用户,学习跨度约 {span_months} 个月。") else: items.append(f"🆕 新用户,学习跨度约 {span_months} 个月,尚在形成学习习惯阶段。") # 3. 强度 if analysis['avg_per_week'] >= 6: items.append(f"🔥 高强度学习,周均 {analysis['avg_per_week']} 节。") elif analysis['avg_per_week'] >= 4: items.append(f"📚 稳定学习,周均 {analysis['avg_per_week']} 节。") elif analysis['avg_per_week'] > 0: items.append(f"🐢 低频学习,周均 {analysis['avg_per_week']} 节。") # 4. 时段特征 if total_non > 0: max_day_idx = max(day_counts, key=day_counts.get) weekday_total = sum(day_counts.get(i, 0) for i in range(5)) evening_cnt = sum(weekday_periods.get("晚上", {}).values()) if evening_cnt > weekday_total * 0.6 and weekday_total > 0: items.append(f"🌙 晚间学习型,{evening_cnt / weekday_total * 100:.0f}% 的课在晚上。") morning_cnt = sum(weekday_periods.get("上午", {}).values()) if morning_cnt > weekday_total * 0.4 and weekday_total > 0: items.append(f"☀️ 上午学习型,{morning_cnt / weekday_total * 100:.0f}% 的课在上午。") # 5. 趋势 if analysis['trend'] == "下降 ↓": items.append(f"📉 学习频率呈下降趋势,需关注。") elif analysis['trend'] == "上涨 ↑": items.append(f"📈 学习频率呈上升趋势,势头良好。") # 6. 寒暑假 holiday_cnt = total - total_non if holiday_cnt > 0: items.append(f"🏖️ 寒暑假期间也有坚持学习({holiday_cnt} 节)。") return items def format_html(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis): """生成 HTML 格式分析报告,包含 5 个部分""" chapter_map = fetch_chapter_info_map() now_str = datetime.now().strftime('%Y-%m-%d %H:%M') # ── 第一部分:扩展基本信息 ── reg_time_str = role_info.get("reg_time", "-") if role_info else "-" first_time_str = "-" last_time_str = "-" last_level = "-" last_unit = "-" last_lesson = "-" if all_records: first = all_records[0]["updated_at"] first_time_str = _fmt_dt(first) last = all_records[-1]["updated_at"] last_time_str = _fmt_dt(last) last_chapter_id = all_records[-1].get("chapter_id") if last_chapter_id and last_chapter_id in chapter_map: ci = chapter_map[last_chapter_id] last_level = ci["level"] or "-" last_unit = ci["unit_name"] or f"Unit{ci['unit_num']}" if ci['unit_num'] >= 0 else "-" last_lesson = str(ci["lesson_index"]) if ci["lesson_index"] > 0 else "-" # ── 第二部分:完课记录表格(带章节映射) ── detail_rows = [] for i, r in enumerate(all_records, 1): chapter_id = r.get("chapter_id") ci = chapter_map.get(chapter_id, {}) if chapter_id else {} level = ci.get("level") or r.get("level") or "-" unit_name = ci.get("unit_name", "") unit_num = ci.get("unit_num", -1) unit_display = unit_name if unit_name else (f"Unit{unit_num}" if unit_num >= 0 else "-") lesson_display = str(ci.get("lesson_index", "-")) if ci.get("lesson_index", 0) > 0 else "-" start_dt = r.get("created_at") end_dt = r["updated_at"] start_str = _fmt_dt(start_dt) end_str = _fmt_dt(end_dt) if start_dt and end_dt: duration_secs = (end_dt - start_dt).total_seconds() else: duration_secs = None duration_str = _format_duration(duration_secs) if duration_secs is not None else "-" weekday_str = WEEKDAY_NAMES[end_dt.weekday()] if end_dt else "-" period_str = classify_period(end_dt.hour) if end_dt else "-" detail_rows.append({ "seq": i, "level": level, "unit": unit_display, "lesson": lesson_display, "start_time": start_str, "end_time": end_str, "duration": duration_str, "weekday": weekday_str, "period": period_str, }) # ── 第三部分:周上课时间分布总结 ── weekly_text = _build_weekly_text(all_records, non_holiday_records, day_counts, weekday_periods) # ── 第四部分:跨周趋势总结 ── trend_text = _build_trend_text(weeks_data, analysis) # ── 第五部分:关键特征总结 ── summary_items = _build_summary(role_info, retention_status, all_records, non_holiday_records, day_counts, weekday_periods, weeks_data, analysis) # ── 构建 HTML ── rows_html = "" for row in detail_rows: rows_html += f""" {row['seq']} {row['level']} {row['unit']} {row['lesson']} {row['weekday']} {row['period']} {row['start_time']} {row['end_time']} {row['duration']} """ summary_items_html = "".join(f"
  • {item}
  • " for item in summary_items) html = f""" 学习时间分析报告 — 角色 {role_id}

    📊 学习时间分析报告 — 角色 {role_id}({role_info.get('nickname', '') if role_info else ''})

    一、基本信息

    角色ID{role_info.get('role_id', '-') if role_info else '-'}
    账号ID{role_info.get('account_id', '-') if role_info else '-'}
    角色姓名{role_info.get('nickname', '-') if role_info else '-'}
    角色性别{role_info.get('gender', '-') if role_info else '-'}
    角色年龄{f"{role_info['age']} 岁" if role_info and role_info.get('age') else '-'}
    手机号后4位{role_info.get('phone_tail', '-') if role_info else '-'}
    最近留存状态{retention_status}
    注册时间{reg_time_str}
    第一次完课时间{first_time_str}
    最后一次完课时间{last_time_str}
    最后一次完课Level {last_level} / {last_unit} / Lesson {last_lesson}

    二、完课记录明细(共 {len(all_records)} 条)

    {rows_html}
    序号 Level Unit(单元) Lesson 星期 时段 开始上课时间 完课时间 完课耗时

    三、周上课时间分布分析

    {weekly_text}

    四、跨周趋势分析

    {trend_text}

    五、关键特征总结

      {summary_items_html}
    分析时间:{now_str} | 完课总数:{len(all_records)} 条 | 非寒暑假:{len(non_holiday_records)} 条 | 寒暑假:{holiday_count} 条
    """ return html # ── 主函数 ──────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="角色学习时间分析工具") parser.add_argument("role_id", type=int, help="角色ID") parser.add_argument("--format", choices=["md", "html"], default="md", help="输出格式(默认 md)") parser.add_argument("--output", "-o", type=str, default=None, help="输出文件路径(仅在 html 模式下生效,默认 stdout)") args = parser.parse_args() role_id = args.role_id all_records = fetch_completion_records(role_id) non_holiday_records, holiday_records = split_records(all_records) holiday_count = len(holiday_records) role_info = fetch_role_info(role_id) retention_status = check_retention(all_records) day_counts, weekday_periods = analyze_weekly_distribution(non_holiday_records) weeks_data, analysis = analyze_weekly_trend(all_records) if args.format == "html": output = format_html(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis) if args.output: os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True) with open(args.output, "w", encoding="utf-8") as f: f.write(output) print(f"HTML 报告已保存到: {args.output}") else: print(output) else: output = format_markdown(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis) print(output) if __name__ == "__main__": main()