ai_member_xiaoban/skills/studytime-analysis/scripts/studytime_analysis.py

922 lines
36 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
studytime-analysis — 角色学习时间分析工具
用法: python3 studytime_analysis.py <role_id> [--format html] [--output <path>]
默认输出: Markdown 格式的分析报告
--format html: 输出 HTML 格式的详细报告
--output: 指定输出文件路径(仅在 html 模式下生效,默认输出到 stdout
"""
import os
import sys
import json
import argparse
import psycopg2
import psycopg2.extras
import pymysql
from datetime import datetime, timedelta
from collections import defaultdict, OrderedDict
# ── 配置 ──────────────────────────────────────────────
PG_CONFIG = {
"host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"),
"port": int(os.environ.get("PG_DB_PORT", "28591")),
"user": os.environ.get("PG_DB_USER", "ai_member"),
"password": os.environ.get("PG_DB_PASSWORD", ""),
"dbname": os.environ.get("PG_DB_DATABASE", "vala"),
}
MYSQL_CONFIG = {
"host": os.environ.get("MYSQL_HOST_online", "bj-cdb-dh2fkqa0.sql.tencentcdb.com"),
"port": int(os.environ.get("MYSQL_PORT_online", "27751")),
"user": os.environ.get("MYSQL_USERNAME_online", "read_only"),
"password": os.environ.get("MYSQL_PASSWORD_online", ""),
"charset": "utf8mb4",
}
EXCLUDED_MONTHS = (1, 2, 7, 8)
WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
PERIODS = OrderedDict([
("凌晨", (0, 6)),
("上午", (6, 12)),
("中午", (12, 14)),
("下午", (14, 18)),
("晚上", (18, 24)),
])
# ── 数据库连接 ────────────────────────────────────────
def get_pg_connection():
return psycopg2.connect(
host=PG_CONFIG["host"], port=PG_CONFIG["port"],
user=PG_CONFIG["user"], password=PG_CONFIG["password"],
dbname=PG_CONFIG["dbname"],
)
def get_mysql_connection(db="vala_user"):
return pymysql.connect(
host=MYSQL_CONFIG["host"], port=MYSQL_CONFIG["port"],
user=MYSQL_CONFIG["user"], password=MYSQL_CONFIG["password"],
db=db, charset=MYSQL_CONFIG["charset"],
)
# ── 章节映射MySQL vala 库) ──────────────────────────
_chapter_info_cache = None
def fetch_chapter_info_map():
"""从 MySQL vala 库加载全部章节信息,建立 chapter_id → {level, unit, lesson, lesson_type} 映射"""
global _chapter_info_cache
if _chapter_info_cache is not None:
return _chapter_info_cache
conn = get_mysql_connection("vala")
try:
with conn.cursor() as cur:
cur.execute("""
SELECT
gc.id AS chapter_id,
IFNULL(sp.level, '') AS level,
IFNULL(sp.cn_name, '') AS unit_name,
IFNULL(sp.season_of_quarter, -1) AS unit_num,
gc.`index` AS lesson_index,
gc.lesson_type
FROM vala_game_chapter gc
LEFT JOIN vala_game_season_package sp ON gc.season_package_id = sp.id
""")
rows = cur.fetchall()
finally:
conn.close()
_chapter_info_cache = {}
for row in rows:
chapter_id, level, unit_name, unit_num, lesson_index, lesson_type = row
_chapter_info_cache[int(chapter_id)] = {
"level": level or "",
"unit_name": unit_name or "",
"unit_num": int(unit_num) if unit_num is not None else -1,
"lesson_index": int(lesson_index) if lesson_index is not None else 0,
"lesson_type": int(lesson_type) if lesson_type is not None else 1,
}
return _chapter_info_cache
# ── 角色信息 ────────────────────────────────────────────
def fetch_role_info(role_id):
"""从 MySQL vala_user 库查询角色基本信息(含注册时间)"""
sql = """
SELECT
c.id AS role_id,
c.account_id,
c.nickname,
c.gender,
c.birthday,
c.created_at,
a.tel
FROM vala_app_character c
LEFT JOIN vala_app_account a ON c.account_id = a.id
WHERE c.id = %s
"""
conn = get_mysql_connection("vala_user")
try:
with conn.cursor() as cur:
cur.execute(sql, (role_id,))
row = cur.fetchone()
finally:
conn.close()
if not row:
return None
role_id_val, account_id, nickname, gender, birthday, reg_time, tel = row
gender_str = ""
if gender == 0:
gender_str = ""
elif gender == 1:
gender_str = ""
elif gender is not None:
gender_str = str(gender)
age = ""
if birthday:
try:
parts = str(birthday).split("-")
if len(parts) >= 1 and parts[0].isdigit():
age = datetime.now().year - int(parts[0])
except (ValueError, IndexError):
pass
phone_tail = ""
if tel:
digits = ''.join(c for c in str(tel) if c.isdigit())
if len(digits) >= 4:
phone_tail = digits[-4:]
elif digits:
phone_tail = digits
reg_time_str = ""
if reg_time:
if isinstance(reg_time, datetime):
reg_time_str = reg_time.strftime("%Y-%m-%d %H:%M")
else:
reg_time_str = str(reg_time)[:16]
return {
"role_id": role_id_val,
"account_id": account_id,
"nickname": nickname or "",
"gender": gender_str,
"age": age,
"phone_tail": phone_tail,
"reg_time": reg_time_str,
}
def check_retention(records, cutoff_days=14):
if not records:
return "无完课记录"
cutoff = datetime.now() - timedelta(days=cutoff_days)
has_recent = any(
r["updated_at"].replace(tzinfo=None) >= cutoff
for r in records
)
return "正常" if has_recent else "流失"
# ── 完课记录查询 ────────────────────────────────────────
def fetch_completion_records(role_id):
"""查询全部完课记录,附带 created_at 用于计算耗时"""
params = {}
union_parts = []
for i in range(8):
pn = f"rid_{i}"
params[pn] = role_id
union_parts.append(f"""
SELECT user_id, chapter_id, chapter_unique_id, level,
created_at, updated_at
FROM user_chapter_play_record_{i}
WHERE user_id = %({pn})s AND play_status = 1
""")
sql = f"SELECT * FROM ({' UNION ALL '.join(union_parts)}) t ORDER BY updated_at ASC"
conn = get_pg_connection()
try:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
rows = cur.fetchall()
finally:
conn.close()
return rows
def is_holiday(dt):
if dt is None:
return False
return dt.month in EXCLUDED_MONTHS
def split_records(records):
non_holiday, holiday = [], []
for r in records:
dt = r["updated_at"]
if dt is None:
continue
(holiday if is_holiday(dt) else non_holiday).append(r)
return non_holiday, holiday
# ── 分析函数 ──────────────────────────────────────────
def classify_period(hour):
for name, (lo, hi) in PERIODS.items():
if lo <= hour < hi:
return name
return "未知"
def analyze_weekly_distribution(records):
day_counts = defaultdict(int)
weekday_periods = defaultdict(lambda: defaultdict(int))
for r in records:
dt = r["updated_at"]
if dt is None:
continue
weekday = dt.weekday()
hour = dt.hour
period = classify_period(hour)
day_counts[weekday] += 1
if weekday < 5:
weekday_periods[period][weekday] += 1
return day_counts, weekday_periods
def analyze_weekly_trend(records):
if not records:
return [], {}
week_counts = defaultdict(int)
for r in records:
dt = r["updated_at"]
if dt is None:
continue
iso = dt.isocalendar()
week_counts[(iso[0], iso[1])] += 1
sorted_weeks = sorted(week_counts.keys())
weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks]
total_weeks = len(weeks_data)
total_lessons = sum(c for _, _, c in weeks_data)
avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0
if sorted_weeks:
first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1)
last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1)
total_span_weeks = ((last - first).days // 7) + 1
all_weeks_in_span = set()
cur = first
while cur <= last:
iso = cur.isocalendar()
all_weeks_in_span.add((iso[0], iso[1]))
cur += timedelta(days=7)
empty_weeks = sorted(all_weeks_in_span - set(sorted_weeks))
else:
total_span_weeks = 0
empty_weeks = []
consecutive = (len(empty_weeks) == 0)
mid = len(weeks_data) // 2
first_half_avg = sum(c for _, _, c in weeks_data[:mid]) / mid if mid > 0 else 0
second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1
second_half_data = weeks_data[second_half_start:]
second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0
trend = "持平"
if first_half_avg > 0:
ratio = second_half_avg / first_half_avg
if ratio > 1.15:
trend = "上涨 ↑"
elif ratio < 0.85:
trend = "下降 ↓"
return weeks_data, {
"total_weeks": total_weeks,
"total_span_weeks": total_span_weeks,
"total_lessons": total_lessons,
"avg_per_week": avg_per_week,
"consecutive": consecutive,
"empty_weeks": empty_weeks,
"first_half_avg": round(first_half_avg, 1),
"second_half_avg": round(second_half_avg, 1),
"trend": trend,
}
# ── Markdown 输出 ──────────────────────────────────────
def format_markdown(role_id, role_info, retention_status, all_records,
non_holiday_records, holiday_count,
day_counts, weekday_periods, weeks_data, analysis):
lines = []
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}")
lines.append("")
if role_info:
lines.append("## 基本信息")
lines.append("")
lines.append("| 项目 | 详情 |")
lines.append("|------|------|")
lines.append(f"| 角色ID | {role_info['role_id']} |")
lines.append(f"| 账号ID | {role_info['account_id']} |")
if role_info['nickname']:
lines.append(f"| 角色名字 | {role_info['nickname']} |")
lines.append(f"| 性别 | {role_info['gender']} |")
if role_info['age']:
lines.append(f"| 年龄 | {role_info['age']} 岁 |")
if role_info['phone_tail']:
lines.append(f"| 账号手机号后4位 | {role_info['phone_tail']} |")
if retention_status:
lines.append(f"| 最近留存状态 | {retention_status} |")
lines.append("")
lines.append(f"**分析时间**: {now_str}")
lines.append(f"**完课记录总数**: {len(all_records)}")
if holiday_count > 0:
lines.append(f"**其中寒暑假记录**: {holiday_count}")
lines.append(f"**非寒暑假记录**: {len(non_holiday_records)}")
lines.append("")
lines.append(f"> ⚠️ 一周时间分布仅基于非寒暑假数据({len(non_holiday_records)} 条),跨周趋势和完课明细包含全部数据({len(all_records)} 条)。")
lines.append("")
if not all_records:
lines.append("> ⚠️ 该角色没有任何完课记录。")
return "\n".join(lines)
if not non_holiday_records:
lines.append("> ⚠️ 该角色在非寒暑假期间没有完课记录,一周时间分布无法分析。")
# ── 一、一周时间分布 ──
lines.append("---")
lines.append(f"## 一、一周时间分布(仅非寒暑假,{len(non_holiday_records)} 条记录)")
lines.append("")
lines.append("### 各天完课数量")
lines.append("")
total = sum(day_counts.values())
max_day = max(day_counts.values()) if day_counts else 1
lines.append("| 星期 | 完课数 | 占比 |")
lines.append("|------|--------|------|")
for i, name in enumerate(WEEKDAY_NAMES):
cnt = day_counts.get(i, 0)
pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%"
bar = "" * max(1, int(cnt / max_day * 20)) if cnt > 0 else ""
lines.append(f"| {name} | {cnt} {bar} | {pct} |")
lines.append("")
weekday_total = sum(day_counts.get(i, 0) for i in range(5))
weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7))
lines.append("### 规律小结")
lines.append("")
if weekend_total > 0:
sat = day_counts.get(5, 0)
sun = day_counts.get(6, 0)
lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节,周日 {sun}")
else:
lines.append("- **周末上课**: ❌ 否 — 周末无完课记录")
lines.append("")
lines.append("### 周一至周五上课时段分布")
lines.append("")
lines.append("| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |")
lines.append("|------|------|------|------|------|------|------|")
for period in ["上午", "中午", "下午", "晚上", "凌晨"]:
period_data = weekday_periods.get(period, {})
if sum(period_data.values()) == 0:
continue
row = [period]
for d in range(5):
cnt = period_data.get(d, 0)
row.append(str(cnt) if cnt > 0 else "-")
row.append(str(sum(period_data.values())))
lines.append(f"| {' | '.join(row)} |")
lines.append("")
lines.append("**时段规律分析**:")
for period in ["上午", "中午", "下午", "晚上"]:
period_data = weekday_periods.get(period, {})
period_sum = sum(period_data.values())
if period_sum == 0:
continue
pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0
active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0]
if active_days:
lines.append(f"- **{period}**{period_sum}节, {pct:.0f}%)→ 集中在 {''.join(active_days)}")
else:
lines.append(f"- **{period}**{period_sum}节, {pct:.0f}%")
lines.append("")
# ── 二、跨周趋势 ──
lines.append("---")
lines.append("## 二、跨周学习趋势")
lines.append("")
lines.append("### 基本数据")
lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周")
lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节")
lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节")
con_str = "✅ 每周连续上课,无中断" if analysis['consecutive'] else "⚠️ 存在中断周(见下方)"
lines.append(f"- 连续性: {con_str}")
lines.append("")
if analysis["empty_weeks"]:
lines.append("### 中断周明细")
empty_list = []
for y, w in sorted(analysis["empty_weeks"]):
monday = datetime.fromisocalendar(y, w, 1)
empty_list.append(f"{y}年W{w:02d}{monday.strftime('%m/%d')}起)")
lines.append(f"- {', '.join(empty_list)}")
lines.append("")
lines.append("### 各周完课详情")
lines.append("")
lines.append("| 周次 | 起止日期 | 完课数 | 趋势 |")
lines.append("|------|----------|--------|------|")
max_count = max(c for _, _, c in weeks_data) if weeks_data else 1
for i, (y, w, cnt) in enumerate(weeks_data):
monday = datetime.fromisocalendar(y, w, 1)
sunday = monday + timedelta(days=6)
date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}"
marker = ""
if i > 0:
prev_cnt = weeks_data[i - 1][2]
if prev_cnt > 0 and cnt >= prev_cnt * 2:
marker = "📈 突增"
elif cnt > prev_cnt * 1.3:
marker = "📈"
elif prev_cnt > 0 and cnt < prev_cnt * 0.7:
marker = "📉"
bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0
bar = "" * bar_len if bar_len > 0 else ""
lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |")
lines.append("")
lines.append("### 趋势分析")
lines.append(f"- **整体趋势**: {analysis['trend']}")
fhw = len(weeks_data) // 2
shw = len(weeks_data) - fhw
lines.append(f" - 前半段(前 {fhw} 周)平均: {analysis['first_half_avg']} 节/周")
lines.append(f" - 后半段(后 {shw} 周)平均: {analysis['second_half_avg']} 节/周")
lines.append("")
if len(weeks_data) >= 2:
counts = [c for _, _, c in weeks_data]
events_found = []
for i in range(1, len(counts)):
if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2:
y, w, _ = weeks_data[i]
monday = datetime.fromisocalendar(y, w, 1)
events_found.append(f"⚡ **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量突增**{counts[i-1]}{counts[i]}")
break
for i in range(1, len(counts)):
if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1:
y, w, _ = weeks_data[i]
monday = datetime.fromisocalendar(y, w, 1)
events_found.append(f"🔻 **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量骤降**{counts[i-1]}{counts[i]}")
break
if events_found:
lines.append("**值得关注的变化**:")
for ev in events_found:
lines.append(f"- {ev}")
lines.append("")
# ── 三、完课明细 ──
lines.append("---")
lines.append(f"## 三、完课记录明细(全部 {len(all_records)} 条记录)")
lines.append("")
lines.append("| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |")
lines.append("|------|------|------|------|------|------|--------|")
for i, r in enumerate(all_records, 1):
dt = r["updated_at"]
if dt is None:
continue
date_str = dt.strftime("%Y-%m-%d")
time_str = dt.strftime("%H:%M")
weekday = WEEKDAY_NAMES[dt.weekday()]
period = classify_period(dt.hour)
level = r.get("level") or "-"
chapter_id = r.get("chapter_id") or "-"
lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |")
lines.append("")
# ── HTML 提示 ──
lines.append("---")
lines.append("> 💡 是否需要将以上所有详细信息生成为一个 HTML 文件?回复「是」或「需要」即可。")
return "\n".join(lines)
# ── HTML 输出 ──────────────────────────────────────────
def _fmt_dt(dt_val):
"""安全格式化 datetime"""
if dt_val is None:
return "-"
return dt_val.strftime('%Y-%m-%d %H:%M')
def _fmt_date(dt_val):
if dt_val is None:
return "-"
return dt_val.strftime('%Y-%m-%d')
def _format_duration(seconds):
if seconds is None or seconds < 0:
return "-"
m, s = divmod(int(seconds), 60)
if m >= 60:
h, m = divmod(m, 60)
return f"{h}{m}{s}"
if m > 0:
return f"{m}{s}"
return f"{s}"
def _build_weekly_text(all_records, non_holiday_records, day_counts, weekday_periods):
"""生成周上课时间分布的文字总结"""
total_non = len(non_holiday_records)
if total_non == 0:
return "该角色在非寒暑假期间没有完课记录,无法分析周上课时间分布。"
parts = []
# 最活跃的星期
max_day_idx = max(day_counts, key=day_counts.get, default=-1)
if max_day_idx >= 0:
parts.append(f"非寒暑假期间共完成 {total_non} 节课,主要集中在 **{WEEKDAY_NAMES[max_day_idx]}**{day_counts[max_day_idx]} 节,占 {day_counts[max_day_idx]/total_non*100:.0f}%)。")
# 时段偏好
weekday_total = sum(day_counts.get(i, 0) for i in range(5))
top_period = None
top_period_cnt = 0
for period in ["晚上", "上午", "下午", "中午"]:
cnt = sum(weekday_periods.get(period, {}).values())
if cnt > top_period_cnt:
top_period_cnt = cnt
top_period = period
if top_period and top_period_cnt > 0:
pct = top_period_cnt / weekday_total * 100 if weekday_total > 0 else 0
parts.append(f"工作日上课集中在 **{top_period}**时段({top_period_cnt} 节,占 {pct:.0f}%)。")
# 周末情况
sat = day_counts.get(5, 0)
sun = day_counts.get(6, 0)
if sat + sun > 0:
parts.append(f"周末也保持上课节奏,周六 {sat} 节、周日 {sun} 节。")
else:
parts.append("周末无上课记录。")
return " ".join(parts)
def _build_trend_text(weeks_data, analysis):
"""生成跨周趋势分析的总结文字"""
if not weeks_data:
return "无完课记录,无法分析趋势。"
parts = []
parts.append(f"完课跨越 {analysis['total_span_weeks']} 周(有课 {analysis['total_weeks']} 周),共 {analysis['total_lessons']} 节,周均 {analysis['avg_per_week']} 节。")
if analysis['consecutive']:
parts.append("学习连续性良好,无中断周。")
else:
empty_list = []
for y, w in sorted(analysis['empty_weeks']):
monday = datetime.fromisocalendar(y, w, 1)
empty_list.append(f"{y}年W{w:02d}{monday.strftime('%m/%d')}起)")
parts.append(f"存在间断:{''.join(empty_list)}")
parts.append(f"整体趋势:{analysis['trend']},前半段平均 {analysis['first_half_avg']} 节/周 → 后半段 {analysis['second_half_avg']} 节/周。")
return " ".join(parts)
def _build_summary(role_info, retention_status, all_records, non_holiday_records,
day_counts, weekday_periods, weeks_data, analysis):
"""生成关键特征总结"""
items = []
total = len(all_records)
total_non = len(non_holiday_records)
if total == 0:
items.append("暂无完课记录。")
return items
# 1. 学习规模与留存
if retention_status == "流失":
items.append(f"⚠️ 近14天无完课已**流失**。历史共 {total} 节完课记录。")
elif retention_status == "正常":
items.append(f"✅ 状态**正常**近14天内有完课。累计 {total} 节完课。")
else:
items.append(f"累计 {total} 节完课记录。")
# 2. 学习时长判断
if weeks_data:
first_week = datetime.fromisocalendar(weeks_data[0][0], weeks_data[0][1], 1)
last_week = datetime.fromisocalendar(weeks_data[-1][0], weeks_data[-1][1], 1)
span_months = (last_week.year - first_week.year) * 12 + (last_week.month - first_week.month) + 1
if span_months >= 6:
items.append(f"📅 长期用户,学习跨度约 {span_months} 个月。")
elif span_months >= 2:
items.append(f"📅 中期用户,学习跨度约 {span_months} 个月。")
else:
items.append(f"🆕 新用户,学习跨度约 {span_months} 个月,尚在形成学习习惯阶段。")
# 3. 强度
if analysis['avg_per_week'] >= 6:
items.append(f"🔥 高强度学习,周均 {analysis['avg_per_week']} 节。")
elif analysis['avg_per_week'] >= 4:
items.append(f"📚 稳定学习,周均 {analysis['avg_per_week']} 节。")
elif analysis['avg_per_week'] > 0:
items.append(f"🐢 低频学习,周均 {analysis['avg_per_week']} 节。")
# 4. 时段特征
if total_non > 0:
max_day_idx = max(day_counts, key=day_counts.get)
weekday_total = sum(day_counts.get(i, 0) for i in range(5))
evening_cnt = sum(weekday_periods.get("晚上", {}).values())
if evening_cnt > weekday_total * 0.6 and weekday_total > 0:
items.append(f"🌙 晚间学习型,{evening_cnt / weekday_total * 100:.0f}% 的课在晚上。")
morning_cnt = sum(weekday_periods.get("上午", {}).values())
if morning_cnt > weekday_total * 0.4 and weekday_total > 0:
items.append(f"☀️ 上午学习型,{morning_cnt / weekday_total * 100:.0f}% 的课在上午。")
# 5. 趋势
if analysis['trend'] == "下降 ↓":
items.append(f"📉 学习频率呈下降趋势,需关注。")
elif analysis['trend'] == "上涨 ↑":
items.append(f"📈 学习频率呈上升趋势,势头良好。")
# 6. 寒暑假
holiday_cnt = total - total_non
if holiday_cnt > 0:
items.append(f"🏖️ 寒暑假期间也有坚持学习({holiday_cnt} 节)。")
return items
def format_html(role_id, role_info, retention_status, all_records,
non_holiday_records, holiday_count,
day_counts, weekday_periods, weeks_data, analysis):
"""生成 HTML 格式分析报告,包含 5 个部分"""
chapter_map = fetch_chapter_info_map()
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
# ── 第一部分:扩展基本信息 ──
reg_time_str = role_info.get("reg_time", "-") if role_info else "-"
first_time_str = "-"
last_time_str = "-"
last_level = "-"
last_unit = "-"
last_lesson = "-"
if all_records:
first = all_records[0]["updated_at"]
first_time_str = _fmt_dt(first)
last = all_records[-1]["updated_at"]
last_time_str = _fmt_dt(last)
last_chapter_id = all_records[-1].get("chapter_id")
if last_chapter_id and last_chapter_id in chapter_map:
ci = chapter_map[last_chapter_id]
last_level = ci["level"] or "-"
last_unit = ci["unit_name"] or f"Unit{ci['unit_num']}" if ci['unit_num'] >= 0 else "-"
last_lesson = str(ci["lesson_index"]) if ci["lesson_index"] > 0 else "-"
# ── 第二部分:完课记录表格(带章节映射) ──
detail_rows = []
for i, r in enumerate(all_records, 1):
chapter_id = r.get("chapter_id")
ci = chapter_map.get(chapter_id, {}) if chapter_id else {}
level = ci.get("level") or r.get("level") or "-"
unit_name = ci.get("unit_name", "")
unit_num = ci.get("unit_num", -1)
unit_display = unit_name if unit_name else (f"Unit{unit_num}" if unit_num >= 0 else "-")
lesson_display = str(ci.get("lesson_index", "-")) if ci.get("lesson_index", 0) > 0 else "-"
start_dt = r.get("created_at")
end_dt = r["updated_at"]
start_str = _fmt_dt(start_dt)
end_str = _fmt_dt(end_dt)
if start_dt and end_dt:
duration_secs = (end_dt - start_dt).total_seconds()
else:
duration_secs = None
duration_str = _format_duration(duration_secs) if duration_secs is not None else "-"
weekday_str = WEEKDAY_NAMES[end_dt.weekday()] if end_dt else "-"
period_str = classify_period(end_dt.hour) if end_dt else "-"
detail_rows.append({
"seq": i,
"level": level,
"unit": unit_display,
"lesson": lesson_display,
"start_time": start_str,
"end_time": end_str,
"duration": duration_str,
"weekday": weekday_str,
"period": period_str,
})
# ── 第三部分:周上课时间分布总结 ──
weekly_text = _build_weekly_text(all_records, non_holiday_records, day_counts, weekday_periods)
# ── 第四部分:跨周趋势总结 ──
trend_text = _build_trend_text(weeks_data, analysis)
# ── 第五部分:关键特征总结 ──
summary_items = _build_summary(role_info, retention_status, all_records,
non_holiday_records, day_counts, weekday_periods,
weeks_data, analysis)
# ── 构建 HTML ──
rows_html = ""
for row in detail_rows:
rows_html += f"""
<tr>
<td>{row['seq']}</td>
<td>{row['level']}</td>
<td>{row['unit']}</td>
<td>{row['lesson']}</td>
<td>{row['weekday']}</td>
<td>{row['period']}</td>
<td>{row['start_time']}</td>
<td>{row['end_time']}</td>
<td>{row['duration']}</td>
</tr>"""
summary_items_html = "".join(f"<li>{item}</li>" for item in summary_items)
html = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>学习时间分析报告 — 角色 {role_id}</title>
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; background: #f5f7fa; color: #333; line-height: 1.6; padding: 20px; }}
.container {{ max-width: 1100px; margin: 0 auto; }}
h1 {{ text-align: center; color: #1a1a2e; margin: 20px 0 30px; font-size: 24px; }}
h2 {{ color: #2d3436; font-size: 20px; margin: 30px 0 15px; padding-bottom: 8px; border-bottom: 2px solid #0984e3; }}
.card {{ background: #fff; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.08); padding: 24px; margin-bottom: 20px; }}
.info-grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); gap: 12px; }}
.info-item {{ display: flex; padding: 8px 0; border-bottom: 1px solid #f0f0f0; }}
.info-label {{ color: #636e72; min-width: 120px; font-weight: 500; }}
.info-value {{ color: #2d3436; }}
table {{ width: 100%; border-collapse: collapse; font-size: 13px; }}
thead {{ background: #0984e3; color: #fff; }}
th {{ padding: 10px 8px; text-align: center; font-weight: 600; white-space: nowrap; }}
td {{ padding: 8px; text-align: center; border-bottom: 1px solid #eee; }}
tr:hover td {{ background: #f0f7ff; }}
.summary-list {{ padding-left: 20px; }}
.summary-list li {{ margin-bottom: 8px; line-height: 1.8; }}
.meta {{ text-align: center; color: #999; font-size: 12px; margin-top: 30px; }}
.text-block {{ background: #f8f9fd; border-left: 4px solid #0984e3; padding: 12px 16px; border-radius: 0 8px 8px 0; margin: 10px 0; }}
.status-normal {{ color: #00b894; font-weight: 600; }}
.status-lost {{ color: #e17055; font-weight: 600; }}
@media print {{ body {{ background: #fff; }} .card {{ box-shadow: none; border: 1px solid #ddd; }} }}
</style>
</head>
<body>
<div class="container">
<h1>📊 学习时间分析报告 — 角色 {role_id}{role_info.get('nickname', '') if role_info else ''}</h1>
<!-- 第一部分:基本信息 -->
<h2>一、基本信息</h2>
<div class="card">
<div class="info-grid">
<div class="info-item"><span class="info-label">角色ID</span><span class="info-value">{role_info.get('role_id', '-') if role_info else '-'}</span></div>
<div class="info-item"><span class="info-label">账号ID</span><span class="info-value">{role_info.get('account_id', '-') if role_info else '-'}</span></div>
<div class="info-item"><span class="info-label">角色姓名</span><span class="info-value">{role_info.get('nickname', '-') if role_info else '-'}</span></div>
<div class="info-item"><span class="info-label">角色性别</span><span class="info-value">{role_info.get('gender', '-') if role_info else '-'}</span></div>
<div class="info-item"><span class="info-label">角色年龄</span><span class="info-value">{f"{role_info['age']}" if role_info and role_info.get('age') else '-'}</span></div>
<div class="info-item"><span class="info-label">手机号后4位</span><span class="info-value">{role_info.get('phone_tail', '-') if role_info else '-'}</span></div>
<div class="info-item"><span class="info-label">最近留存状态</span><span class="info-value"><span class="{'status-normal' if retention_status=='正常' else 'status-lost'}">{retention_status}</span></span></div>
<div class="info-item"><span class="info-label">注册时间</span><span class="info-value">{reg_time_str}</span></div>
<div class="info-item"><span class="info-label">第一次完课时间</span><span class="info-value">{first_time_str}</span></div>
<div class="info-item"><span class="info-label">最后一次完课时间</span><span class="info-value">{last_time_str}</span></div>
<div class="info-item"><span class="info-label">最后一次完课</span><span class="info-value">Level {last_level} / {last_unit} / Lesson {last_lesson}</span></div>
</div>
</div>
<!-- 第二部分:完课记录表格 -->
<h2>二、完课记录明细(共 {len(all_records)} 条)</h2>
<div class="card" style="overflow-x: auto;">
<table>
<thead>
<tr>
<th>序号</th>
<th>Level</th>
<th>Unit单元</th>
<th>Lesson</th>
<th>星期</th>
<th>时段</th>
<th>开始上课时间</th>
<th>完课时间</th>
<th>完课耗时</th>
</tr>
</thead>
<tbody>
{rows_html}
</tbody>
</table>
</div>
<!-- 第三部分:周上课时间分布总结 -->
<h2>三、周上课时间分布分析</h2>
<div class="card">
<div class="text-block">{weekly_text}</div>
</div>
<!-- 第四部分:跨周趋势分析 -->
<h2>四、跨周趋势分析</h2>
<div class="card">
<div class="text-block">{trend_text}</div>
</div>
<!-- 第五部分:关键特征总结 -->
<h2>五、关键特征总结</h2>
<div class="card">
<ul class="summary-list">
{summary_items_html}
</ul>
</div>
<div class="meta">分析时间:{now_str} | 完课总数:{len(all_records)} 条 | 非寒暑假:{len(non_holiday_records)} 条 | 寒暑假:{holiday_count} 条</div>
</div>
</body>
</html>"""
return html
# ── 主函数 ────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="角色学习时间分析工具")
parser.add_argument("role_id", type=int, help="角色ID")
parser.add_argument("--format", choices=["md", "html"], default="md",
help="输出格式(默认 md")
parser.add_argument("--output", "-o", type=str, default=None,
help="输出文件路径(仅在 html 模式下生效,默认 stdout")
args = parser.parse_args()
role_id = args.role_id
all_records = fetch_completion_records(role_id)
non_holiday_records, holiday_records = split_records(all_records)
holiday_count = len(holiday_records)
role_info = fetch_role_info(role_id)
retention_status = check_retention(all_records)
day_counts, weekday_periods = analyze_weekly_distribution(non_holiday_records)
weeks_data, analysis = analyze_weekly_trend(all_records)
if args.format == "html":
output = format_html(role_id, role_info, retention_status, all_records,
non_holiday_records, holiday_count,
day_counts, weekday_periods, weeks_data, analysis)
if args.output:
os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
with open(args.output, "w", encoding="utf-8") as f:
f.write(output)
print(f"HTML 报告已保存到: {args.output}")
else:
print(output)
else:
output = format_markdown(role_id, role_info, retention_status, all_records,
non_holiday_records, holiday_count,
day_counts, weekday_periods, weeks_data, analysis)
print(output)
if __name__ == "__main__":
main()