#!/usr/bin/env python3 """ studytime-analysis — 角色学习时间分析工具 用法: python3 studytime_analysis.py [--format html] [--output ] """ import os, sys, argparse, json import psycopg2, psycopg2.extras, pymysql from datetime import datetime, timedelta from collections import defaultdict, OrderedDict # ── 配置 ── PG_CONFIG = { "host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"), "port": int(os.environ.get("PG_DB_PORT", "28591")), "user": os.environ.get("PG_DB_USER", "ai_member"), "password": os.environ.get("PG_DB_PASSWORD", ""), "dbname": os.environ.get("PG_DB_DATABASE", "vala"), } MYSQL_HOST = os.environ.get("MYSQL_HOST_online", "bj-cdb-dh2fkqa0.sql.tencentcdb.com") MYSQL_PORT = int(os.environ.get("MYSQL_PORT_online", "27751")) MYSQL_USER = os.environ.get("MYSQL_USERNAME_online", "read_only") MYSQL_PASS = os.environ.get("MYSQL_PASSWORD_online", "") EXCLUDED_MONTHS = (1, 2, 7, 8) WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"] PERIODS = OrderedDict([("凌晨", (0,6)), ("上午", (6,12)), ("中午", (12,14)), ("下午", (14,18)), ("晚上", (18,24))]) # ── 数据库连接 ── def pg_conn(): return psycopg2.connect(host=PG_CONFIG["host"], port=PG_CONFIG["port"], user=PG_CONFIG["user"], password=PG_CONFIG["password"], dbname=PG_CONFIG["dbname"]) def my_conn(db="vala_user"): return pymysql.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASS, db=db, charset="utf8mb4") # ── 章节全局单元映射 ── _chapter_map = None def fetch_chapter_info_map(): """建立 chapter_id → {level, unit_index(0-48), lesson_index, lesson_type} 映射 单元计算规则:每 5 个连续 lesson 章节(lesson_type=1,按id排序)组成一个单元; season_of_quarter=0 全部属 unit 0; s>=1 时 base=1+12*(s-1)""" global _chapter_map if _chapter_map is not None: return _chapter_map conn = my_conn("vala") try: with conn.cursor() as cur: cur.execute("""SELECT gc.id, IFNULL(sp.level,''), IFNULL(sp.season_of_quarter,-1), gc.`index`, gc.lesson_type FROM vala_game_chapter gc LEFT JOIN vala_game_season_package sp ON gc.season_package_id=sp.id ORDER BY sp.season_of_quarter, gc.id""") rows = cur.fetchall() finally: conn.close() from collections import OrderedDict as OD # 按 (level, season_of_quarter) 分组,避免 A1/A2 同季度混合 seasons = OD() for ch_id, lv, sq, li, lt in rows: sq = int(sq) if sq is not None else -1 lt = int(lt) if lt is not None else 1 lv = lv or "" key = (lv, sq) seasons.setdefault(key, []).append((int(ch_id), lv, int(li or 0), lt)) def base(s): return 0 if s <= 0 else 1 + 12 * (s - 1) _chapter_map = {} for (lv_key, sq), ch_list in seasons.items(): regular = [(cid, lv, li) for cid, lv, li, lt in ch_list if lt == 1] uid_map = {} if sq <= 0: for cid, lv, li in regular: uid_map[cid] = 0 else: for pos, (cid, lv, li) in enumerate(regular): uid_map[cid] = pos // 5 for cid, lv, li, lt in ch_list: u = uid_map.get(cid, -1) _chapter_map[cid] = dict(level=lv, unit_index=base(sq) + u if u >= 0 else -1, lesson_index=li, lesson_type=lt) return _chapter_map # ── 角色信息 ── def fetch_role_info(role_id): conn = my_conn("vala_user") try: with conn.cursor() as cur: cur.execute("""SELECT c.id, c.account_id, c.nickname, c.gender, c.birthday, c.created_at, a.tel FROM vala_app_character c LEFT JOIN vala_app_account a ON c.account_id=a.id WHERE c.id=%s""", (role_id,)) row = cur.fetchone() finally: conn.close() if not row: return None rid, aid, nn, gd, bd, rt, tel = row gs = {0: "女", 1: "男"}.get(gd, str(gd) if gd is not None else "") age = "" if bd: try: age = datetime.now().year - int(str(bd).split("-")[0]) except: pass pt = "" if tel: d = ''.join(c for c in str(tel) if c.isdigit()) pt = d[-4:] if len(d) >= 4 else d rts = "" if rt: rts = rt.strftime("%Y-%m-%d %H:%M") if isinstance(rt, datetime) else str(rt)[:16] return dict(role_id=rid, account_id=aid, nickname=nn or "", gender=gs, age=age, phone_tail=pt, reg_time=rts) def check_retention(records, days=14): if not records: return "无完课记录" cut = datetime.now() - timedelta(days=days) return "正常" if any(r["updated_at"].replace(tzinfo=None) >= cut for r in records) else "流失" # ── 完课记录 ── def fetch_completion_records(role_id): parts, params = [], {} for i in range(8): pn = f"r{i}" params[pn] = role_id parts.append(f"""SELECT user_id, chapter_id, chapter_unique_id, level, created_at, updated_at FROM user_chapter_play_record_{i} WHERE user_id=%({pn})s AND play_status=1""") sql = f"SELECT * FROM ({' UNION ALL '.join(parts)}) t ORDER BY updated_at ASC" conn = pg_conn() try: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, params) return cur.fetchall() finally: conn.close() def is_holiday(dt): return dt is not None and dt.month in EXCLUDED_MONTHS def split_records(records): nh, h = [], [] for r in records: dt = r["updated_at"] if dt is not None: (h if is_holiday(dt) else nh).append(r) return nh, h # ── 分析 ── def classify_period(h): for n, (lo, hi) in PERIODS.items(): if lo <= h < hi: return n return "未知" def analyze_weekly_distribution(records): dc = defaultdict(int) wp = defaultdict(lambda: defaultdict(int)) for r in records: dt = r["updated_at"] if dt is None: continue wd = dt.weekday() dc[wd] += 1 if wd < 5: wp[classify_period(dt.hour)][wd] += 1 return dc, wp def analyze_weekly_trend(records): if not records: return [], {} wc = defaultdict(int) for r in records: dt = r["updated_at"] if dt is None: continue iso = dt.isocalendar() wc[(iso[0], iso[1])] += 1 sw = sorted(wc.keys()) wd = [(y, w, wc[(y, w)]) for y, w in sw] tw = len(wd) tl = sum(c for _, _, c in wd) apw = round(tl / tw, 1) if tw > 0 else 0 if sw: first = datetime.fromisocalendar(sw[0][0], sw[0][1], 1) last = datetime.fromisocalendar(sw[-1][0], sw[-1][1], 1) tsw = ((last - first).days // 7) + 1 aws = set() cur = first while cur <= last: aws.add((cur.isocalendar()[0], cur.isocalendar()[1])) cur += timedelta(days=7) ew = sorted(aws - set(sw)) else: tsw, ew = 0, [] cons = len(ew) == 0 mid = len(wd) // 2 fha = sum(c for _, _, c in wd[:mid]) / mid if mid > 0 else 0 shs = mid if len(wd) % 2 == 0 else mid + 1 shd = wd[shs:] sha = sum(c for _, _, c in shd) / len(shd) if shd else 0 trend = "持平" if fha > 0: r = sha / fha if r > 1.15: trend = "上涨 ↑" elif r < 0.85: trend = "下降 ↓" return wd, dict(total_weeks=tw, total_span_weeks=tsw, total_lessons=tl, avg_per_week=apw, consecutive=cons, empty_weeks=ew, first_half_avg=round(fha, 1), second_half_avg=round(sha, 1), trend=trend) # ── Markdown ── def format_markdown(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis): L = [] ns = datetime.now().strftime('%Y-%m-%d %H:%M') L.append(f"# 📊 学习时间分析报告 — 角色 {role_id}\n") if role_info: L.append("## 基本信息\n") L.append("| 项目 | 详情 |") L.append("|------|------|") ri = role_info L.append(f"| 角色ID | {ri['role_id']} |") L.append(f"| 账号ID | {ri['account_id']} |") if ri['nickname']: L.append(f"| 角色名字 | {ri['nickname']} |") L.append(f"| 性别 | {ri['gender']} |") if ri['age']: L.append(f"| 年龄 | {ri['age']} 岁 |") if ri['phone_tail']: L.append(f"| 账号手机号后4位 | {ri['phone_tail']} |") if retention_status: L.append(f"| 最近留存状态 | {retention_status} |") L.append("") L.append(f"**分析时间**: {ns}") L.append(f"**完课记录总数**: {len(all_records)} 条") if holiday_count > 0: L.append(f"**其中寒暑假记录**: {holiday_count} 条") L.append(f"**非寒暑假记录**: {len(non_holiday_records)} 条") L.append(f"\n> ⚠️ 一周时间分布仅基于非寒暑假数据({len(non_holiday_records)} 条),跨周趋势和完课明细包含全部数据({len(all_records)} 条)。\n") if not all_records: L.append("> ⚠️ 该角色没有任何完课记录。") return "\n".join(L) if not non_holiday_records: L.append("> ⚠️ 该角色在非寒暑假期间没有完课记录,一周时间分布无法分析。") L.append("---") L.append(f"## 一、一周时间分布(仅非寒暑假,{len(non_holiday_records)} 条记录)\n") L.append("### 各天完课数量\n") total = sum(day_counts.values()) md = max(day_counts.values()) if day_counts else 1 L.append("| 星期 | 完课数 | 占比 |") L.append("|------|--------|------|") for i, nm in enumerate(WEEKDAY_NAMES): c = day_counts.get(i, 0) pct = f"{c/total*100:.1f}%" if total > 0 else "0%" bar = "█" * max(1, int(c / md * 20)) if c > 0 else "" L.append(f"| {nm} | {c} {bar} | {pct} |") L.append("") wt = sum(day_counts.get(i, 0) for i in range(5)) wet = sum(day_counts.get(i, 0) for i in range(5, 7)) L.append("### 规律小结\n") if wet > 0: L.append(f"- **周末上课**: ✅ 是 — 周六 {day_counts.get(5,0)} 节,周日 {day_counts.get(6,0)} 节") else: L.append("- **周末上课**: ❌ 否 — 周末无完课记录") L.append("\n### 周一至周五上课时段分布\n") L.append("| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |") L.append("|------|------|------|------|------|------|------|") for pd in ["上午", "中午", "下午", "晚上", "凌晨"]: pdata = weekday_periods.get(pd, {}) if sum(pdata.values()) == 0: continue row = [pd] + [str(pdata.get(d, 0)) if pdata.get(d, 0) > 0 else "-" for d in range(5)] + [str(sum(pdata.values()))] L.append(f"| {' | '.join(row)} |") L.append("") L.append("**时段规律分析**:") for pd in ["上午", "中午", "下午", "晚上"]: pdata = weekday_periods.get(pd, {}) ps = sum(pdata.values()) if ps == 0: continue pct = ps / wt * 100 if wt > 0 else 0 ad = [WEEKDAY_NAMES[d] for d in range(5) if pdata.get(d, 0) > 0] L.append(f"- **{pd}**({ps}节, {pct:.0f}%)→ 集中在 {'、'.join(ad)}" if ad else f"- **{pd}**({ps}节, {pct:.0f}%)") L.append("") L.append("---\n## 二、跨周学习趋势\n") L.append("### 基本数据") L.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周") L.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节") L.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节") cs = "✅ 每周连续上课,无中断" if analysis['consecutive'] else "⚠️ 存在中断周(见下方)" L.append(f"- 连续性: {cs}\n") if analysis["empty_weeks"]: L.append("### 中断周明细") el = [] for y, w in sorted(analysis["empty_weeks"]): mon = datetime.fromisocalendar(y, w, 1) el.append(f"{y}年W{w:02d}({mon.strftime('%m/%d')}起)") L.append(f"- {', '.join(el)}\n") L.append("### 各周完课详情\n") L.append("| 周次 | 起止日期 | 完课数 | 趋势 |") L.append("|------|----------|--------|------|") mc = max(c for _, _, c in weeks_data) if weeks_data else 1 for i, (y, w, c) in enumerate(weeks_data): mon = datetime.fromisocalendar(y, w, 1) sun = mon + timedelta(days=6) dr = f"{mon.strftime('%m/%d')}-{sun.strftime('%m/%d')}" mk = "" if i > 0: pc = weeks_data[i-1][2] if pc > 0 and c >= pc * 2: mk = "📈 突增" elif c > pc * 1.3: mk = "📈" elif pc > 0 and c < pc * 0.7: mk = "📉" bl = max(1, int(c / mc * 15)) if c > 0 else 0 bar = "█" * bl if bl > 0 else "" L.append(f"| {y}W{w:02d} | {dr} | {c} {bar} | {mk} |") L.append("") L.append("### 趋势分析") L.append(f"- **整体趋势**: {analysis['trend']}") fhw = len(weeks_data) // 2 shw = len(weeks_data) - fhw L.append(f" - 前半段(前 {fhw} 周)平均: {analysis['first_half_avg']} 节/周") L.append(f" - 后半段(后 {shw} 周)平均: {analysis['second_half_avg']} 节/周") L.append("") if len(weeks_data) >= 2: cnts = [c for _, _, c in weeks_data] ev = [] for i in range(1, len(cnts)): if cnts[i-1] > 0 and cnts[i] >= cnts[i-1] * 2: y, w, _ = weeks_data[i] mon = datetime.fromisocalendar(y, w, 1) ev.append(f"⚡ **{y}年W{w:02d}周({mon.strftime('%m/%d')}起)完课量突增**:{cnts[i-1]}→{cnts[i]} 节") break for i in range(1, len(cnts)): if cnts[i-1] >= 3 and cnts[i] <= 1: y, w, _ = weeks_data[i] mon = datetime.fromisocalendar(y, w, 1) ev.append(f"🔻 **{y}年W{w:02d}周({mon.strftime('%m/%d')}起)完课量骤降**:{cnts[i-1]}→{cnts[i]} 节") break if ev: L.append("**值得关注的变化**:") for e in ev: L.append(f"- {e}") L.append("") L.append("---") L.append(f"## 三、完课记录明细(全部 {len(all_records)} 条记录)\n") L.append("| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |") L.append("|------|------|------|------|------|------|--------|") for i, r in enumerate(all_records, 1): dt = r["updated_at"] if dt is None: continue L.append(f"| {i} | {dt.strftime('%Y-%m-%d')} | {dt.strftime('%H:%M')} | {WEEKDAY_NAMES[dt.weekday()]} | {classify_period(dt.hour)} | {r.get('level') or '-'} | {r.get('chapter_id') or '-'} |") L.append("") L.append("---") L.append("> 💡 是否需要将以上所有详细信息生成为一个 HTML 文件?回复「是」或「需要」即可。") return "\n".join(L) # ── HTML 辅助 ── def _td(dt): return dt.strftime('%Y-%m-%d %H:%M') if dt else "-" def _ts(dt): return dt.strftime('%Y-%m-%d') if dt else "-" def _dur(secs): if secs is None or secs < 0: return "-" m, s = divmod(int(secs), 60) if m >= 60: h, m = divmod(m, 60) return f"{h}时{m}分{s}秒" return f"{m}分{s}秒" if m > 0 else f"{s}秒" def _weekly_text(nh_records, day_counts, weekday_periods): tn = len(nh_records) if tn == 0: return "该角色在非寒暑假期间没有完课记录,无法分析周上课时间分布。" parts = [] mdi = max(day_counts, key=day_counts.get, default=-1) if mdi >= 0: parts.append(f"非寒暑假期间共完成 {tn} 节课,主要集中在 **{WEEKDAY_NAMES[mdi]}**({day_counts[mdi]} 节,占 {day_counts[mdi]/tn*100:.0f}%)。") wt = sum(day_counts.get(i, 0) for i in range(5)) tp, tpc = None, 0 for p in ["晚上", "上午", "下午", "中午"]: c = sum(weekday_periods.get(p, {}).values()) if c > tpc: tpc, tp = c, p if tp and tpc > 0: parts.append(f"工作日上课集中在 **{tp}**时段({tpc} 节,占 {tpc/wt*100:.0f}%)。" if wt > 0 else "") sat, sun = day_counts.get(5, 0), day_counts.get(6, 0) parts.append(f"周末也保持上课节奏,周六 {sat} 节、周日 {sun} 节。" if sat + sun > 0 else "周末无上课记录。") return " ".join(parts) def _trend_text(weeks_data, analysis): if not weeks_data: return "无完课记录,无法分析趋势。" parts = [f"完课跨越 {analysis['total_span_weeks']} 周(有课 {analysis['total_weeks']} 周),共 {analysis['total_lessons']} 节,周均 {analysis['avg_per_week']} 节。"] if analysis['consecutive']: parts.append("学习连续性良好,无中断周。") else: el = [] for y, w in sorted(analysis['empty_weeks']): mon = datetime.fromisocalendar(y, w, 1) el.append(f"{y}年W{w:02d}({mon.strftime('%m/%d')}起)") parts.append(f"存在间断:{'、'.join(el)}。") parts.append(f"整体趋势:{analysis['trend']},前半段平均 {analysis['first_half_avg']} 节/周 → 后半段 {analysis['second_half_avg']} 节/周。") return " ".join(parts) def _summary_items(role_info, retention_status, all_records, non_holiday_records, day_counts, weekday_periods, weeks_data, analysis): items = [] total = len(all_records) tn = len(non_holiday_records) if total == 0: items.append("暂无完课记录。") return items if retention_status == "流失": items.append(f"⚠️ 近14天无完课,已**流失**。历史共 {total} 节完课记录。") elif retention_status == "正常": items.append(f"✅ 状态**正常**,近14天内有完课。累计 {total} 节完课。") else: items.append(f"累计 {total} 节完课记录。") if weeks_data: first = datetime.fromisocalendar(weeks_data[0][0], weeks_data[0][1], 1) last = datetime.fromisocalendar(weeks_data[-1][0], weeks_data[-1][1], 1) sm = (last.year - first.year) * 12 + (last.month - first.month) + 1 if sm >= 6: items.append(f"📅 长期用户,学习跨度约 {sm} 个月。") elif sm >= 2: items.append(f"📅 中期用户,学习跨度约 {sm} 个月。") else: items.append(f"🆕 新用户,学习跨度约 {sm} 个月,尚在形成学习习惯阶段。") apw = analysis['avg_per_week'] if apw >= 6: items.append(f"🔥 高强度学习,周均 {apw} 节。") elif apw >= 4: items.append(f"📚 稳定学习,周均 {apw} 节。") elif apw > 0: items.append(f"🐢 低频学习,周均 {apw} 节。") if tn > 0: mdi = max(day_counts, key=day_counts.get) wt = sum(day_counts.get(i, 0) for i in range(5)) ec = sum(weekday_periods.get("晚上", {}).values()) if ec > wt * 0.6 and wt > 0: items.append(f"🌙 晚间学习型,{ec/wt*100:.0f}% 的课在晚上。") mc = sum(weekday_periods.get("上午", {}).values()) if mc > wt * 0.4 and wt > 0: items.append(f"☀️ 上午学习型,{mc/wt*100:.0f}% 的课在上午。") if analysis['trend'] == "下降 ↓": items.append("📉 学习频率呈下降趋势,需关注。") elif analysis['trend'] == "上涨 ↑": items.append("📈 学习频率呈上升趋势,势头良好。") hc = total - tn if hc > 0: items.append(f"🏖️ 寒暑假期间也有坚持学习({hc} 节)。") return items # ── HTML ── def format_html(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis): cm = fetch_chapter_info_map() ns = datetime.now().strftime('%Y-%m-%d %H:%M') # Part 1: extended basic info rt = role_info.get("reg_time", "-") if role_info else "-" ft_str = lt_str = llv = lu = lls = "-" if all_records: ft_str = _td(all_records[0]["updated_at"]) lt_str = _td(all_records[-1]["updated_at"]) lcid = all_records[-1].get("chapter_id") if lcid and lcid in cm: ci = cm[lcid] llv = ci["level"] or "-" lu = str(ci["unit_index"]) if ci["unit_index"] >= 0 else "-" lls = str(ci["lesson_index"]) if ci["lesson_index"] > 0 else "-" # Part 2: detail rows rows_html = "" for i, r in enumerate(all_records, 1): cid = r.get("chapter_id") ci = cm.get(cid, {}) if cid else {} lv = ci.get("level") or r.get("level") or "-" ui = str(ci.get("unit_index", "-")) if ci.get("unit_index", -1) >= 0 else "-" lsn = str(ci.get("lesson_index", "-")) if ci.get("lesson_index", 0) > 0 else "-" sd = r.get("created_at") ed = r["updated_at"] dur = (ed - sd).total_seconds() if sd and ed else None rows_html += f""" {i}{lv}{ui}{lsn} {WEEKDAY_NAMES[ed.weekday()] if ed else '-'} {classify_period(ed.hour) if ed else '-'} {_td(sd)}{_td(ed)}{_dur(dur)} """ wt_text = _weekly_text(non_holiday_records, day_counts, weekday_periods) tt_text = _trend_text(weeks_data, analysis) si = _summary_items(role_info, retention_status, all_records, non_holiday_records, day_counts, weekday_periods, weeks_data, analysis) si_html = "".join(f"
  • {it}
  • " for it in si) ri = role_info or {} return f""" 学习时间分析 — 角色 {role_id}

    📊 学习时间分析报告 — 角色 {role_id}({ri.get('nickname','')})

    一、基本信息

    角色ID{ri.get('role_id','-')}
    账号ID{ri.get('account_id','-')}
    角色姓名{ri.get('nickname','-')}
    角色性别{ri.get('gender','-')}
    角色年龄{f"{ri['age']} 岁" if ri.get('age') else '-'}
    手机号后4位{ri.get('phone_tail','-')}
    最近留存状态{retention_status}
    注册时间{rt}
    第一次完课时间{ft_str}
    最后一次完课时间{lt_str}
    最后一次完课Level {llv} / Unit {lu} / Lesson {lls}

    二、完课记录明细(共 {len(all_records)} 条)

    {rows_html}
    序号LevelUnitLesson星期时段开始上课时间完课时间完课耗时

    三、周上课时间分布分析

    {wt_text}

    四、跨周趋势分析

    {tt_text}

    五、关键特征总结

      {si_html}
    分析时间:{ns} | 完课总数:{len(all_records)} 条 | 非寒暑假:{len(non_holiday_records)} 条 | 寒暑假:{holiday_count} 条
    """ # ── main ── def main(): ap = argparse.ArgumentParser(description="角色学习时间分析工具") ap.add_argument("role_id", type=int) ap.add_argument("--format", choices=["md","html"], default="md") ap.add_argument("--output","-o", default=None) args = ap.parse_args() all_rec = fetch_completion_records(args.role_id) nh, ho = split_records(all_rec) ri = fetch_role_info(args.role_id) rs = check_retention(all_rec) dc, wp = analyze_weekly_distribution(nh) wd, an = analyze_weekly_trend(all_rec) if args.format == "html": out = format_html(args.role_id, ri, rs, all_rec, nh, len(ho), dc, wp, wd, an) if args.output: os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True) with open(args.output, "w", encoding="utf-8") as f: f.write(out) print(f"HTML 报告已保存到: {args.output}") else: print(out) else: print(format_markdown(args.role_id, ri, rs, all_rec, nh, len(ho), dc, wp, wd, an)) if __name__ == "__main__": main()