ai_member_xiaoban/skills/studytime-analysis/scripts/studytime_analysis.py

601 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
studytime-analysis — 角色学习时间分析工具
用法: python3 studytime_analysis.py <role_id>
输出: Markdown 格式的分析报告
数据源: PostgreSQL Online (vala 库)
核心表: user_chapter_play_record_0~7
"""
import os
import sys
import psycopg2
import psycopg2.extras
import pymysql
from datetime import datetime, timedelta, timezone
from collections import defaultdict, OrderedDict
# ── 配置 ──────────────────────────────────────────────
PG_CONFIG = {
"host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"),
"port": int(os.environ.get("PG_DB_PORT", "28591")),
"user": os.environ.get("PG_DB_USER", "ai_member"),
"password": os.environ.get("PG_DB_PASSWORD", ""),
"dbname": os.environ.get("PG_DB_DATABASE", "vala"),
}
# MySQL Online (vala_user 库 — 角色基本信息)
MYSQL_CONFIG = {
"host": os.environ.get("MYSQL_HOST_online", "bj-cdb-dh2fkqa0.sql.tencentcdb.com"),
"port": int(os.environ.get("MYSQL_PORT_online", "27751")),
"user": os.environ.get("MYSQL_USERNAME_online", "read_only"),
"password": os.environ.get("MYSQL_PASSWORD_online", ""),
"db": "vala_user",
"charset": "utf8mb4",
}
EXCLUDED_MONTHS = (1, 2, 7, 8) # 寒假1-2月, 暑假7-8月
WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
PERIODS = OrderedDict([
("凌晨", (0, 6)),
("上午", (6, 12)),
("中午", (12, 14)),
("下午", (14, 18)),
("晚上", (18, 24)),
])
# ── 数据库查询 ────────────────────────────────────────
def get_connection():
"""连接 PostgreSQL"""
conn = psycopg2.connect(
host=PG_CONFIG["host"],
port=PG_CONFIG["port"],
user=PG_CONFIG["user"],
password=PG_CONFIG["password"],
dbname=PG_CONFIG["dbname"],
)
return conn
def get_mysql_connection():
"""连接 MySQL Online (vala_user 库)"""
conn = pymysql.connect(
host=MYSQL_CONFIG["host"],
port=MYSQL_CONFIG["port"],
user=MYSQL_CONFIG["user"],
password=MYSQL_CONFIG["password"],
db=MYSQL_CONFIG["db"],
charset=MYSQL_CONFIG["charset"],
)
return conn
def fetch_role_info(role_id):
"""
从 MySQL vala_user 库查询角色基本信息
返回 dict: role_id, account_id, nickname, gender, age, phone_tail
"""
sql = """
SELECT
c.id AS role_id,
c.account_id,
c.nickname,
c.gender,
c.birthday,
a.tel
FROM vala_app_character c
LEFT JOIN vala_app_account a ON c.account_id = a.id
WHERE c.id = %s
"""
conn = get_mysql_connection()
try:
with conn.cursor() as cur:
cur.execute(sql, (role_id,))
row = cur.fetchone()
finally:
conn.close()
if not row:
return None
role_id_val, account_id, nickname, gender, birthday, tel = row
# 性别映射
gender_str = ""
if gender == 0:
gender_str = ""
elif gender == 1:
gender_str = ""
elif gender is not None:
gender_str = str(gender)
# 年龄(从 birthday 推算)
age = ""
if birthday:
try:
# birthday 可能是 "2015-5-28" 或 "2015-05-28" 格式
parts = str(birthday).split("-")
if len(parts) >= 1 and parts[0].isdigit():
birth_year = int(parts[0])
current_year = datetime.now().year
age = current_year - birth_year
except (ValueError, IndexError):
pass
# 手机号后4位
phone_tail = ""
if tel:
tel_str = str(tel)
# tel 可能是 "186****1625" 格式取最后4位
digits = ''.join(c for c in tel_str if c.isdigit())
if len(digits) >= 4:
phone_tail = digits[-4:]
elif digits:
phone_tail = digits
return {
"role_id": role_id_val,
"account_id": account_id,
"nickname": nickname or "",
"gender": gender_str,
"age": age,
"phone_tail": phone_tail,
}
def check_retention(records, cutoff_days=14):
"""
检查角色留存状态
- 最近 cutoff_days 天内有完课记录 → "正常"
- 否则 → "流失"
当无任何完课记录时,返回 "无完课记录"
"""
if not records:
return "无完课记录"
# 用本地时间比较(与 PG 存储时区一致 UTC+8
cutoff = datetime.now() - timedelta(days=cutoff_days)
has_recent = any(r["updated_at"].replace(tzinfo=None) >= cutoff for r in records)
return "正常" if has_recent else "流失"
def fetch_completion_records(role_id):
"""查询指定角色全部完课记录(包含寒暑假)"""
params = {}
union_parts = []
for i in range(8):
param_name = f"rid_{i}"
params[param_name] = role_id
union_parts.append(f"""
SELECT user_id, chapter_id, chapter_unique_id, level, updated_at
FROM user_chapter_play_record_{i}
WHERE user_id = %({param_name})s
AND play_status = 1
""")
union_sql = " UNION ALL ".join(union_parts)
sql = f"""
SELECT * FROM (
{union_sql}
) t
ORDER BY updated_at ASC
"""
conn = get_connection()
try:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
rows = cur.fetchall()
finally:
conn.close()
return rows
def is_holiday(dt):
"""判断是否为寒暑假月份1-2月寒假, 7-8月暑假"""
if dt is None:
return False
return dt.month in EXCLUDED_MONTHS
def split_records(records):
"""
拆分记录:
- non_holiday: 非寒暑假记录(用于一周分布分析)
- holiday: 寒暑假记录
返回统计信息
"""
non_holiday = []
holiday = []
for r in records:
dt = r["updated_at"]
if dt is None:
continue
if is_holiday(dt):
holiday.append(r)
else:
non_holiday.append(r)
return non_holiday, holiday
# ── 分析函数 ──────────────────────────────────────────
def classify_period(hour):
"""根据小时数返回时段名称"""
for name, (lo, hi) in PERIODS.items():
if lo <= hour < hi:
return name
return "未知"
def analyze_weekly_distribution(records):
"""
分析一周内分布: 周一至周日各天完课数 + 周一至周五时段分布
返回: (day_counts, weekday_periods)
"""
day_counts = defaultdict(int)
weekday_periods = defaultdict(lambda: defaultdict(int))
today = datetime.now().date()
for r in records:
dt = r["updated_at"]
if dt is None:
continue
# dt is timezone-aware, convert to local naive for analysis
if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
# PostgreSQL returns tz-aware, but we just need local time
pass
weekday = dt.weekday() # 0=Mon
hour = dt.hour
period = classify_period(hour)
day_counts[weekday] += 1
if weekday < 5:
weekday_periods[period][weekday] += 1
return day_counts, weekday_periods
def analyze_weekly_trend(records):
"""
按周统计完课趋势
返回: (weeks_data, analysis_dict)
"""
if not records:
return [], {}
week_counts = defaultdict(int)
for r in records:
dt = r["updated_at"]
if dt is None:
continue
iso = dt.isocalendar()
year, week_num = iso[0], iso[1]
week_counts[(year, week_num)] += 1
sorted_weeks = sorted(week_counts.keys())
weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks]
total_weeks = len(weeks_data)
total_lessons = sum(c for _, _, c in weeks_data)
avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0
# 时间跨度(含空周)
if sorted_weeks:
first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1)
last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1)
total_span_weeks = ((last - first).days // 7) + 1
all_weeks_in_span = set()
cur = first
while cur <= last:
iso = cur.isocalendar()
all_weeks_in_span.add((iso[0], iso[1]))
cur += timedelta(days=7)
active_weeks = set(sorted_weeks)
empty_weeks = sorted(all_weeks_in_span - active_weeks)
else:
total_span_weeks = 0
empty_weeks = []
consecutive = (len(empty_weeks) == 0)
# 趋势: 前半段 vs 后半段
mid = len(weeks_data) // 2
first_half_data = weeks_data[:mid]
first_half_avg = sum(c for _, _, c in first_half_data) / mid if mid > 0 else 0
second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1
second_half_data = weeks_data[second_half_start:]
second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0
trend = "持平"
if first_half_avg > 0:
ratio = second_half_avg / first_half_avg
if ratio > 1.15:
trend = "上涨 ↑"
elif ratio < 0.85:
trend = "下降 ↓"
return weeks_data, {
"total_weeks": total_weeks,
"total_span_weeks": total_span_weeks,
"total_lessons": total_lessons,
"avg_per_week": avg_per_week,
"consecutive": consecutive,
"empty_weeks": empty_weeks,
"first_half_avg": round(first_half_avg, 1),
"second_half_avg": round(second_half_avg, 1),
"trend": trend,
}
# ── 输出格式化 ────────────────────────────────────────
def format_report(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis):
"""生成 Markdown 格式分析报告
Args:
all_records: 全部完课记录(用于明细表)
non_holiday_records: 非寒暑假记录(用于一周分布分析)
holiday_count: 寒暑假记录数(仅统计,不参与一周分布)
day_counts, weekday_periods: 基于 non_holiday_records 的分析结果
weeks_data, analysis: 基于 all_records 的分析结果
"""
lines = []
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}")
lines.append(f"")
# ── 角色基本信息 ──
if role_info:
lines.append(f"## 基本信息")
lines.append(f"")
lines.append(f"| 项目 | 详情 |")
lines.append(f"|------|------|")
lines.append(f"| 角色ID | {role_info['role_id']} |")
lines.append(f"| 账号ID | {role_info['account_id']} |")
if role_info['nickname']:
lines.append(f"| 角色名字 | {role_info['nickname']} |")
lines.append(f"| 性别 | {role_info['gender']} |")
if role_info['age']:
lines.append(f"| 年龄 | {role_info['age']} 岁 |")
if role_info['phone_tail']:
lines.append(f"| 账号手机号后4位 | {role_info['phone_tail']} |")
if retention_status:
lines.append(f"| 最近留存状态 | {retention_status} |")
lines.append(f"")
# ── 数据概况 ──
lines.append(f"**分析时间**: {now_str}")
lines.append(f"**完课记录总数**: {len(all_records)}")
if holiday_count > 0:
lines.append(f"**其中寒暑假记录**: {holiday_count}寒假1-2月、暑假7-8月")
lines.append(f"**非寒暑假记录**: {len(non_holiday_records)}")
lines.append(f"")
lines.append(f"> ⚠️ **说明**: 「一周时间分布」仅基于非寒暑假数据(共 {len(non_holiday_records)} 条),排除寒暑假作息差异的干扰。")
lines.append(f"> 「跨周趋势」和「完课明细」包含全部数据(共 {len(all_records)} 条),反映完整学习轨迹。")
lines.append(f"")
if not all_records:
lines.append("> ⚠️ 该角色没有任何完课记录。")
return "\n".join(lines)
if not non_holiday_records:
lines.append("> ⚠️ 该角色在非寒暑假期间没有完课记录,一周时间分布无法分析。")
# ═══ 一、一周时间分布 ═══
lines.append(f"---")
lines.append(f"## 一、一周时间分布(仅非寒暑假,{len(non_holiday_records)} 条记录)")
lines.append(f"")
# 日分布表
lines.append(f"### 各天完课数量")
lines.append(f"")
total = sum(day_counts.values())
max_day = max(day_counts.values()) if day_counts else 1
lines.append(f"| 星期 | 完课数 | 占比 |")
lines.append(f"|------|--------|------|")
for i, name in enumerate(WEEKDAY_NAMES):
cnt = day_counts.get(i, 0)
pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%"
bar = "" * max(1, int(cnt / max_day * 20)) if cnt > 0 else ""
lines.append(f"| {name} | {cnt} {bar} | {pct} |")
lines.append(f"")
# 规律小结
weekday_total = sum(day_counts.get(i, 0) for i in range(5))
weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7))
lines.append(f"### 规律小结")
lines.append(f"")
if weekend_total > 0:
sat = day_counts.get(5, 0)
sun = day_counts.get(6, 0)
lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节,周日 {sun}")
else:
lines.append(f"- **周末上课**: ❌ 否 — 周末无完课记录")
# 时段分布(周一至周五)
lines.append(f"")
lines.append(f"### 周一至周五上课时段分布")
lines.append(f"")
lines.append(f"| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |")
lines.append(f"|------|------|------|------|------|------|------|")
for period in ["上午", "中午", "下午", "晚上", "凌晨"]:
period_data = weekday_periods.get(period, {})
period_total = sum(period_data.values())
if period_total == 0:
continue
row = [period]
for d in range(5):
cnt = period_data.get(d, 0)
row.append(str(cnt) if cnt > 0 else "-")
row.append(str(period_total))
lines.append(f"| {' | '.join(row)} |")
lines.append(f"")
# 时段规律
lines.append(f"**时段规律分析**:")
for period in ["上午", "中午", "下午", "晚上"]:
period_data = weekday_periods.get(period, {})
period_sum = sum(period_data.values())
if period_sum == 0:
continue
pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0
active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0]
if active_days:
lines.append(f"- **{period}**{period_sum}节, {pct:.0f}%)→ 集中在 {''.join(active_days)}")
else:
lines.append(f"- **{period}**{period_sum}节, {pct:.0f}%")
lines.append(f"")
# ═══ 二、跨周学习趋势 ═══
lines.append(f"---")
lines.append(f"## 二、跨周学习趋势")
lines.append(f"")
lines.append(f"### 基本数据")
lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周")
lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节")
lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节")
lines.append(f"- 连续性: {'✅ 每周连续上课,无中断' if analysis['consecutive'] else '⚠️ 存在中断周(见下方)'}")
lines.append(f"")
if analysis["empty_weeks"]:
lines.append(f"### 中断周明细")
empty_list = []
for y, w in sorted(analysis["empty_weeks"]):
monday = datetime.fromisocalendar(y, w, 1)
empty_list.append(f"{y}年W{w:02d}{monday.strftime('%m/%d')}起)")
lines.append(f"- {', '.join(empty_list)}")
lines.append(f"")
lines.append(f"### 各周完课详情")
lines.append(f"")
lines.append(f"| 周次 | 起止日期 | 完课数 | 趋势 |")
lines.append(f"|------|----------|--------|------|")
max_count = max(c for _, _, c in weeks_data) if weeks_data else 1
for i, (y, w, cnt) in enumerate(weeks_data):
monday = datetime.fromisocalendar(y, w, 1)
sunday = monday + timedelta(days=6)
date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}"
marker = ""
if i > 0:
prev_cnt = weeks_data[i - 1][2]
if prev_cnt > 0 and cnt >= prev_cnt * 2:
marker = "📈 突增"
elif cnt > prev_cnt * 1.3:
marker = "📈"
elif prev_cnt > 0 and cnt < prev_cnt * 0.7:
marker = "📉"
bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0
bar = "" * bar_len if bar_len > 0 else ""
lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |")
lines.append(f"")
# 趋势总结
lines.append(f"### 趋势分析")
lines.append(f"- **整体趋势**: {analysis['trend']}")
first_half_weeks = len(weeks_data) // 2
second_half_weeks = len(weeks_data) - first_half_weeks
lines.append(f" - 前半段(前 {first_half_weeks} 周)平均: {analysis['first_half_avg']} 节/周")
lines.append(f" - 后半段(后 {second_half_weeks} 周)平均: {analysis['second_half_avg']} 节/周")
lines.append(f"")
# 特殊事件
if len(weeks_data) >= 2:
counts = [c for _, _, c in weeks_data]
events_found = []
for i in range(1, len(counts)):
if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2:
y, w, _ = weeks_data[i]
monday = datetime.fromisocalendar(y, w, 1)
events_found.append(f"⚡ **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量突增**{counts[i-1]}{counts[i]}")
break
for i in range(1, len(counts)):
if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1:
y, w, _ = weeks_data[i]
monday = datetime.fromisocalendar(y, w, 1)
events_found.append(f"🔻 **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量骤降**{counts[i-1]}{counts[i]}")
break
if events_found:
lines.append(f"**值得关注的变化**:")
for ev in events_found:
lines.append(f"- {ev}")
lines.append(f"")
# ═══ 三、完课记录明细 ═══
lines.append(f"---")
lines.append(f"## 三、完课记录明细(全部 {len(all_records)} 条记录)")
lines.append(f"")
lines.append(f"| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |")
lines.append(f"|------|------|------|------|------|------|--------|")
for i, r in enumerate(all_records, 1):
dt = r["updated_at"]
if dt is None:
continue
date_str = dt.strftime("%Y-%m-%d")
time_str = dt.strftime("%H:%M")
weekday = WEEKDAY_NAMES[dt.weekday()]
period = classify_period(dt.hour)
level = r.get("level") or "-"
chapter_id = r.get("chapter_id") or "-"
lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |")
lines.append(f"")
return "\n".join(lines)
# ── 主函数 ────────────────────────────────────────────
def main():
if len(sys.argv) < 2:
print("用法: python3 studytime_analysis.py <role_id>", file=sys.stderr)
sys.exit(1)
try:
role_id = int(sys.argv[1])
except ValueError:
print(f"错误: 角色ID必须是数字收到: {sys.argv[1]}", file=sys.stderr)
sys.exit(1)
all_records = fetch_completion_records(role_id)
non_holiday_records, holiday_records = split_records(all_records)
holiday_count = len(holiday_records)
# 角色基本信息MySQL
role_info = fetch_role_info(role_id)
# 留存状态判定
retention_status = check_retention(all_records)
# 一周分布分析:仅用非寒暑假数据
day_counts, weekday_periods = analyze_weekly_distribution(non_holiday_records)
# 跨周趋势分析:用全部数据
weeks_data, analysis = analyze_weekly_trend(all_records)
report = format_report(role_id, role_info, retention_status, all_records, non_holiday_records, holiday_count, day_counts, weekday_periods, weeks_data, analysis)
print(report)
if __name__ == "__main__":
main()