444 lines
16 KiB
Python
444 lines
16 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
studytime-analysis — 角色学习时间分析工具
|
||
用法: python3 studytime_analysis.py <role_id>
|
||
输出: Markdown 格式的分析报告
|
||
|
||
数据源: PostgreSQL Online (vala 库)
|
||
核心表: user_chapter_play_record_0~7
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
from datetime import datetime, timedelta
|
||
from collections import defaultdict, OrderedDict
|
||
|
||
# ── 配置 ──────────────────────────────────────────────
|
||
PG_CONFIG = {
|
||
"host": os.environ.get("PG_DB_HOST", "bj-postgres-16pob4sg.sql.tencentcdb.com"),
|
||
"port": int(os.environ.get("PG_DB_PORT", "28591")),
|
||
"user": os.environ.get("PG_DB_USER", "ai_member"),
|
||
"password": os.environ.get("PG_DB_PASSWORD", ""),
|
||
"dbname": os.environ.get("PG_DB_DATABASE", "vala"),
|
||
}
|
||
|
||
EXCLUDED_MONTHS = (1, 2, 7, 8) # 寒假1-2月, 暑假7-8月
|
||
|
||
WEEKDAY_NAMES = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
|
||
|
||
PERIODS = OrderedDict([
|
||
("凌晨", (0, 6)),
|
||
("上午", (6, 12)),
|
||
("中午", (12, 14)),
|
||
("下午", (14, 18)),
|
||
("晚上", (18, 24)),
|
||
])
|
||
|
||
|
||
# ── 数据库查询 ────────────────────────────────────────
|
||
|
||
def get_connection():
|
||
"""连接 PostgreSQL"""
|
||
conn = psycopg2.connect(
|
||
host=PG_CONFIG["host"],
|
||
port=PG_CONFIG["port"],
|
||
user=PG_CONFIG["user"],
|
||
password=PG_CONFIG["password"],
|
||
dbname=PG_CONFIG["dbname"],
|
||
)
|
||
return conn
|
||
|
||
|
||
def fetch_completion_records(role_id):
|
||
"""查询指定角色全部完课记录(排除寒暑假)"""
|
||
params = {}
|
||
union_parts = []
|
||
for i in range(8):
|
||
param_name = f"rid_{i}"
|
||
params[param_name] = role_id
|
||
union_parts.append(f"""
|
||
SELECT user_id, chapter_id, chapter_unique_id, level, updated_at
|
||
FROM user_chapter_play_record_{i}
|
||
WHERE user_id = %({param_name})s
|
||
AND play_status = 1
|
||
AND EXTRACT(MONTH FROM updated_at) NOT IN (1, 2, 7, 8)
|
||
""")
|
||
|
||
union_sql = " UNION ALL ".join(union_parts)
|
||
sql = f"""
|
||
SELECT * FROM (
|
||
{union_sql}
|
||
) t
|
||
ORDER BY updated_at ASC
|
||
"""
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||
cur.execute(sql, params)
|
||
rows = cur.fetchall()
|
||
finally:
|
||
conn.close()
|
||
return rows
|
||
|
||
|
||
def count_excluded_records(role_id):
|
||
"""统计被寒暑假排除的记录数"""
|
||
params = {}
|
||
union_parts = []
|
||
for i in range(8):
|
||
param_name = f"rid_{i}"
|
||
params[param_name] = role_id
|
||
union_parts.append(f"""
|
||
SELECT COUNT(*) as cnt
|
||
FROM user_chapter_play_record_{i}
|
||
WHERE user_id = %({param_name})s
|
||
AND play_status = 1
|
||
AND EXTRACT(MONTH FROM updated_at) IN (1, 2, 7, 8)
|
||
""")
|
||
|
||
union_sql = " UNION ALL ".join(union_parts)
|
||
sql = f"SELECT SUM(cnt) as total FROM ({union_sql}) t"
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(sql, params)
|
||
result = cur.fetchone()
|
||
finally:
|
||
conn.close()
|
||
return result[0] if result and result[0] else 0
|
||
|
||
|
||
# ── 分析函数 ──────────────────────────────────────────
|
||
|
||
def classify_period(hour):
|
||
"""根据小时数返回时段名称"""
|
||
for name, (lo, hi) in PERIODS.items():
|
||
if lo <= hour < hi:
|
||
return name
|
||
return "未知"
|
||
|
||
|
||
def analyze_weekly_distribution(records):
|
||
"""
|
||
分析一周内分布: 周一至周日各天完课数 + 周一至周五时段分布
|
||
返回: (day_counts, weekday_periods)
|
||
"""
|
||
day_counts = defaultdict(int)
|
||
weekday_periods = defaultdict(lambda: defaultdict(int))
|
||
|
||
today = datetime.now().date()
|
||
|
||
for r in records:
|
||
dt = r["updated_at"]
|
||
if dt is None:
|
||
continue
|
||
# dt is timezone-aware, convert to local naive for analysis
|
||
if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
|
||
# PostgreSQL returns tz-aware, but we just need local time
|
||
pass
|
||
weekday = dt.weekday() # 0=Mon
|
||
hour = dt.hour
|
||
period = classify_period(hour)
|
||
day_counts[weekday] += 1
|
||
if weekday < 5:
|
||
weekday_periods[period][weekday] += 1
|
||
|
||
return day_counts, weekday_periods
|
||
|
||
|
||
def analyze_weekly_trend(records):
|
||
"""
|
||
按周统计完课趋势
|
||
返回: (weeks_data, analysis_dict)
|
||
"""
|
||
if not records:
|
||
return [], {}
|
||
|
||
week_counts = defaultdict(int)
|
||
for r in records:
|
||
dt = r["updated_at"]
|
||
if dt is None:
|
||
continue
|
||
iso = dt.isocalendar()
|
||
year, week_num = iso[0], iso[1]
|
||
week_counts[(year, week_num)] += 1
|
||
|
||
sorted_weeks = sorted(week_counts.keys())
|
||
weeks_data = [(y, w, week_counts[(y, w)]) for y, w in sorted_weeks]
|
||
|
||
total_weeks = len(weeks_data)
|
||
total_lessons = sum(c for _, _, c in weeks_data)
|
||
avg_per_week = round(total_lessons / total_weeks, 1) if total_weeks > 0 else 0
|
||
|
||
# 时间跨度(含空周)
|
||
if sorted_weeks:
|
||
first = datetime.fromisocalendar(sorted_weeks[0][0], sorted_weeks[0][1], 1)
|
||
last = datetime.fromisocalendar(sorted_weeks[-1][0], sorted_weeks[-1][1], 1)
|
||
total_span_weeks = ((last - first).days // 7) + 1
|
||
|
||
all_weeks_in_span = set()
|
||
cur = first
|
||
while cur <= last:
|
||
iso = cur.isocalendar()
|
||
all_weeks_in_span.add((iso[0], iso[1]))
|
||
cur += timedelta(days=7)
|
||
|
||
active_weeks = set(sorted_weeks)
|
||
empty_weeks = sorted(all_weeks_in_span - active_weeks)
|
||
else:
|
||
total_span_weeks = 0
|
||
empty_weeks = []
|
||
|
||
consecutive = (len(empty_weeks) == 0)
|
||
|
||
# 趋势: 前半段 vs 后半段
|
||
mid = len(weeks_data) // 2
|
||
first_half_data = weeks_data[:mid]
|
||
first_half_avg = sum(c for _, _, c in first_half_data) / mid if mid > 0 else 0
|
||
second_half_start = mid if len(weeks_data) % 2 == 0 else mid + 1
|
||
second_half_data = weeks_data[second_half_start:]
|
||
second_half_avg = sum(c for _, _, c in second_half_data) / len(second_half_data) if second_half_data else 0
|
||
|
||
trend = "持平"
|
||
if first_half_avg > 0:
|
||
ratio = second_half_avg / first_half_avg
|
||
if ratio > 1.15:
|
||
trend = "上涨 ↑"
|
||
elif ratio < 0.85:
|
||
trend = "下降 ↓"
|
||
|
||
return weeks_data, {
|
||
"total_weeks": total_weeks,
|
||
"total_span_weeks": total_span_weeks,
|
||
"total_lessons": total_lessons,
|
||
"avg_per_week": avg_per_week,
|
||
"consecutive": consecutive,
|
||
"empty_weeks": empty_weeks,
|
||
"first_half_avg": round(first_half_avg, 1),
|
||
"second_half_avg": round(second_half_avg, 1),
|
||
"trend": trend,
|
||
}
|
||
|
||
|
||
# ── 输出格式化 ────────────────────────────────────────
|
||
|
||
def format_report(role_id, records, excluded_count, day_counts, weekday_periods, weeks_data, analysis):
|
||
"""生成 Markdown 格式分析报告"""
|
||
lines = []
|
||
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
|
||
|
||
lines.append(f"# 📊 学习时间分析报告 — 角色 {role_id}")
|
||
lines.append(f"")
|
||
lines.append(f"**分析时间**: {now_str}")
|
||
lines.append(f"**有效完课记录**: {len(records)} 条")
|
||
if excluded_count > 0:
|
||
lines.append(f"**已排除寒暑假记录**: {excluded_count} 条(寒假1-2月、暑假7-8月,不算入分析)")
|
||
lines.append(f"")
|
||
|
||
if not records:
|
||
lines.append("> ⚠️ 该角色没有非寒暑假期间的完课记录,无法进行分析。")
|
||
return "\n".join(lines)
|
||
|
||
# ═══ 一、一周时间分布 ═══
|
||
lines.append(f"---")
|
||
lines.append(f"## 一、一周时间分布")
|
||
lines.append(f"")
|
||
|
||
# 日分布表
|
||
lines.append(f"### 各天完课数量")
|
||
lines.append(f"")
|
||
total = sum(day_counts.values())
|
||
max_day = max(day_counts.values()) if day_counts else 1
|
||
lines.append(f"| 星期 | 完课数 | 占比 |")
|
||
lines.append(f"|------|--------|------|")
|
||
for i, name in enumerate(WEEKDAY_NAMES):
|
||
cnt = day_counts.get(i, 0)
|
||
pct = f"{cnt / total * 100:.1f}%" if total > 0 else "0%"
|
||
bar = "█" * max(1, int(cnt / max_day * 20)) if cnt > 0 else ""
|
||
lines.append(f"| {name} | {cnt} {bar} | {pct} |")
|
||
lines.append(f"")
|
||
|
||
# 规律小结
|
||
weekday_total = sum(day_counts.get(i, 0) for i in range(5))
|
||
weekend_total = sum(day_counts.get(i, 0) for i in range(5, 7))
|
||
|
||
lines.append(f"### 规律小结")
|
||
lines.append(f"")
|
||
|
||
if weekend_total > 0:
|
||
sat = day_counts.get(5, 0)
|
||
sun = day_counts.get(6, 0)
|
||
lines.append(f"- **周末上课**: ✅ 是 — 周六 {sat} 节,周日 {sun} 节")
|
||
else:
|
||
lines.append(f"- **周末上课**: ❌ 否 — 周末无完课记录")
|
||
|
||
# 时段分布(周一至周五)
|
||
lines.append(f"")
|
||
lines.append(f"### 周一至周五上课时段分布")
|
||
lines.append(f"")
|
||
lines.append(f"| 时段 | 周一 | 周二 | 周三 | 周四 | 周五 | 合计 |")
|
||
lines.append(f"|------|------|------|------|------|------|------|")
|
||
|
||
for period in ["上午", "中午", "下午", "晚上", "凌晨"]:
|
||
period_data = weekday_periods.get(period, {})
|
||
period_total = sum(period_data.values())
|
||
if period_total == 0:
|
||
continue
|
||
row = [period]
|
||
for d in range(5):
|
||
cnt = period_data.get(d, 0)
|
||
row.append(str(cnt) if cnt > 0 else "-")
|
||
row.append(str(period_total))
|
||
lines.append(f"| {' | '.join(row)} |")
|
||
lines.append(f"")
|
||
|
||
# 时段规律
|
||
lines.append(f"**时段规律分析**:")
|
||
for period in ["上午", "中午", "下午", "晚上"]:
|
||
period_data = weekday_periods.get(period, {})
|
||
period_sum = sum(period_data.values())
|
||
if period_sum == 0:
|
||
continue
|
||
pct = period_sum / weekday_total * 100 if weekday_total > 0 else 0
|
||
active_days = [WEEKDAY_NAMES[d] for d in range(5) if period_data.get(d, 0) > 0]
|
||
if active_days:
|
||
lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)→ 集中在 {'、'.join(active_days)}")
|
||
else:
|
||
lines.append(f"- **{period}**({period_sum}节, {pct:.0f}%)")
|
||
lines.append(f"")
|
||
|
||
# ═══ 二、跨周学习趋势 ═══
|
||
lines.append(f"---")
|
||
lines.append(f"## 二、跨周学习趋势")
|
||
lines.append(f"")
|
||
|
||
lines.append(f"### 基本数据")
|
||
lines.append(f"- 完课跨越 **{analysis['total_span_weeks']}** 个自然周(含空周),有课周数 **{analysis['total_weeks']}** 周")
|
||
lines.append(f"- 有效完课总数 **{analysis['total_lessons']}** 节")
|
||
lines.append(f"- 平均每周完课 **{analysis['avg_per_week']}** 节")
|
||
lines.append(f"- 连续性: {'✅ 每周连续上课,无中断' if analysis['consecutive'] else '⚠️ 存在中断周(见下方)'}")
|
||
lines.append(f"")
|
||
|
||
if analysis["empty_weeks"]:
|
||
lines.append(f"### 中断周明细")
|
||
empty_list = []
|
||
for y, w in sorted(analysis["empty_weeks"]):
|
||
monday = datetime.fromisocalendar(y, w, 1)
|
||
empty_list.append(f"{y}年W{w:02d}({monday.strftime('%m/%d')}起)")
|
||
lines.append(f"- {', '.join(empty_list)}")
|
||
lines.append(f"")
|
||
|
||
lines.append(f"### 各周完课详情")
|
||
lines.append(f"")
|
||
lines.append(f"| 周次 | 起止日期 | 完课数 | 趋势 |")
|
||
lines.append(f"|------|----------|--------|------|")
|
||
|
||
max_count = max(c for _, _, c in weeks_data) if weeks_data else 1
|
||
for i, (y, w, cnt) in enumerate(weeks_data):
|
||
monday = datetime.fromisocalendar(y, w, 1)
|
||
sunday = monday + timedelta(days=6)
|
||
date_range = f"{monday.strftime('%m/%d')}-{sunday.strftime('%m/%d')}"
|
||
|
||
marker = ""
|
||
if i > 0:
|
||
prev_cnt = weeks_data[i - 1][2]
|
||
if prev_cnt > 0 and cnt >= prev_cnt * 2:
|
||
marker = "📈 突增"
|
||
elif cnt > prev_cnt * 1.3:
|
||
marker = "📈"
|
||
elif prev_cnt > 0 and cnt < prev_cnt * 0.7:
|
||
marker = "📉"
|
||
|
||
bar_len = max(1, int(cnt / max_count * 15)) if cnt > 0 else 0
|
||
bar = "█" * bar_len if bar_len > 0 else ""
|
||
lines.append(f"| {y}W{w:02d} | {date_range} | {cnt} {bar} | {marker} |")
|
||
|
||
lines.append(f"")
|
||
|
||
# 趋势总结
|
||
lines.append(f"### 趋势分析")
|
||
lines.append(f"- **整体趋势**: {analysis['trend']}")
|
||
first_half_weeks = len(weeks_data) // 2
|
||
second_half_weeks = len(weeks_data) - first_half_weeks
|
||
lines.append(f" - 前半段(前 {first_half_weeks} 周)平均: {analysis['first_half_avg']} 节/周")
|
||
lines.append(f" - 后半段(后 {second_half_weeks} 周)平均: {analysis['second_half_avg']} 节/周")
|
||
lines.append(f"")
|
||
|
||
# 特殊事件
|
||
if len(weeks_data) >= 2:
|
||
counts = [c for _, _, c in weeks_data]
|
||
events_found = []
|
||
|
||
for i in range(1, len(counts)):
|
||
if counts[i - 1] > 0 and counts[i] >= counts[i - 1] * 2:
|
||
y, w, _ = weeks_data[i]
|
||
monday = datetime.fromisocalendar(y, w, 1)
|
||
events_found.append(f"⚡ **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量突增**:{counts[i-1]}→{counts[i]} 节")
|
||
break
|
||
|
||
for i in range(1, len(counts)):
|
||
if counts[i - 1] >= 3 and counts[i - 1] > 0 and counts[i] <= 1:
|
||
y, w, _ = weeks_data[i]
|
||
monday = datetime.fromisocalendar(y, w, 1)
|
||
events_found.append(f"🔻 **{y}年W{w:02d}周({monday.strftime('%m/%d')}起)完课量骤降**:{counts[i-1]}→{counts[i]} 节")
|
||
break
|
||
|
||
if events_found:
|
||
lines.append(f"**值得关注的变化**:")
|
||
for ev in events_found:
|
||
lines.append(f"- {ev}")
|
||
lines.append(f"")
|
||
|
||
# ═══ 三、完课记录明细 ═══
|
||
lines.append(f"---")
|
||
lines.append(f"## 三、完课记录明细")
|
||
lines.append(f"")
|
||
lines.append(f"| 序号 | 日期 | 时间 | 星期 | 时段 | 级别 | 课程ID |")
|
||
lines.append(f"|------|------|------|------|------|------|--------|")
|
||
|
||
for i, r in enumerate(records, 1):
|
||
dt = r["updated_at"]
|
||
if dt is None:
|
||
continue
|
||
date_str = dt.strftime("%Y-%m-%d")
|
||
time_str = dt.strftime("%H:%M")
|
||
weekday = WEEKDAY_NAMES[dt.weekday()]
|
||
period = classify_period(dt.hour)
|
||
level = r.get("level") or "-"
|
||
chapter_id = r.get("chapter_id") or "-"
|
||
lines.append(f"| {i} | {date_str} | {time_str} | {weekday} | {period} | {level} | {chapter_id} |")
|
||
|
||
lines.append(f"")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
# ── 主函数 ────────────────────────────────────────────
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print("用法: python3 studytime_analysis.py <role_id>", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
try:
|
||
role_id = int(sys.argv[1])
|
||
except ValueError:
|
||
print(f"错误: 角色ID必须是数字,收到: {sys.argv[1]}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
records = fetch_completion_records(role_id)
|
||
excluded_count = count_excluded_records(role_id)
|
||
day_counts, weekday_periods = analyze_weekly_distribution(records)
|
||
weeks_data, analysis = analyze_weekly_trend(records)
|
||
report = format_report(role_id, records, excluded_count, day_counts, weekday_periods, weeks_data, analysis)
|
||
|
||
print(report)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|