#!/usr/bin/env python3 """ 导出每个角色每节课的完课记录(含耗时、是否首通) 输出:用户ID、角色ID、课程名称、完课耗时(分钟)、是否首通 """ import os import sys import pandas as pd import psycopg2 from openpyxl import Workbook from openpyxl.utils.dataframe import dataframe_to_rows # 数据库连接 conn = psycopg2.connect( host=os.environ.get('PG_ONLINE_HOST', 'bj-postgres-16pob4sg.sql.tencentcdb.com'), port=int(os.environ.get('PG_ONLINE_PORT', 28591)), dbname='vala_bi', user='ai_member', password=os.environ['PG_ONLINE_PASSWORD'] ) print("开始查询...") # 构建 8 个分表的 UNION ALL 查询 chapter_unions = " UNION ALL ".join( f"SELECT * FROM bi_user_chapter_play_record_{i}" for i in range(8) ) component_unions = " UNION ALL ".join( f"SELECT * FROM bi_user_component_play_record_{i}" for i in range(8) ) query = f""" WITH -- 1. 聚合组件耗时(按 chapter_unique_id) component_agg AS ( SELECT chapter_unique_id, SUM(interval_time) AS total_interval_ms FROM ({component_unions}) AS comp GROUP BY chapter_unique_id ), -- 2. 有效课时完成记录(关联有效账号 + 角色 + 课程) chapter_data AS ( SELECT cpr.id AS record_id, cpr.user_id AS character_id, chr.account_id, cpr.chapter_id, cpr.chapter_unique_id, cpr.created_at, lul.course_level || ' ' || lul.course_season || ' ' || lul.course_unit || ' ' || lul.course_lesson AS course_name FROM ({chapter_unions}) AS cpr INNER JOIN bi_vala_app_character chr ON cpr.user_id = chr.id INNER JOIN bi_vala_app_account acc ON chr.account_id = acc.id AND acc.status = 1 AND acc.deleted_at IS NULL INNER JOIN bi_level_unit_lesson lul ON cpr.chapter_id = lul.id WHERE cpr.play_status = 1 ), -- 3. 关联耗时并标记首通 final AS ( SELECT cd.account_id, cd.character_id, cd.course_name, ROUND(COALESCE(ca.total_interval_ms, 0) / 60000.0, 1) AS duration_min, CASE WHEN cd.created_at = MIN(cd.created_at) OVER ( PARTITION BY cd.character_id, cd.chapter_id ) THEN '是' ELSE '否' END AS is_first_complete FROM chapter_data cd LEFT JOIN component_agg ca ON cd.chapter_unique_id = ca.chapter_unique_id ) SELECT account_id, character_id, course_name, duration_min, is_first_complete FROM final ORDER BY account_id, character_id, course_name, is_first_complete DESC, duration_min """ print("执行 SQL 查询...") df = pd.read_sql_query(query, conn) conn.close() print(f"查询完成,共 {len(df)} 条记录") print(f"列:{list(df.columns)}") print(f"前5行预览:\n{df.head()}") # 导出 Excel output_path = '/root/.openclaw/workspace/output/chapter_completion_all.xlsx' print(f"正在导出到 {output_path}...") # 重命名列 df.columns = ['用户ID', '角色ID', '课程名称', '完课耗时(分钟)', '是否首通'] # 使用 openpyxl 写入(更好的格式) wb = Workbook() ws = wb.active ws.title = '完课记录' # 写入表头 for col_idx, col_name in enumerate(df.columns, 1): ws.cell(row=1, column=col_idx, value=col_name) # 批量写入数据 for row_idx, row in enumerate(df.itertuples(index=False), 2): ws.cell(row=row_idx, column=1, value=row[0]) # 用户ID ws.cell(row=row_idx, column=2, value=row[1]) # 角色ID ws.cell(row=row_idx, column=3, value=row[2]) # 课程名称 ws.cell(row=row_idx, column=4, value=row[3]) # 完课耗时 ws.cell(row=row_idx, column=5, value=row[4]) # 是否首通 # 调整列宽 ws.column_dimensions['A'].width = 12 ws.column_dimensions['B'].width = 12 ws.column_dimensions['C'].width = 22 ws.column_dimensions['D'].width = 18 ws.column_dimensions['E'].width = 12 wb.save(output_path) print(f"✅ 导出完成!文件: {output_path}") print(f" 总记录数: {len(df)}") print(f" 首通记录: {len(df[df['是否首通'] == '是'])}") print(f" 非首通记录: {len(df[df['是否首通'] == '否'])}") # 文件大小 size_mb = os.path.getsize(output_path) / (1024 * 1024) print(f" 文件大小: {size_mb:.1f} MB")