ai_member_xiaoxi/scripts/export_chapter_completion.py
2026-05-23 08:00:01 +08:00

135 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
导出每个角色每节课的完课记录(含耗时、是否首通)
输出用户ID、角色ID、课程名称、完课耗时(分钟)、是否首通
"""
import os
import sys
import pandas as pd
import psycopg2
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
# 数据库连接
conn = psycopg2.connect(
host=os.environ.get('PG_ONLINE_HOST', 'bj-postgres-16pob4sg.sql.tencentcdb.com'),
port=int(os.environ.get('PG_ONLINE_PORT', 28591)),
dbname='vala_bi',
user='ai_member',
password=os.environ['PG_ONLINE_PASSWORD']
)
print("开始查询...")
# 构建 8 个分表的 UNION ALL 查询
chapter_unions = " UNION ALL ".join(
f"SELECT * FROM bi_user_chapter_play_record_{i}" for i in range(8)
)
component_unions = " UNION ALL ".join(
f"SELECT * FROM bi_user_component_play_record_{i}" for i in range(8)
)
query = f"""
WITH
-- 1. 聚合组件耗时(按 chapter_unique_id
component_agg AS (
SELECT
chapter_unique_id,
SUM(interval_time) AS total_interval_ms
FROM ({component_unions}) AS comp
GROUP BY chapter_unique_id
),
-- 2. 有效课时完成记录(关联有效账号 + 角色 + 课程)
chapter_data AS (
SELECT
cpr.id AS record_id,
cpr.user_id AS character_id,
chr.account_id,
cpr.chapter_id,
cpr.chapter_unique_id,
cpr.created_at,
lul.course_level || ' ' || lul.course_season || ' ' || lul.course_unit || ' ' || lul.course_lesson AS course_name
FROM ({chapter_unions}) AS cpr
INNER JOIN bi_vala_app_character chr ON cpr.user_id = chr.id
INNER JOIN bi_vala_app_account acc ON chr.account_id = acc.id
AND acc.status = 1
AND acc.deleted_at IS NULL
INNER JOIN bi_level_unit_lesson lul ON cpr.chapter_id = lul.id
WHERE cpr.play_status = 1
),
-- 3. 关联耗时并标记首通
final AS (
SELECT
cd.account_id,
cd.character_id,
cd.course_name,
ROUND(COALESCE(ca.total_interval_ms, 0) / 60000.0, 1) AS duration_min,
CASE
WHEN cd.created_at = MIN(cd.created_at) OVER (
PARTITION BY cd.character_id, cd.chapter_id
) THEN ''
ELSE ''
END AS is_first_complete
FROM chapter_data cd
LEFT JOIN component_agg ca ON cd.chapter_unique_id = ca.chapter_unique_id
)
SELECT
account_id,
character_id,
course_name,
duration_min,
is_first_complete
FROM final
ORDER BY account_id, character_id, course_name, is_first_complete DESC, duration_min
"""
print("执行 SQL 查询...")
df = pd.read_sql_query(query, conn)
conn.close()
print(f"查询完成,共 {len(df)} 条记录")
print(f"列:{list(df.columns)}")
print(f"前5行预览\n{df.head()}")
# 导出 Excel
output_path = '/root/.openclaw/workspace/output/chapter_completion_all.xlsx'
print(f"正在导出到 {output_path}...")
# 重命名列
df.columns = ['用户ID', '角色ID', '课程名称', '完课耗时(分钟)', '是否首通']
# 使用 openpyxl 写入(更好的格式)
wb = Workbook()
ws = wb.active
ws.title = '完课记录'
# 写入表头
for col_idx, col_name in enumerate(df.columns, 1):
ws.cell(row=1, column=col_idx, value=col_name)
# 批量写入数据
for row_idx, row in enumerate(df.itertuples(index=False), 2):
ws.cell(row=row_idx, column=1, value=row[0]) # 用户ID
ws.cell(row=row_idx, column=2, value=row[1]) # 角色ID
ws.cell(row=row_idx, column=3, value=row[2]) # 课程名称
ws.cell(row=row_idx, column=4, value=row[3]) # 完课耗时
ws.cell(row=row_idx, column=5, value=row[4]) # 是否首通
# 调整列宽
ws.column_dimensions['A'].width = 12
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 22
ws.column_dimensions['D'].width = 18
ws.column_dimensions['E'].width = 12
wb.save(output_path)
print(f"✅ 导出完成!文件: {output_path}")
print(f" 总记录数: {len(df)}")
print(f" 首通记录: {len(df[df['是否首通'] == ''])}")
print(f" 非首通记录: {len(df[df['是否首通'] == ''])}")
# 文件大小
size_mb = os.path.getsize(output_path) / (1024 * 1024)
print(f" 文件大小: {size_mb:.1f} MB")