135 lines
4.1 KiB
Python
135 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
导出每个角色每节课的完课记录(含耗时、是否首通)
|
||
输出:用户ID、角色ID、课程名称、完课耗时(分钟)、是否首通
|
||
"""
|
||
import os
|
||
import sys
|
||
import pandas as pd
|
||
import psycopg2
|
||
from openpyxl import Workbook
|
||
from openpyxl.utils.dataframe import dataframe_to_rows
|
||
|
||
# 数据库连接
|
||
conn = psycopg2.connect(
|
||
host=os.environ.get('PG_ONLINE_HOST', 'bj-postgres-16pob4sg.sql.tencentcdb.com'),
|
||
port=int(os.environ.get('PG_ONLINE_PORT', 28591)),
|
||
dbname='vala_bi',
|
||
user='ai_member',
|
||
password=os.environ['PG_ONLINE_PASSWORD']
|
||
)
|
||
|
||
print("开始查询...")
|
||
|
||
# 构建 8 个分表的 UNION ALL 查询
|
||
chapter_unions = " UNION ALL ".join(
|
||
f"SELECT * FROM bi_user_chapter_play_record_{i}" for i in range(8)
|
||
)
|
||
|
||
component_unions = " UNION ALL ".join(
|
||
f"SELECT * FROM bi_user_component_play_record_{i}" for i in range(8)
|
||
)
|
||
|
||
query = f"""
|
||
WITH
|
||
-- 1. 聚合组件耗时(按 chapter_unique_id)
|
||
component_agg AS (
|
||
SELECT
|
||
chapter_unique_id,
|
||
SUM(interval_time) AS total_interval_ms
|
||
FROM ({component_unions}) AS comp
|
||
GROUP BY chapter_unique_id
|
||
),
|
||
-- 2. 有效课时完成记录(关联有效账号 + 角色 + 课程)
|
||
chapter_data AS (
|
||
SELECT
|
||
cpr.id AS record_id,
|
||
cpr.user_id AS character_id,
|
||
chr.account_id,
|
||
cpr.chapter_id,
|
||
cpr.chapter_unique_id,
|
||
cpr.created_at,
|
||
lul.course_level || ' ' || lul.course_season || ' ' || lul.course_unit || ' ' || lul.course_lesson AS course_name
|
||
FROM ({chapter_unions}) AS cpr
|
||
INNER JOIN bi_vala_app_character chr ON cpr.user_id = chr.id
|
||
INNER JOIN bi_vala_app_account acc ON chr.account_id = acc.id
|
||
AND acc.status = 1
|
||
AND acc.deleted_at IS NULL
|
||
INNER JOIN bi_level_unit_lesson lul ON cpr.chapter_id = lul.id
|
||
WHERE cpr.play_status = 1
|
||
),
|
||
-- 3. 关联耗时并标记首通
|
||
final AS (
|
||
SELECT
|
||
cd.account_id,
|
||
cd.character_id,
|
||
cd.course_name,
|
||
ROUND(COALESCE(ca.total_interval_ms, 0) / 60000.0, 1) AS duration_min,
|
||
CASE
|
||
WHEN cd.created_at = MIN(cd.created_at) OVER (
|
||
PARTITION BY cd.character_id, cd.chapter_id
|
||
) THEN '是'
|
||
ELSE '否'
|
||
END AS is_first_complete
|
||
FROM chapter_data cd
|
||
LEFT JOIN component_agg ca ON cd.chapter_unique_id = ca.chapter_unique_id
|
||
)
|
||
SELECT
|
||
account_id,
|
||
character_id,
|
||
course_name,
|
||
duration_min,
|
||
is_first_complete
|
||
FROM final
|
||
ORDER BY account_id, character_id, course_name, is_first_complete DESC, duration_min
|
||
"""
|
||
|
||
print("执行 SQL 查询...")
|
||
df = pd.read_sql_query(query, conn)
|
||
conn.close()
|
||
|
||
print(f"查询完成,共 {len(df)} 条记录")
|
||
print(f"列:{list(df.columns)}")
|
||
print(f"前5行预览:\n{df.head()}")
|
||
|
||
# 导出 Excel
|
||
output_path = '/root/.openclaw/workspace/output/chapter_completion_all.xlsx'
|
||
print(f"正在导出到 {output_path}...")
|
||
|
||
# 重命名列
|
||
df.columns = ['用户ID', '角色ID', '课程名称', '完课耗时(分钟)', '是否首通']
|
||
|
||
# 使用 openpyxl 写入(更好的格式)
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
ws.title = '完课记录'
|
||
|
||
# 写入表头
|
||
for col_idx, col_name in enumerate(df.columns, 1):
|
||
ws.cell(row=1, column=col_idx, value=col_name)
|
||
|
||
# 批量写入数据
|
||
for row_idx, row in enumerate(df.itertuples(index=False), 2):
|
||
ws.cell(row=row_idx, column=1, value=row[0]) # 用户ID
|
||
ws.cell(row=row_idx, column=2, value=row[1]) # 角色ID
|
||
ws.cell(row=row_idx, column=3, value=row[2]) # 课程名称
|
||
ws.cell(row=row_idx, column=4, value=row[3]) # 完课耗时
|
||
ws.cell(row=row_idx, column=5, value=row[4]) # 是否首通
|
||
|
||
# 调整列宽
|
||
ws.column_dimensions['A'].width = 12
|
||
ws.column_dimensions['B'].width = 12
|
||
ws.column_dimensions['C'].width = 22
|
||
ws.column_dimensions['D'].width = 18
|
||
ws.column_dimensions['E'].width = 12
|
||
|
||
wb.save(output_path)
|
||
print(f"✅ 导出完成!文件: {output_path}")
|
||
print(f" 总记录数: {len(df)}")
|
||
print(f" 首通记录: {len(df[df['是否首通'] == '是'])}")
|
||
print(f" 非首通记录: {len(df[df['是否首通'] == '否'])}")
|
||
|
||
# 文件大小
|
||
size_mb = os.path.getsize(output_path) / (1024 * 1024)
|
||
print(f" 文件大小: {size_mb:.1f} MB")
|