ai_member_xiaoxi/scripts/export_103_users.py
2026-06-04 08:00:01 +08:00

272 lines
9.8 KiB
Python

#!/usr/bin/env python3
"""导出2026年付费用户中5月完成>30课时的103人明细"""
import os
import psycopg2
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*"
SQL = """
WITH user_orders AS (
SELECT DISTINCT o.account_id, o.goods_id, o.trade_no, o.key_from, o.pay_success_date
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.pay_success_date >= '2026-01-01'
AND o.pay_success_date IS NOT NULL
AND o.order_status IN (3, 4)
AND o.goods_id NOT IN (32)
),
refunded_trades AS (
SELECT DISTINCT r.trade_no
FROM bi_refund_order r
JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4
WHERE r.status = 3
),
user_all_refunded AS (
SELECT uo.account_id
FROM user_orders uo
LEFT JOIN refunded_trades rt ON uo.trade_no = rt.trade_no
GROUP BY uo.account_id
HAVING COUNT(*) = COUNT(rt.trade_no)
),
non_refund_users AS (
SELECT DISTINCT uo.account_id
FROM user_orders uo
WHERE uo.account_id NOT IN (SELECT account_id FROM user_all_refunded)
),
user_level AS (
SELECT
uo.account_id,
CASE
WHEN bool_or(uo.goods_id = 61) THEN 'L1+L2'
WHEN bool_or(uo.goods_id IN (57, 60, 63)) AND bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L1+L2'
WHEN bool_or(uo.goods_id IN (57, 60, 63)) THEN 'L1'
WHEN bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L2'
ELSE '其他'
END AS level
FROM user_orders uo
GROUP BY uo.account_id
),
purchase_info AS (
SELECT account_id,
STRING_AGG(DISTINCT key_from, ', ' ORDER BY key_from) AS channels,
MIN(pay_success_date) AS first_purchase_date
FROM user_orders
GROUP BY account_id
),
target_chars AS (
SELECT c.id AS char_id, c.account_id, c.gender, c.birthday
FROM bi_vala_app_character c
JOIN non_refund_users n ON c.account_id = n.account_id
),
chapter_done AS (
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_0 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 0
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_1 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 1
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_2 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 2
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_3 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 3
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_4 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 4
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_5 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 5
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_6 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 6
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
UNION ALL
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
FROM bi_user_chapter_play_record_7 cp
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 7
WHERE cp.play_status = 1
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
GROUP BY cp.user_id
),
user_done AS (
SELECT tc.account_id, SUM(cd.done_count) AS total_done
FROM target_chars tc
JOIN chapter_done cd ON tc.char_id = cd.char_id
GROUP BY tc.account_id
HAVING SUM(cd.done_count) > 30
),
char_info AS (
SELECT DISTINCT ON (c.account_id)
c.account_id, c.gender, c.birthday
FROM bi_vala_app_character c
JOIN user_done ud ON c.account_id = ud.account_id
WHERE c.deleted_at IS NULL
ORDER BY c.account_id, c.birthday DESC NULLS LAST
),
addr_info AS (
SELECT DISTINCT ON (account_id) account_id, login_address
FROM account_detail_info
WHERE login_address IS NOT NULL AND login_address != ''
ORDER BY account_id, created_time DESC
)
SELECT
ud.account_id,
ul.level,
CASE
WHEN ci.gender = 1 THEN ''
WHEN ci.gender = 2 THEN ''
ELSE '未知'
END AS gender,
CASE
WHEN ci.birthday IS NOT NULL AND ci.birthday != ''
THEN EXTRACT(YEAR FROM AGE(TO_DATE(ci.birthday, 'YYYY-MM-DD')))
ELSE NULL
END AS age,
SPLIT_PART(ai.login_address, '-', 1) AS province,
SPLIT_PART(ai.login_address, '-', 2) AS city,
pi.channels AS purchase_channel,
TO_CHAR(pi.first_purchase_date, 'YYYY-MM-DD') AS first_purchase_date,
ud.total_done AS may_done_count
FROM user_done ud
JOIN user_level ul ON ud.account_id = ul.account_id
LEFT JOIN char_info ci ON ud.account_id = ci.account_id
LEFT JOIN addr_info ai ON ud.account_id = ai.account_id
LEFT JOIN purchase_info pi ON ud.account_id = pi.account_id
ORDER BY ul.level, ud.account_id;
"""
def main():
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password=PG_PASSWORD,
database="vala_bi"
)
df = pd.read_sql(SQL, conn)
conn.close()
# Rename columns
df.columns = ['账号ID', '课程级别', '性别', '年龄', '省份', '城市', '购课渠道', '首次购课日期', '5月完成课时数']
# Output path
output_path = "/root/.openclaw/workspace/output/2026付费用户_5月完成30课时以上_103人.xlsx"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Write with openpyxl for styling
wb = Workbook()
ws = wb.active
ws.title = "5月完成>30课时用户"
# Header style
header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF')
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
header_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# Write headers
for col_idx, col_name in enumerate(df.columns, 1):
cell = ws.cell(row=1, column=col_idx, value=col_name)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
cell.border = thin_border
# Write data
data_font = Font(name='微软雅黑', size=10)
data_alignment = Alignment(vertical='center')
for row_idx, row in df.iterrows():
for col_idx, value in enumerate(row, 1):
cell = ws.cell(row=row_idx + 2, column=col_idx, value=value if pd.notna(value) else '')
cell.font = data_font
cell.alignment = data_alignment
cell.border = thin_border
# Column widths
col_widths = [12, 10, 8, 8, 16, 12, 60, 16, 16]
for i, w in enumerate(col_widths, 1):
ws.column_dimensions[get_column_letter(i)].width = w
# Freeze header
ws.freeze_panes = 'A2'
# Auto filter
ws.auto_filter.ref = f"A1:{get_column_letter(len(df.columns))}{len(df)+1}"
# Summary sheet
ws2 = wb.create_sheet("汇总")
summary_data = [
["指标", "数值"],
["总人数", len(df)],
["仅L1", len(df[df['课程级别'] == 'L1'])],
["仅L2", len(df[df['课程级别'] == 'L2'])],
["L1+L2", len(df[df['课程级别'] == 'L1+L2'])],
["平均年龄", round(df['年龄'].mean(), 1)],
["男性", len(df[df['性别'] == ''])],
["女性", len(df[df['性别'] == ''])],
["性别未知", len(df[df['性别'] == '未知'])],
["平均5月完成课时", round(df['5月完成课时数'].mean(), 1)],
["最高5月完成课时", int(df['5月完成课时数'].max())],
]
for row_idx, row_data in enumerate(summary_data, 1):
for col_idx, value in enumerate(row_data, 1):
cell = ws2.cell(row=row_idx, column=col_idx, value=value)
if row_idx == 1:
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
else:
cell.font = data_font
cell.alignment = data_alignment
cell.border = thin_border
ws2.column_dimensions['A'].width = 20
ws2.column_dimensions['B'].width = 15
wb.save(output_path)
print(f"✅ 已导出: {output_path}")
print(f"{len(df)} 条记录")
if __name__ == "__main__":
main()