272 lines
9.8 KiB
Python
272 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
|
"""导出2026年付费用户中5月完成>30课时的103人明细"""
|
|
|
|
import os
|
|
import psycopg2
|
|
import pandas as pd
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
|
|
from openpyxl.utils import get_column_letter
|
|
|
|
PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*"
|
|
|
|
SQL = """
|
|
WITH user_orders AS (
|
|
SELECT DISTINCT o.account_id, o.goods_id, o.trade_no, o.key_from, o.pay_success_date
|
|
FROM bi_vala_order o
|
|
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
|
WHERE o.pay_success_date >= '2026-01-01'
|
|
AND o.pay_success_date IS NOT NULL
|
|
AND o.order_status IN (3, 4)
|
|
AND o.goods_id NOT IN (32)
|
|
),
|
|
refunded_trades AS (
|
|
SELECT DISTINCT r.trade_no
|
|
FROM bi_refund_order r
|
|
JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4
|
|
WHERE r.status = 3
|
|
),
|
|
user_all_refunded AS (
|
|
SELECT uo.account_id
|
|
FROM user_orders uo
|
|
LEFT JOIN refunded_trades rt ON uo.trade_no = rt.trade_no
|
|
GROUP BY uo.account_id
|
|
HAVING COUNT(*) = COUNT(rt.trade_no)
|
|
),
|
|
non_refund_users AS (
|
|
SELECT DISTINCT uo.account_id
|
|
FROM user_orders uo
|
|
WHERE uo.account_id NOT IN (SELECT account_id FROM user_all_refunded)
|
|
),
|
|
user_level AS (
|
|
SELECT
|
|
uo.account_id,
|
|
CASE
|
|
WHEN bool_or(uo.goods_id = 61) THEN 'L1+L2'
|
|
WHEN bool_or(uo.goods_id IN (57, 60, 63)) AND bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L1+L2'
|
|
WHEN bool_or(uo.goods_id IN (57, 60, 63)) THEN 'L1'
|
|
WHEN bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L2'
|
|
ELSE '其他'
|
|
END AS level
|
|
FROM user_orders uo
|
|
GROUP BY uo.account_id
|
|
),
|
|
purchase_info AS (
|
|
SELECT account_id,
|
|
STRING_AGG(DISTINCT key_from, ', ' ORDER BY key_from) AS channels,
|
|
MIN(pay_success_date) AS first_purchase_date
|
|
FROM user_orders
|
|
GROUP BY account_id
|
|
),
|
|
target_chars AS (
|
|
SELECT c.id AS char_id, c.account_id, c.gender, c.birthday
|
|
FROM bi_vala_app_character c
|
|
JOIN non_refund_users n ON c.account_id = n.account_id
|
|
),
|
|
chapter_done AS (
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_0 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 0
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_1 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 1
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_2 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 2
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_3 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 3
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_4 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 4
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_5 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 5
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_6 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 6
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
UNION ALL
|
|
SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count
|
|
FROM bi_user_chapter_play_record_7 cp
|
|
JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 7
|
|
WHERE cp.play_status = 1
|
|
AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01'
|
|
GROUP BY cp.user_id
|
|
),
|
|
user_done AS (
|
|
SELECT tc.account_id, SUM(cd.done_count) AS total_done
|
|
FROM target_chars tc
|
|
JOIN chapter_done cd ON tc.char_id = cd.char_id
|
|
GROUP BY tc.account_id
|
|
HAVING SUM(cd.done_count) > 30
|
|
),
|
|
char_info AS (
|
|
SELECT DISTINCT ON (c.account_id)
|
|
c.account_id, c.gender, c.birthday
|
|
FROM bi_vala_app_character c
|
|
JOIN user_done ud ON c.account_id = ud.account_id
|
|
WHERE c.deleted_at IS NULL
|
|
ORDER BY c.account_id, c.birthday DESC NULLS LAST
|
|
),
|
|
addr_info AS (
|
|
SELECT DISTINCT ON (account_id) account_id, login_address
|
|
FROM account_detail_info
|
|
WHERE login_address IS NOT NULL AND login_address != ''
|
|
ORDER BY account_id, created_time DESC
|
|
)
|
|
SELECT
|
|
ud.account_id,
|
|
ul.level,
|
|
CASE
|
|
WHEN ci.gender = 1 THEN '男'
|
|
WHEN ci.gender = 2 THEN '女'
|
|
ELSE '未知'
|
|
END AS gender,
|
|
CASE
|
|
WHEN ci.birthday IS NOT NULL AND ci.birthday != ''
|
|
THEN EXTRACT(YEAR FROM AGE(TO_DATE(ci.birthday, 'YYYY-MM-DD')))
|
|
ELSE NULL
|
|
END AS age,
|
|
SPLIT_PART(ai.login_address, '-', 1) AS province,
|
|
SPLIT_PART(ai.login_address, '-', 2) AS city,
|
|
pi.channels AS purchase_channel,
|
|
TO_CHAR(pi.first_purchase_date, 'YYYY-MM-DD') AS first_purchase_date,
|
|
ud.total_done AS may_done_count
|
|
FROM user_done ud
|
|
JOIN user_level ul ON ud.account_id = ul.account_id
|
|
LEFT JOIN char_info ci ON ud.account_id = ci.account_id
|
|
LEFT JOIN addr_info ai ON ud.account_id = ai.account_id
|
|
LEFT JOIN purchase_info pi ON ud.account_id = pi.account_id
|
|
ORDER BY ul.level, ud.account_id;
|
|
"""
|
|
|
|
def main():
|
|
conn = psycopg2.connect(
|
|
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
|
port=28591,
|
|
user="ai_member",
|
|
password=PG_PASSWORD,
|
|
database="vala_bi"
|
|
)
|
|
|
|
df = pd.read_sql(SQL, conn)
|
|
conn.close()
|
|
|
|
# Rename columns
|
|
df.columns = ['账号ID', '课程级别', '性别', '年龄', '省份', '城市', '购课渠道', '首次购课日期', '5月完成课时数']
|
|
|
|
# Output path
|
|
output_path = "/root/.openclaw/workspace/output/2026付费用户_5月完成30课时以上_103人.xlsx"
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
|
|
# Write with openpyxl for styling
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = "5月完成>30课时用户"
|
|
|
|
# Header style
|
|
header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF')
|
|
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
|
|
header_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
|
thin_border = Border(
|
|
left=Side(style='thin'),
|
|
right=Side(style='thin'),
|
|
top=Side(style='thin'),
|
|
bottom=Side(style='thin')
|
|
)
|
|
|
|
# Write headers
|
|
for col_idx, col_name in enumerate(df.columns, 1):
|
|
cell = ws.cell(row=1, column=col_idx, value=col_name)
|
|
cell.font = header_font
|
|
cell.fill = header_fill
|
|
cell.alignment = header_alignment
|
|
cell.border = thin_border
|
|
|
|
# Write data
|
|
data_font = Font(name='微软雅黑', size=10)
|
|
data_alignment = Alignment(vertical='center')
|
|
|
|
for row_idx, row in df.iterrows():
|
|
for col_idx, value in enumerate(row, 1):
|
|
cell = ws.cell(row=row_idx + 2, column=col_idx, value=value if pd.notna(value) else '')
|
|
cell.font = data_font
|
|
cell.alignment = data_alignment
|
|
cell.border = thin_border
|
|
|
|
# Column widths
|
|
col_widths = [12, 10, 8, 8, 16, 12, 60, 16, 16]
|
|
for i, w in enumerate(col_widths, 1):
|
|
ws.column_dimensions[get_column_letter(i)].width = w
|
|
|
|
# Freeze header
|
|
ws.freeze_panes = 'A2'
|
|
|
|
# Auto filter
|
|
ws.auto_filter.ref = f"A1:{get_column_letter(len(df.columns))}{len(df)+1}"
|
|
|
|
# Summary sheet
|
|
ws2 = wb.create_sheet("汇总")
|
|
summary_data = [
|
|
["指标", "数值"],
|
|
["总人数", len(df)],
|
|
["仅L1", len(df[df['课程级别'] == 'L1'])],
|
|
["仅L2", len(df[df['课程级别'] == 'L2'])],
|
|
["L1+L2", len(df[df['课程级别'] == 'L1+L2'])],
|
|
["平均年龄", round(df['年龄'].mean(), 1)],
|
|
["男性", len(df[df['性别'] == '男'])],
|
|
["女性", len(df[df['性别'] == '女'])],
|
|
["性别未知", len(df[df['性别'] == '未知'])],
|
|
["平均5月完成课时", round(df['5月完成课时数'].mean(), 1)],
|
|
["最高5月完成课时", int(df['5月完成课时数'].max())],
|
|
]
|
|
|
|
for row_idx, row_data in enumerate(summary_data, 1):
|
|
for col_idx, value in enumerate(row_data, 1):
|
|
cell = ws2.cell(row=row_idx, column=col_idx, value=value)
|
|
if row_idx == 1:
|
|
cell.font = header_font
|
|
cell.fill = header_fill
|
|
cell.alignment = header_alignment
|
|
else:
|
|
cell.font = data_font
|
|
cell.alignment = data_alignment
|
|
cell.border = thin_border
|
|
|
|
ws2.column_dimensions['A'].width = 20
|
|
ws2.column_dimensions['B'].width = 15
|
|
|
|
wb.save(output_path)
|
|
print(f"✅ 已导出: {output_path}")
|
|
print(f" 共 {len(df)} 条记录")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|