#!/usr/bin/env python3 """导出2026年付费用户中5月完成>30课时的103人明细""" import os import psycopg2 import pandas as pd from openpyxl import Workbook from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.utils import get_column_letter PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" SQL = """ WITH user_orders AS ( SELECT DISTINCT o.account_id, o.goods_id, o.trade_no, o.key_from, o.pay_success_date FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.pay_success_date >= '2026-01-01' AND o.pay_success_date IS NOT NULL AND o.order_status IN (3, 4) AND o.goods_id NOT IN (32) ), refunded_trades AS ( SELECT DISTINCT r.trade_no FROM bi_refund_order r JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 WHERE r.status = 3 ), user_all_refunded AS ( SELECT uo.account_id FROM user_orders uo LEFT JOIN refunded_trades rt ON uo.trade_no = rt.trade_no GROUP BY uo.account_id HAVING COUNT(*) = COUNT(rt.trade_no) ), non_refund_users AS ( SELECT DISTINCT uo.account_id FROM user_orders uo WHERE uo.account_id NOT IN (SELECT account_id FROM user_all_refunded) ), user_level AS ( SELECT uo.account_id, CASE WHEN bool_or(uo.goods_id = 61) THEN 'L1+L2' WHEN bool_or(uo.goods_id IN (57, 60, 63)) AND bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L1+L2' WHEN bool_or(uo.goods_id IN (57, 60, 63)) THEN 'L1' WHEN bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L2' ELSE '其他' END AS level FROM user_orders uo GROUP BY uo.account_id ), purchase_info AS ( SELECT account_id, STRING_AGG(DISTINCT key_from, ', ' ORDER BY key_from) AS channels, MIN(pay_success_date) AS first_purchase_date FROM user_orders GROUP BY account_id ), target_chars AS ( SELECT c.id AS char_id, c.account_id, c.gender, c.birthday FROM bi_vala_app_character c JOIN non_refund_users n ON c.account_id = n.account_id ), chapter_done AS ( SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_0 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 0 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_1 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 1 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_2 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 2 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_3 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 3 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_4 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 4 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_5 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 5 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_6 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 6 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id UNION ALL SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count FROM bi_user_chapter_play_record_7 cp JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 7 WHERE cp.play_status = 1 AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' GROUP BY cp.user_id ), user_done AS ( SELECT tc.account_id, SUM(cd.done_count) AS total_done FROM target_chars tc JOIN chapter_done cd ON tc.char_id = cd.char_id GROUP BY tc.account_id HAVING SUM(cd.done_count) > 30 ), char_info AS ( SELECT DISTINCT ON (c.account_id) c.account_id, c.gender, c.birthday FROM bi_vala_app_character c JOIN user_done ud ON c.account_id = ud.account_id WHERE c.deleted_at IS NULL ORDER BY c.account_id, c.birthday DESC NULLS LAST ), addr_info AS ( SELECT DISTINCT ON (account_id) account_id, login_address FROM account_detail_info WHERE login_address IS NOT NULL AND login_address != '' ORDER BY account_id, created_time DESC ) SELECT ud.account_id, ul.level, CASE WHEN ci.gender = 1 THEN '男' WHEN ci.gender = 2 THEN '女' ELSE '未知' END AS gender, CASE WHEN ci.birthday IS NOT NULL AND ci.birthday != '' THEN EXTRACT(YEAR FROM AGE(TO_DATE(ci.birthday, 'YYYY-MM-DD'))) ELSE NULL END AS age, SPLIT_PART(ai.login_address, '-', 1) AS province, SPLIT_PART(ai.login_address, '-', 2) AS city, pi.channels AS purchase_channel, TO_CHAR(pi.first_purchase_date, 'YYYY-MM-DD') AS first_purchase_date, ud.total_done AS may_done_count FROM user_done ud JOIN user_level ul ON ud.account_id = ul.account_id LEFT JOIN char_info ci ON ud.account_id = ci.account_id LEFT JOIN addr_info ai ON ud.account_id = ai.account_id LEFT JOIN purchase_info pi ON ud.account_id = pi.account_id ORDER BY ul.level, ud.account_id; """ def main(): conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password=PG_PASSWORD, database="vala_bi" ) df = pd.read_sql(SQL, conn) conn.close() # Rename columns df.columns = ['账号ID', '课程级别', '性别', '年龄', '省份', '城市', '购课渠道', '首次购课日期', '5月完成课时数'] # Output path output_path = "/root/.openclaw/workspace/output/2026付费用户_5月完成30课时以上_103人.xlsx" os.makedirs(os.path.dirname(output_path), exist_ok=True) # Write with openpyxl for styling wb = Workbook() ws = wb.active ws.title = "5月完成>30课时用户" # Header style header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF') header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid') header_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True) thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) # Write headers for col_idx, col_name in enumerate(df.columns, 1): cell = ws.cell(row=1, column=col_idx, value=col_name) cell.font = header_font cell.fill = header_fill cell.alignment = header_alignment cell.border = thin_border # Write data data_font = Font(name='微软雅黑', size=10) data_alignment = Alignment(vertical='center') for row_idx, row in df.iterrows(): for col_idx, value in enumerate(row, 1): cell = ws.cell(row=row_idx + 2, column=col_idx, value=value if pd.notna(value) else '') cell.font = data_font cell.alignment = data_alignment cell.border = thin_border # Column widths col_widths = [12, 10, 8, 8, 16, 12, 60, 16, 16] for i, w in enumerate(col_widths, 1): ws.column_dimensions[get_column_letter(i)].width = w # Freeze header ws.freeze_panes = 'A2' # Auto filter ws.auto_filter.ref = f"A1:{get_column_letter(len(df.columns))}{len(df)+1}" # Summary sheet ws2 = wb.create_sheet("汇总") summary_data = [ ["指标", "数值"], ["总人数", len(df)], ["仅L1", len(df[df['课程级别'] == 'L1'])], ["仅L2", len(df[df['课程级别'] == 'L2'])], ["L1+L2", len(df[df['课程级别'] == 'L1+L2'])], ["平均年龄", round(df['年龄'].mean(), 1)], ["男性", len(df[df['性别'] == '男'])], ["女性", len(df[df['性别'] == '女'])], ["性别未知", len(df[df['性别'] == '未知'])], ["平均5月完成课时", round(df['5月完成课时数'].mean(), 1)], ["最高5月完成课时", int(df['5月完成课时数'].max())], ] for row_idx, row_data in enumerate(summary_data, 1): for col_idx, value in enumerate(row_data, 1): cell = ws2.cell(row=row_idx, column=col_idx, value=value) if row_idx == 1: cell.font = header_font cell.fill = header_fill cell.alignment = header_alignment else: cell.font = data_font cell.alignment = data_alignment cell.border = thin_border ws2.column_dimensions['A'].width = 20 ws2.column_dimensions['B'].width = 15 wb.save(output_path) print(f"✅ 已导出: {output_path}") print(f" 共 {len(df)} 条记录") if __name__ == "__main__": main()