#!/usr/bin/env python3 """ 最近14天无完课行为的付费用户占比分析 口径: - 最近14天: 2026-05-08 ~ 2026-05-21(14个完整天) - 付费用户: 有已完成/已退款订单的非测试账号(status=1, deleted_at IS NULL) - 完课行为: bi_user_chapter_play_record.play_status=1 - L1/L2分类: 基于goods_id """ import os, sys sys.path.insert(0, "/root/.openclaw/workspace") import psycopg2 from datetime import datetime SECRETS_FILE = "/root/.openclaw/workspace/secrets.env" DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" DB_PORT = "28591" DB_USER = "ai_member" DB_NAME = "vala_bi" DATE_START = "2026-05-08" DATE_END = "2026-05-22" # exclusive def load_pw(): with open(SECRETS_FILE) as f: for line in f: if line.startswith("PG_ONLINE_PASSWORD="): return line.split("=", 1)[1].strip().strip("'\"") pw = load_pw() conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=pw, dbname=DB_NAME) cur = conn.cursor() print("=" * 60) print("📊 最近14天(2026-05-08~05-21)无完课行为付费用户占比") print("=" * 60) # ── Step 1: 获取所有付费用户及L1/L2分类 ── print("\n[1/3] 查询付费用户及等级分类...") cur.execute(""" WITH user_goods AS ( SELECT o.account_id, o.goods_id, a.id IS NOT NULL AS is_valid FROM bi_vala_order o JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL WHERE o.order_status IN (3, 4) AND o.pay_success_date IS NOT NULL GROUP BY o.account_id, o.goods_id, a.id ), user_level_raw AS ( SELECT account_id, BOOL_OR(goods_id = 61) AS has_both, BOOL_OR(goods_id IN (57, 60, 63)) AS has_l1, BOOL_OR(goods_id IN (31, 32, 33, 54)) AS has_l2, BOOL_OR(goods_id IN (4,5,6,10,13,14,17,20,25,29,30,35,36,37,38)) AS has_other FROM user_goods GROUP BY account_id ) SELECT account_id, CASE WHEN has_both OR (has_l1 AND has_l2) THEN 'L1+L2' WHEN has_l1 AND NOT has_l2 THEN '仅L1' WHEN has_l2 AND NOT has_l1 THEN '仅L2' ELSE '其他' END AS level FROM user_level_raw """) paying_users = {row[0]: row[1] for row in cur.fetchall()} total_paying = len(paying_users) print(f" 付费用户总数: {total_paying}") l1_only = sum(1 for v in paying_users.values() if v == '仅L1') l2_only = sum(1 for v in paying_users.values() if v == '仅L2') l1l2 = sum(1 for v in paying_users.values() if v == 'L1+L2') other = sum(1 for v in paying_users.values() if v == '其他') print(f" 仅L1: {l1_only} | 仅L2: {l2_only} | L1+L2: {l1l2} | 其他: {other}") # ── Step 2: 查询最近14天有完课行为的用户 ── print("\n[2/3] 查询最近14天有完课行为的用户...") # 获取所有付费用户的 character_ids account_ids = list(paying_users.keys()) # 分批查 character char_map = {} # account_id -> [character_ids] for i in range(0, len(account_ids), 500): batch = account_ids[i:i+500] cur.execute(""" SELECT account_id, id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL """, (batch,)) for acc_id, char_id in cur.fetchall(): if acc_id not in char_map: char_map[acc_id] = [] char_map[acc_id].append(char_id) all_char_ids = [] for chars in char_map.values(): all_char_ids.extend(chars) print(f" 角色总数: {len(all_char_ids)}") # 查完课行为 - 分8表UNION ALL learned_chars = set() for shard in range(8): table = f"bi_user_chapter_play_record_{shard}" sql = f""" SELECT DISTINCT user_id FROM {table} WHERE play_status = 1 AND created_at >= %s AND created_at < %s AND user_id = ANY(%s) """ cur.execute(sql, (DATE_START, DATE_END, all_char_ids)) for (uid,) in cur.fetchall(): learned_chars.add(uid) # 有完课行为的 account_id learned_accounts = set() for acc_id, chars in char_map.items(): if any(c in learned_chars for c in chars): learned_accounts.add(acc_id) no_learn_accounts = set(paying_users.keys()) - learned_accounts print(f" 有完课行为的付费用户: {len(learned_accounts)}") print(f" 无完课行为的付费用户: {len(no_learn_accounts)}") # ── Step 3: 分L1/L2统计 ── print("\n[3/3] 汇总统计...") def stats_by_level(label, user_set): total = len(user_set) no_learn = len(user_set & no_learn_accounts) learn = total - no_learn ratio = no_learn / total * 100 if total > 0 else 0 return total, no_learn, learn, ratio all_users = set(paying_users.keys()) l1_set = {k for k, v in paying_users.items() if v == '仅L1'} l2_set = {k for k, v in paying_users.items() if v == '仅L2'} l1l2_set = {k for k, v in paying_users.items() if v == 'L1+L2'} other_set = {k for k, v in paying_users.items() if v == '其他'} print("\n" + "=" * 60) print("📊 结果汇总") print("=" * 60) print(f"{'类别':<12} {'总付费用户':>10} {'无完课':>8} {'无完课占比':>10} {'有完课':>8}") print("-" * 60) for label, uset in [("总体", all_users), ("仅L1", l1_set), ("仅L2", l2_set), ("L1+L2", l1l2_set), ("其他", other_set)]: t, nl, l, r = stats_by_level(label, uset) print(f"{label:<12} {t:>10,} {nl:>8,} {r:>9.1f}% {l:>8,}") # 合并L1相关(仅L1 + L1+L2) l1_related = l1_set | l1l2_set t1, nl1, l1, r1 = stats_by_level("L1相关", l1_related) print(f"{'L1相关(含L1+L2)':<12} {t1:>10,} {nl1:>8,} {r1:>9.1f}% {l1:>8,}") # 合并L2相关(仅L2 + L1+L2) l2_related = l2_set | l1l2_set t2, nl2, l2, r2 = stats_by_level("L2相关", l2_related) print(f"{'L2相关(含L1+L2)':<12} {t2:>10,} {nl2:>8,} {r2:>9.1f}% {l2:>8,}") cur.close() conn.close() print("\n✅ 完成")