#!/usr/bin/env python3 """ 统计最近14天内没有完课行为的付费用户占比,按 L1/L2/L1+L2 分类。 时间范围:2026-05-09 ~ 2026-05-22(含当日) """ import psycopg2 import psycopg2.extras from datetime import datetime # ── 数据库连接 ────────────────────────────────────────────── conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) conn.set_session(autocommit=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) START_DATE = "2026-05-09" END_DATE = "2026-05-22" # inclusive, so use < '2026-05-23' print("=" * 70) print("📊 最近14天无完课行为付费用户占比分析") print(f" 时间范围: {START_DATE} ~ {END_DATE}") print("=" * 70) # ═══════════════════════════════════════════════════════════ # Step 1: 获取所有付费用户(去重 account_id) # ═══════════════════════════════════════════════════════════ print("\n[1/4] 获取付费用户...") cur.execute(""" SELECT DISTINCT o.account_id FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.order_status IN (3, 4) AND o.pay_success_date IS NOT NULL AND o.deleted_at IS NULL """) paid_accounts = {row[0] for row in cur.fetchall()} print(f" 付费用户总数(account_id 去重): {len(paid_accounts)}") # ═══════════════════════════════════════════════════════════ # Step 2: L1 / L2 / L1+L2 分类 # ═══════════════════════════════════════════════════════════ print("\n[2/4] 按 goods_id 分类用户等级...") L1_GOODS = {57, 60, 63} L2_GOODS = {31, 32, 33, 54} L1L2_GOODS = {61} # 按 account_id 汇总所有购买过的 goods_id cur.execute(""" SELECT o.account_id, o.goods_id FROM bi_vala_order o WHERE o.account_id = ANY(%s) AND o.order_status IN (3, 4) AND o.deleted_at IS NULL """, (list(paid_accounts),)) user_goods_map = {} for row in cur.fetchall(): aid = row[0] gid = row[1] if aid not in user_goods_map: user_goods_map[aid] = set() user_goods_map[aid].add(gid) only_l1 = set() only_l2 = set() both_l1l2 = set() other = set() for aid in paid_accounts: goods = user_goods_map.get(aid, set()) has_l1 = bool(goods & L1_GOODS) has_l2 = bool(goods & L2_GOODS) has_l1l2 = bool(goods & L1L2_GOODS) if has_l1l2 or (has_l1 and has_l2): both_l1l2.add(aid) elif has_l1: only_l1.add(aid) elif has_l2: only_l2.add(aid) else: other.add(aid) print(f" 仅L1: {len(only_l1)}") print(f" 仅L2: {len(only_l2)}") print(f" L1+L2: {len(both_l1l2)}") print(f" 其他: {len(other)}") print(f" (合计): {len(only_l1)+len(only_l2)+len(both_l1l2)+len(other)}") # ═══════════════════════════════════════════════════════════ # Step 3: 获取这些付费用户的所有角色 ID(character.id) # ═══════════════════════════════════════════════════════════ print("\n[3/4] 获取付费用户的角色 ID...") cur.execute(""" SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL """, (list(paid_accounts),)) # character_id → account_id 映射 char_to_account = {} account_chars = {} # account_id → [char_id, ...] for row in cur.fetchall(): cid = row[0] aid = row[1] char_to_account[cid] = aid if aid not in account_chars: account_chars[aid] = [] account_chars[aid].append(cid) all_char_ids = list(char_to_account.keys()) print(f" 付费用户角色总数: {len(all_char_ids)}") # ═══════════════════════════════════════════════════════════ # Step 4: 查询最近14天内有完课行为的角色(遍历 8 张分表) # ═══════════════════════════════════════════════════════════ print("\n[4/4] 查询最近14天完课行为(遍历8张分表)...") active_char_ids = set() for shard in range(8): table = f"bi_user_chapter_play_record_{shard}" print(f" 查询 {table} ...", end=" ") cur.execute(f""" SELECT DISTINCT user_id FROM {table} WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day' """, (START_DATE, END_DATE)) results = cur.fetchall() count = len(results) print(f"找到 {count} 条记录") for row in results: cid = row[0] if cid in char_to_account: active_char_ids.add(cid) # 将活跃角色映射回 account_id active_account_ids = set() for cid in active_char_ids: active_account_ids.add(char_to_account[cid]) # ═══════════════════════════════════════════════════════════ # Step 5: 计算统计结果 # ═══════════════════════════════════════════════════════════ print("\n" + "=" * 70) print("📈 统计结果") print("=" * 70) # 没有完课行为的付费用户 inactive_all = paid_accounts - active_account_ids inactive_only_l1 = inactive_all & only_l1 inactive_only_l2 = inactive_all & only_l2 inactive_both_l1l2 = inactive_all & both_l1l2 inactive_other = inactive_all & other total = len(paid_accounts) active_total = len(active_account_ids) inactive_total = len(inactive_all) print(f"\n 付费用户总数: {total}") print(f" 最近14天有完课行为的: {active_total} ({active_total/total*100:.1f}%)") print(f" 最近14天无完课行为的: {inactive_total} ({inactive_total/total*100:.1f}%)") print(f"\n ─── 按课程等级拆分 ───") def show_cat(label, cat_set): inactive_cat = cat_set & inactive_all cat_total = len(cat_set) if cat_total == 0: print(f" {label}: 0 人") return print(f" {label}:") print(f" 总付费用户: {cat_total}") print(f" 最近14天无完课: {len(inactive_cat)} ({len(inactive_cat)/cat_total*100:.1f}%)") show_cat("仅L1", only_l1) show_cat("仅L2", only_l2) show_cat("L1+L2", both_l1l2) if other: show_cat("其他", other) # ─── 汇总视图:按是否有 L1/L2 分别统计 ─── print(f"\n ─── 按有无 L1/L2 交叉汇总 ───") users_with_l1 = only_l1 | both_l1l2 # 所有购买了 L1 的用户 users_with_l2 = only_l2 | both_l1l2 # 所有购买了 L2 的用户 def show_cross(label, user_set): inactive = user_set & inactive_all total_u = len(user_set) if total_u == 0: print(f" {label}: 0 人") return print(f" {label}:") print(f" 总付费用户: {total_u}") print(f" 最近14天无完课: {len(inactive)} ({len(inactive)/total_u*100:.1f}%)") show_cross("含L1(仅L1 + L1+L2)", users_with_l1) show_cross("含L2(仅L2 + L1+L2)", users_with_l2) print("\n" + "=" * 70) print("✅ 分析完成") print("=" * 70) cur.close() conn.close()