#!/usr/bin/env python3 """ 对于最近14天无完课行为的付费用户,统计他们「最后一次完课」所在的 Unit 分布(按 L1/L2 拆分)。 时间范围:2026-05-09 ~ 2026-05-22 """ import psycopg2 import psycopg2.extras from collections import defaultdict conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) conn.set_session(autocommit=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) START_DATE = "2026-05-09" END_DATE = "2026-05-22" print("=" * 70) print("📊 无完课用户最后一次完课 Unit 分布分析") print("=" * 70) # ═══════════════════════════════════════════════════════════ # Step 1: 获取所有付费用户 # ═══════════════════════════════════════════════════════════ print("\n[1/6] 获取付费用户...") cur.execute(""" SELECT DISTINCT o.account_id FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.order_status IN (3, 4) AND o.pay_success_date IS NOT NULL AND o.deleted_at IS NULL """) paid_accounts = {row[0] for row in cur.fetchall()} print(f" 付费用户总数: {len(paid_accounts)}") # ═══════════════════════════════════════════════════════════ # Step 2: L1/L2 分类 # ═══════════════════════════════════════════════════════════ print("\n[2/6] 分类用户等级...") L1_GOODS = {57, 60, 63} L2_GOODS = {31, 32, 33, 54} L1L2_GOODS = {61} cur.execute(""" SELECT o.account_id, o.goods_id FROM bi_vala_order o WHERE o.account_id = ANY(%s) AND o.order_status IN (3, 4) AND o.deleted_at IS NULL """, (list(paid_accounts),)) user_goods = defaultdict(set) for row in cur.fetchall(): user_goods[row[0]].add(row[1]) only_l1, only_l2, both_l1l2, other = set(), set(), set(), set() for aid in paid_accounts: g = user_goods.get(aid, set()) h1, h2, h12 = bool(g & L1_GOODS), bool(g & L2_GOODS), bool(g & L1L2_GOODS) if h12 or (h1 and h2): both_l1l2.add(aid) elif h1: only_l1.add(aid) elif h2: only_l2.add(aid) else: other.add(aid) # ═══════════════════════════════════════════════════════════ # Step 3: 获取最近14天有完课的用户 # ═══════════════════════════════════════════════════════════ print("\n[3/6] 查询最近14天完课行为...") cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL", (list(paid_accounts),)) char_to_account = {} account_chars = defaultdict(list) for row in cur.fetchall(): char_to_account[row[0]] = row[1] account_chars[row[1]].append(row[0]) all_char_ids = set(char_to_account.keys()) active_char_ids = set() for shard in range(8): cur.execute(f""" SELECT DISTINCT user_id FROM bi_user_chapter_play_record_{shard} WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day' """, (START_DATE, END_DATE)) for r in cur.fetchall(): if r[0] in all_char_ids: active_char_ids.add(r[0]) active_accounts = {char_to_account[c] for c in active_char_ids} inactive_accounts = paid_accounts - active_accounts print(f" 无完课付费用户: {len(inactive_accounts)}") # ═══════════════════════════════════════════════════════════ # Step 4: 获取无完课用户的所有角色 # ═══════════════════════════════════════════════════════════ print("\n[4/6] 获取无完课用户的角色...") inactive_chars = {} for aid in inactive_accounts: for cid in account_chars.get(aid, []): inactive_chars[cid] = aid print(f" 无完课用户角色数: {len(inactive_chars)}") # ═══════════════════════════════════════════════════════════ # Step 5: 查询每个角色最后一次完课记录(遍历8张分表) # ═══════════════════════════════════════════════════════════ print("\n[5/6] 查询各角色最后一次完课记录...") # char_id → (chapter_id, updated_at) latest_completion = {} for shard in range(8): table = f"bi_user_chapter_play_record_{shard}" cur.execute(f""" SELECT DISTINCT ON (user_id) user_id, chapter_id, updated_at FROM {table} WHERE play_status = 1 ORDER BY user_id, updated_at DESC """) rows = cur.fetchall() print(f" {table}: {len(rows)} 条记录") for row in rows: cid = row[0] if cid not in inactive_chars: continue ch_id = row[1] ts = row[2] if cid not in latest_completion or ts > latest_completion[cid][1]: latest_completion[cid] = (ch_id, ts) print(f" 有历史完课记录的角色数: {len(latest_completion)}") no_history = len(inactive_chars) - len(latest_completion) print(f" 无任何完课记录的角色数: {no_history}") # ═══════════════════════════════════════════════════════════ # Step 6: 映射 chapter_id → Unit,按用户等级分类聚合 # ═══════════════════════════════════════════════════════════ print("\n[6/6] 映射 chapter_id → Unit 并聚合...") # 获取课程结构映射 cur.execute("SELECT id, course_level, course_unit FROM bi_level_unit_lesson") chapter_map = {} # chapter_id → (course_level, course_unit) for row in cur.fetchall(): chapter_map[row[0]] = (row[1], row[2]) # 聚合:user_level_type → { unit → count } # user_level_type: "仅L1", "仅L2", "L1+L2" unit_counts = defaultdict(lambda: defaultdict(int)) no_chapter = defaultdict(int) # 有完课但 chapter_id 映射不上的 for cid, (ch_id, ts) in latest_completion.items(): aid = inactive_chars[cid] if aid in only_l1: user_type = "仅L1" elif aid in only_l2: user_type = "仅L2" elif aid in both_l1l2: user_type = "L1+L2" else: user_type = "其他" if ch_id in chapter_map: level, unit = chapter_map[ch_id] unit_counts[user_type][unit] += 1 else: no_chapter[user_type] += 1 # ═══════════════════════════════════════════════════════════ # 输出结果 # ═══════════════════════════════════════════════════════════ print("\n" + "=" * 70) print("📈 统计结果") print("=" * 70) # 定义 Unit 排序 def unit_sort_key(u): if u is None: return (99, 99) # e.g. "U00", "U01", "S1U00" import re m = re.match(r'(?:S(\d+))?U(\d+)', u) if m: s = int(m.group(1)) if m.group(1) else 0 uu = int(m.group(2)) return (s, uu) return (99, 99) for user_type in ["仅L1", "仅L2", "L1+L2"]: data = unit_counts[user_type] total_with_history = sum(data.values()) print(f"\n{'─' * 50}") print(f" {user_type} 用户") print(f" 最后一次完课 Unit 分布(共 {total_with_history} 人有完课记录):") print(f"{'─' * 50}") # 先按 Unit 排序 sorted_units = sorted(data.items(), key=lambda x: unit_sort_key(x[0])) total_all = total_with_history + no_history for unit, cnt in sorted_units: bar = "█" * max(1, int(cnt / max(1, max(data.values())) * 30)) print(f" {unit:>10s} {cnt:>5d} {bar}") if no_chapter.get(user_type, 0) > 0: print(f" {'(未知)':>10s} {no_chapter[user_type]:>5d} (chapter_id 映射失败)") # ─── 总体汇总 ─── print(f"\n{'═' * 50}") print("📋 汇总") print(f"{'═' * 50}") total_inactive = len(inactive_accounts) for user_type in ["仅L1", "仅L2", "L1+L2"]: type_users = len([a for a in inactive_accounts if (user_type == "仅L1" and a in only_l1) or (user_type == "仅L2" and a in only_l2) or (user_type == "L1+L2" and a in both_l1l2)]) with_history = sum(unit_counts[user_type].values()) no_hist = 0 # Count users in this type without any history type_chars = {c: a for c, a in inactive_chars.items() if (user_type == "仅L1" and a in only_l1) or (user_type == "仅L2" and a in only_l2) or (user_type == "L1+L2" and a in both_l1l2)} # For users with no history: count distinct account_ids among chars with no record chars_with_history = set(latest_completion.keys()) accounts_with_history = set() accounts_no_history = set() for c, a in type_chars.items(): if c in chars_with_history: accounts_with_history.add(a) else: accounts_no_history.add(a) # An account may have some chars with history and some without. # Count accounts that have ANY char with history vs those with NO chars having history no_history_accounts = accounts_no_history - accounts_with_history print(f" {user_type}: 共 {type_users} 人") print(f" 有完课记录: {len(accounts_with_history)} 人") print(f" 完全无完课: {len(no_history_accounts)} 人") # Top units data = unit_counts[user_type] if data: top = sorted(data.items(), key=lambda x: -x[1])[:5] top_str = ", ".join(f"{u}({c})" for u, c in top) print(f" Top5 Unit: {top_str}") print(f"\n 合计无完课付费用户: {total_inactive}") print(f" 注:用户数 {total_inactive} 为上轮口径,本次统计以角色维度查最后完课记录") cur.close() conn.close() print("\n✅ 完成")