#!/usr/bin/env python3 """ 各渠道付费用户近14天完课频次差异分析。 限定:2026年3月后订单、剔除退费。 """ import psycopg2 import psycopg2.extras from collections import defaultdict import numpy as np conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) conn.set_session(autocommit=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) START = "2026-05-09" END = "2026-05-22" print("=" * 80) print("📊 各渠道付费用户近14天完课频次差异") print(" 限定:3月后订单 + 剔除退费") print("=" * 80) # ── 渠道分类 ───────────────────────────────────────────────── ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'} def classify(kf): if kf in ENDPOINT_INTERNAL: return "端内" if kf.startswith("sales-adp"): return "销售渠道" if kf == "newmedia-dianpu-xhs-0-0": return "小红书店铺" if kf.startswith("newmedia-daren"): return "达人直播" if kf == "newmedia-dianpu-wwxx-0-0": return "万物" return "其他端外" # ═══════════════════════════════════════════════════════════ # Step 1: 付费用户 + 渠道 + 等级 # ═══════════════════════════════════════════════════════════ print("\n[1/4] 获取付费用户...") cur.execute(""" SELECT o.account_id, o.goods_id, o.key_from FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.order_status = 3 AND o.pay_success_date >= '2026-03-01' AND o.deleted_at IS NULL """) rows = cur.fetchall() user_goods = defaultdict(set) user_channels = defaultdict(set) for r in rows: user_goods[r[0]].add(r[1]) user_channels[r[0]].add(classify(r[2])) L1_G = {57,60,63}; L2_G = {31,32,33,54}; L12_G = {61} def level(aid): g = user_goods.get(aid, set()) h1, h2, h12 = bool(g & L1_G), bool(g & L2_G), bool(g & L12_G) if h12 or (h1 and h2): return "L1+L2" if h1: return "仅L1" if h2: return "仅L2" return "其他" # ═══════════════════════════════════════════════════════════ # Step 2: 角色映射 # ═══════════════════════════════════════════════════════════ print("[2/4] 获取角色映射...") all_aids = list(user_goods.keys()) cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL", (all_aids,)) char_to_account = {} for r in cur.fetchall(): char_to_account[r[0]] = r[1] all_char_ids = set(char_to_account.keys()) # ═══════════════════════════════════════════════════════════ # Step 3: 各角色近14天完课次数 # ═══════════════════════════════════════════════════════════ print("[3/4] 统计各角色近14天完课次数(8张分表)...") char_completion_count = defaultdict(int) for shard in range(8): cur.execute(f""" SELECT user_id, COUNT(*) as cnt FROM bi_user_chapter_play_record_{shard} WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day' GROUP BY user_id """, (START, END)) for r in cur.fetchall(): cid = r[0] if cid in all_char_ids: char_completion_count[cid] += r[1] # 汇总到 account account_completions = defaultdict(int) for cid, cnt in char_completion_count.items(): account_completions[char_to_account[cid]] += cnt # ═══════════════════════════════════════════════════════════ # Step 4: 按渠道聚合统计 # ═══════════════════════════════════════════════════════════ print("[4/4] 按渠道聚合...") CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"] channel_user_set = defaultdict(set) for aid, chs in user_channels.items(): for ch in chs: channel_user_set[ch].add(aid) def stats(aid_set): """返回 (总人数, 有完课人数, 总完课次数, 完课次数列表)""" comps = [] active = 0 for aid in aid_set: c = account_completions.get(aid, 0) if c > 0: active += 1 comps.append(c) total_comps = sum(comps) return len(aid_set), active, total_comps, comps # ── 表格输出 ── header = f"{'渠道':<12s} {'等级':<8s} {'总付费':>6s} {'活跃人数':>7s} {'活跃率':>7s} {'总完课次':>7s} {'人均(全)':>8s} {'人均(活跃)':>9s} {'中位数':>6s} {'P75':>5s} {'P90':>5s}" print(f"\n{header}") print("-" * 105) for ch in CHANNELS: users = channel_user_set.get(ch, set()) if not users: continue # 渠道总计 tot, act, total_c, comps = stats(users) avg_all = total_c / tot if tot else 0 avg_act = total_c / act if act else 0 sorted_c = sorted(comps) med = np.median(sorted_c) if sorted_c else 0 p75 = np.percentile(sorted_c, 75) if sorted_c else 0 p90 = np.percentile(sorted_c, 90) if sorted_c else 0 print(f"{ch:<12s} {'合计':<8s} {tot:>6d} {act:>7d} {act/tot*100:>6.1f}% {total_c:>7d} {avg_all:>8.1f} {avg_act:>9.1f} {med:>6.0f} {p75:>5.0f} {p90:>5.0f}") # 按等级拆分 for lv in ["仅L1", "仅L2", "L1+L2"]: subset = {a for a in users if level(a) == lv} if not subset: continue t, a, tc, cs = stats(subset) aa = tc / t if t else 0 ag = tc / a if a else 0 sc = sorted(cs) md = np.median(sc) if sc else 0 p7 = np.percentile(sc, 75) if sc else 0 p9 = np.percentile(sc, 90) if sc else 0 print(f"{'':<12s} {lv:<8s} {t:>6d} {a:>7d} {a/t*100:>6.1f}% {tc:>7d} {aa:>8.1f} {ag:>9.1f} {md:>6.0f} {p7:>5.0f} {p9:>5.0f}") print("-" * 105) # ── 频次分布 ── print(f"\n{'═' * 70}") print("📋 完课频次分布(活跃用户)") print(f"{'═' * 70}") BINS = [(1,1), (2,2), (3,4), (5,7), (8,14), (15, 30), (31, 999)] def dist(aid_set): comps = [account_completions.get(a, 0) for a in aid_set if account_completions.get(a, 0) > 0] total = len(comps) d = [] for lo, hi in BINS: cnt = sum(1 for c in comps if lo <= c <= hi) pct = cnt / total * 100 if total else 0 d.append((f"{lo}-{hi}" if hi < 999 else f"{lo}+", cnt, pct)) return d, total print(f"\n{'渠道':<12s} {'活跃':>5s}", end="") for lo, hi in BINS: label = f"{lo}-{hi}" if hi < 999 else f"{lo}+" print(f" {label:>7s}", end="") print() for ch in CHANNELS: users = channel_user_set.get(ch, set()) if not users: continue d, tot = dist(users) if tot == 0: continue print(f"{ch:<12s} {tot:>5d}", end="") for label, cnt, pct in d: print(f" {pct:>6.1f}%", end="") print() # ── 频道对比柱状图 ── print(f"\n{'═' * 70}") print("📊 人均完课次数对比(活跃用户)") print(f"{'═' * 70}") for ch in CHANNELS: users = channel_user_set.get(ch, set()) if not users: continue _, act, tc, _ = stats(users) avg = tc / act if act else 0 bar = "█" * max(1, int(avg * 2)) print(f" {ch:<12s} 人均 {avg:5.1f} 次 {bar}") cur.close() conn.close() print("\n✅ 完成")