#!/usr/bin/env python3 """ 按 key_from 渠道拆分,统计各渠道付费用户在最近14天内的完课行为差异。 时间范围:2026-05-09 ~ 2026-05-22 """ import psycopg2 import psycopg2.extras from collections import defaultdict conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", dbname="vala_bi" ) conn.set_session(autocommit=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) START = "2026-05-09" END = "2026-05-22" print("=" * 80) print("📊 各渠道付费用户近14天完课行为差异分析") print("=" * 80) # ── 渠道分类规则 ───────────────────────────────────────────── ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'} def classify_channel(kf): """将单个 key_from 归入渠道大类""" if kf in ENDPOINT_INTERNAL: return "端内" if kf.startswith("sales-adp"): return "销售渠道" if kf == "newmedia-dianpu-xhs-0-0": return "小红书店铺" if kf.startswith("newmedia-daren"): return "达人直播" if kf == "newmedia-dianpu-wwxx-0-0": return "万物" return "其他端外" # ═══════════════════════════════════════════════════════════ # Step 1: 付费用户 + goods 分类 # ═══════════════════════════════════════════════════════════ print("\n[1/5] 获取付费用户...") cur.execute(""" SELECT o.account_id, o.goods_id, o.key_from FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.order_status = 3 AND o.pay_success_date >= '2026-03-01' AND o.deleted_at IS NULL """) rows = cur.fetchall() print(f" 订单记录: {len(rows)}") # account → {goods_ids}, {channels} user_goods = defaultdict(set) user_channels = defaultdict(set) for row in rows: aid, gid, kf = row[0], row[1], row[2] user_goods[aid].add(gid) user_channels[aid].add(classify_channel(kf)) # L1/L2 分类 L1_GOODS = {57, 60, 63} L2_GOODS = {31, 32, 33, 54} L1L2_GOODS = {61} def user_level(aid): g = user_goods.get(aid, set()) h1, h2, h12 = bool(g & L1_GOODS), bool(g & L2_GOODS), bool(g & L1L2_GOODS) if h12 or (h1 and h2): return "L1+L2" if h1: return "仅L1" if h2: return "仅L2" return "其他" # ═══════════════════════════════════════════════════════════ # Step 2: 角色映射 # ═══════════════════════════════════════════════════════════ print("\n[2/5] 获取角色映射...") all_accounts = list(user_goods.keys()) cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL", (all_accounts,)) char_to_account = {} account_chars = defaultdict(list) for row in cur.fetchall(): char_to_account[row[0]] = row[1] account_chars[row[1]].append(row[0]) all_char_ids = set(char_to_account.keys()) # ═══════════════════════════════════════════════════════════ # Step 3: 最近14天完课用户 # ═══════════════════════════════════════════════════════════ print("\n[3/5] 查询近14天完课行为(8张分表)...") active_char_ids = set() for shard in range(8): cur.execute(f""" SELECT DISTINCT user_id FROM bi_user_chapter_play_record_{shard} WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day' """, (START, END)) for r in cur.fetchall(): if r[0] in all_char_ids: active_char_ids.add(r[0]) active_accounts = {char_to_account[c] for c in active_char_ids} # ═══════════════════════════════════════════════════════════ # Step 4: 按渠道 + 等级 聚合 # ═══════════════════════════════════════════════════════════ print("\n[4/5] 按渠道聚合...") # 每个渠道大类独立统计 # 渠道统计维度:用户可能属于多个渠道(多笔订单不同渠道),在各自渠道下独立计数 CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"] # 每个渠道下的用户集合 channel_users = defaultdict(set) for aid, chs in user_channels.items(): for ch in chs: channel_users[ch].add(aid) # ═══════════════════════════════════════════════════════════ # Step 5: 输出 # ═══════════════════════════════════════════════════════════ print("\n[5/5] 生成报表...\n") def calc(channel, aid_set): if not aid_set: return None # 按等级拆分 split = {"仅L1": set(), "仅L2": set(), "L1+L2": set(), "其他": set()} for aid in aid_set: split[user_level(aid)].add(aid) total = len(aid_set) inactive_all = aid_set - active_accounts rows_out = [] for lv in ["仅L1", "仅L2", "L1+L2", "其他"]: u = split[lv] if not u: continue active_u = u & active_accounts inactive_u = u - active_accounts rows_out.append((lv, len(u), len(active_u), len(inactive_u), len(inactive_u)/len(u)*100 if u else 0)) inactive_total = len(inactive_all) rows_out.append(("合计", total, total - inactive_total, inactive_total, inactive_total/total*100 if total else 0)) return rows_out # ── 打印每个渠道 ── print(f"{'渠道':<14s} {'等级':<8s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}") print("-" * 64) all_channels_data = {} for ch in CHANNELS: if ch not in channel_users: continue data = calc(ch, channel_users[ch]) if not data: continue all_channels_data[ch] = data first = True for row in data: lv, tot, act, inact, pct = row if lv == "合计": print(f"{ch:<14s} {'':<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%") else: label = ch if first else "" print(f"{label:<14s} {lv:<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%") first = False print("-" * 64) # ── 跨渠道对比表 ── print(f"\n{'═' * 64}") print("📋 渠道对比总览(仅合计行)") print(f"{'═' * 64}") print(f"{'渠道':<14s} {'总付费':>6s} {'无完课':>6s} {'无完课占比':>10s}") print("-" * 42) for ch in CHANNELS: if ch not in all_channels_data: continue row = all_channels_data[ch][-1] # 合计行 _, tot, _, inact, pct = row bar = "█" * max(1, int(pct / 100 * 20)) print(f"{ch:<14s} {tot:>6d} {inact:>6d} {pct:>9.1f}% {bar}") # ── 端内按 sale_channel 拆分 ── print(f"\n{'═' * 64}") print("📋 端内渠道 — 按 sale_channel 细分") print(f"{'═' * 64}") SALE_CHANNEL_MAP = { 11: "苹果", 12: "华为", 13: "小米", 14: "荣耀", 15: "应用宝", 17: "魅族", 18: "VIVO", 19: "OPPO", 21: "学而思", 22: "讯飞", 23: "步步高", 24: "作业帮", 25: "小度", 26: "希沃", 27: "京东方", 41: "官网", 71: "小程序" } cur.execute(""" SELECT o.account_id, o.sale_channel FROM bi_vala_order o INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 WHERE o.key_from = 'app-active-h5-0-0' AND o.order_status = 3 AND o.pay_success_date >= '2026-03-01' AND o.deleted_at IS NULL """) sale_rows = cur.fetchall() # account → set of sale_channels user_sale_channels = defaultdict(set) for row in sale_rows: sc = row[1] name = SALE_CHANNEL_MAP.get(sc, f"站外({sc})") user_sale_channels[row[0]].add(name) print(f"{'子渠道':<12s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}") print("-" * 52) sale_stats = defaultdict(lambda: {"total": 0, "inactive": 0}) for aid, ch_names in user_sale_channels.items(): if aid not in active_accounts: for nm in ch_names: sale_stats[nm]["total"] += 1 sale_stats[nm]["inactive"] += 1 else: for nm in ch_names: sale_stats[nm]["total"] += 1 for nm in sorted(sale_stats.keys(), key=lambda x: -sale_stats[x]["total"]): s = sale_stats[nm] pct = s["inactive"] / s["total"] * 100 if s["total"] else 0 bar = "█" * max(1, int(pct / 100 * 20)) print(f"{nm:<12s} {s['total']:>6d} {s['total']-s['inactive']:>6d} {s['inactive']:>6d} {pct:>9.1f}% {bar}") cur.close() conn.close() print("\n✅ 完成")