207 lines
8.0 KiB
Python
207 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
各渠道付费用户近14天完课频次差异分析。
|
||
限定:2026年3月后订单、剔除退费。
|
||
"""
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
from collections import defaultdict
|
||
import numpy as np
|
||
|
||
conn = psycopg2.connect(
|
||
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
||
port=28591,
|
||
user="ai_member",
|
||
password="LdfjdjL83h3h3^$&**YGG*",
|
||
dbname="vala_bi"
|
||
)
|
||
conn.set_session(autocommit=True)
|
||
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||
|
||
START = "2026-05-09"
|
||
END = "2026-05-22"
|
||
|
||
print("=" * 80)
|
||
print("📊 各渠道付费用户近14天完课频次差异")
|
||
print(" 限定:3月后订单 + 剔除退费")
|
||
print("=" * 80)
|
||
|
||
# ── 渠道分类 ─────────────────────────────────────────────────
|
||
ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'}
|
||
def classify(kf):
|
||
if kf in ENDPOINT_INTERNAL: return "端内"
|
||
if kf.startswith("sales-adp"): return "销售渠道"
|
||
if kf == "newmedia-dianpu-xhs-0-0": return "小红书店铺"
|
||
if kf.startswith("newmedia-daren"): return "达人直播"
|
||
if kf == "newmedia-dianpu-wwxx-0-0": return "万物"
|
||
return "其他端外"
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 1: 付费用户 + 渠道 + 等级
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[1/4] 获取付费用户...")
|
||
cur.execute("""
|
||
SELECT o.account_id, o.goods_id, o.key_from
|
||
FROM bi_vala_order o
|
||
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
||
WHERE o.order_status = 3 AND o.pay_success_date >= '2026-03-01' AND o.deleted_at IS NULL
|
||
""")
|
||
rows = cur.fetchall()
|
||
|
||
user_goods = defaultdict(set)
|
||
user_channels = defaultdict(set)
|
||
for r in rows:
|
||
user_goods[r[0]].add(r[1])
|
||
user_channels[r[0]].add(classify(r[2]))
|
||
|
||
L1_G = {57,60,63}; L2_G = {31,32,33,54}; L12_G = {61}
|
||
def level(aid):
|
||
g = user_goods.get(aid, set())
|
||
h1, h2, h12 = bool(g & L1_G), bool(g & L2_G), bool(g & L12_G)
|
||
if h12 or (h1 and h2): return "L1+L2"
|
||
if h1: return "仅L1"
|
||
if h2: return "仅L2"
|
||
return "其他"
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 2: 角色映射
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("[2/4] 获取角色映射...")
|
||
all_aids = list(user_goods.keys())
|
||
cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL", (all_aids,))
|
||
char_to_account = {}
|
||
for r in cur.fetchall():
|
||
char_to_account[r[0]] = r[1]
|
||
all_char_ids = set(char_to_account.keys())
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 3: 各角色近14天完课次数
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("[3/4] 统计各角色近14天完课次数(8张分表)...")
|
||
char_completion_count = defaultdict(int)
|
||
for shard in range(8):
|
||
cur.execute(f"""
|
||
SELECT user_id, COUNT(*) as cnt
|
||
FROM bi_user_chapter_play_record_{shard}
|
||
WHERE play_status = 1
|
||
AND updated_at >= %s AND updated_at < %s::date + interval '1 day'
|
||
GROUP BY user_id
|
||
""", (START, END))
|
||
for r in cur.fetchall():
|
||
cid = r[0]
|
||
if cid in all_char_ids:
|
||
char_completion_count[cid] += r[1]
|
||
|
||
# 汇总到 account
|
||
account_completions = defaultdict(int)
|
||
for cid, cnt in char_completion_count.items():
|
||
account_completions[char_to_account[cid]] += cnt
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 4: 按渠道聚合统计
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("[4/4] 按渠道聚合...")
|
||
|
||
CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"]
|
||
channel_user_set = defaultdict(set)
|
||
for aid, chs in user_channels.items():
|
||
for ch in chs:
|
||
channel_user_set[ch].add(aid)
|
||
|
||
def stats(aid_set):
|
||
"""返回 (总人数, 有完课人数, 总完课次数, 完课次数列表)"""
|
||
comps = []
|
||
active = 0
|
||
for aid in aid_set:
|
||
c = account_completions.get(aid, 0)
|
||
if c > 0:
|
||
active += 1
|
||
comps.append(c)
|
||
total_comps = sum(comps)
|
||
return len(aid_set), active, total_comps, comps
|
||
|
||
# ── 表格输出 ──
|
||
header = f"{'渠道':<12s} {'等级':<8s} {'总付费':>6s} {'活跃人数':>7s} {'活跃率':>7s} {'总完课次':>7s} {'人均(全)':>8s} {'人均(活跃)':>9s} {'中位数':>6s} {'P75':>5s} {'P90':>5s}"
|
||
print(f"\n{header}")
|
||
print("-" * 105)
|
||
|
||
for ch in CHANNELS:
|
||
users = channel_user_set.get(ch, set())
|
||
if not users:
|
||
continue
|
||
# 渠道总计
|
||
tot, act, total_c, comps = stats(users)
|
||
avg_all = total_c / tot if tot else 0
|
||
avg_act = total_c / act if act else 0
|
||
sorted_c = sorted(comps)
|
||
med = np.median(sorted_c) if sorted_c else 0
|
||
p75 = np.percentile(sorted_c, 75) if sorted_c else 0
|
||
p90 = np.percentile(sorted_c, 90) if sorted_c else 0
|
||
print(f"{ch:<12s} {'合计':<8s} {tot:>6d} {act:>7d} {act/tot*100:>6.1f}% {total_c:>7d} {avg_all:>8.1f} {avg_act:>9.1f} {med:>6.0f} {p75:>5.0f} {p90:>5.0f}")
|
||
|
||
# 按等级拆分
|
||
for lv in ["仅L1", "仅L2", "L1+L2"]:
|
||
subset = {a for a in users if level(a) == lv}
|
||
if not subset:
|
||
continue
|
||
t, a, tc, cs = stats(subset)
|
||
aa = tc / t if t else 0
|
||
ag = tc / a if a else 0
|
||
sc = sorted(cs)
|
||
md = np.median(sc) if sc else 0
|
||
p7 = np.percentile(sc, 75) if sc else 0
|
||
p9 = np.percentile(sc, 90) if sc else 0
|
||
print(f"{'':<12s} {lv:<8s} {t:>6d} {a:>7d} {a/t*100:>6.1f}% {tc:>7d} {aa:>8.1f} {ag:>9.1f} {md:>6.0f} {p7:>5.0f} {p9:>5.0f}")
|
||
print("-" * 105)
|
||
|
||
# ── 频次分布 ──
|
||
print(f"\n{'═' * 70}")
|
||
print("📋 完课频次分布(活跃用户)")
|
||
print(f"{'═' * 70}")
|
||
BINS = [(1,1), (2,2), (3,4), (5,7), (8,14), (15, 30), (31, 999)]
|
||
|
||
def dist(aid_set):
|
||
comps = [account_completions.get(a, 0) for a in aid_set if account_completions.get(a, 0) > 0]
|
||
total = len(comps)
|
||
d = []
|
||
for lo, hi in BINS:
|
||
cnt = sum(1 for c in comps if lo <= c <= hi)
|
||
pct = cnt / total * 100 if total else 0
|
||
d.append((f"{lo}-{hi}" if hi < 999 else f"{lo}+", cnt, pct))
|
||
return d, total
|
||
|
||
print(f"\n{'渠道':<12s} {'活跃':>5s}", end="")
|
||
for lo, hi in BINS:
|
||
label = f"{lo}-{hi}" if hi < 999 else f"{lo}+"
|
||
print(f" {label:>7s}", end="")
|
||
print()
|
||
|
||
for ch in CHANNELS:
|
||
users = channel_user_set.get(ch, set())
|
||
if not users:
|
||
continue
|
||
d, tot = dist(users)
|
||
if tot == 0:
|
||
continue
|
||
print(f"{ch:<12s} {tot:>5d}", end="")
|
||
for label, cnt, pct in d:
|
||
print(f" {pct:>6.1f}%", end="")
|
||
print()
|
||
|
||
# ── 频道对比柱状图 ──
|
||
print(f"\n{'═' * 70}")
|
||
print("📊 人均完课次数对比(活跃用户)")
|
||
print(f"{'═' * 70}")
|
||
for ch in CHANNELS:
|
||
users = channel_user_set.get(ch, set())
|
||
if not users:
|
||
continue
|
||
_, act, tc, _ = stats(users)
|
||
avg = tc / act if act else 0
|
||
bar = "█" * max(1, int(avg * 2))
|
||
print(f" {ch:<12s} 人均 {avg:5.1f} 次 {bar}")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
print("\n✅ 完成")
|