ai_member_xiaoxi/scripts/channel_frequency_diff.py
2026-05-23 08:00:01 +08:00

207 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
各渠道付费用户近14天完课频次差异分析。
限定2026年3月后订单、剔除退费。
"""
import psycopg2
import psycopg2.extras
from collections import defaultdict
import numpy as np
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala_bi"
)
conn.set_session(autocommit=True)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
START = "2026-05-09"
END = "2026-05-22"
print("=" * 80)
print("📊 各渠道付费用户近14天完课频次差异")
print(" 限定3月后订单 + 剔除退费")
print("=" * 80)
# ── 渠道分类 ─────────────────────────────────────────────────
ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'}
def classify(kf):
if kf in ENDPOINT_INTERNAL: return "端内"
if kf.startswith("sales-adp"): return "销售渠道"
if kf == "newmedia-dianpu-xhs-0-0": return "小红书店铺"
if kf.startswith("newmedia-daren"): return "达人直播"
if kf == "newmedia-dianpu-wwxx-0-0": return "万物"
return "其他端外"
# ═══════════════════════════════════════════════════════════
# Step 1: 付费用户 + 渠道 + 等级
# ═══════════════════════════════════════════════════════════
print("\n[1/4] 获取付费用户...")
cur.execute("""
SELECT o.account_id, o.goods_id, o.key_from
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.order_status = 3 AND o.pay_success_date >= '2026-03-01' AND o.deleted_at IS NULL
""")
rows = cur.fetchall()
user_goods = defaultdict(set)
user_channels = defaultdict(set)
for r in rows:
user_goods[r[0]].add(r[1])
user_channels[r[0]].add(classify(r[2]))
L1_G = {57,60,63}; L2_G = {31,32,33,54}; L12_G = {61}
def level(aid):
g = user_goods.get(aid, set())
h1, h2, h12 = bool(g & L1_G), bool(g & L2_G), bool(g & L12_G)
if h12 or (h1 and h2): return "L1+L2"
if h1: return "仅L1"
if h2: return "仅L2"
return "其他"
# ═══════════════════════════════════════════════════════════
# Step 2: 角色映射
# ═══════════════════════════════════════════════════════════
print("[2/4] 获取角色映射...")
all_aids = list(user_goods.keys())
cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL", (all_aids,))
char_to_account = {}
for r in cur.fetchall():
char_to_account[r[0]] = r[1]
all_char_ids = set(char_to_account.keys())
# ═══════════════════════════════════════════════════════════
# Step 3: 各角色近14天完课次数
# ═══════════════════════════════════════════════════════════
print("[3/4] 统计各角色近14天完课次数8张分表...")
char_completion_count = defaultdict(int)
for shard in range(8):
cur.execute(f"""
SELECT user_id, COUNT(*) as cnt
FROM bi_user_chapter_play_record_{shard}
WHERE play_status = 1
AND updated_at >= %s AND updated_at < %s::date + interval '1 day'
GROUP BY user_id
""", (START, END))
for r in cur.fetchall():
cid = r[0]
if cid in all_char_ids:
char_completion_count[cid] += r[1]
# 汇总到 account
account_completions = defaultdict(int)
for cid, cnt in char_completion_count.items():
account_completions[char_to_account[cid]] += cnt
# ═══════════════════════════════════════════════════════════
# Step 4: 按渠道聚合统计
# ═══════════════════════════════════════════════════════════
print("[4/4] 按渠道聚合...")
CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"]
channel_user_set = defaultdict(set)
for aid, chs in user_channels.items():
for ch in chs:
channel_user_set[ch].add(aid)
def stats(aid_set):
"""返回 (总人数, 有完课人数, 总完课次数, 完课次数列表)"""
comps = []
active = 0
for aid in aid_set:
c = account_completions.get(aid, 0)
if c > 0:
active += 1
comps.append(c)
total_comps = sum(comps)
return len(aid_set), active, total_comps, comps
# ── 表格输出 ──
header = f"{'渠道':<12s} {'等级':<8s} {'总付费':>6s} {'活跃人数':>7s} {'活跃率':>7s} {'总完课次':>7s} {'人均(全)':>8s} {'人均(活跃)':>9s} {'中位数':>6s} {'P75':>5s} {'P90':>5s}"
print(f"\n{header}")
print("-" * 105)
for ch in CHANNELS:
users = channel_user_set.get(ch, set())
if not users:
continue
# 渠道总计
tot, act, total_c, comps = stats(users)
avg_all = total_c / tot if tot else 0
avg_act = total_c / act if act else 0
sorted_c = sorted(comps)
med = np.median(sorted_c) if sorted_c else 0
p75 = np.percentile(sorted_c, 75) if sorted_c else 0
p90 = np.percentile(sorted_c, 90) if sorted_c else 0
print(f"{ch:<12s} {'合计':<8s} {tot:>6d} {act:>7d} {act/tot*100:>6.1f}% {total_c:>7d} {avg_all:>8.1f} {avg_act:>9.1f} {med:>6.0f} {p75:>5.0f} {p90:>5.0f}")
# 按等级拆分
for lv in ["仅L1", "仅L2", "L1+L2"]:
subset = {a for a in users if level(a) == lv}
if not subset:
continue
t, a, tc, cs = stats(subset)
aa = tc / t if t else 0
ag = tc / a if a else 0
sc = sorted(cs)
md = np.median(sc) if sc else 0
p7 = np.percentile(sc, 75) if sc else 0
p9 = np.percentile(sc, 90) if sc else 0
print(f"{'':<12s} {lv:<8s} {t:>6d} {a:>7d} {a/t*100:>6.1f}% {tc:>7d} {aa:>8.1f} {ag:>9.1f} {md:>6.0f} {p7:>5.0f} {p9:>5.0f}")
print("-" * 105)
# ── 频次分布 ──
print(f"\n{'' * 70}")
print("📋 完课频次分布(活跃用户)")
print(f"{'' * 70}")
BINS = [(1,1), (2,2), (3,4), (5,7), (8,14), (15, 30), (31, 999)]
def dist(aid_set):
comps = [account_completions.get(a, 0) for a in aid_set if account_completions.get(a, 0) > 0]
total = len(comps)
d = []
for lo, hi in BINS:
cnt = sum(1 for c in comps if lo <= c <= hi)
pct = cnt / total * 100 if total else 0
d.append((f"{lo}-{hi}" if hi < 999 else f"{lo}+", cnt, pct))
return d, total
print(f"\n{'渠道':<12s} {'活跃':>5s}", end="")
for lo, hi in BINS:
label = f"{lo}-{hi}" if hi < 999 else f"{lo}+"
print(f" {label:>7s}", end="")
print()
for ch in CHANNELS:
users = channel_user_set.get(ch, set())
if not users:
continue
d, tot = dist(users)
if tot == 0:
continue
print(f"{ch:<12s} {tot:>5d}", end="")
for label, cnt, pct in d:
print(f" {pct:>6.1f}%", end="")
print()
# ── 频道对比柱状图 ──
print(f"\n{'' * 70}")
print("📊 人均完课次数对比(活跃用户)")
print(f"{'' * 70}")
for ch in CHANNELS:
users = channel_user_set.get(ch, set())
if not users:
continue
_, act, tc, _ = stats(users)
avg = tc / act if act else 0
bar = "" * max(1, int(avg * 2))
print(f" {ch:<12s} 人均 {avg:5.1f}{bar}")
cur.close()
conn.close()
print("\n✅ 完成")