ai_member_xiaoxi/scripts/channel_completion_diff.py
2026-05-23 08:00:01 +08:00

250 lines
9.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
按 key_from 渠道拆分统计各渠道付费用户在最近14天内的完课行为差异。
时间范围2026-05-09 ~ 2026-05-22
"""
import psycopg2
import psycopg2.extras
from collections import defaultdict
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala_bi"
)
conn.set_session(autocommit=True)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
START = "2026-05-09"
END = "2026-05-22"
print("=" * 80)
print("📊 各渠道付费用户近14天完课行为差异分析")
print("=" * 80)
# ── 渠道分类规则 ─────────────────────────────────────────────
ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'}
def classify_channel(kf):
"""将单个 key_from 归入渠道大类"""
if kf in ENDPOINT_INTERNAL:
return "端内"
if kf.startswith("sales-adp"):
return "销售渠道"
if kf == "newmedia-dianpu-xhs-0-0":
return "小红书店铺"
if kf.startswith("newmedia-daren"):
return "达人直播"
if kf == "newmedia-dianpu-wwxx-0-0":
return "万物"
return "其他端外"
# ═══════════════════════════════════════════════════════════
# Step 1: 付费用户 + goods 分类
# ═══════════════════════════════════════════════════════════
print("\n[1/5] 获取付费用户...")
cur.execute("""
SELECT o.account_id, o.goods_id, o.key_from
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.order_status = 3
AND o.pay_success_date >= '2026-03-01'
AND o.deleted_at IS NULL
""")
rows = cur.fetchall()
print(f" 订单记录: {len(rows)}")
# account → {goods_ids}, {channels}
user_goods = defaultdict(set)
user_channels = defaultdict(set)
for row in rows:
aid, gid, kf = row[0], row[1], row[2]
user_goods[aid].add(gid)
user_channels[aid].add(classify_channel(kf))
# L1/L2 分类
L1_GOODS = {57, 60, 63}
L2_GOODS = {31, 32, 33, 54}
L1L2_GOODS = {61}
def user_level(aid):
g = user_goods.get(aid, set())
h1, h2, h12 = bool(g & L1_GOODS), bool(g & L2_GOODS), bool(g & L1L2_GOODS)
if h12 or (h1 and h2):
return "L1+L2"
if h1:
return "仅L1"
if h2:
return "仅L2"
return "其他"
# ═══════════════════════════════════════════════════════════
# Step 2: 角色映射
# ═══════════════════════════════════════════════════════════
print("\n[2/5] 获取角色映射...")
all_accounts = list(user_goods.keys())
cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL",
(all_accounts,))
char_to_account = {}
account_chars = defaultdict(list)
for row in cur.fetchall():
char_to_account[row[0]] = row[1]
account_chars[row[1]].append(row[0])
all_char_ids = set(char_to_account.keys())
# ═══════════════════════════════════════════════════════════
# Step 3: 最近14天完课用户
# ═══════════════════════════════════════════════════════════
print("\n[3/5] 查询近14天完课行为8张分表...")
active_char_ids = set()
for shard in range(8):
cur.execute(f"""
SELECT DISTINCT user_id FROM bi_user_chapter_play_record_{shard}
WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day'
""", (START, END))
for r in cur.fetchall():
if r[0] in all_char_ids:
active_char_ids.add(r[0])
active_accounts = {char_to_account[c] for c in active_char_ids}
# ═══════════════════════════════════════════════════════════
# Step 4: 按渠道 + 等级 聚合
# ═══════════════════════════════════════════════════════════
print("\n[4/5] 按渠道聚合...")
# 每个渠道大类独立统计
# 渠道统计维度:用户可能属于多个渠道(多笔订单不同渠道),在各自渠道下独立计数
CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"]
# 每个渠道下的用户集合
channel_users = defaultdict(set)
for aid, chs in user_channels.items():
for ch in chs:
channel_users[ch].add(aid)
# ═══════════════════════════════════════════════════════════
# Step 5: 输出
# ═══════════════════════════════════════════════════════════
print("\n[5/5] 生成报表...\n")
def calc(channel, aid_set):
if not aid_set:
return None
# 按等级拆分
split = {"仅L1": set(), "仅L2": set(), "L1+L2": set(), "其他": set()}
for aid in aid_set:
split[user_level(aid)].add(aid)
total = len(aid_set)
inactive_all = aid_set - active_accounts
rows_out = []
for lv in ["仅L1", "仅L2", "L1+L2", "其他"]:
u = split[lv]
if not u:
continue
active_u = u & active_accounts
inactive_u = u - active_accounts
rows_out.append((lv, len(u), len(active_u), len(inactive_u),
len(inactive_u)/len(u)*100 if u else 0))
inactive_total = len(inactive_all)
rows_out.append(("合计", total, total - inactive_total, inactive_total,
inactive_total/total*100 if total else 0))
return rows_out
# ── 打印每个渠道 ──
print(f"{'渠道':<14s} {'等级':<8s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}")
print("-" * 64)
all_channels_data = {}
for ch in CHANNELS:
if ch not in channel_users:
continue
data = calc(ch, channel_users[ch])
if not data:
continue
all_channels_data[ch] = data
first = True
for row in data:
lv, tot, act, inact, pct = row
if lv == "合计":
print(f"{ch:<14s} {'':<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%")
else:
label = ch if first else ""
print(f"{label:<14s} {lv:<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%")
first = False
print("-" * 64)
# ── 跨渠道对比表 ──
print(f"\n{'' * 64}")
print("📋 渠道对比总览(仅合计行)")
print(f"{'' * 64}")
print(f"{'渠道':<14s} {'总付费':>6s} {'无完课':>6s} {'无完课占比':>10s}")
print("-" * 42)
for ch in CHANNELS:
if ch not in all_channels_data:
continue
row = all_channels_data[ch][-1] # 合计行
_, tot, _, inact, pct = row
bar = "" * max(1, int(pct / 100 * 20))
print(f"{ch:<14s} {tot:>6d} {inact:>6d} {pct:>9.1f}% {bar}")
# ── 端内按 sale_channel 拆分 ──
print(f"\n{'' * 64}")
print("📋 端内渠道 — 按 sale_channel 细分")
print(f"{'' * 64}")
SALE_CHANNEL_MAP = {
11: "苹果", 12: "华为", 13: "小米", 14: "荣耀", 15: "应用宝",
17: "魅族", 18: "VIVO", 19: "OPPO",
21: "学而思", 22: "讯飞", 23: "步步高", 24: "作业帮", 25: "小度",
26: "希沃", 27: "京东方",
41: "官网", 71: "小程序"
}
cur.execute("""
SELECT o.account_id, o.sale_channel
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.key_from = 'app-active-h5-0-0'
AND o.order_status = 3
AND o.pay_success_date >= '2026-03-01'
AND o.deleted_at IS NULL
""")
sale_rows = cur.fetchall()
# account → set of sale_channels
user_sale_channels = defaultdict(set)
for row in sale_rows:
sc = row[1]
name = SALE_CHANNEL_MAP.get(sc, f"站外({sc})")
user_sale_channels[row[0]].add(name)
print(f"{'子渠道':<12s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}")
print("-" * 52)
sale_stats = defaultdict(lambda: {"total": 0, "inactive": 0})
for aid, ch_names in user_sale_channels.items():
if aid not in active_accounts:
for nm in ch_names:
sale_stats[nm]["total"] += 1
sale_stats[nm]["inactive"] += 1
else:
for nm in ch_names:
sale_stats[nm]["total"] += 1
for nm in sorted(sale_stats.keys(), key=lambda x: -sale_stats[x]["total"]):
s = sale_stats[nm]
pct = s["inactive"] / s["total"] * 100 if s["total"] else 0
bar = "" * max(1, int(pct / 100 * 20))
print(f"{nm:<12s} {s['total']:>6d} {s['total']-s['inactive']:>6d} {s['inactive']:>6d} {pct:>9.1f}% {bar}")
cur.close()
conn.close()
print("\n✅ 完成")