250 lines
9.3 KiB
Python
250 lines
9.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
按 key_from 渠道拆分,统计各渠道付费用户在最近14天内的完课行为差异。
|
||
时间范围:2026-05-09 ~ 2026-05-22
|
||
"""
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
from collections import defaultdict
|
||
|
||
conn = psycopg2.connect(
|
||
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
||
port=28591,
|
||
user="ai_member",
|
||
password="LdfjdjL83h3h3^$&**YGG*",
|
||
dbname="vala_bi"
|
||
)
|
||
conn.set_session(autocommit=True)
|
||
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||
|
||
START = "2026-05-09"
|
||
END = "2026-05-22"
|
||
|
||
print("=" * 80)
|
||
print("📊 各渠道付费用户近14天完课行为差异分析")
|
||
print("=" * 80)
|
||
|
||
# ── 渠道分类规则 ─────────────────────────────────────────────
|
||
ENDPOINT_INTERNAL = {'app-active-h5-0-0', 'app-sales-bj-qhm-0'}
|
||
|
||
def classify_channel(kf):
|
||
"""将单个 key_from 归入渠道大类"""
|
||
if kf in ENDPOINT_INTERNAL:
|
||
return "端内"
|
||
if kf.startswith("sales-adp"):
|
||
return "销售渠道"
|
||
if kf == "newmedia-dianpu-xhs-0-0":
|
||
return "小红书店铺"
|
||
if kf.startswith("newmedia-daren"):
|
||
return "达人直播"
|
||
if kf == "newmedia-dianpu-wwxx-0-0":
|
||
return "万物"
|
||
return "其他端外"
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 1: 付费用户 + goods 分类
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[1/5] 获取付费用户...")
|
||
cur.execute("""
|
||
SELECT o.account_id, o.goods_id, o.key_from
|
||
FROM bi_vala_order o
|
||
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
||
WHERE o.order_status = 3
|
||
AND o.pay_success_date >= '2026-03-01'
|
||
AND o.deleted_at IS NULL
|
||
""")
|
||
rows = cur.fetchall()
|
||
print(f" 订单记录: {len(rows)}")
|
||
|
||
# account → {goods_ids}, {channels}
|
||
user_goods = defaultdict(set)
|
||
user_channels = defaultdict(set)
|
||
for row in rows:
|
||
aid, gid, kf = row[0], row[1], row[2]
|
||
user_goods[aid].add(gid)
|
||
user_channels[aid].add(classify_channel(kf))
|
||
|
||
# L1/L2 分类
|
||
L1_GOODS = {57, 60, 63}
|
||
L2_GOODS = {31, 32, 33, 54}
|
||
L1L2_GOODS = {61}
|
||
|
||
def user_level(aid):
|
||
g = user_goods.get(aid, set())
|
||
h1, h2, h12 = bool(g & L1_GOODS), bool(g & L2_GOODS), bool(g & L1L2_GOODS)
|
||
if h12 or (h1 and h2):
|
||
return "L1+L2"
|
||
if h1:
|
||
return "仅L1"
|
||
if h2:
|
||
return "仅L2"
|
||
return "其他"
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 2: 角色映射
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[2/5] 获取角色映射...")
|
||
all_accounts = list(user_goods.keys())
|
||
cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL",
|
||
(all_accounts,))
|
||
char_to_account = {}
|
||
account_chars = defaultdict(list)
|
||
for row in cur.fetchall():
|
||
char_to_account[row[0]] = row[1]
|
||
account_chars[row[1]].append(row[0])
|
||
all_char_ids = set(char_to_account.keys())
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 3: 最近14天完课用户
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[3/5] 查询近14天完课行为(8张分表)...")
|
||
active_char_ids = set()
|
||
for shard in range(8):
|
||
cur.execute(f"""
|
||
SELECT DISTINCT user_id FROM bi_user_chapter_play_record_{shard}
|
||
WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day'
|
||
""", (START, END))
|
||
for r in cur.fetchall():
|
||
if r[0] in all_char_ids:
|
||
active_char_ids.add(r[0])
|
||
|
||
active_accounts = {char_to_account[c] for c in active_char_ids}
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 4: 按渠道 + 等级 聚合
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[4/5] 按渠道聚合...")
|
||
|
||
# 每个渠道大类独立统计
|
||
# 渠道统计维度:用户可能属于多个渠道(多笔订单不同渠道),在各自渠道下独立计数
|
||
CHANNELS = ["端内", "销售渠道", "小红书店铺", "达人直播", "万物", "其他端外"]
|
||
|
||
# 每个渠道下的用户集合
|
||
channel_users = defaultdict(set)
|
||
for aid, chs in user_channels.items():
|
||
for ch in chs:
|
||
channel_users[ch].add(aid)
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 5: 输出
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[5/5] 生成报表...\n")
|
||
|
||
def calc(channel, aid_set):
|
||
if not aid_set:
|
||
return None
|
||
# 按等级拆分
|
||
split = {"仅L1": set(), "仅L2": set(), "L1+L2": set(), "其他": set()}
|
||
for aid in aid_set:
|
||
split[user_level(aid)].add(aid)
|
||
|
||
total = len(aid_set)
|
||
inactive_all = aid_set - active_accounts
|
||
|
||
rows_out = []
|
||
for lv in ["仅L1", "仅L2", "L1+L2", "其他"]:
|
||
u = split[lv]
|
||
if not u:
|
||
continue
|
||
active_u = u & active_accounts
|
||
inactive_u = u - active_accounts
|
||
rows_out.append((lv, len(u), len(active_u), len(inactive_u),
|
||
len(inactive_u)/len(u)*100 if u else 0))
|
||
|
||
inactive_total = len(inactive_all)
|
||
rows_out.append(("合计", total, total - inactive_total, inactive_total,
|
||
inactive_total/total*100 if total else 0))
|
||
return rows_out
|
||
|
||
# ── 打印每个渠道 ──
|
||
print(f"{'渠道':<14s} {'等级':<8s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}")
|
||
print("-" * 64)
|
||
|
||
all_channels_data = {}
|
||
for ch in CHANNELS:
|
||
if ch not in channel_users:
|
||
continue
|
||
data = calc(ch, channel_users[ch])
|
||
if not data:
|
||
continue
|
||
all_channels_data[ch] = data
|
||
first = True
|
||
for row in data:
|
||
lv, tot, act, inact, pct = row
|
||
if lv == "合计":
|
||
print(f"{ch:<14s} {'':<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%")
|
||
else:
|
||
label = ch if first else ""
|
||
print(f"{label:<14s} {lv:<8s} {tot:>6d} {act:>6d} {inact:>6d} {pct:>9.1f}%")
|
||
first = False
|
||
print("-" * 64)
|
||
|
||
# ── 跨渠道对比表 ──
|
||
print(f"\n{'═' * 64}")
|
||
print("📋 渠道对比总览(仅合计行)")
|
||
print(f"{'═' * 64}")
|
||
print(f"{'渠道':<14s} {'总付费':>6s} {'无完课':>6s} {'无完课占比':>10s}")
|
||
print("-" * 42)
|
||
for ch in CHANNELS:
|
||
if ch not in all_channels_data:
|
||
continue
|
||
row = all_channels_data[ch][-1] # 合计行
|
||
_, tot, _, inact, pct = row
|
||
bar = "█" * max(1, int(pct / 100 * 20))
|
||
print(f"{ch:<14s} {tot:>6d} {inact:>6d} {pct:>9.1f}% {bar}")
|
||
|
||
# ── 端内按 sale_channel 拆分 ──
|
||
print(f"\n{'═' * 64}")
|
||
print("📋 端内渠道 — 按 sale_channel 细分")
|
||
print(f"{'═' * 64}")
|
||
|
||
SALE_CHANNEL_MAP = {
|
||
11: "苹果", 12: "华为", 13: "小米", 14: "荣耀", 15: "应用宝",
|
||
17: "魅族", 18: "VIVO", 19: "OPPO",
|
||
21: "学而思", 22: "讯飞", 23: "步步高", 24: "作业帮", 25: "小度",
|
||
26: "希沃", 27: "京东方",
|
||
41: "官网", 71: "小程序"
|
||
}
|
||
|
||
cur.execute("""
|
||
SELECT o.account_id, o.sale_channel
|
||
FROM bi_vala_order o
|
||
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
||
WHERE o.key_from = 'app-active-h5-0-0'
|
||
AND o.order_status = 3
|
||
AND o.pay_success_date >= '2026-03-01'
|
||
AND o.deleted_at IS NULL
|
||
""")
|
||
sale_rows = cur.fetchall()
|
||
|
||
# account → set of sale_channels
|
||
user_sale_channels = defaultdict(set)
|
||
for row in sale_rows:
|
||
sc = row[1]
|
||
name = SALE_CHANNEL_MAP.get(sc, f"站外({sc})")
|
||
user_sale_channels[row[0]].add(name)
|
||
|
||
print(f"{'子渠道':<12s} {'总付费':>6s} {'有完课':>6s} {'无完课':>6s} {'无完课占比':>10s}")
|
||
print("-" * 52)
|
||
|
||
sale_stats = defaultdict(lambda: {"total": 0, "inactive": 0})
|
||
for aid, ch_names in user_sale_channels.items():
|
||
if aid not in active_accounts:
|
||
for nm in ch_names:
|
||
sale_stats[nm]["total"] += 1
|
||
sale_stats[nm]["inactive"] += 1
|
||
else:
|
||
for nm in ch_names:
|
||
sale_stats[nm]["total"] += 1
|
||
|
||
for nm in sorted(sale_stats.keys(), key=lambda x: -sale_stats[x]["total"]):
|
||
s = sale_stats[nm]
|
||
pct = s["inactive"] / s["total"] * 100 if s["total"] else 0
|
||
bar = "█" * max(1, int(pct / 100 * 20))
|
||
print(f"{nm:<12s} {s['total']:>6d} {s['total']-s['inactive']:>6d} {s['inactive']:>6d} {pct:>9.1f}% {bar}")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
print("\n✅ 完成")
|