216 lines
8.0 KiB
Python
216 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
统计最近14天内没有完课行为的付费用户占比,按 L1/L2/L1+L2 分类。
|
||
时间范围:2026-05-09 ~ 2026-05-22(含当日)
|
||
"""
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
from datetime import datetime
|
||
|
||
# ── 数据库连接 ──────────────────────────────────────────────
|
||
conn = psycopg2.connect(
|
||
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
||
port=28591,
|
||
user="ai_member",
|
||
password="LdfjdjL83h3h3^$&**YGG*",
|
||
dbname="vala_bi"
|
||
)
|
||
conn.set_session(autocommit=True)
|
||
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||
|
||
START_DATE = "2026-05-09"
|
||
END_DATE = "2026-05-22" # inclusive, so use < '2026-05-23'
|
||
|
||
print("=" * 70)
|
||
print("📊 最近14天无完课行为付费用户占比分析")
|
||
print(f" 时间范围: {START_DATE} ~ {END_DATE}")
|
||
print("=" * 70)
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 1: 获取所有付费用户(去重 account_id)
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[1/4] 获取付费用户...")
|
||
cur.execute("""
|
||
SELECT DISTINCT o.account_id
|
||
FROM bi_vala_order o
|
||
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
||
WHERE o.order_status IN (3, 4)
|
||
AND o.pay_success_date IS NOT NULL
|
||
AND o.deleted_at IS NULL
|
||
""")
|
||
paid_accounts = {row[0] for row in cur.fetchall()}
|
||
print(f" 付费用户总数(account_id 去重): {len(paid_accounts)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 2: L1 / L2 / L1+L2 分类
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[2/4] 按 goods_id 分类用户等级...")
|
||
|
||
L1_GOODS = {57, 60, 63}
|
||
L2_GOODS = {31, 32, 33, 54}
|
||
L1L2_GOODS = {61}
|
||
|
||
# 按 account_id 汇总所有购买过的 goods_id
|
||
cur.execute("""
|
||
SELECT o.account_id, o.goods_id
|
||
FROM bi_vala_order o
|
||
WHERE o.account_id = ANY(%s)
|
||
AND o.order_status IN (3, 4)
|
||
AND o.deleted_at IS NULL
|
||
""", (list(paid_accounts),))
|
||
|
||
user_goods_map = {}
|
||
for row in cur.fetchall():
|
||
aid = row[0]
|
||
gid = row[1]
|
||
if aid not in user_goods_map:
|
||
user_goods_map[aid] = set()
|
||
user_goods_map[aid].add(gid)
|
||
|
||
only_l1 = set()
|
||
only_l2 = set()
|
||
both_l1l2 = set()
|
||
other = set()
|
||
|
||
for aid in paid_accounts:
|
||
goods = user_goods_map.get(aid, set())
|
||
has_l1 = bool(goods & L1_GOODS)
|
||
has_l2 = bool(goods & L2_GOODS)
|
||
has_l1l2 = bool(goods & L1L2_GOODS)
|
||
|
||
if has_l1l2 or (has_l1 and has_l2):
|
||
both_l1l2.add(aid)
|
||
elif has_l1:
|
||
only_l1.add(aid)
|
||
elif has_l2:
|
||
only_l2.add(aid)
|
||
else:
|
||
other.add(aid)
|
||
|
||
print(f" 仅L1: {len(only_l1)}")
|
||
print(f" 仅L2: {len(only_l2)}")
|
||
print(f" L1+L2: {len(both_l1l2)}")
|
||
print(f" 其他: {len(other)}")
|
||
print(f" (合计): {len(only_l1)+len(only_l2)+len(both_l1l2)+len(other)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 3: 获取这些付费用户的所有角色 ID(character.id)
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[3/4] 获取付费用户的角色 ID...")
|
||
cur.execute("""
|
||
SELECT id, account_id
|
||
FROM bi_vala_app_character
|
||
WHERE account_id = ANY(%s)
|
||
AND deleted_at IS NULL
|
||
""", (list(paid_accounts),))
|
||
|
||
# character_id → account_id 映射
|
||
char_to_account = {}
|
||
account_chars = {} # account_id → [char_id, ...]
|
||
for row in cur.fetchall():
|
||
cid = row[0]
|
||
aid = row[1]
|
||
char_to_account[cid] = aid
|
||
if aid not in account_chars:
|
||
account_chars[aid] = []
|
||
account_chars[aid].append(cid)
|
||
|
||
all_char_ids = list(char_to_account.keys())
|
||
print(f" 付费用户角色总数: {len(all_char_ids)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 4: 查询最近14天内有完课行为的角色(遍历 8 张分表)
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[4/4] 查询最近14天完课行为(遍历8张分表)...")
|
||
|
||
active_char_ids = set()
|
||
for shard in range(8):
|
||
table = f"bi_user_chapter_play_record_{shard}"
|
||
print(f" 查询 {table} ...", end=" ")
|
||
cur.execute(f"""
|
||
SELECT DISTINCT user_id
|
||
FROM {table}
|
||
WHERE play_status = 1
|
||
AND updated_at >= %s
|
||
AND updated_at < %s::date + interval '1 day'
|
||
""", (START_DATE, END_DATE))
|
||
results = cur.fetchall()
|
||
count = len(results)
|
||
print(f"找到 {count} 条记录")
|
||
|
||
for row in results:
|
||
cid = row[0]
|
||
if cid in char_to_account:
|
||
active_char_ids.add(cid)
|
||
|
||
# 将活跃角色映射回 account_id
|
||
active_account_ids = set()
|
||
for cid in active_char_ids:
|
||
active_account_ids.add(char_to_account[cid])
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 5: 计算统计结果
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n" + "=" * 70)
|
||
print("📈 统计结果")
|
||
print("=" * 70)
|
||
|
||
# 没有完课行为的付费用户
|
||
inactive_all = paid_accounts - active_account_ids
|
||
inactive_only_l1 = inactive_all & only_l1
|
||
inactive_only_l2 = inactive_all & only_l2
|
||
inactive_both_l1l2 = inactive_all & both_l1l2
|
||
inactive_other = inactive_all & other
|
||
|
||
total = len(paid_accounts)
|
||
active_total = len(active_account_ids)
|
||
inactive_total = len(inactive_all)
|
||
|
||
print(f"\n 付费用户总数: {total}")
|
||
print(f" 最近14天有完课行为的: {active_total} ({active_total/total*100:.1f}%)")
|
||
print(f" 最近14天无完课行为的: {inactive_total} ({inactive_total/total*100:.1f}%)")
|
||
|
||
print(f"\n ─── 按课程等级拆分 ───")
|
||
|
||
def show_cat(label, cat_set):
|
||
inactive_cat = cat_set & inactive_all
|
||
cat_total = len(cat_set)
|
||
if cat_total == 0:
|
||
print(f" {label}: 0 人")
|
||
return
|
||
print(f" {label}:")
|
||
print(f" 总付费用户: {cat_total}")
|
||
print(f" 最近14天无完课: {len(inactive_cat)} ({len(inactive_cat)/cat_total*100:.1f}%)")
|
||
|
||
show_cat("仅L1", only_l1)
|
||
show_cat("仅L2", only_l2)
|
||
show_cat("L1+L2", both_l1l2)
|
||
if other:
|
||
show_cat("其他", other)
|
||
|
||
# ─── 汇总视图:按是否有 L1/L2 分别统计 ───
|
||
print(f"\n ─── 按有无 L1/L2 交叉汇总 ───")
|
||
users_with_l1 = only_l1 | both_l1l2 # 所有购买了 L1 的用户
|
||
users_with_l2 = only_l2 | both_l1l2 # 所有购买了 L2 的用户
|
||
|
||
def show_cross(label, user_set):
|
||
inactive = user_set & inactive_all
|
||
total_u = len(user_set)
|
||
if total_u == 0:
|
||
print(f" {label}: 0 人")
|
||
return
|
||
print(f" {label}:")
|
||
print(f" 总付费用户: {total_u}")
|
||
print(f" 最近14天无完课: {len(inactive)} ({len(inactive)/total_u*100:.1f}%)")
|
||
|
||
show_cross("含L1(仅L1 + L1+L2)", users_with_l1)
|
||
show_cross("含L2(仅L2 + L1+L2)", users_with_l2)
|
||
|
||
print("\n" + "=" * 70)
|
||
print("✅ 分析完成")
|
||
print("=" * 70)
|
||
|
||
cur.close()
|
||
conn.close()
|