ai_member_xiaoxi/scripts/inactive_paid_users_14d.py
2026-05-23 08:00:01 +08:00

216 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
统计最近14天内没有完课行为的付费用户占比按 L1/L2/L1+L2 分类。
时间范围2026-05-09 ~ 2026-05-22含当日
"""
import psycopg2
import psycopg2.extras
from datetime import datetime
# ── 数据库连接 ──────────────────────────────────────────────
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala_bi"
)
conn.set_session(autocommit=True)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
START_DATE = "2026-05-09"
END_DATE = "2026-05-22" # inclusive, so use < '2026-05-23'
print("=" * 70)
print("📊 最近14天无完课行为付费用户占比分析")
print(f" 时间范围: {START_DATE} ~ {END_DATE}")
print("=" * 70)
# ═══════════════════════════════════════════════════════════
# Step 1: 获取所有付费用户(去重 account_id
# ═══════════════════════════════════════════════════════════
print("\n[1/4] 获取付费用户...")
cur.execute("""
SELECT DISTINCT o.account_id
FROM bi_vala_order o
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
WHERE o.order_status IN (3, 4)
AND o.pay_success_date IS NOT NULL
AND o.deleted_at IS NULL
""")
paid_accounts = {row[0] for row in cur.fetchall()}
print(f" 付费用户总数account_id 去重): {len(paid_accounts)}")
# ═══════════════════════════════════════════════════════════
# Step 2: L1 / L2 / L1+L2 分类
# ═══════════════════════════════════════════════════════════
print("\n[2/4] 按 goods_id 分类用户等级...")
L1_GOODS = {57, 60, 63}
L2_GOODS = {31, 32, 33, 54}
L1L2_GOODS = {61}
# 按 account_id 汇总所有购买过的 goods_id
cur.execute("""
SELECT o.account_id, o.goods_id
FROM bi_vala_order o
WHERE o.account_id = ANY(%s)
AND o.order_status IN (3, 4)
AND o.deleted_at IS NULL
""", (list(paid_accounts),))
user_goods_map = {}
for row in cur.fetchall():
aid = row[0]
gid = row[1]
if aid not in user_goods_map:
user_goods_map[aid] = set()
user_goods_map[aid].add(gid)
only_l1 = set()
only_l2 = set()
both_l1l2 = set()
other = set()
for aid in paid_accounts:
goods = user_goods_map.get(aid, set())
has_l1 = bool(goods & L1_GOODS)
has_l2 = bool(goods & L2_GOODS)
has_l1l2 = bool(goods & L1L2_GOODS)
if has_l1l2 or (has_l1 and has_l2):
both_l1l2.add(aid)
elif has_l1:
only_l1.add(aid)
elif has_l2:
only_l2.add(aid)
else:
other.add(aid)
print(f" 仅L1: {len(only_l1)}")
print(f" 仅L2: {len(only_l2)}")
print(f" L1+L2: {len(both_l1l2)}")
print(f" 其他: {len(other)}")
print(f" (合计): {len(only_l1)+len(only_l2)+len(both_l1l2)+len(other)}")
# ═══════════════════════════════════════════════════════════
# Step 3: 获取这些付费用户的所有角色 IDcharacter.id
# ═══════════════════════════════════════════════════════════
print("\n[3/4] 获取付费用户的角色 ID...")
cur.execute("""
SELECT id, account_id
FROM bi_vala_app_character
WHERE account_id = ANY(%s)
AND deleted_at IS NULL
""", (list(paid_accounts),))
# character_id → account_id 映射
char_to_account = {}
account_chars = {} # account_id → [char_id, ...]
for row in cur.fetchall():
cid = row[0]
aid = row[1]
char_to_account[cid] = aid
if aid not in account_chars:
account_chars[aid] = []
account_chars[aid].append(cid)
all_char_ids = list(char_to_account.keys())
print(f" 付费用户角色总数: {len(all_char_ids)}")
# ═══════════════════════════════════════════════════════════
# Step 4: 查询最近14天内有完课行为的角色遍历 8 张分表)
# ═══════════════════════════════════════════════════════════
print("\n[4/4] 查询最近14天完课行为遍历8张分表...")
active_char_ids = set()
for shard in range(8):
table = f"bi_user_chapter_play_record_{shard}"
print(f" 查询 {table} ...", end=" ")
cur.execute(f"""
SELECT DISTINCT user_id
FROM {table}
WHERE play_status = 1
AND updated_at >= %s
AND updated_at < %s::date + interval '1 day'
""", (START_DATE, END_DATE))
results = cur.fetchall()
count = len(results)
print(f"找到 {count} 条记录")
for row in results:
cid = row[0]
if cid in char_to_account:
active_char_ids.add(cid)
# 将活跃角色映射回 account_id
active_account_ids = set()
for cid in active_char_ids:
active_account_ids.add(char_to_account[cid])
# ═══════════════════════════════════════════════════════════
# Step 5: 计算统计结果
# ═══════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("📈 统计结果")
print("=" * 70)
# 没有完课行为的付费用户
inactive_all = paid_accounts - active_account_ids
inactive_only_l1 = inactive_all & only_l1
inactive_only_l2 = inactive_all & only_l2
inactive_both_l1l2 = inactive_all & both_l1l2
inactive_other = inactive_all & other
total = len(paid_accounts)
active_total = len(active_account_ids)
inactive_total = len(inactive_all)
print(f"\n 付费用户总数: {total}")
print(f" 最近14天有完课行为的: {active_total} ({active_total/total*100:.1f}%)")
print(f" 最近14天无完课行为的: {inactive_total} ({inactive_total/total*100:.1f}%)")
print(f"\n ─── 按课程等级拆分 ───")
def show_cat(label, cat_set):
inactive_cat = cat_set & inactive_all
cat_total = len(cat_set)
if cat_total == 0:
print(f" {label}: 0 人")
return
print(f" {label}:")
print(f" 总付费用户: {cat_total}")
print(f" 最近14天无完课: {len(inactive_cat)} ({len(inactive_cat)/cat_total*100:.1f}%)")
show_cat("仅L1", only_l1)
show_cat("仅L2", only_l2)
show_cat("L1+L2", both_l1l2)
if other:
show_cat("其他", other)
# ─── 汇总视图:按是否有 L1/L2 分别统计 ───
print(f"\n ─── 按有无 L1/L2 交叉汇总 ───")
users_with_l1 = only_l1 | both_l1l2 # 所有购买了 L1 的用户
users_with_l2 = only_l2 | both_l1l2 # 所有购买了 L2 的用户
def show_cross(label, user_set):
inactive = user_set & inactive_all
total_u = len(user_set)
if total_u == 0:
print(f" {label}: 0 人")
return
print(f" {label}:")
print(f" 总付费用户: {total_u}")
print(f" 最近14天无完课: {len(inactive)} ({len(inactive)/total_u*100:.1f}%)")
show_cross("含L1仅L1 + L1+L2", users_with_l1)
show_cross("含L2仅L2 + L1+L2", users_with_l2)
print("\n" + "=" * 70)
print("✅ 分析完成")
print("=" * 70)
cur.close()
conn.close()