172 lines
5.8 KiB
Python
172 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
最近14天无完课行为的付费用户占比分析
|
||
口径:
|
||
- 最近14天: 2026-05-08 ~ 2026-05-21(14个完整天)
|
||
- 付费用户: 有已完成/已退款订单的非测试账号(status=1, deleted_at IS NULL)
|
||
- 完课行为: bi_user_chapter_play_record.play_status=1
|
||
- L1/L2分类: 基于goods_id
|
||
"""
|
||
import os, sys
|
||
sys.path.insert(0, "/root/.openclaw/workspace")
|
||
import psycopg2
|
||
from datetime import datetime
|
||
|
||
SECRETS_FILE = "/root/.openclaw/workspace/secrets.env"
|
||
DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
|
||
DB_PORT = "28591"
|
||
DB_USER = "ai_member"
|
||
DB_NAME = "vala_bi"
|
||
|
||
DATE_START = "2026-05-08"
|
||
DATE_END = "2026-05-22" # exclusive
|
||
|
||
def load_pw():
|
||
with open(SECRETS_FILE) as f:
|
||
for line in f:
|
||
if line.startswith("PG_ONLINE_PASSWORD="):
|
||
return line.split("=", 1)[1].strip().strip("'\"")
|
||
|
||
pw = load_pw()
|
||
conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=pw, dbname=DB_NAME)
|
||
cur = conn.cursor()
|
||
|
||
print("=" * 60)
|
||
print("📊 最近14天(2026-05-08~05-21)无完课行为付费用户占比")
|
||
print("=" * 60)
|
||
|
||
# ── Step 1: 获取所有付费用户及L1/L2分类 ──
|
||
print("\n[1/3] 查询付费用户及等级分类...")
|
||
cur.execute("""
|
||
WITH user_goods AS (
|
||
SELECT
|
||
o.account_id,
|
||
o.goods_id,
|
||
a.id IS NOT NULL AS is_valid
|
||
FROM bi_vala_order o
|
||
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL
|
||
WHERE o.order_status IN (3, 4)
|
||
AND o.pay_success_date IS NOT NULL
|
||
GROUP BY o.account_id, o.goods_id, a.id
|
||
),
|
||
user_level_raw AS (
|
||
SELECT
|
||
account_id,
|
||
BOOL_OR(goods_id = 61) AS has_both,
|
||
BOOL_OR(goods_id IN (57, 60, 63)) AS has_l1,
|
||
BOOL_OR(goods_id IN (31, 32, 33, 54)) AS has_l2,
|
||
BOOL_OR(goods_id IN (4,5,6,10,13,14,17,20,25,29,30,35,36,37,38)) AS has_other
|
||
FROM user_goods
|
||
GROUP BY account_id
|
||
)
|
||
SELECT
|
||
account_id,
|
||
CASE
|
||
WHEN has_both OR (has_l1 AND has_l2) THEN 'L1+L2'
|
||
WHEN has_l1 AND NOT has_l2 THEN '仅L1'
|
||
WHEN has_l2 AND NOT has_l1 THEN '仅L2'
|
||
ELSE '其他'
|
||
END AS level
|
||
FROM user_level_raw
|
||
""")
|
||
paying_users = {row[0]: row[1] for row in cur.fetchall()}
|
||
total_paying = len(paying_users)
|
||
print(f" 付费用户总数: {total_paying}")
|
||
|
||
l1_only = sum(1 for v in paying_users.values() if v == '仅L1')
|
||
l2_only = sum(1 for v in paying_users.values() if v == '仅L2')
|
||
l1l2 = sum(1 for v in paying_users.values() if v == 'L1+L2')
|
||
other = sum(1 for v in paying_users.values() if v == '其他')
|
||
print(f" 仅L1: {l1_only} | 仅L2: {l2_only} | L1+L2: {l1l2} | 其他: {other}")
|
||
|
||
# ── Step 2: 查询最近14天有完课行为的用户 ──
|
||
print("\n[2/3] 查询最近14天有完课行为的用户...")
|
||
|
||
# 获取所有付费用户的 character_ids
|
||
account_ids = list(paying_users.keys())
|
||
# 分批查 character
|
||
char_map = {} # account_id -> [character_ids]
|
||
for i in range(0, len(account_ids), 500):
|
||
batch = account_ids[i:i+500]
|
||
cur.execute("""
|
||
SELECT account_id, id FROM bi_vala_app_character
|
||
WHERE account_id = ANY(%s) AND deleted_at IS NULL
|
||
""", (batch,))
|
||
for acc_id, char_id in cur.fetchall():
|
||
if acc_id not in char_map:
|
||
char_map[acc_id] = []
|
||
char_map[acc_id].append(char_id)
|
||
|
||
all_char_ids = []
|
||
for chars in char_map.values():
|
||
all_char_ids.extend(chars)
|
||
|
||
print(f" 角色总数: {len(all_char_ids)}")
|
||
|
||
# 查完课行为 - 分8表UNION ALL
|
||
learned_chars = set()
|
||
for shard in range(8):
|
||
table = f"bi_user_chapter_play_record_{shard}"
|
||
sql = f"""
|
||
SELECT DISTINCT user_id
|
||
FROM {table}
|
||
WHERE play_status = 1
|
||
AND created_at >= %s
|
||
AND created_at < %s
|
||
AND user_id = ANY(%s)
|
||
"""
|
||
cur.execute(sql, (DATE_START, DATE_END, all_char_ids))
|
||
for (uid,) in cur.fetchall():
|
||
learned_chars.add(uid)
|
||
|
||
# 有完课行为的 account_id
|
||
learned_accounts = set()
|
||
for acc_id, chars in char_map.items():
|
||
if any(c in learned_chars for c in chars):
|
||
learned_accounts.add(acc_id)
|
||
|
||
no_learn_accounts = set(paying_users.keys()) - learned_accounts
|
||
|
||
print(f" 有完课行为的付费用户: {len(learned_accounts)}")
|
||
print(f" 无完课行为的付费用户: {len(no_learn_accounts)}")
|
||
|
||
# ── Step 3: 分L1/L2统计 ──
|
||
print("\n[3/3] 汇总统计...")
|
||
|
||
def stats_by_level(label, user_set):
|
||
total = len(user_set)
|
||
no_learn = len(user_set & no_learn_accounts)
|
||
learn = total - no_learn
|
||
ratio = no_learn / total * 100 if total > 0 else 0
|
||
return total, no_learn, learn, ratio
|
||
|
||
all_users = set(paying_users.keys())
|
||
l1_set = {k for k, v in paying_users.items() if v == '仅L1'}
|
||
l2_set = {k for k, v in paying_users.items() if v == '仅L2'}
|
||
l1l2_set = {k for k, v in paying_users.items() if v == 'L1+L2'}
|
||
other_set = {k for k, v in paying_users.items() if v == '其他'}
|
||
|
||
print("\n" + "=" * 60)
|
||
print("📊 结果汇总")
|
||
print("=" * 60)
|
||
print(f"{'类别':<12} {'总付费用户':>10} {'无完课':>8} {'无完课占比':>10} {'有完课':>8}")
|
||
print("-" * 60)
|
||
|
||
for label, uset in [("总体", all_users), ("仅L1", l1_set), ("仅L2", l2_set), ("L1+L2", l1l2_set), ("其他", other_set)]:
|
||
t, nl, l, r = stats_by_level(label, uset)
|
||
print(f"{label:<12} {t:>10,} {nl:>8,} {r:>9.1f}% {l:>8,}")
|
||
|
||
# 合并L1相关(仅L1 + L1+L2)
|
||
l1_related = l1_set | l1l2_set
|
||
t1, nl1, l1, r1 = stats_by_level("L1相关", l1_related)
|
||
print(f"{'L1相关(含L1+L2)':<12} {t1:>10,} {nl1:>8,} {r1:>9.1f}% {l1:>8,}")
|
||
|
||
# 合并L2相关(仅L2 + L1+L2)
|
||
l2_related = l2_set | l1l2_set
|
||
t2, nl2, l2, r2 = stats_by_level("L2相关", l2_related)
|
||
print(f"{'L2相关(含L1+L2)':<12} {t2:>10,} {nl2:>8,} {r2:>9.1f}% {l2:>8,}")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
print("\n✅ 完成")
|