ai_member_xiaoxi/scripts/no_learn_paying_users.py
2026-05-23 08:00:01 +08:00

172 lines
5.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
最近14天无完课行为的付费用户占比分析
口径:
- 最近14天: 2026-05-08 ~ 2026-05-2114个完整天
- 付费用户: 有已完成/已退款订单的非测试账号(status=1, deleted_at IS NULL)
- 完课行为: bi_user_chapter_play_record.play_status=1
- L1/L2分类: 基于goods_id
"""
import os, sys
sys.path.insert(0, "/root/.openclaw/workspace")
import psycopg2
from datetime import datetime
SECRETS_FILE = "/root/.openclaw/workspace/secrets.env"
DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
DB_PORT = "28591"
DB_USER = "ai_member"
DB_NAME = "vala_bi"
DATE_START = "2026-05-08"
DATE_END = "2026-05-22" # exclusive
def load_pw():
with open(SECRETS_FILE) as f:
for line in f:
if line.startswith("PG_ONLINE_PASSWORD="):
return line.split("=", 1)[1].strip().strip("'\"")
pw = load_pw()
conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=pw, dbname=DB_NAME)
cur = conn.cursor()
print("=" * 60)
print("📊 最近14天(2026-05-08~05-21)无完课行为付费用户占比")
print("=" * 60)
# ── Step 1: 获取所有付费用户及L1/L2分类 ──
print("\n[1/3] 查询付费用户及等级分类...")
cur.execute("""
WITH user_goods AS (
SELECT
o.account_id,
o.goods_id,
a.id IS NOT NULL AS is_valid
FROM bi_vala_order o
JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL
WHERE o.order_status IN (3, 4)
AND o.pay_success_date IS NOT NULL
GROUP BY o.account_id, o.goods_id, a.id
),
user_level_raw AS (
SELECT
account_id,
BOOL_OR(goods_id = 61) AS has_both,
BOOL_OR(goods_id IN (57, 60, 63)) AS has_l1,
BOOL_OR(goods_id IN (31, 32, 33, 54)) AS has_l2,
BOOL_OR(goods_id IN (4,5,6,10,13,14,17,20,25,29,30,35,36,37,38)) AS has_other
FROM user_goods
GROUP BY account_id
)
SELECT
account_id,
CASE
WHEN has_both OR (has_l1 AND has_l2) THEN 'L1+L2'
WHEN has_l1 AND NOT has_l2 THEN '仅L1'
WHEN has_l2 AND NOT has_l1 THEN '仅L2'
ELSE '其他'
END AS level
FROM user_level_raw
""")
paying_users = {row[0]: row[1] for row in cur.fetchall()}
total_paying = len(paying_users)
print(f" 付费用户总数: {total_paying}")
l1_only = sum(1 for v in paying_users.values() if v == '仅L1')
l2_only = sum(1 for v in paying_users.values() if v == '仅L2')
l1l2 = sum(1 for v in paying_users.values() if v == 'L1+L2')
other = sum(1 for v in paying_users.values() if v == '其他')
print(f" 仅L1: {l1_only} | 仅L2: {l2_only} | L1+L2: {l1l2} | 其他: {other}")
# ── Step 2: 查询最近14天有完课行为的用户 ──
print("\n[2/3] 查询最近14天有完课行为的用户...")
# 获取所有付费用户的 character_ids
account_ids = list(paying_users.keys())
# 分批查 character
char_map = {} # account_id -> [character_ids]
for i in range(0, len(account_ids), 500):
batch = account_ids[i:i+500]
cur.execute("""
SELECT account_id, id FROM bi_vala_app_character
WHERE account_id = ANY(%s) AND deleted_at IS NULL
""", (batch,))
for acc_id, char_id in cur.fetchall():
if acc_id not in char_map:
char_map[acc_id] = []
char_map[acc_id].append(char_id)
all_char_ids = []
for chars in char_map.values():
all_char_ids.extend(chars)
print(f" 角色总数: {len(all_char_ids)}")
# 查完课行为 - 分8表UNION ALL
learned_chars = set()
for shard in range(8):
table = f"bi_user_chapter_play_record_{shard}"
sql = f"""
SELECT DISTINCT user_id
FROM {table}
WHERE play_status = 1
AND created_at >= %s
AND created_at < %s
AND user_id = ANY(%s)
"""
cur.execute(sql, (DATE_START, DATE_END, all_char_ids))
for (uid,) in cur.fetchall():
learned_chars.add(uid)
# 有完课行为的 account_id
learned_accounts = set()
for acc_id, chars in char_map.items():
if any(c in learned_chars for c in chars):
learned_accounts.add(acc_id)
no_learn_accounts = set(paying_users.keys()) - learned_accounts
print(f" 有完课行为的付费用户: {len(learned_accounts)}")
print(f" 无完课行为的付费用户: {len(no_learn_accounts)}")
# ── Step 3: 分L1/L2统计 ──
print("\n[3/3] 汇总统计...")
def stats_by_level(label, user_set):
total = len(user_set)
no_learn = len(user_set & no_learn_accounts)
learn = total - no_learn
ratio = no_learn / total * 100 if total > 0 else 0
return total, no_learn, learn, ratio
all_users = set(paying_users.keys())
l1_set = {k for k, v in paying_users.items() if v == '仅L1'}
l2_set = {k for k, v in paying_users.items() if v == '仅L2'}
l1l2_set = {k for k, v in paying_users.items() if v == 'L1+L2'}
other_set = {k for k, v in paying_users.items() if v == '其他'}
print("\n" + "=" * 60)
print("📊 结果汇总")
print("=" * 60)
print(f"{'类别':<12} {'总付费用户':>10} {'无完课':>8} {'无完课占比':>10} {'有完课':>8}")
print("-" * 60)
for label, uset in [("总体", all_users), ("仅L1", l1_set), ("仅L2", l2_set), ("L1+L2", l1l2_set), ("其他", other_set)]:
t, nl, l, r = stats_by_level(label, uset)
print(f"{label:<12} {t:>10,} {nl:>8,} {r:>9.1f}% {l:>8,}")
# 合并L1相关仅L1 + L1+L2
l1_related = l1_set | l1l2_set
t1, nl1, l1, r1 = stats_by_level("L1相关", l1_related)
print(f"{'L1相关(含L1+L2)':<12} {t1:>10,} {nl1:>8,} {r1:>9.1f}% {l1:>8,}")
# 合并L2相关仅L2 + L1+L2
l2_related = l2_set | l1l2_set
t2, nl2, l2, r2 = stats_by_level("L2相关", l2_related)
print(f"{'L2相关(含L1+L2)':<12} {t2:>10,} {nl2:>8,} {r2:>9.1f}% {l2:>8,}")
cur.close()
conn.close()
print("\n✅ 完成")