ai_member_xiaoxi/scripts/no_learn_paying_users.py

#!/usr/bin/env python3
"""
最近14天无完课行为的付费用户占比分析
口径：
  - 最近14天: 2026-05-08 ~ 2026-05-21（14个完整天）
  - 付费用户: 有已完成/已退款订单的非测试账号(status=1, deleted_at IS NULL)
  - 完课行为: bi_user_chapter_play_record.play_status=1
  - L1/L2分类: 基于goods_id
"""
import os, sys
sys.path.insert(0, "/root/.openclaw/workspace")
import psycopg2
from datetime import datetime

SECRETS_FILE = "/root/.openclaw/workspace/secrets.env"
DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
DB_PORT = "28591"
DB_USER = "ai_member"
DB_NAME = "vala_bi"

DATE_START = "2026-05-08"
DATE_END = "2026-05-22"  # exclusive

def load_pw():
    with open(SECRETS_FILE) as f:
        for line in f:
            if line.startswith("PG_ONLINE_PASSWORD="):
                return line.split("=", 1)[1].strip().strip("'\"")

pw = load_pw()
conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=pw, dbname=DB_NAME)
cur = conn.cursor()

print("=" * 60)
print("📊 最近14天(2026-05-08~05-21)无完课行为付费用户占比")
print("=" * 60)

# ── Step 1: 获取所有付费用户及L1/L2分类 ──
print("\n[1/3] 查询付费用户及等级分类...")
cur.execute("""
    WITH user_goods AS (
        SELECT
            o.account_id,
            o.goods_id,
            a.id IS NOT NULL AS is_valid
        FROM bi_vala_order o
        JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL
        WHERE o.order_status IN (3, 4)
          AND o.pay_success_date IS NOT NULL
        GROUP BY o.account_id, o.goods_id, a.id
    ),
    user_level_raw AS (
        SELECT
            account_id,
            BOOL_OR(goods_id = 61) AS has_both,
            BOOL_OR(goods_id IN (57, 60, 63)) AS has_l1,
            BOOL_OR(goods_id IN (31, 32, 33, 54)) AS has_l2,
            BOOL_OR(goods_id IN (4,5,6,10,13,14,17,20,25,29,30,35,36,37,38)) AS has_other
        FROM user_goods
        GROUP BY account_id
    )
    SELECT
        account_id,
        CASE
            WHEN has_both OR (has_l1 AND has_l2) THEN 'L1+L2'
            WHEN has_l1 AND NOT has_l2 THEN '仅L1'
            WHEN has_l2 AND NOT has_l1 THEN '仅L2'
            ELSE '其他'
        END AS level
    FROM user_level_raw
""")
paying_users = {row[0]: row[1] for row in cur.fetchall()}
total_paying = len(paying_users)
print(f"   付费用户总数: {total_paying}")

l1_only = sum(1 for v in paying_users.values() if v == '仅L1')
l2_only = sum(1 for v in paying_users.values() if v == '仅L2')
l1l2 = sum(1 for v in paying_users.values() if v == 'L1+L2')
other = sum(1 for v in paying_users.values() if v == '其他')
print(f"   仅L1: {l1_only} | 仅L2: {l2_only} | L1+L2: {l1l2} | 其他: {other}")

# ── Step 2: 查询最近14天有完课行为的用户 ──
print("\n[2/3] 查询最近14天有完课行为的用户...")

# 获取所有付费用户的 character_ids
account_ids = list(paying_users.keys())
# 分批查 character
char_map = {}  # account_id -> [character_ids]
for i in range(0, len(account_ids), 500):
    batch = account_ids[i:i+500]
    cur.execute("""
        SELECT account_id, id FROM bi_vala_app_character
        WHERE account_id = ANY(%s) AND deleted_at IS NULL
    """, (batch,))
    for acc_id, char_id in cur.fetchall():
        if acc_id not in char_map:
            char_map[acc_id] = []
        char_map[acc_id].append(char_id)

all_char_ids = []
for chars in char_map.values():
    all_char_ids.extend(chars)

print(f"   角色总数: {len(all_char_ids)}")

# 查完课行为 - 分8表UNION ALL
learned_chars = set()
for shard in range(8):
    table = f"bi_user_chapter_play_record_{shard}"
    sql = f"""
        SELECT DISTINCT user_id
        FROM {table}
        WHERE play_status = 1
          AND created_at >= %s
          AND created_at < %s
          AND user_id = ANY(%s)
    """
    cur.execute(sql, (DATE_START, DATE_END, all_char_ids))
    for (uid,) in cur.fetchall():
        learned_chars.add(uid)

# 有完课行为的 account_id
learned_accounts = set()
for acc_id, chars in char_map.items():
    if any(c in learned_chars for c in chars):
        learned_accounts.add(acc_id)

no_learn_accounts = set(paying_users.keys()) - learned_accounts

print(f"   有完课行为的付费用户: {len(learned_accounts)}")
print(f"   无完课行为的付费用户: {len(no_learn_accounts)}")

# ── Step 3: 分L1/L2统计 ──
print("\n[3/3] 汇总统计...")

def stats_by_level(label, user_set):
    total = len(user_set)
    no_learn = len(user_set & no_learn_accounts)
    learn = total - no_learn
    ratio = no_learn / total * 100 if total > 0 else 0
    return total, no_learn, learn, ratio

all_users = set(paying_users.keys())
l1_set = {k for k, v in paying_users.items() if v == '仅L1'}
l2_set = {k for k, v in paying_users.items() if v == '仅L2'}
l1l2_set = {k for k, v in paying_users.items() if v == 'L1+L2'}
other_set = {k for k, v in paying_users.items() if v == '其他'}

print("\n" + "=" * 60)
print("📊 结果汇总")
print("=" * 60)
print(f"{'类别':<12} {'总付费用户':>10} {'无完课':>8} {'无完课占比':>10} {'有完课':>8}")
print("-" * 60)

for label, uset in [("总体", all_users), ("仅L1", l1_set), ("仅L2", l2_set), ("L1+L2", l1l2_set), ("其他", other_set)]:
    t, nl, l, r = stats_by_level(label, uset)
    print(f"{label:<12} {t:>10,} {nl:>8,} {r:>9.1f}% {l:>8,}")

# 合并L1相关（仅L1 + L1+L2）
l1_related = l1_set | l1l2_set
t1, nl1, l1, r1 = stats_by_level("L1相关", l1_related)
print(f"{'L1相关(含L1+L2)':<12} {t1:>10,} {nl1:>8,} {r1:>9.1f}% {l1:>8,}")

# 合并L2相关（仅L2 + L1+L2）
l2_related = l2_set | l1l2_set
t2, nl2, l2, r2 = stats_by_level("L2相关", l2_related)
print(f"{'L2相关(含L1+L2)':<12} {t2:>10,} {nl2:>8,} {r2:>9.1f}% {l2:>8,}")

cur.close()
conn.close()
print("\n✅ 完成")