ai_member_xiaoxi/scripts/activated_users_study_14d.py

#!/usr/bin/env python3
"""
最近14天内无完课行为的激活用户占比
口径：
- 激活用户：bi_vala_seasonal_ticket, status=1, deleted_at IS NULL, season_package_level IN ('A1','A2')
- 完课行为：bi_user_chapter_play_record_{0-7}, play_status=1, created_at 在最近14天内
- L1用户只看L1课程，L2用户只看L2课程，L1+L2用户看所有课程
"""

import psycopg2
from collections import defaultdict

PG_CONFIG = {
    'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
    'port': 28591,
    'user': 'ai_member',
    'password': "LdfjdjL83h3h3^$&**YGG*",
    'dbname': 'vala_bi'
}

WINDOW_START = '2026-05-09'
WINDOW_END = '2026-05-23'  # exclusive

conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()

# ===== 1. Get activated users =====
print("1. 查询激活用户...")
cur.execute("""
    SELECT account_id, bool_or(season_package_level = 'A1') as has_l1,
           bool_or(season_package_level = 'A2') as has_l2
    FROM bi_vala_seasonal_ticket
    WHERE status = 1 AND deleted_at IS NULL
      AND season_package_level IN ('A1', 'A2')
    GROUP BY account_id
""")
activated = {}
for row in cur.fetchall():
    aid, has_l1, has_l2 = row
    if has_l1 and has_l2:
        activated[aid] = 'L1+L2'
    elif has_l1:
        activated[aid] = 'L1'
    else:
        activated[aid] = 'L2'

print(f"   激活用户总数: {len(activated)}")
for cat in ['L1', 'L2', 'L1+L2']:
    print(f"   {cat}: {sum(1 for v in activated.values() if v == cat)}")

# ===== 2. Get character_id mapping =====
print("2. 查询用户角色映射...")
aid_list = list(activated.keys())
aid_to_user_ids = defaultdict(set)
batch_size = 500
for i in range(0, len(aid_list), batch_size):
    batch = aid_list[i:i+batch_size]
    cur.execute(
        "SELECT account_id, id FROM bi_vala_app_character WHERE account_id = ANY(%s)",
        (batch,)
    )
    for row in cur.fetchall():
        aid_to_user_ids[row[0]].add(row[1])

print(f"   有角色的激活用户数: {len(aid_to_user_ids)}")

# ===== 3. Get course level mapping =====
print("3. 查询课程等级映射...")
cur.execute("SELECT id, course_level FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2')")
chapter_level = {row[0]: row[1] for row in cur.fetchall()}

# ===== 4. Get users with study records in last 14 days =====
print("4. 查询最近14天完课记录...")

all_user_ids = set()
for uids in aid_to_user_ids.values():
    all_user_ids.update(uids)

mod_buckets = defaultdict(set)
for uid in all_user_ids:
    mod_buckets[uid % 8].add(uid)

user_level_study = defaultdict(set)  # user_id -> set of course_level studied

for mod_val in range(8):
    uids_in_mod = mod_buckets.get(mod_val, set())
    if not uids_in_mod:
        continue
    uid_list = list(uids_in_mod)
    for j in range(0, len(uid_list), 500):
        batch = uid_list[j:j+500]
        cur.execute(f"""
            SELECT DISTINCT pr.user_id, cl.course_level
            FROM bi_user_chapter_play_record_{mod_val} pr
            JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
            WHERE pr.user_id = ANY(%s)
              AND pr.play_status = 1
              AND pr.created_at >= %s
              AND pr.created_at < %s
              AND cl.course_level IN ('L1', 'L2')
        """, (batch, WINDOW_START, WINDOW_END))
        for row in cur.fetchall():
            user_level_study[row[0]].add(row[1])

print(f"   最近14天有完课记录的用户数: {len(user_level_study)}")

# ===== 5. Calculate results =====
print("\n===== 计算结果 =====\n")

# Category config: (key, label, allowed_levels, track_detail)
categories = [
    ('L1',     '仅激活L1', {'L1'}),
    ('L2',     '仅激活L2', {'L2'}),
    ('L1+L2',  'L1+L2',   {'L1', 'L2'}),
]

overall_results = {}
for cat, label, levels in categories:
    cat_users = [aid for aid, c in activated.items() if c == cat]
    no_character = 0
    no_study = 0
    has_study = 0
    # L1+L2 detail
    l1l2_only_l1 = 0
    l1l2_only_l2 = 0
    l1l2_both = 0

    for aid in cat_users:
        uids = aid_to_user_ids.get(aid, set())
        if not uids:
            no_character += 1
            continue

        studied_levels = set()
        for uid in uids:
            studied_levels |= user_level_study.get(uid, set())

        matched = studied_levels & levels
        if matched:
            has_study += 1
            if cat == 'L1+L2':
                has_l1 = 'L1' in studied_levels
                has_l2 = 'L2' in studied_levels
                if has_l1 and has_l2:
                    l1l2_both += 1
                elif has_l1:
                    l1l2_only_l1 += 1
                elif has_l2:
                    l1l2_only_l2 += 1
        else:
            no_study += 1

    total_with_char = has_study + no_study
    overall_results[cat] = {
        'label': label,
        'total_with_char': total_with_char,
        'no_character': no_character,
        'no_study': no_study,
        'has_study': has_study,
        'no_study_pct': no_study / total_with_char * 100 if total_with_char else 0,
        'l1l2_only_l1': l1l2_only_l1,
        'l1l2_only_l2': l1l2_only_l2,
        'l1l2_both': l1l2_both,
    }

# Print
total_activated = len(activated)
total_no_study = sum(r['no_study'] for r in overall_results.values())
total_has_study = sum(r['has_study'] for r in overall_results.values())
total_no_char = sum(r['no_character'] for r in overall_results.values())

print(f"激活用户总数: {total_activated}")
print(f"  无角色数据: {total_no_char}")
print(f"  近14天无完课: {total_no_study} ({total_no_study/total_activated*100:.1f}%)")
print(f"  近14天有完课: {total_has_study} ({total_has_study/total_activated*100:.1f}%)")
print()

for cat, label, _ in categories:
    r = overall_results[cat]
    print(f"【{label}】 共{r['total_with_char']}人 (另有{r['no_character']}人无角色)")
    print(f"  无完课: {r['no_study']}人 ({r['no_study_pct']:.1f}%)")
    print(f"  有完课: {r['has_study']}人 ({100-r['no_study_pct']:.1f}%)")
    if cat == 'L1+L2':
        print(f"    其中:")
        print(f"      仅L1有完课: {r['l1l2_only_l1']}人")
        print(f"      仅L2有完课: {r['l1l2_only_l2']}人")
        print(f"      L1+L2都有: {r['l1l2_both']}人")
        total_detail = r['l1l2_only_l1'] + r['l1l2_only_l2'] + r['l1l2_both']
        print(f"      (合计有完课: {total_detail}人)")
    print()

cur.close()
conn.close()