ai_member_xiaoxi/scripts/activated_courses_study_14d.py

#!/usr/bin/env python3
"""
被激活的课程数统计：一个 character + 一个 level = 一个课程
- 激活：bi_vala_seasonal_ticket, status=1, deleted_at IS NULL, character_id IS NOT NULL
- 完课：bi_user_chapter_play_record, play_status=1, created_at 在最近14天, 课程level匹配
"""

import psycopg2
from collections import defaultdict

PG_CONFIG = {
    'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
    'port': 28591,
    'user': 'ai_member',
    'password': "LdfjdjL83h3h3^$&**YGG*",
    'dbname': 'vala_bi'
}

WINDOW_START = '2026-05-09'
WINDOW_END = '2026-05-23'

conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()

# ===== 1. Get activated courses: (character_id, level) =====
print("1. 查询被激活的课程...")
cur.execute("""
    SELECT DISTINCT character_id, season_package_level
    FROM bi_vala_seasonal_ticket
    WHERE status = 1
      AND deleted_at IS NULL
      AND season_package_level IN ('A1', 'A2')
      AND character_id IS NOT NULL
""")

# Map: level_code ('A1','A2') -> set of character_ids
activated_courses = defaultdict(set)
for row in cur.fetchall():
    cid, level = row
    activated_courses[level].add(cid)

l1_chars = activated_courses['A1']
l2_chars = activated_courses['A2']
both_chars = l1_chars & l2_chars

print(f"   L1激活课程数 (character+level): {len(l1_chars)}")
print(f"   L2激活课程数 (character+level): {len(l2_chars)}")
print(f"   去重后总激活课程数: {len(l1_chars | l2_chars)}")

# Show overlap
only_l1 = l1_chars - l2_chars
only_l2 = l2_chars - l1_chars
print(f"   仅L1激活的角色: {len(only_l1)}")
print(f"   仅L2激活的角色: {len(only_l2)}")
print(f"   L1+L2都激活的角色: {len(both_chars)}")

# ===== 2. Get course level mapping =====
print("\n2. 查询课程等级映射...")
cur.execute("SELECT id, course_level FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2')")
chapter_level = {row[0]: row[1] for row in cur.fetchall()}

# ===== 3. Get study records by character =====
print("3. 查询最近14天完课记录...")

all_chars = l1_chars | l2_chars
mod_buckets = defaultdict(set)
for cid in all_chars:
    mod_buckets[cid % 8].add(cid)

char_level_study = defaultdict(set)  # character_id -> set of course_level studied

for mod_val in range(8):
    uids_in_mod = mod_buckets.get(mod_val, set())
    if not uids_in_mod:
        continue
    uid_list = list(uids_in_mod)
    for j in range(0, len(uid_list), 500):
        batch = uid_list[j:j+500]
        cur.execute(f"""
            SELECT DISTINCT pr.user_id, cl.course_level
            FROM bi_user_chapter_play_record_{mod_val} pr
            JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
            WHERE pr.user_id = ANY(%s)
              AND pr.play_status = 1
              AND pr.created_at >= %s
              AND pr.created_at < %s
              AND cl.course_level IN ('L1', 'L2')
        """, (batch, WINDOW_START, WINDOW_END))
        for row in cur.fetchall():
            char_level_study[row[0]].add(row[1])

print(f"   最近14天有完课的角色数: {len(char_level_study)}")

# ===== 4. Calculate =====
print("\n===== 计算结果 =====\n")

level_map = {'A1': 'L1', 'A2': 'L2'}

# For each activated course (character, level), check if studied
totals = {}
for db_level, label in [('A1', 'L1'), ('A2', 'L2')]:
    chars = activated_courses[db_level]
    studied = 0
    not_studied = 0
    for cid in chars:
        char_levels = char_level_study.get(cid, set())
        if label in char_levels:
            studied += 1
        else:
            not_studied += 1

    total = studied + not_studied
    totals[db_level] = {
        'total': total,
        'studied': studied,
        'not_studied': not_studied,
        'pct': studied / total * 100 if total else 0
    }
    print(f"【{label} 激活课程】 共 {total} 门")
    print(f"  近14天有完课: {studied} ({studied/total*100:.1f}%)")
    print(f"  近14天无完课: {not_studied} ({not_studied/total*100:.1f}%)")
    print()

# Combined
total_courses = len(l1_chars | l2_chars)
# A character can have both L1 and L2, so total courses >= total unique characters
# Total activated course units = L1 courses + L2 courses
total_units = totals['A1']['total'] + totals['A2']['total']
total_studied = totals['A1']['studied'] + totals['A2']['studied']

print(f"【总体】")
print(f"  激活课程总数（去重角色+去重等级）: {total_courses} 门课程")
print(f"  激活课程总数（L1+L2独立计数）: {total_units} 个课程单位")
print(f"  近14天有完课: {total_studied} ({total_studied/total_units*100:.1f}%)")
print(f"  近14天无完课: {total_units - total_studied} ({(total_units - total_studied)/total_units*100:.1f}%)")

# Cross-tab for characters with both L1+L2 activated
print(f"\n【L1+L2双激活角色】 共 {len(both_chars)} 个角色")
both_l1_studied = 0
both_l2_studied = 0
both_both = 0
both_neither = 0
for cid in both_chars:
    levels = char_level_study.get(cid, set())
    has_l1 = 'L1' in levels
    has_l2 = 'L2' in levels
    if has_l1 and has_l2:
        both_both += 1
        both_l1_studied += 1
        both_l2_studied += 1
    elif has_l1:
        both_l1_studied += 1
    elif has_l2:
        both_l2_studied += 1
    else:
        both_neither += 1

n = len(both_chars) if both_chars else 1
print(f"  仅L1有完课: {both_l1_studied - both_both} 个 ({(both_l1_studied - both_both)/n*100:.1f}%)")
print(f"  仅L2有完课: {both_l2_studied - both_both} 个 ({(both_l2_studied - both_both)/n*100:.1f}%)")
print(f"  L1+L2都有: {both_both} 个 ({both_both/n*100:.1f}%)")
print(f"  都无完课: {both_neither} 个 ({both_neither/n*100:.1f}%)")

cur.close()
conn.close()