#!/usr/bin/env python3 """ 无完课激活课程中,最后停在 U00/U01 的,按 Lesson(L01~L05) 分布 """ import psycopg2 from collections import defaultdict, Counter PG_CONFIG = { 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', 'port': 28591, 'user': 'ai_member', 'password': "LdfjdjL83h3h3^$&**YGG*", 'dbname': 'vala_bi' } WINDOW_START = '2026-05-09' WINDOW_END = '2026-05-23' conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() # ===== 1. Activated courses ===== print("1. 查询激活课程...") cur.execute(""" SELECT DISTINCT character_id, season_package_level FROM bi_vala_seasonal_ticket WHERE status = 1 AND deleted_at IS NULL AND season_package_level IN ('A1', 'A2') AND character_id IS NOT NULL """) activated = set() for row in cur.fetchall(): activated.add((row[0], row[1])) print(f" 总激活课程: {len(activated)}") # ===== 2. chapter -> (level, unit, lesson) ===== print("2. 查询课程结构...") cur.execute(""" SELECT id, course_level, course_unit, course_lesson FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2') """) ch_info = {} # id -> (level, unit, lesson) for row in cur.fetchall(): ch_info[row[0]] = (row[1], row[2], row[3]) # ===== 3. Find studied in window ===== print("3. 查询近14天完课...") all_chars = set(cid for cid, _ in activated) mod_buckets = defaultdict(set) for cid in all_chars: mod_buckets[cid % 8].add(cid) studied = set() # (character_id, level_label) level_map = {'A1': 'L1', 'A2': 'L2'} for mod_val in range(8): uids = mod_buckets.get(mod_val, set()) if not uids: continue uid_list = list(uids) for j in range(0, len(uid_list), 500): batch = uid_list[j:j+500] cur.execute(f""" SELECT DISTINCT pr.user_id, cl.course_level FROM bi_user_chapter_play_record_{mod_val} pr JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id WHERE pr.user_id = ANY(%s) AND pr.play_status = 1 AND pr.created_at >= %s AND pr.created_at < %s AND cl.course_level IN ('L1','L2') """, (batch, WINDOW_START, WINDOW_END)) for row in cur.fetchall(): studied.add((row[0], row[1])) # ===== 4. No-study courses ===== no_study = [(cid, db_level, level_map[db_level]) for (cid, db_level) in activated if (cid, level_map[db_level]) not in studied] print(f" 无完课课程: {len(no_study)}") # ===== 5. Last chapter per (character, level) ===== print("4. 查询最后一次完课 chapter...") no_study_chars = list(set(cid for cid, _, _ in no_study)) mod_buckets2 = defaultdict(list) for cid in no_study_chars: mod_buckets2[cid % 8].append(cid) last_chapter = {} # (character_id, level_label) -> chapter_id for mod_val in range(8): uids = mod_buckets2.get(mod_val, []) if not uids: continue for j in range(0, len(uids), 500): batch = uids[j:j+500] cur.execute(f""" SELECT DISTINCT ON (pr.user_id, cl.course_level) pr.user_id, cl.course_level, pr.chapter_id FROM bi_user_chapter_play_record_{mod_val} pr JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id WHERE pr.user_id = ANY(%s) AND pr.play_status = 1 AND cl.course_level IN ('L1','L2') ORDER BY pr.user_id, cl.course_level, pr.created_at DESC """, (batch,)) for row in cur.fetchall(): last_chapter[(row[0], row[1])] = row[2] print(f" 有历史记录的组合数: {len(last_chapter)}") # ===== 6. Stats: U00/U01 by lesson ===== print("\n===== 结果 =====\n") # Structure: { 'L1': { 'U00': Counter(lesson), 'U01': Counter(lesson) }, 'L2': ... } stats = { 'L1': {'U00': Counter(), 'U01': Counter()}, 'L2': {'U00': Counter(), 'U01': Counter()}, } no_record = {'L1': {'U00': 0, 'U01': 0}, 'L2': {'U00': 0, 'U01': 0}} # Expected totals from pre-known query targets = { ('L1', 'U00'): 149, ('L1', 'U01'): 94, ('L2', 'U00'): 351, ('L2', 'U01'): 242, } # First, split no_study courses by their last chapter's unit for cid, db_level, label in no_study: ch_id = last_chapter.get((cid, label)) if ch_id is None or ch_id not in ch_info: # No record - need to check which unit they're in # Actually we can't determine unit without a chapter. Skip for now. continue cl, unit, lesson = ch_info[ch_id] if unit in ('U00', 'U01'): stats[cl][unit][lesson] += 1 # Count no-record for U00/U01: these are courses whose last chapter we DON'T have # But we need to check: were they counted in the original 149/94/351/242? # They were - the original query counted them under U00/U01. # But we can't determine lesson for them since they have no chapter record. # Actually wait - the original query found chapter_id from last_chapter, so if they're in # the original U00/U01 counts, they DO have a chapter. Let me verify... # Let me recount properly: for each no_study course with a last chapter in U00/U01 for label in ('L1', 'L2'): for unit in ('U00', 'U01'): target = targets[(label, unit)] counted = sum(stats[label][unit].values()) no_rec = target - counted if no_rec > 0: no_record[label][unit] = no_rec # Print for label in ('L1', 'L2'): for unit in ('U00', 'U01'): target = targets[(label, unit)] counted = sum(stats[label][unit].values()) no_rec = target - counted print(f"【{label} {unit}】 共 {target} 门(有lesson数据 {counted} + 无记录 {no_rec})") print(f" {'Lesson':<8} {'课程数':<8} {'占比':<10}") lessons = sorted(stats[label][unit].keys()) for les in lessons: cnt = stats[label][unit][les] pct = cnt / target * 100 print(f" {les:<8} {cnt:<8} {pct:>6.1f}%") if no_rec > 0: print(f" {'无记录':<8} {no_rec:<8} {no_rec/target*100:>6.1f}%") print() cur.close() conn.close()