#!/usr/bin/env python3 """ 1880个近14天无完课的激活课程,最后一次完课记录分布在哪个Unit - 激活课程:(character_id, level) 唯一组合 - 最后一次完课:该角色在对应level课程中 play_status=1 的最晚 created_at - 分布按 bi_level_unit_lesson.course_unit """ import psycopg2 from collections import defaultdict, Counter PG_CONFIG = { 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', 'port': 28591, 'user': 'ai_member', 'password': "LdfjdjL83h3h3^$&**YGG*", 'dbname': 'vala_bi' } WINDOW_START = '2026-05-09' WINDOW_END = '2026-05-23' conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() # ===== 1. Get activated courses ===== print("1. 查询被激活的课程...") cur.execute(""" SELECT DISTINCT character_id, season_package_level FROM bi_vala_seasonal_ticket WHERE status = 1 AND deleted_at IS NULL AND season_package_level IN ('A1', 'A2') AND character_id IS NOT NULL """) activated = {} # (character_id, db_level) -> True for row in cur.fetchall(): activated[(row[0], row[1])] = True print(f" 总激活课程数: {len(activated)}") # ===== 2. Get chapter -> (level, unit, season) mapping ===== print("2. 查询课程结构...") cur.execute(""" SELECT id, course_level, course_unit, course_season FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2') """) chapter_info = {} # chapter_id -> (level, unit, season) for row in cur.fetchall(): chapter_info[row[0]] = (row[1], row[2], row[3]) print(f" L1/L2 课时数: {len(chapter_info)}") # ===== 3. Find which courses had study in last 14 days ===== print("3. 查询最近14天有完课的...") level_map = {'A1': 'L1', 'A2': 'L2'} all_chars = set(cid for cid, _ in activated.keys()) mod_buckets = defaultdict(set) for cid in all_chars: mod_buckets[cid % 8].add(cid) studied_courses = set() # (character_id, level_label) that had study in window for mod_val in range(8): uids = mod_buckets.get(mod_val, set()) if not uids: continue uid_list = list(uids) for j in range(0, len(uid_list), 500): batch = uid_list[j:j+500] cur.execute(f""" SELECT DISTINCT pr.user_id, cl.course_level FROM bi_user_chapter_play_record_{mod_val} pr JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id WHERE pr.user_id = ANY(%s) AND pr.play_status = 1 AND pr.created_at >= %s AND pr.created_at < %s AND cl.course_level IN ('L1', 'L2') """, (batch, WINDOW_START, WINDOW_END)) for row in cur.fetchall(): studied_courses.add((row[0], row[1])) # ===== 4. Identify courses with NO study in last 14 days ===== print("4. 找出无完课的激活课程...") no_study_courses = {} # (character_id, db_level) -> label_level for (cid, db_level), _ in activated.items(): label = level_map[db_level] if (cid, label) not in studied_courses: no_study_courses[(cid, db_level)] = label print(f" 无完课激活课程数: {len(no_study_courses)}") # ===== 5. For each no-study course, find the LAST study record ===== print("5. 查询最后一次完课记录...") # Group no_study courses by character_id no_study_chars = set(cid for cid, _ in no_study_courses.keys()) mod_buckets_no = defaultdict(set) for cid in no_study_chars: mod_buckets_no[cid % 8].add(cid) # Store last chapter_id per (character_id, level_label) last_chapter = {} # (character_id, level_label) -> chapter_id for mod_val in range(8): uids = mod_buckets_no.get(mod_val, set()) if not uids: continue uid_list = list(uids) for j in range(0, len(uid_list), 500): batch = uid_list[j:j+500] # For each user, get the max created_at per level cur.execute(f""" SELECT DISTINCT ON (pr.user_id, cl.course_level) pr.user_id, cl.course_level, pr.chapter_id FROM bi_user_chapter_play_record_{mod_val} pr JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id WHERE pr.user_id = ANY(%s) AND pr.play_status = 1 AND cl.course_level IN ('L1', 'L2') ORDER BY pr.user_id, cl.course_level, pr.created_at DESC """, (batch,)) for row in cur.fetchall(): uid, level, ch_id = row last_chapter[(uid, level)] = ch_id print(f" 有历史完课记录的 (角色,level) 组合数: {len(last_chapter)}") # ===== 6. Build distribution ===== print("\n===== 最后一次完课 Unit 分布 =====\n") no_history = 0 l1_unit_counter = Counter() l2_unit_counter = Counter() for (cid, db_level), label in no_study_courses.items(): key = (cid, label) ch_id = last_chapter.get(key) if ch_id is None: no_history += 1 continue info = chapter_info.get(ch_id) if info is None: no_history += 1 continue cl, unit, season = info if cl == 'L1': l1_unit_counter[unit] += 1 else: l2_unit_counter[unit] += 1 # Sort units naturally def sort_units(counter): result = [] for u in sorted(counter.keys()): result.append((u, counter[u])) return result print(f"总无完课激活课程: {len(no_study_courses)}") print(f" 从未有过任何完课记录: {no_history}") print(f" 有历史完课记录: {len(no_study_courses) - no_history}") print() print("【L1 激活课程 - 最后一次完课 Unit 分布】") print(f"{'Unit':<8} {'课程数':<8} {'占比':<10} {'累计占比':<10}") total_l1 = len([v for (c,db),v in no_study_courses.items() if db == 'A1']) cum = 0 for unit, cnt in sort_units(l1_unit_counter): pct = cnt / total_l1 * 100 if total_l1 else 0 cum += pct print(f"{unit:<8} {cnt:<8} {pct:>6.1f}% {cum:>6.1f}%") no_hist_l1 = sum(1 for (c,db) in no_study_courses if db == 'A1') - sum(l1_unit_counter.values()) if no_hist_l1 > 0: print(f"{'无记录':<8} {no_hist_l1:<8} {no_hist_l1/total_l1*100:>6.1f}%") print() print("【L2 激活课程 - 最后一次完课 Unit 分布】") print(f"{'Unit':<8} {'课程数':<8} {'占比':<10} {'累计占比':<10}") total_l2 = len([v for (c,db),v in no_study_courses.items() if db == 'A2']) cum = 0 for unit, cnt in sort_units(l2_unit_counter): pct = cnt / total_l2 * 100 if total_l2 else 0 cum += pct print(f"{unit:<8} {cnt:<8} {pct:>6.1f}% {cum:>6.1f}%") no_hist_l2 = sum(1 for (c,db) in no_study_courses if db == 'A2') - sum(l2_unit_counter.values()) if no_hist_l2 > 0: print(f"{'无记录':<8} {no_hist_l2:<8} {no_hist_l2/total_l2*100:>6.1f}%") cur.close() conn.close()