#!/usr/bin/env python3 """ 最近14天内无完课行为的激活用户占比 口径: - 激活用户:bi_vala_seasonal_ticket, status=1, deleted_at IS NULL, season_package_level IN ('A1','A2') - 完课行为:bi_user_chapter_play_record_{0-7}, play_status=1, created_at 在最近14天内 - L1用户只看L1课程,L2用户只看L2课程,L1+L2用户看所有课程 """ import psycopg2 from collections import defaultdict PG_CONFIG = { 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', 'port': 28591, 'user': 'ai_member', 'password': "LdfjdjL83h3h3^$&**YGG*", 'dbname': 'vala_bi' } WINDOW_START = '2026-05-09' WINDOW_END = '2026-05-23' # exclusive conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() # ===== 1. Get activated users ===== print("1. 查询激活用户...") cur.execute(""" SELECT account_id, bool_or(season_package_level = 'A1') as has_l1, bool_or(season_package_level = 'A2') as has_l2 FROM bi_vala_seasonal_ticket WHERE status = 1 AND deleted_at IS NULL AND season_package_level IN ('A1', 'A2') GROUP BY account_id """) activated = {} for row in cur.fetchall(): aid, has_l1, has_l2 = row if has_l1 and has_l2: activated[aid] = 'L1+L2' elif has_l1: activated[aid] = 'L1' else: activated[aid] = 'L2' print(f" 激活用户总数: {len(activated)}") for cat in ['L1', 'L2', 'L1+L2']: print(f" {cat}: {sum(1 for v in activated.values() if v == cat)}") # ===== 2. Get character_id mapping ===== print("2. 查询用户角色映射...") aid_list = list(activated.keys()) aid_to_user_ids = defaultdict(set) batch_size = 500 for i in range(0, len(aid_list), batch_size): batch = aid_list[i:i+batch_size] cur.execute( "SELECT account_id, id FROM bi_vala_app_character WHERE account_id = ANY(%s)", (batch,) ) for row in cur.fetchall(): aid_to_user_ids[row[0]].add(row[1]) print(f" 有角色的激活用户数: {len(aid_to_user_ids)}") # ===== 3. Get course level mapping ===== print("3. 查询课程等级映射...") cur.execute("SELECT id, course_level FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2')") chapter_level = {row[0]: row[1] for row in cur.fetchall()} # ===== 4. Get users with study records in last 14 days ===== print("4. 查询最近14天完课记录...") all_user_ids = set() for uids in aid_to_user_ids.values(): all_user_ids.update(uids) mod_buckets = defaultdict(set) for uid in all_user_ids: mod_buckets[uid % 8].add(uid) user_level_study = defaultdict(set) # user_id -> set of course_level studied for mod_val in range(8): uids_in_mod = mod_buckets.get(mod_val, set()) if not uids_in_mod: continue uid_list = list(uids_in_mod) for j in range(0, len(uid_list), 500): batch = uid_list[j:j+500] cur.execute(f""" SELECT DISTINCT pr.user_id, cl.course_level FROM bi_user_chapter_play_record_{mod_val} pr JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id WHERE pr.user_id = ANY(%s) AND pr.play_status = 1 AND pr.created_at >= %s AND pr.created_at < %s AND cl.course_level IN ('L1', 'L2') """, (batch, WINDOW_START, WINDOW_END)) for row in cur.fetchall(): user_level_study[row[0]].add(row[1]) print(f" 最近14天有完课记录的用户数: {len(user_level_study)}") # ===== 5. Calculate results ===== print("\n===== 计算结果 =====\n") # Category config: (key, label, allowed_levels, track_detail) categories = [ ('L1', '仅激活L1', {'L1'}), ('L2', '仅激活L2', {'L2'}), ('L1+L2', 'L1+L2', {'L1', 'L2'}), ] overall_results = {} for cat, label, levels in categories: cat_users = [aid for aid, c in activated.items() if c == cat] no_character = 0 no_study = 0 has_study = 0 # L1+L2 detail l1l2_only_l1 = 0 l1l2_only_l2 = 0 l1l2_both = 0 for aid in cat_users: uids = aid_to_user_ids.get(aid, set()) if not uids: no_character += 1 continue studied_levels = set() for uid in uids: studied_levels |= user_level_study.get(uid, set()) matched = studied_levels & levels if matched: has_study += 1 if cat == 'L1+L2': has_l1 = 'L1' in studied_levels has_l2 = 'L2' in studied_levels if has_l1 and has_l2: l1l2_both += 1 elif has_l1: l1l2_only_l1 += 1 elif has_l2: l1l2_only_l2 += 1 else: no_study += 1 total_with_char = has_study + no_study overall_results[cat] = { 'label': label, 'total_with_char': total_with_char, 'no_character': no_character, 'no_study': no_study, 'has_study': has_study, 'no_study_pct': no_study / total_with_char * 100 if total_with_char else 0, 'l1l2_only_l1': l1l2_only_l1, 'l1l2_only_l2': l1l2_only_l2, 'l1l2_both': l1l2_both, } # Print total_activated = len(activated) total_no_study = sum(r['no_study'] for r in overall_results.values()) total_has_study = sum(r['has_study'] for r in overall_results.values()) total_no_char = sum(r['no_character'] for r in overall_results.values()) print(f"激活用户总数: {total_activated}") print(f" 无角色数据: {total_no_char}") print(f" 近14天无完课: {total_no_study} ({total_no_study/total_activated*100:.1f}%)") print(f" 近14天有完课: {total_has_study} ({total_has_study/total_activated*100:.1f}%)") print() for cat, label, _ in categories: r = overall_results[cat] print(f"【{label}】 共{r['total_with_char']}人 (另有{r['no_character']}人无角色)") print(f" 无完课: {r['no_study']}人 ({r['no_study_pct']:.1f}%)") print(f" 有完课: {r['has_study']}人 ({100-r['no_study_pct']:.1f}%)") if cat == 'L1+L2': print(f" 其中:") print(f" 仅L1有完课: {r['l1l2_only_l1']}人") print(f" 仅L2有完课: {r['l1l2_only_l2']}人") print(f" L1+L2都有: {r['l1l2_both']}人") total_detail = r['l1l2_only_l1'] + r['l1l2_only_l2'] + r['l1l2_both'] print(f" (合计有完课: {total_detail}人)") print() cur.close() conn.close()