ai_member_xiaoxi/scripts/activated_courses_study_14d.py
2026-05-23 08:00:01 +08:00

166 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
被激活的课程数统计:一个 character + 一个 level = 一个课程
- 激活bi_vala_seasonal_ticket, status=1, deleted_at IS NULL, character_id IS NOT NULL
- 完课bi_user_chapter_play_record, play_status=1, created_at 在最近14天, 课程level匹配
"""
import psycopg2
from collections import defaultdict
PG_CONFIG = {
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
'port': 28591,
'user': 'ai_member',
'password': "LdfjdjL83h3h3^$&**YGG*",
'dbname': 'vala_bi'
}
WINDOW_START = '2026-05-09'
WINDOW_END = '2026-05-23'
conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()
# ===== 1. Get activated courses: (character_id, level) =====
print("1. 查询被激活的课程...")
cur.execute("""
SELECT DISTINCT character_id, season_package_level
FROM bi_vala_seasonal_ticket
WHERE status = 1
AND deleted_at IS NULL
AND season_package_level IN ('A1', 'A2')
AND character_id IS NOT NULL
""")
# Map: level_code ('A1','A2') -> set of character_ids
activated_courses = defaultdict(set)
for row in cur.fetchall():
cid, level = row
activated_courses[level].add(cid)
l1_chars = activated_courses['A1']
l2_chars = activated_courses['A2']
both_chars = l1_chars & l2_chars
print(f" L1激活课程数 (character+level): {len(l1_chars)}")
print(f" L2激活课程数 (character+level): {len(l2_chars)}")
print(f" 去重后总激活课程数: {len(l1_chars | l2_chars)}")
# Show overlap
only_l1 = l1_chars - l2_chars
only_l2 = l2_chars - l1_chars
print(f" 仅L1激活的角色: {len(only_l1)}")
print(f" 仅L2激活的角色: {len(only_l2)}")
print(f" L1+L2都激活的角色: {len(both_chars)}")
# ===== 2. Get course level mapping =====
print("\n2. 查询课程等级映射...")
cur.execute("SELECT id, course_level FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2')")
chapter_level = {row[0]: row[1] for row in cur.fetchall()}
# ===== 3. Get study records by character =====
print("3. 查询最近14天完课记录...")
all_chars = l1_chars | l2_chars
mod_buckets = defaultdict(set)
for cid in all_chars:
mod_buckets[cid % 8].add(cid)
char_level_study = defaultdict(set) # character_id -> set of course_level studied
for mod_val in range(8):
uids_in_mod = mod_buckets.get(mod_val, set())
if not uids_in_mod:
continue
uid_list = list(uids_in_mod)
for j in range(0, len(uid_list), 500):
batch = uid_list[j:j+500]
cur.execute(f"""
SELECT DISTINCT pr.user_id, cl.course_level
FROM bi_user_chapter_play_record_{mod_val} pr
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
WHERE pr.user_id = ANY(%s)
AND pr.play_status = 1
AND pr.created_at >= %s
AND pr.created_at < %s
AND cl.course_level IN ('L1', 'L2')
""", (batch, WINDOW_START, WINDOW_END))
for row in cur.fetchall():
char_level_study[row[0]].add(row[1])
print(f" 最近14天有完课的角色数: {len(char_level_study)}")
# ===== 4. Calculate =====
print("\n===== 计算结果 =====\n")
level_map = {'A1': 'L1', 'A2': 'L2'}
# For each activated course (character, level), check if studied
totals = {}
for db_level, label in [('A1', 'L1'), ('A2', 'L2')]:
chars = activated_courses[db_level]
studied = 0
not_studied = 0
for cid in chars:
char_levels = char_level_study.get(cid, set())
if label in char_levels:
studied += 1
else:
not_studied += 1
total = studied + not_studied
totals[db_level] = {
'total': total,
'studied': studied,
'not_studied': not_studied,
'pct': studied / total * 100 if total else 0
}
print(f"{label} 激活课程】 共 {total}")
print(f" 近14天有完课: {studied} ({studied/total*100:.1f}%)")
print(f" 近14天无完课: {not_studied} ({not_studied/total*100:.1f}%)")
print()
# Combined
total_courses = len(l1_chars | l2_chars)
# A character can have both L1 and L2, so total courses >= total unique characters
# Total activated course units = L1 courses + L2 courses
total_units = totals['A1']['total'] + totals['A2']['total']
total_studied = totals['A1']['studied'] + totals['A2']['studied']
print(f"【总体】")
print(f" 激活课程总数(去重角色+去重等级): {total_courses} 门课程")
print(f" 激活课程总数L1+L2独立计数: {total_units} 个课程单位")
print(f" 近14天有完课: {total_studied} ({total_studied/total_units*100:.1f}%)")
print(f" 近14天无完课: {total_units - total_studied} ({(total_units - total_studied)/total_units*100:.1f}%)")
# Cross-tab for characters with both L1+L2 activated
print(f"\n【L1+L2双激活角色】 共 {len(both_chars)} 个角色")
both_l1_studied = 0
both_l2_studied = 0
both_both = 0
both_neither = 0
for cid in both_chars:
levels = char_level_study.get(cid, set())
has_l1 = 'L1' in levels
has_l2 = 'L2' in levels
if has_l1 and has_l2:
both_both += 1
both_l1_studied += 1
both_l2_studied += 1
elif has_l1:
both_l1_studied += 1
elif has_l2:
both_l2_studied += 1
else:
both_neither += 1
n = len(both_chars) if both_chars else 1
print(f" 仅L1有完课: {both_l1_studied - both_both} 个 ({(both_l1_studied - both_both)/n*100:.1f}%)")
print(f" 仅L2有完课: {both_l2_studied - both_both} 个 ({(both_l2_studied - both_both)/n*100:.1f}%)")
print(f" L1+L2都有: {both_both} 个 ({both_both/n*100:.1f}%)")
print(f" 都无完课: {both_neither} 个 ({both_neither/n*100:.1f}%)")
cur.close()
conn.close()