198 lines
6.5 KiB
Python
198 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
1880个近14天无完课的激活课程,最后一次完课记录分布在哪个Unit
|
||
- 激活课程:(character_id, level) 唯一组合
|
||
- 最后一次完课:该角色在对应level课程中 play_status=1 的最晚 created_at
|
||
- 分布按 bi_level_unit_lesson.course_unit
|
||
"""
|
||
|
||
import psycopg2
|
||
from collections import defaultdict, Counter
|
||
|
||
PG_CONFIG = {
|
||
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
|
||
'port': 28591,
|
||
'user': 'ai_member',
|
||
'password': "LdfjdjL83h3h3^$&**YGG*",
|
||
'dbname': 'vala_bi'
|
||
}
|
||
|
||
WINDOW_START = '2026-05-09'
|
||
WINDOW_END = '2026-05-23'
|
||
|
||
conn = psycopg2.connect(**PG_CONFIG)
|
||
cur = conn.cursor()
|
||
|
||
# ===== 1. Get activated courses =====
|
||
print("1. 查询被激活的课程...")
|
||
cur.execute("""
|
||
SELECT DISTINCT character_id, season_package_level
|
||
FROM bi_vala_seasonal_ticket
|
||
WHERE status = 1 AND deleted_at IS NULL
|
||
AND season_package_level IN ('A1', 'A2')
|
||
AND character_id IS NOT NULL
|
||
""")
|
||
activated = {} # (character_id, db_level) -> True
|
||
for row in cur.fetchall():
|
||
activated[(row[0], row[1])] = True
|
||
|
||
print(f" 总激活课程数: {len(activated)}")
|
||
|
||
# ===== 2. Get chapter -> (level, unit, season) mapping =====
|
||
print("2. 查询课程结构...")
|
||
cur.execute("""
|
||
SELECT id, course_level, course_unit, course_season
|
||
FROM bi_level_unit_lesson
|
||
WHERE course_level IN ('L1', 'L2')
|
||
""")
|
||
chapter_info = {} # chapter_id -> (level, unit, season)
|
||
for row in cur.fetchall():
|
||
chapter_info[row[0]] = (row[1], row[2], row[3])
|
||
|
||
print(f" L1/L2 课时数: {len(chapter_info)}")
|
||
|
||
# ===== 3. Find which courses had study in last 14 days =====
|
||
print("3. 查询最近14天有完课的...")
|
||
|
||
level_map = {'A1': 'L1', 'A2': 'L2'}
|
||
|
||
all_chars = set(cid for cid, _ in activated.keys())
|
||
mod_buckets = defaultdict(set)
|
||
for cid in all_chars:
|
||
mod_buckets[cid % 8].add(cid)
|
||
|
||
studied_courses = set() # (character_id, level_label) that had study in window
|
||
|
||
for mod_val in range(8):
|
||
uids = mod_buckets.get(mod_val, set())
|
||
if not uids:
|
||
continue
|
||
uid_list = list(uids)
|
||
for j in range(0, len(uid_list), 500):
|
||
batch = uid_list[j:j+500]
|
||
cur.execute(f"""
|
||
SELECT DISTINCT pr.user_id, cl.course_level
|
||
FROM bi_user_chapter_play_record_{mod_val} pr
|
||
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
|
||
WHERE pr.user_id = ANY(%s)
|
||
AND pr.play_status = 1
|
||
AND pr.created_at >= %s
|
||
AND pr.created_at < %s
|
||
AND cl.course_level IN ('L1', 'L2')
|
||
""", (batch, WINDOW_START, WINDOW_END))
|
||
for row in cur.fetchall():
|
||
studied_courses.add((row[0], row[1]))
|
||
|
||
# ===== 4. Identify courses with NO study in last 14 days =====
|
||
print("4. 找出无完课的激活课程...")
|
||
|
||
no_study_courses = {} # (character_id, db_level) -> label_level
|
||
for (cid, db_level), _ in activated.items():
|
||
label = level_map[db_level]
|
||
if (cid, label) not in studied_courses:
|
||
no_study_courses[(cid, db_level)] = label
|
||
|
||
print(f" 无完课激活课程数: {len(no_study_courses)}")
|
||
|
||
# ===== 5. For each no-study course, find the LAST study record =====
|
||
print("5. 查询最后一次完课记录...")
|
||
|
||
# Group no_study courses by character_id
|
||
no_study_chars = set(cid for cid, _ in no_study_courses.keys())
|
||
mod_buckets_no = defaultdict(set)
|
||
for cid in no_study_chars:
|
||
mod_buckets_no[cid % 8].add(cid)
|
||
|
||
# Store last chapter_id per (character_id, level_label)
|
||
last_chapter = {} # (character_id, level_label) -> chapter_id
|
||
|
||
for mod_val in range(8):
|
||
uids = mod_buckets_no.get(mod_val, set())
|
||
if not uids:
|
||
continue
|
||
uid_list = list(uids)
|
||
for j in range(0, len(uid_list), 500):
|
||
batch = uid_list[j:j+500]
|
||
# For each user, get the max created_at per level
|
||
cur.execute(f"""
|
||
SELECT DISTINCT ON (pr.user_id, cl.course_level)
|
||
pr.user_id, cl.course_level, pr.chapter_id
|
||
FROM bi_user_chapter_play_record_{mod_val} pr
|
||
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
|
||
WHERE pr.user_id = ANY(%s)
|
||
AND pr.play_status = 1
|
||
AND cl.course_level IN ('L1', 'L2')
|
||
ORDER BY pr.user_id, cl.course_level, pr.created_at DESC
|
||
""", (batch,))
|
||
|
||
for row in cur.fetchall():
|
||
uid, level, ch_id = row
|
||
last_chapter[(uid, level)] = ch_id
|
||
|
||
print(f" 有历史完课记录的 (角色,level) 组合数: {len(last_chapter)}")
|
||
|
||
# ===== 6. Build distribution =====
|
||
print("\n===== 最后一次完课 Unit 分布 =====\n")
|
||
|
||
no_history = 0
|
||
l1_unit_counter = Counter()
|
||
l2_unit_counter = Counter()
|
||
|
||
for (cid, db_level), label in no_study_courses.items():
|
||
key = (cid, label)
|
||
ch_id = last_chapter.get(key)
|
||
if ch_id is None:
|
||
no_history += 1
|
||
continue
|
||
|
||
info = chapter_info.get(ch_id)
|
||
if info is None:
|
||
no_history += 1
|
||
continue
|
||
|
||
cl, unit, season = info
|
||
if cl == 'L1':
|
||
l1_unit_counter[unit] += 1
|
||
else:
|
||
l2_unit_counter[unit] += 1
|
||
|
||
# Sort units naturally
|
||
def sort_units(counter):
|
||
result = []
|
||
for u in sorted(counter.keys()):
|
||
result.append((u, counter[u]))
|
||
return result
|
||
|
||
print(f"总无完课激活课程: {len(no_study_courses)}")
|
||
print(f" 从未有过任何完课记录: {no_history}")
|
||
print(f" 有历史完课记录: {len(no_study_courses) - no_history}")
|
||
print()
|
||
|
||
print("【L1 激活课程 - 最后一次完课 Unit 分布】")
|
||
print(f"{'Unit':<8} {'课程数':<8} {'占比':<10} {'累计占比':<10}")
|
||
total_l1 = len([v for (c,db),v in no_study_courses.items() if db == 'A1'])
|
||
cum = 0
|
||
for unit, cnt in sort_units(l1_unit_counter):
|
||
pct = cnt / total_l1 * 100 if total_l1 else 0
|
||
cum += pct
|
||
print(f"{unit:<8} {cnt:<8} {pct:>6.1f}% {cum:>6.1f}%")
|
||
no_hist_l1 = sum(1 for (c,db) in no_study_courses if db == 'A1') - sum(l1_unit_counter.values())
|
||
if no_hist_l1 > 0:
|
||
print(f"{'无记录':<8} {no_hist_l1:<8} {no_hist_l1/total_l1*100:>6.1f}%")
|
||
print()
|
||
|
||
print("【L2 激活课程 - 最后一次完课 Unit 分布】")
|
||
print(f"{'Unit':<8} {'课程数':<8} {'占比':<10} {'累计占比':<10}")
|
||
total_l2 = len([v for (c,db),v in no_study_courses.items() if db == 'A2'])
|
||
cum = 0
|
||
for unit, cnt in sort_units(l2_unit_counter):
|
||
pct = cnt / total_l2 * 100 if total_l2 else 0
|
||
cum += pct
|
||
print(f"{unit:<8} {cnt:<8} {pct:>6.1f}% {cum:>6.1f}%")
|
||
no_hist_l2 = sum(1 for (c,db) in no_study_courses if db == 'A2') - sum(l2_unit_counter.values())
|
||
if no_hist_l2 > 0:
|
||
print(f"{'无记录':<8} {no_hist_l2:<8} {no_hist_l2/total_l2*100:>6.1f}%")
|
||
|
||
cur.close()
|
||
conn.close()
|