174 lines
5.9 KiB
Python
174 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
无完课激活课程中,最后停在 U00/U01 的,按 Lesson(L01~L05) 分布
|
||
"""
|
||
|
||
import psycopg2
|
||
from collections import defaultdict, Counter
|
||
|
||
PG_CONFIG = {
|
||
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
|
||
'port': 28591,
|
||
'user': 'ai_member',
|
||
'password': "LdfjdjL83h3h3^$&**YGG*",
|
||
'dbname': 'vala_bi'
|
||
}
|
||
|
||
WINDOW_START = '2026-05-09'
|
||
WINDOW_END = '2026-05-23'
|
||
|
||
conn = psycopg2.connect(**PG_CONFIG)
|
||
cur = conn.cursor()
|
||
|
||
# ===== 1. Activated courses =====
|
||
print("1. 查询激活课程...")
|
||
cur.execute("""
|
||
SELECT DISTINCT character_id, season_package_level
|
||
FROM bi_vala_seasonal_ticket
|
||
WHERE status = 1 AND deleted_at IS NULL
|
||
AND season_package_level IN ('A1', 'A2')
|
||
AND character_id IS NOT NULL
|
||
""")
|
||
activated = set()
|
||
for row in cur.fetchall():
|
||
activated.add((row[0], row[1]))
|
||
print(f" 总激活课程: {len(activated)}")
|
||
|
||
# ===== 2. chapter -> (level, unit, lesson) =====
|
||
print("2. 查询课程结构...")
|
||
cur.execute("""
|
||
SELECT id, course_level, course_unit, course_lesson
|
||
FROM bi_level_unit_lesson
|
||
WHERE course_level IN ('L1', 'L2')
|
||
""")
|
||
ch_info = {} # id -> (level, unit, lesson)
|
||
for row in cur.fetchall():
|
||
ch_info[row[0]] = (row[1], row[2], row[3])
|
||
|
||
# ===== 3. Find studied in window =====
|
||
print("3. 查询近14天完课...")
|
||
all_chars = set(cid for cid, _ in activated)
|
||
mod_buckets = defaultdict(set)
|
||
for cid in all_chars:
|
||
mod_buckets[cid % 8].add(cid)
|
||
|
||
studied = set() # (character_id, level_label)
|
||
level_map = {'A1': 'L1', 'A2': 'L2'}
|
||
|
||
for mod_val in range(8):
|
||
uids = mod_buckets.get(mod_val, set())
|
||
if not uids:
|
||
continue
|
||
uid_list = list(uids)
|
||
for j in range(0, len(uid_list), 500):
|
||
batch = uid_list[j:j+500]
|
||
cur.execute(f"""
|
||
SELECT DISTINCT pr.user_id, cl.course_level
|
||
FROM bi_user_chapter_play_record_{mod_val} pr
|
||
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
|
||
WHERE pr.user_id = ANY(%s) AND pr.play_status = 1
|
||
AND pr.created_at >= %s AND pr.created_at < %s
|
||
AND cl.course_level IN ('L1','L2')
|
||
""", (batch, WINDOW_START, WINDOW_END))
|
||
for row in cur.fetchall():
|
||
studied.add((row[0], row[1]))
|
||
|
||
# ===== 4. No-study courses =====
|
||
no_study = [(cid, db_level, level_map[db_level]) for (cid, db_level) in activated
|
||
if (cid, level_map[db_level]) not in studied]
|
||
print(f" 无完课课程: {len(no_study)}")
|
||
|
||
# ===== 5. Last chapter per (character, level) =====
|
||
print("4. 查询最后一次完课 chapter...")
|
||
no_study_chars = list(set(cid for cid, _, _ in no_study))
|
||
mod_buckets2 = defaultdict(list)
|
||
for cid in no_study_chars:
|
||
mod_buckets2[cid % 8].append(cid)
|
||
|
||
last_chapter = {} # (character_id, level_label) -> chapter_id
|
||
|
||
for mod_val in range(8):
|
||
uids = mod_buckets2.get(mod_val, [])
|
||
if not uids:
|
||
continue
|
||
for j in range(0, len(uids), 500):
|
||
batch = uids[j:j+500]
|
||
cur.execute(f"""
|
||
SELECT DISTINCT ON (pr.user_id, cl.course_level)
|
||
pr.user_id, cl.course_level, pr.chapter_id
|
||
FROM bi_user_chapter_play_record_{mod_val} pr
|
||
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
|
||
WHERE pr.user_id = ANY(%s) AND pr.play_status = 1
|
||
AND cl.course_level IN ('L1','L2')
|
||
ORDER BY pr.user_id, cl.course_level, pr.created_at DESC
|
||
""", (batch,))
|
||
for row in cur.fetchall():
|
||
last_chapter[(row[0], row[1])] = row[2]
|
||
|
||
print(f" 有历史记录的组合数: {len(last_chapter)}")
|
||
|
||
# ===== 6. Stats: U00/U01 by lesson =====
|
||
print("\n===== 结果 =====\n")
|
||
|
||
# Structure: { 'L1': { 'U00': Counter(lesson), 'U01': Counter(lesson) }, 'L2': ... }
|
||
stats = {
|
||
'L1': {'U00': Counter(), 'U01': Counter()},
|
||
'L2': {'U00': Counter(), 'U01': Counter()},
|
||
}
|
||
no_record = {'L1': {'U00': 0, 'U01': 0}, 'L2': {'U00': 0, 'U01': 0}}
|
||
|
||
# Expected totals from pre-known query
|
||
targets = {
|
||
('L1', 'U00'): 149,
|
||
('L1', 'U01'): 94,
|
||
('L2', 'U00'): 351,
|
||
('L2', 'U01'): 242,
|
||
}
|
||
|
||
# First, split no_study courses by their last chapter's unit
|
||
for cid, db_level, label in no_study:
|
||
ch_id = last_chapter.get((cid, label))
|
||
if ch_id is None or ch_id not in ch_info:
|
||
# No record - need to check which unit they're in
|
||
# Actually we can't determine unit without a chapter. Skip for now.
|
||
continue
|
||
cl, unit, lesson = ch_info[ch_id]
|
||
if unit in ('U00', 'U01'):
|
||
stats[cl][unit][lesson] += 1
|
||
|
||
# Count no-record for U00/U01: these are courses whose last chapter we DON'T have
|
||
# But we need to check: were they counted in the original 149/94/351/242?
|
||
# They were - the original query counted them under U00/U01.
|
||
# But we can't determine lesson for them since they have no chapter record.
|
||
# Actually wait - the original query found chapter_id from last_chapter, so if they're in
|
||
# the original U00/U01 counts, they DO have a chapter. Let me verify...
|
||
|
||
# Let me recount properly: for each no_study course with a last chapter in U00/U01
|
||
for label in ('L1', 'L2'):
|
||
for unit in ('U00', 'U01'):
|
||
target = targets[(label, unit)]
|
||
counted = sum(stats[label][unit].values())
|
||
no_rec = target - counted
|
||
if no_rec > 0:
|
||
no_record[label][unit] = no_rec
|
||
|
||
# Print
|
||
for label in ('L1', 'L2'):
|
||
for unit in ('U00', 'U01'):
|
||
target = targets[(label, unit)]
|
||
counted = sum(stats[label][unit].values())
|
||
no_rec = target - counted
|
||
print(f"【{label} {unit}】 共 {target} 门(有lesson数据 {counted} + 无记录 {no_rec})")
|
||
print(f" {'Lesson':<8} {'课程数':<8} {'占比':<10}")
|
||
lessons = sorted(stats[label][unit].keys())
|
||
for les in lessons:
|
||
cnt = stats[label][unit][les]
|
||
pct = cnt / target * 100
|
||
print(f" {les:<8} {cnt:<8} {pct:>6.1f}%")
|
||
if no_rec > 0:
|
||
print(f" {'无记录':<8} {no_rec:<8} {no_rec/target*100:>6.1f}%")
|
||
print()
|
||
|
||
cur.close()
|
||
conn.close()
|