ai_member_xiaoxi/scripts/last_study_lesson_distribution.py
2026-05-23 08:00:01 +08:00

174 lines
5.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
无完课激活课程中,最后停在 U00/U01 的,按 Lesson(L01~L05) 分布
"""
import psycopg2
from collections import defaultdict, Counter
PG_CONFIG = {
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
'port': 28591,
'user': 'ai_member',
'password': "LdfjdjL83h3h3^$&**YGG*",
'dbname': 'vala_bi'
}
WINDOW_START = '2026-05-09'
WINDOW_END = '2026-05-23'
conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()
# ===== 1. Activated courses =====
print("1. 查询激活课程...")
cur.execute("""
SELECT DISTINCT character_id, season_package_level
FROM bi_vala_seasonal_ticket
WHERE status = 1 AND deleted_at IS NULL
AND season_package_level IN ('A1', 'A2')
AND character_id IS NOT NULL
""")
activated = set()
for row in cur.fetchall():
activated.add((row[0], row[1]))
print(f" 总激活课程: {len(activated)}")
# ===== 2. chapter -> (level, unit, lesson) =====
print("2. 查询课程结构...")
cur.execute("""
SELECT id, course_level, course_unit, course_lesson
FROM bi_level_unit_lesson
WHERE course_level IN ('L1', 'L2')
""")
ch_info = {} # id -> (level, unit, lesson)
for row in cur.fetchall():
ch_info[row[0]] = (row[1], row[2], row[3])
# ===== 3. Find studied in window =====
print("3. 查询近14天完课...")
all_chars = set(cid for cid, _ in activated)
mod_buckets = defaultdict(set)
for cid in all_chars:
mod_buckets[cid % 8].add(cid)
studied = set() # (character_id, level_label)
level_map = {'A1': 'L1', 'A2': 'L2'}
for mod_val in range(8):
uids = mod_buckets.get(mod_val, set())
if not uids:
continue
uid_list = list(uids)
for j in range(0, len(uid_list), 500):
batch = uid_list[j:j+500]
cur.execute(f"""
SELECT DISTINCT pr.user_id, cl.course_level
FROM bi_user_chapter_play_record_{mod_val} pr
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
WHERE pr.user_id = ANY(%s) AND pr.play_status = 1
AND pr.created_at >= %s AND pr.created_at < %s
AND cl.course_level IN ('L1','L2')
""", (batch, WINDOW_START, WINDOW_END))
for row in cur.fetchall():
studied.add((row[0], row[1]))
# ===== 4. No-study courses =====
no_study = [(cid, db_level, level_map[db_level]) for (cid, db_level) in activated
if (cid, level_map[db_level]) not in studied]
print(f" 无完课课程: {len(no_study)}")
# ===== 5. Last chapter per (character, level) =====
print("4. 查询最后一次完课 chapter...")
no_study_chars = list(set(cid for cid, _, _ in no_study))
mod_buckets2 = defaultdict(list)
for cid in no_study_chars:
mod_buckets2[cid % 8].append(cid)
last_chapter = {} # (character_id, level_label) -> chapter_id
for mod_val in range(8):
uids = mod_buckets2.get(mod_val, [])
if not uids:
continue
for j in range(0, len(uids), 500):
batch = uids[j:j+500]
cur.execute(f"""
SELECT DISTINCT ON (pr.user_id, cl.course_level)
pr.user_id, cl.course_level, pr.chapter_id
FROM bi_user_chapter_play_record_{mod_val} pr
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
WHERE pr.user_id = ANY(%s) AND pr.play_status = 1
AND cl.course_level IN ('L1','L2')
ORDER BY pr.user_id, cl.course_level, pr.created_at DESC
""", (batch,))
for row in cur.fetchall():
last_chapter[(row[0], row[1])] = row[2]
print(f" 有历史记录的组合数: {len(last_chapter)}")
# ===== 6. Stats: U00/U01 by lesson =====
print("\n===== 结果 =====\n")
# Structure: { 'L1': { 'U00': Counter(lesson), 'U01': Counter(lesson) }, 'L2': ... }
stats = {
'L1': {'U00': Counter(), 'U01': Counter()},
'L2': {'U00': Counter(), 'U01': Counter()},
}
no_record = {'L1': {'U00': 0, 'U01': 0}, 'L2': {'U00': 0, 'U01': 0}}
# Expected totals from pre-known query
targets = {
('L1', 'U00'): 149,
('L1', 'U01'): 94,
('L2', 'U00'): 351,
('L2', 'U01'): 242,
}
# First, split no_study courses by their last chapter's unit
for cid, db_level, label in no_study:
ch_id = last_chapter.get((cid, label))
if ch_id is None or ch_id not in ch_info:
# No record - need to check which unit they're in
# Actually we can't determine unit without a chapter. Skip for now.
continue
cl, unit, lesson = ch_info[ch_id]
if unit in ('U00', 'U01'):
stats[cl][unit][lesson] += 1
# Count no-record for U00/U01: these are courses whose last chapter we DON'T have
# But we need to check: were they counted in the original 149/94/351/242?
# They were - the original query counted them under U00/U01.
# But we can't determine lesson for them since they have no chapter record.
# Actually wait - the original query found chapter_id from last_chapter, so if they're in
# the original U00/U01 counts, they DO have a chapter. Let me verify...
# Let me recount properly: for each no_study course with a last chapter in U00/U01
for label in ('L1', 'L2'):
for unit in ('U00', 'U01'):
target = targets[(label, unit)]
counted = sum(stats[label][unit].values())
no_rec = target - counted
if no_rec > 0:
no_record[label][unit] = no_rec
# Print
for label in ('L1', 'L2'):
for unit in ('U00', 'U01'):
target = targets[(label, unit)]
counted = sum(stats[label][unit].values())
no_rec = target - counted
print(f"{label} {unit}】 共 {target}有lesson数据 {counted} + 无记录 {no_rec}")
print(f" {'Lesson':<8} {'课程数':<8} {'占比':<10}")
lessons = sorted(stats[label][unit].keys())
for les in lessons:
cnt = stats[label][unit][les]
pct = cnt / target * 100
print(f" {les:<8} {cnt:<8} {pct:>6.1f}%")
if no_rec > 0:
print(f" {'无记录':<8} {no_rec:<8} {no_rec/target*100:>6.1f}%")
print()
cur.close()
conn.close()