ai_member_xiaoxi/scripts/activated_users_study_14d.py
2026-05-23 08:00:01 +08:00

194 lines
6.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
最近14天内无完课行为的激活用户占比
口径:
- 激活用户bi_vala_seasonal_ticket, status=1, deleted_at IS NULL, season_package_level IN ('A1','A2')
- 完课行为bi_user_chapter_play_record_{0-7}, play_status=1, created_at 在最近14天内
- L1用户只看L1课程L2用户只看L2课程L1+L2用户看所有课程
"""
import psycopg2
from collections import defaultdict
PG_CONFIG = {
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
'port': 28591,
'user': 'ai_member',
'password': "LdfjdjL83h3h3^$&**YGG*",
'dbname': 'vala_bi'
}
WINDOW_START = '2026-05-09'
WINDOW_END = '2026-05-23' # exclusive
conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()
# ===== 1. Get activated users =====
print("1. 查询激活用户...")
cur.execute("""
SELECT account_id, bool_or(season_package_level = 'A1') as has_l1,
bool_or(season_package_level = 'A2') as has_l2
FROM bi_vala_seasonal_ticket
WHERE status = 1 AND deleted_at IS NULL
AND season_package_level IN ('A1', 'A2')
GROUP BY account_id
""")
activated = {}
for row in cur.fetchall():
aid, has_l1, has_l2 = row
if has_l1 and has_l2:
activated[aid] = 'L1+L2'
elif has_l1:
activated[aid] = 'L1'
else:
activated[aid] = 'L2'
print(f" 激活用户总数: {len(activated)}")
for cat in ['L1', 'L2', 'L1+L2']:
print(f" {cat}: {sum(1 for v in activated.values() if v == cat)}")
# ===== 2. Get character_id mapping =====
print("2. 查询用户角色映射...")
aid_list = list(activated.keys())
aid_to_user_ids = defaultdict(set)
batch_size = 500
for i in range(0, len(aid_list), batch_size):
batch = aid_list[i:i+batch_size]
cur.execute(
"SELECT account_id, id FROM bi_vala_app_character WHERE account_id = ANY(%s)",
(batch,)
)
for row in cur.fetchall():
aid_to_user_ids[row[0]].add(row[1])
print(f" 有角色的激活用户数: {len(aid_to_user_ids)}")
# ===== 3. Get course level mapping =====
print("3. 查询课程等级映射...")
cur.execute("SELECT id, course_level FROM bi_level_unit_lesson WHERE course_level IN ('L1', 'L2')")
chapter_level = {row[0]: row[1] for row in cur.fetchall()}
# ===== 4. Get users with study records in last 14 days =====
print("4. 查询最近14天完课记录...")
all_user_ids = set()
for uids in aid_to_user_ids.values():
all_user_ids.update(uids)
mod_buckets = defaultdict(set)
for uid in all_user_ids:
mod_buckets[uid % 8].add(uid)
user_level_study = defaultdict(set) # user_id -> set of course_level studied
for mod_val in range(8):
uids_in_mod = mod_buckets.get(mod_val, set())
if not uids_in_mod:
continue
uid_list = list(uids_in_mod)
for j in range(0, len(uid_list), 500):
batch = uid_list[j:j+500]
cur.execute(f"""
SELECT DISTINCT pr.user_id, cl.course_level
FROM bi_user_chapter_play_record_{mod_val} pr
JOIN bi_level_unit_lesson cl ON pr.chapter_id = cl.id
WHERE pr.user_id = ANY(%s)
AND pr.play_status = 1
AND pr.created_at >= %s
AND pr.created_at < %s
AND cl.course_level IN ('L1', 'L2')
""", (batch, WINDOW_START, WINDOW_END))
for row in cur.fetchall():
user_level_study[row[0]].add(row[1])
print(f" 最近14天有完课记录的用户数: {len(user_level_study)}")
# ===== 5. Calculate results =====
print("\n===== 计算结果 =====\n")
# Category config: (key, label, allowed_levels, track_detail)
categories = [
('L1', '仅激活L1', {'L1'}),
('L2', '仅激活L2', {'L2'}),
('L1+L2', 'L1+L2', {'L1', 'L2'}),
]
overall_results = {}
for cat, label, levels in categories:
cat_users = [aid for aid, c in activated.items() if c == cat]
no_character = 0
no_study = 0
has_study = 0
# L1+L2 detail
l1l2_only_l1 = 0
l1l2_only_l2 = 0
l1l2_both = 0
for aid in cat_users:
uids = aid_to_user_ids.get(aid, set())
if not uids:
no_character += 1
continue
studied_levels = set()
for uid in uids:
studied_levels |= user_level_study.get(uid, set())
matched = studied_levels & levels
if matched:
has_study += 1
if cat == 'L1+L2':
has_l1 = 'L1' in studied_levels
has_l2 = 'L2' in studied_levels
if has_l1 and has_l2:
l1l2_both += 1
elif has_l1:
l1l2_only_l1 += 1
elif has_l2:
l1l2_only_l2 += 1
else:
no_study += 1
total_with_char = has_study + no_study
overall_results[cat] = {
'label': label,
'total_with_char': total_with_char,
'no_character': no_character,
'no_study': no_study,
'has_study': has_study,
'no_study_pct': no_study / total_with_char * 100 if total_with_char else 0,
'l1l2_only_l1': l1l2_only_l1,
'l1l2_only_l2': l1l2_only_l2,
'l1l2_both': l1l2_both,
}
# Print
total_activated = len(activated)
total_no_study = sum(r['no_study'] for r in overall_results.values())
total_has_study = sum(r['has_study'] for r in overall_results.values())
total_no_char = sum(r['no_character'] for r in overall_results.values())
print(f"激活用户总数: {total_activated}")
print(f" 无角色数据: {total_no_char}")
print(f" 近14天无完课: {total_no_study} ({total_no_study/total_activated*100:.1f}%)")
print(f" 近14天有完课: {total_has_study} ({total_has_study/total_activated*100:.1f}%)")
print()
for cat, label, _ in categories:
r = overall_results[cat]
print(f"{label}】 共{r['total_with_char']}人 (另有{r['no_character']}人无角色)")
print(f" 无完课: {r['no_study']}人 ({r['no_study_pct']:.1f}%)")
print(f" 有完课: {r['has_study']}人 ({100-r['no_study_pct']:.1f}%)")
if cat == 'L1+L2':
print(f" 其中:")
print(f" 仅L1有完课: {r['l1l2_only_l1']}")
print(f" 仅L2有完课: {r['l1l2_only_l2']}")
print(f" L1+L2都有: {r['l1l2_both']}")
total_detail = r['l1l2_only_l1'] + r['l1l2_only_l2'] + r['l1l2_both']
print(f" (合计有完课: {total_detail}人)")
print()
cur.close()
conn.close()