261 lines
11 KiB
Python
261 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
对于最近14天无完课行为的付费用户,统计他们「最后一次完课」所在的 Unit 分布(按 L1/L2 拆分)。
|
||
时间范围:2026-05-09 ~ 2026-05-22
|
||
"""
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
from collections import defaultdict
|
||
|
||
conn = psycopg2.connect(
|
||
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
|
||
port=28591,
|
||
user="ai_member",
|
||
password="LdfjdjL83h3h3^$&**YGG*",
|
||
dbname="vala_bi"
|
||
)
|
||
conn.set_session(autocommit=True)
|
||
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||
|
||
START_DATE = "2026-05-09"
|
||
END_DATE = "2026-05-22"
|
||
|
||
print("=" * 70)
|
||
print("📊 无完课用户最后一次完课 Unit 分布分析")
|
||
print("=" * 70)
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 1: 获取所有付费用户
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[1/6] 获取付费用户...")
|
||
cur.execute("""
|
||
SELECT DISTINCT o.account_id
|
||
FROM bi_vala_order o
|
||
INNER JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
|
||
WHERE o.order_status IN (3, 4)
|
||
AND o.pay_success_date IS NOT NULL
|
||
AND o.deleted_at IS NULL
|
||
""")
|
||
paid_accounts = {row[0] for row in cur.fetchall()}
|
||
print(f" 付费用户总数: {len(paid_accounts)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 2: L1/L2 分类
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[2/6] 分类用户等级...")
|
||
L1_GOODS = {57, 60, 63}
|
||
L2_GOODS = {31, 32, 33, 54}
|
||
L1L2_GOODS = {61}
|
||
|
||
cur.execute("""
|
||
SELECT o.account_id, o.goods_id
|
||
FROM bi_vala_order o
|
||
WHERE o.account_id = ANY(%s)
|
||
AND o.order_status IN (3, 4)
|
||
AND o.deleted_at IS NULL
|
||
""", (list(paid_accounts),))
|
||
|
||
user_goods = defaultdict(set)
|
||
for row in cur.fetchall():
|
||
user_goods[row[0]].add(row[1])
|
||
|
||
only_l1, only_l2, both_l1l2, other = set(), set(), set(), set()
|
||
for aid in paid_accounts:
|
||
g = user_goods.get(aid, set())
|
||
h1, h2, h12 = bool(g & L1_GOODS), bool(g & L2_GOODS), bool(g & L1L2_GOODS)
|
||
if h12 or (h1 and h2):
|
||
both_l1l2.add(aid)
|
||
elif h1:
|
||
only_l1.add(aid)
|
||
elif h2:
|
||
only_l2.add(aid)
|
||
else:
|
||
other.add(aid)
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 3: 获取最近14天有完课的用户
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[3/6] 查询最近14天完课行为...")
|
||
cur.execute("SELECT id, account_id FROM bi_vala_app_character WHERE account_id = ANY(%s) AND deleted_at IS NULL",
|
||
(list(paid_accounts),))
|
||
char_to_account = {}
|
||
account_chars = defaultdict(list)
|
||
for row in cur.fetchall():
|
||
char_to_account[row[0]] = row[1]
|
||
account_chars[row[1]].append(row[0])
|
||
all_char_ids = set(char_to_account.keys())
|
||
|
||
active_char_ids = set()
|
||
for shard in range(8):
|
||
cur.execute(f"""
|
||
SELECT DISTINCT user_id FROM bi_user_chapter_play_record_{shard}
|
||
WHERE play_status = 1 AND updated_at >= %s AND updated_at < %s::date + interval '1 day'
|
||
""", (START_DATE, END_DATE))
|
||
for r in cur.fetchall():
|
||
if r[0] in all_char_ids:
|
||
active_char_ids.add(r[0])
|
||
|
||
active_accounts = {char_to_account[c] for c in active_char_ids}
|
||
inactive_accounts = paid_accounts - active_accounts
|
||
print(f" 无完课付费用户: {len(inactive_accounts)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 4: 获取无完课用户的所有角色
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[4/6] 获取无完课用户的角色...")
|
||
inactive_chars = {}
|
||
for aid in inactive_accounts:
|
||
for cid in account_chars.get(aid, []):
|
||
inactive_chars[cid] = aid
|
||
print(f" 无完课用户角色数: {len(inactive_chars)}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 5: 查询每个角色最后一次完课记录(遍历8张分表)
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[5/6] 查询各角色最后一次完课记录...")
|
||
|
||
# char_id → (chapter_id, updated_at)
|
||
latest_completion = {}
|
||
|
||
for shard in range(8):
|
||
table = f"bi_user_chapter_play_record_{shard}"
|
||
cur.execute(f"""
|
||
SELECT DISTINCT ON (user_id) user_id, chapter_id, updated_at
|
||
FROM {table}
|
||
WHERE play_status = 1
|
||
ORDER BY user_id, updated_at DESC
|
||
""")
|
||
rows = cur.fetchall()
|
||
print(f" {table}: {len(rows)} 条记录")
|
||
for row in rows:
|
||
cid = row[0]
|
||
if cid not in inactive_chars:
|
||
continue
|
||
ch_id = row[1]
|
||
ts = row[2]
|
||
if cid not in latest_completion or ts > latest_completion[cid][1]:
|
||
latest_completion[cid] = (ch_id, ts)
|
||
|
||
print(f" 有历史完课记录的角色数: {len(latest_completion)}")
|
||
no_history = len(inactive_chars) - len(latest_completion)
|
||
print(f" 无任何完课记录的角色数: {no_history}")
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Step 6: 映射 chapter_id → Unit,按用户等级分类聚合
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n[6/6] 映射 chapter_id → Unit 并聚合...")
|
||
|
||
# 获取课程结构映射
|
||
cur.execute("SELECT id, course_level, course_unit FROM bi_level_unit_lesson")
|
||
chapter_map = {} # chapter_id → (course_level, course_unit)
|
||
for row in cur.fetchall():
|
||
chapter_map[row[0]] = (row[1], row[2])
|
||
|
||
# 聚合:user_level_type → { unit → count }
|
||
# user_level_type: "仅L1", "仅L2", "L1+L2"
|
||
unit_counts = defaultdict(lambda: defaultdict(int))
|
||
no_chapter = defaultdict(int) # 有完课但 chapter_id 映射不上的
|
||
|
||
for cid, (ch_id, ts) in latest_completion.items():
|
||
aid = inactive_chars[cid]
|
||
if aid in only_l1:
|
||
user_type = "仅L1"
|
||
elif aid in only_l2:
|
||
user_type = "仅L2"
|
||
elif aid in both_l1l2:
|
||
user_type = "L1+L2"
|
||
else:
|
||
user_type = "其他"
|
||
|
||
if ch_id in chapter_map:
|
||
level, unit = chapter_map[ch_id]
|
||
unit_counts[user_type][unit] += 1
|
||
else:
|
||
no_chapter[user_type] += 1
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 输出结果
|
||
# ═══════════════════════════════════════════════════════════
|
||
print("\n" + "=" * 70)
|
||
print("📈 统计结果")
|
||
print("=" * 70)
|
||
|
||
# 定义 Unit 排序
|
||
def unit_sort_key(u):
|
||
if u is None:
|
||
return (99, 99)
|
||
# e.g. "U00", "U01", "S1U00"
|
||
import re
|
||
m = re.match(r'(?:S(\d+))?U(\d+)', u)
|
||
if m:
|
||
s = int(m.group(1)) if m.group(1) else 0
|
||
uu = int(m.group(2))
|
||
return (s, uu)
|
||
return (99, 99)
|
||
|
||
for user_type in ["仅L1", "仅L2", "L1+L2"]:
|
||
data = unit_counts[user_type]
|
||
total_with_history = sum(data.values())
|
||
print(f"\n{'─' * 50}")
|
||
print(f" {user_type} 用户")
|
||
print(f" 最后一次完课 Unit 分布(共 {total_with_history} 人有完课记录):")
|
||
print(f"{'─' * 50}")
|
||
|
||
# 先按 Unit 排序
|
||
sorted_units = sorted(data.items(), key=lambda x: unit_sort_key(x[0]))
|
||
total_all = total_with_history + no_history
|
||
for unit, cnt in sorted_units:
|
||
bar = "█" * max(1, int(cnt / max(1, max(data.values())) * 30))
|
||
print(f" {unit:>10s} {cnt:>5d} {bar}")
|
||
|
||
if no_chapter.get(user_type, 0) > 0:
|
||
print(f" {'(未知)':>10s} {no_chapter[user_type]:>5d} (chapter_id 映射失败)")
|
||
|
||
# ─── 总体汇总 ───
|
||
print(f"\n{'═' * 50}")
|
||
print("📋 汇总")
|
||
print(f"{'═' * 50}")
|
||
total_inactive = len(inactive_accounts)
|
||
for user_type in ["仅L1", "仅L2", "L1+L2"]:
|
||
type_users = len([a for a in inactive_accounts if
|
||
(user_type == "仅L1" and a in only_l1) or
|
||
(user_type == "仅L2" and a in only_l2) or
|
||
(user_type == "L1+L2" and a in both_l1l2)])
|
||
with_history = sum(unit_counts[user_type].values())
|
||
no_hist = 0
|
||
# Count users in this type without any history
|
||
type_chars = {c: a for c, a in inactive_chars.items() if
|
||
(user_type == "仅L1" and a in only_l1) or
|
||
(user_type == "仅L2" and a in only_l2) or
|
||
(user_type == "L1+L2" and a in both_l1l2)}
|
||
# For users with no history: count distinct account_ids among chars with no record
|
||
chars_with_history = set(latest_completion.keys())
|
||
accounts_with_history = set()
|
||
accounts_no_history = set()
|
||
for c, a in type_chars.items():
|
||
if c in chars_with_history:
|
||
accounts_with_history.add(a)
|
||
else:
|
||
accounts_no_history.add(a)
|
||
# An account may have some chars with history and some without.
|
||
# Count accounts that have ANY char with history vs those with NO chars having history
|
||
no_history_accounts = accounts_no_history - accounts_with_history
|
||
|
||
print(f" {user_type}: 共 {type_users} 人")
|
||
print(f" 有完课记录: {len(accounts_with_history)} 人")
|
||
print(f" 完全无完课: {len(no_history_accounts)} 人")
|
||
# Top units
|
||
data = unit_counts[user_type]
|
||
if data:
|
||
top = sorted(data.items(), key=lambda x: -x[1])[:5]
|
||
top_str = ", ".join(f"{u}({c})" for u, c in top)
|
||
print(f" Top5 Unit: {top_str}")
|
||
|
||
print(f"\n 合计无完课付费用户: {total_inactive}")
|
||
print(f" 注:用户数 {total_inactive} 为上轮口径,本次统计以角色维度查最后完课记录")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
print("\n✅ 完成")
|