ai_member_xiaoxi/scripts/challenge_funnel.py
2026-06-24 08:00:01 +08:00

111 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""单元挑战漏斗按用户交叉匹配不依赖ex3区分路径"""
import json, urllib.request, base64, ssl
ES_HOST = "es-7vd7jcu9.public.tencentelasticsearch.com"
ES_PORT = 9200
ES_USER = "elastic"
ES_PASS = "F%?QDcWes7N2WTuiYD11"
START_TS = 1779465600
END_TS = 1782057599
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
def es_query(body):
url = f"https://{ES_HOST}:{ES_PORT}/user_behavior_buried_points/_search"
auth = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode()
req = urllib.request.Request(url, data=json.dumps(body).encode(), headers={
"Content-Type": "application/json",
"Authorization": f"Basic {auth}"
})
resp = urllib.request.urlopen(req, context=ctx)
return json.loads(resp.read())
def fetch_users(sub_id):
"""获取某个 subId 的所有用户"""
body = {
"size": 10000,
"query": {
"bool": {
"must": [
{"term": {"buryingPointId": 1300}},
{"term": {"buryingPointSubId": sub_id}},
{"range": {"activeTime": {"gte": START_TS, "lte": END_TS}}}
]
}
},
"_source": ["accountId", "courseLevel"]
}
result = es_query(body)
hits = result.get("hits", {}).get("hits", [])
a1 = set()
a2 = set()
for hit in hits:
src = hit["_source"]
aid = src.get("accountId", 0)
level = src.get("courseLevel", "")
if aid <= 0: continue
if level == "A1": a1.add(aid)
elif level == "A2": a2.add(aid)
return a1, a2
# 1. 开始挑战 (subId=25)
start_a1, start_a2 = fetch_users(25)
# 2. 再次挑战 (subId=26)
restart_a1, restart_a2 = fetch_users(26)
# 3. 第一题曝光 (subId=32) - 全部
first_a1, first_a2 = fetch_users(32)
# 4. 结算页曝光 (subId=27) - 全部
settle_a1, settle_a2 = fetch_users(27)
# 漏斗:按用户交叉匹配
# 开始挑战漏斗 = 开始挑战用户 ∩ 第一题用户 ∩ 结算用户
start_first_a1 = start_a1 & first_a1
start_settle_a1 = start_a1 & settle_a1
start_first_a2 = start_a2 & first_a2
start_settle_a2 = start_a2 & settle_a2
# 再次挑战漏斗 = 再次挑战用户 ∩ 第一题用户 ∩ 结算用户
restart_first_a1 = restart_a1 & first_a1
restart_settle_a1 = restart_a1 & settle_a1
restart_first_a2 = restart_a2 & first_a2
restart_settle_a2 = restart_a2 & settle_a2
def pct(part, base):
if base == 0: return "N/A"
return f"{part/base*100:.1f}%"
print("单元挑战漏斗 | 2026-05-23 ~ 2026-06-21")
print("方法按用户交叉匹配不依赖ex3区分路径")
print("=" * 60)
print(f"\n📌 A1 开始挑战漏斗:")
print(f" 开始挑战: {len(start_a1)}")
print(f" → 第一题曝光: {len(start_first_a1)}人 ({pct(len(start_first_a1), len(start_a1))})")
print(f" → 结算页曝光: {len(start_settle_a1)}人 ({pct(len(start_settle_a1), len(start_a1))})")
print(f"\n📌 A1 再次挑战漏斗:")
print(f" 再次挑战: {len(restart_a1)}")
print(f" → 第一题曝光: {len(restart_first_a1)}人 ({pct(len(restart_first_a1), len(restart_a1))})")
print(f" → 结算页曝光: {len(restart_settle_a1)}人 ({pct(len(restart_settle_a1), len(restart_a1))})")
print(f"\n📌 A2 开始挑战漏斗:")
print(f" 开始挑战: {len(start_a2)}")
print(f" → 第一题曝光: {len(start_first_a2)}人 ({pct(len(start_first_a2), len(start_a2))})")
print(f" → 结算页曝光: {len(start_settle_a2)}人 ({pct(len(start_settle_a2), len(start_a2))})")
print(f"\n📌 A2 再次挑战漏斗:")
print(f" 再次挑战: {len(restart_a2)}")
print(f" → 第一题曝光: {len(restart_first_a2)}人 ({pct(len(restart_first_a2), len(restart_a2))})")
print(f" → 结算页曝光: {len(restart_settle_a2)}人 ({pct(len(restart_settle_a2), len(restart_a2))})")
# 诊断:结算页有但第一题没有的用户
print(f"\n--- 诊断 ---")
settle_only_a1 = start_settle_a1 - start_first_a1
settle_only_a2 = start_settle_a2 - start_first_a2
print(f"A1 开始挑战→结算页有但第一题没有: {len(settle_only_a1)}")
print(f"A2 开始挑战→结算页有但第一题没有: {len(settle_only_a2)}")