ai_member_xiaoxi/scripts/challenge_click_daily.py
2026-06-24 08:00:01 +08:00

96 lines
2.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""单元挑战入口点击 埋点日活2026-05-23 ~ 2026-06-21按A1/A2拆分"""
import json, urllib.request, base64, ssl
from collections import defaultdict
from datetime import datetime, timezone, timedelta
ES_HOST = "es-7vd7jcu9.public.tencentelasticsearch.com"
ES_PORT = 9200
ES_USER = "elastic"
ES_PASS = "F%?QDcWes7N2WTuiYD11"
START_TS = 1779465600 # 2026-05-23 00:00:00 CST
END_TS = 1782057599 # 2026-06-21 23:59:59 CST
CST = timezone(timedelta(hours=8))
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
def es_query(body):
url = f"https://{ES_HOST}:{ES_PORT}/user_behavior_buried_points/_search"
auth = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode()
req = urllib.request.Request(url, data=json.dumps(body).encode(), headers={
"Content-Type": "application/json",
"Authorization": f"Basic {auth}"
})
resp = urllib.request.urlopen(req, context=ctx)
return json.loads(resp.read())
# Step 1: 一次拉取所有数据
print("Step 1: 从 ES 获取数据...")
body = {
"size": 10000,
"query": {
"bool": {
"must": [
{"term": {"buryingPointId": 1300}},
{"term": {"buryingPointSubId": 19}},
{"range": {"activeTime": {"gte": START_TS, "lte": END_TS}}}
]
}
},
"_source": ["accountId", "activeTime", "courseLevel"]
}
result = es_query(body)
hits = result.get("hits", {}).get("hits", [])
total = result.get("hits", {}).get("total", {}).get("value", 0)
print(f"{total} 条记录, 返回 {len(hits)}")
# 按天 + 按课程等级汇总
daily_a1 = defaultdict(set)
daily_a2 = defaultdict(set)
all_a1 = set()
all_a2 = set()
for hit in hits:
src = hit["_source"]
aid = src.get("accountId", 0)
ts = src.get("activeTime", 0)
level = src.get("courseLevel", "")
if aid <= 0 or ts <= 0:
continue
dt = datetime.fromtimestamp(ts, tz=CST)
date_str = dt.strftime("%Y-%m-%d")
if level == "A1":
daily_a1[date_str].add(aid)
all_a1.add(aid)
elif level == "A2":
daily_a2[date_str].add(aid)
all_a2.add(aid)
# 合并所有日期
all_dates = sorted(set(list(daily_a1.keys()) + list(daily_a2.keys())))
print(f"\n{'日期':<12} {'A1日活':>6} {'A2日活':>6} {'总计':>6}")
print("-" * 36)
total_a1_daily = 0
total_a2_daily = 0
for d in all_dates:
a1 = len(daily_a1.get(d, set()))
a2 = len(daily_a2.get(d, set()))
total_a1_daily += a1
total_a2_daily += a2
print(f"{d:<12} {a1:>6} {a2:>6} {a1+a2:>6}")
print("-" * 36)
print(f"{'合计':<12} {total_a1_daily:>6} {total_a2_daily:>6} {total_a1_daily+total_a2_daily:>6}")
print(f"\n{'='*36}")
print(f"总去重用户数: {len(all_a1 | all_a2)}")
print(f" A1: {len(all_a1)}")
print(f" A2: {len(all_a2)}")
print(f"注:日活为每日累加值(非去重),同一用户多天活跃会重复计数")