96 lines
2.9 KiB
Python
96 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
||
"""单元挑战入口点击 埋点日活(2026-05-23 ~ 2026-06-21,按A1/A2拆分)"""
|
||
|
||
import json, urllib.request, base64, ssl
|
||
from collections import defaultdict
|
||
from datetime import datetime, timezone, timedelta
|
||
|
||
ES_HOST = "es-7vd7jcu9.public.tencentelasticsearch.com"
|
||
ES_PORT = 9200
|
||
ES_USER = "elastic"
|
||
ES_PASS = "F%?QDcWes7N2WTuiYD11"
|
||
|
||
START_TS = 1779465600 # 2026-05-23 00:00:00 CST
|
||
END_TS = 1782057599 # 2026-06-21 23:59:59 CST
|
||
CST = timezone(timedelta(hours=8))
|
||
|
||
ctx = ssl.create_default_context()
|
||
ctx.check_hostname = False
|
||
ctx.verify_mode = ssl.CERT_NONE
|
||
|
||
def es_query(body):
|
||
url = f"https://{ES_HOST}:{ES_PORT}/user_behavior_buried_points/_search"
|
||
auth = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode()
|
||
req = urllib.request.Request(url, data=json.dumps(body).encode(), headers={
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Basic {auth}"
|
||
})
|
||
resp = urllib.request.urlopen(req, context=ctx)
|
||
return json.loads(resp.read())
|
||
|
||
# Step 1: 一次拉取所有数据
|
||
print("Step 1: 从 ES 获取数据...")
|
||
body = {
|
||
"size": 10000,
|
||
"query": {
|
||
"bool": {
|
||
"must": [
|
||
{"term": {"buryingPointId": 1300}},
|
||
{"term": {"buryingPointSubId": 19}},
|
||
{"range": {"activeTime": {"gte": START_TS, "lte": END_TS}}}
|
||
]
|
||
}
|
||
},
|
||
"_source": ["accountId", "activeTime", "courseLevel"]
|
||
}
|
||
|
||
result = es_query(body)
|
||
hits = result.get("hits", {}).get("hits", [])
|
||
total = result.get("hits", {}).get("total", {}).get("value", 0)
|
||
print(f" 共 {total} 条记录, 返回 {len(hits)} 条")
|
||
|
||
# 按天 + 按课程等级汇总
|
||
daily_a1 = defaultdict(set)
|
||
daily_a2 = defaultdict(set)
|
||
all_a1 = set()
|
||
all_a2 = set()
|
||
|
||
for hit in hits:
|
||
src = hit["_source"]
|
||
aid = src.get("accountId", 0)
|
||
ts = src.get("activeTime", 0)
|
||
level = src.get("courseLevel", "")
|
||
if aid <= 0 or ts <= 0:
|
||
continue
|
||
dt = datetime.fromtimestamp(ts, tz=CST)
|
||
date_str = dt.strftime("%Y-%m-%d")
|
||
if level == "A1":
|
||
daily_a1[date_str].add(aid)
|
||
all_a1.add(aid)
|
||
elif level == "A2":
|
||
daily_a2[date_str].add(aid)
|
||
all_a2.add(aid)
|
||
|
||
# 合并所有日期
|
||
all_dates = sorted(set(list(daily_a1.keys()) + list(daily_a2.keys())))
|
||
|
||
print(f"\n{'日期':<12} {'A1日活':>6} {'A2日活':>6} {'总计':>6}")
|
||
print("-" * 36)
|
||
total_a1_daily = 0
|
||
total_a2_daily = 0
|
||
for d in all_dates:
|
||
a1 = len(daily_a1.get(d, set()))
|
||
a2 = len(daily_a2.get(d, set()))
|
||
total_a1_daily += a1
|
||
total_a2_daily += a2
|
||
print(f"{d:<12} {a1:>6} {a2:>6} {a1+a2:>6}")
|
||
|
||
print("-" * 36)
|
||
print(f"{'合计':<12} {total_a1_daily:>6} {total_a2_daily:>6} {total_a1_daily+total_a2_daily:>6}")
|
||
|
||
print(f"\n{'='*36}")
|
||
print(f"总去重用户数: {len(all_a1 | all_a2)}")
|
||
print(f" A1: {len(all_a1)}")
|
||
print(f" A2: {len(all_a2)}")
|
||
print(f"注:日活为每日累加值(非去重),同一用户多天活跃会重复计数")
|