#!/usr/bin/env python3 """单元挑战入口点击 埋点日活(2026-05-23 ~ 2026-06-21,按A1/A2拆分)""" import json, urllib.request, base64, ssl from collections import defaultdict from datetime import datetime, timezone, timedelta ES_HOST = "es-7vd7jcu9.public.tencentelasticsearch.com" ES_PORT = 9200 ES_USER = "elastic" ES_PASS = "F%?QDcWes7N2WTuiYD11" START_TS = 1779465600 # 2026-05-23 00:00:00 CST END_TS = 1782057599 # 2026-06-21 23:59:59 CST CST = timezone(timedelta(hours=8)) ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE def es_query(body): url = f"https://{ES_HOST}:{ES_PORT}/user_behavior_buried_points/_search" auth = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode() req = urllib.request.Request(url, data=json.dumps(body).encode(), headers={ "Content-Type": "application/json", "Authorization": f"Basic {auth}" }) resp = urllib.request.urlopen(req, context=ctx) return json.loads(resp.read()) # Step 1: 一次拉取所有数据 print("Step 1: 从 ES 获取数据...") body = { "size": 10000, "query": { "bool": { "must": [ {"term": {"buryingPointId": 1300}}, {"term": {"buryingPointSubId": 19}}, {"range": {"activeTime": {"gte": START_TS, "lte": END_TS}}} ] } }, "_source": ["accountId", "activeTime", "courseLevel"] } result = es_query(body) hits = result.get("hits", {}).get("hits", []) total = result.get("hits", {}).get("total", {}).get("value", 0) print(f" 共 {total} 条记录, 返回 {len(hits)} 条") # 按天 + 按课程等级汇总 daily_a1 = defaultdict(set) daily_a2 = defaultdict(set) all_a1 = set() all_a2 = set() for hit in hits: src = hit["_source"] aid = src.get("accountId", 0) ts = src.get("activeTime", 0) level = src.get("courseLevel", "") if aid <= 0 or ts <= 0: continue dt = datetime.fromtimestamp(ts, tz=CST) date_str = dt.strftime("%Y-%m-%d") if level == "A1": daily_a1[date_str].add(aid) all_a1.add(aid) elif level == "A2": daily_a2[date_str].add(aid) all_a2.add(aid) # 合并所有日期 all_dates = sorted(set(list(daily_a1.keys()) + list(daily_a2.keys()))) print(f"\n{'日期':<12} {'A1日活':>6} {'A2日活':>6} {'总计':>6}") print("-" * 36) total_a1_daily = 0 total_a2_daily = 0 for d in all_dates: a1 = len(daily_a1.get(d, set())) a2 = len(daily_a2.get(d, set())) total_a1_daily += a1 total_a2_daily += a2 print(f"{d:<12} {a1:>6} {a2:>6} {a1+a2:>6}") print("-" * 36) print(f"{'合计':<12} {total_a1_daily:>6} {total_a2_daily:>6} {total_a1_daily+total_a2_daily:>6}") print(f"\n{'='*36}") print(f"总去重用户数: {len(all_a1 | all_a2)}") print(f" A1: {len(all_a1)}") print(f" A2: {len(all_a2)}") print(f"注:日活为每日累加值(非去重),同一用户多天活跃会重复计数")