ai_member_xiaoxi/scripts/challenge_click_daily.py

#!/usr/bin/env python3
"""单元挑战入口点击 埋点日活（2026-05-23 ~ 2026-06-21，按A1/A2拆分）"""

import json, urllib.request, base64, ssl
from collections import defaultdict
from datetime import datetime, timezone, timedelta

ES_HOST = "es-7vd7jcu9.public.tencentelasticsearch.com"
ES_PORT = 9200
ES_USER = "elastic"
ES_PASS = "F%?QDcWes7N2WTuiYD11"

START_TS = 1779465600  # 2026-05-23 00:00:00 CST
END_TS = 1782057599    # 2026-06-21 23:59:59 CST
CST = timezone(timedelta(hours=8))

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

def es_query(body):
    url = f"https://{ES_HOST}:{ES_PORT}/user_behavior_buried_points/_search"
    auth = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode()
    req = urllib.request.Request(url, data=json.dumps(body).encode(), headers={
        "Content-Type": "application/json",
        "Authorization": f"Basic {auth}"
    })
    resp = urllib.request.urlopen(req, context=ctx)
    return json.loads(resp.read())

# Step 1: 一次拉取所有数据
print("Step 1: 从 ES 获取数据...")
body = {
    "size": 10000,
    "query": {
        "bool": {
            "must": [
                {"term": {"buryingPointId": 1300}},
                {"term": {"buryingPointSubId": 19}},
                {"range": {"activeTime": {"gte": START_TS, "lte": END_TS}}}
            ]
        }
    },
    "_source": ["accountId", "activeTime", "courseLevel"]
}

result = es_query(body)
hits = result.get("hits", {}).get("hits", [])
total = result.get("hits", {}).get("total", {}).get("value", 0)
print(f"  共 {total} 条记录, 返回 {len(hits)} 条")

# 按天 + 按课程等级汇总
daily_a1 = defaultdict(set)
daily_a2 = defaultdict(set)
all_a1 = set()
all_a2 = set()

for hit in hits:
    src = hit["_source"]
    aid = src.get("accountId", 0)
    ts = src.get("activeTime", 0)
    level = src.get("courseLevel", "")
    if aid <= 0 or ts <= 0:
        continue
    dt = datetime.fromtimestamp(ts, tz=CST)
    date_str = dt.strftime("%Y-%m-%d")
    if level == "A1":
        daily_a1[date_str].add(aid)
        all_a1.add(aid)
    elif level == "A2":
        daily_a2[date_str].add(aid)
        all_a2.add(aid)

# 合并所有日期
all_dates = sorted(set(list(daily_a1.keys()) + list(daily_a2.keys())))

print(f"\n{'日期':<12} {'A1日活':>6} {'A2日活':>6} {'总计':>6}")
print("-" * 36)
total_a1_daily = 0
total_a2_daily = 0
for d in all_dates:
    a1 = len(daily_a1.get(d, set()))
    a2 = len(daily_a2.get(d, set()))
    total_a1_daily += a1
    total_a2_daily += a2
    print(f"{d:<12} {a1:>6} {a2:>6} {a1+a2:>6}")

print("-" * 36)
print(f"{'合计':<12} {total_a1_daily:>6} {total_a2_daily:>6} {total_a1_daily+total_a2_daily:>6}")

print(f"\n{'='*36}")
print(f"总去重用户数: {len(all_a1 | all_a2)}")
print(f"  A1: {len(all_a1)}")
print(f"  A2: {len(all_a2)}")
print(f"注：日活为每日累加值（非去重），同一用户多天活跃会重复计数")