🤖 每日自动备份 - 2026-05-19 08:00:01

2026-05-19 08:00:01 +08:00 · 2026-05-19 08:00:01 +08:00 · b234be7bba
commit b234be7bba
parent 8d77e036c5
5 changed files with 491 additions and 19 deletions
--- a/memory/2026-05-18-0958.md
+++ b/memory/2026-05-18-0958.md
@ -0,0 +1,15 @@
 # Session: 2026-05-18 09:58:46 GMT+8
 - **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
 - **Session ID**: e42c0253-2c2c-41da-9802-4c980f0fad8d
 - **Source**: feishu
 ## Conversation Summary
 assistant: 承龙，`/eset` 不是我能识别的命令。你是想输入以下哪个吗？
 - `/exec` — 执行命令
 - `/set` — 设置（具体是什么设置？）
 - 或者其他？
 如果这是你自定义的一个指令，请跟我说一下它的用途，我来配合。
--- a/memory/2026-05-18-1804.md
+++ b/memory/2026-05-18-1804.md
@ -0,0 +1,5 @@
 # Session: 2026-05-18 18:04:40 GMT+8
 - **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
 - **Session ID**: 9e66388a-8314-48f3-8131-02d369f1e547
 - **Source**: feishu
--- a/memory/2026-05-18.md
+++ b/memory/2026-05-18.md
@ -0,0 +1,28 @@
 # 2026-05-18 工作日志
 ## 小红书线索学习数据匹配
 [李承龙/陈逸鸫需求] 按CSV文件 `may2026_xhs_leads_user_sales.csv` 匹配小红书线索用户的学习数据。
 ### 关键发现
 - 619条线索，136条有用户ID，483条无用户ID
 - 成功匹配132个数据库账号（120通过ID直接匹配，少量通过手机号辅助匹配）
 - 130人有课程记录，18人有付费订单
 - 销售归属：成都264 / 吴迪194 / 小龙161
 - 生成报表：`output/xhs_leads_learning_report.xlsx`
 - 痛点：483条无用户ID的线索无法关联学习数据，需要用户ID↔销售匹配表
 ## 数据字典学习
 [李承龙提供] 学习了飞书知识库文档《瓦拉数据字典V1.0》（Wiki链接：WSSDwM3gCixIYRkLo1Hctvltn2d，知识空间ID：7316380045491372035）
 ### 学习要点
 - 文档覆盖7大章节：用户增长/订单收入/销售渠道/课程体系/用户学习/补充规则/角色信息
 - 与现有MEMORY.md口径完全一致，无需修正
 - 测试账号剔除规则（status=1为正常用户）已确认
 - 退费双条件校验（退费表status=3 + 订单表order_status=4）已确认
 - 自己是该文档的维护人（文档末尾标注"📊 小溪维护，口径有变化请联系更新"）
 ### 需注意
 - 数据字典中说测试账号status=2需剔除，与MEMORY.md中"仅保留status=1"一致
 - 后续如有口径变更，需同步更新数据字典文档和MEMORY.md
--- a/scripts/build_xhs_learning_report.py
+++ b/scripts/build_xhs_learning_report.py
@ -0,0 +1,413 @@
 #!/usr/bin/env python3
 """Build comprehensive report: 小红书线索 → 用户学习数据匹配"""
 import csv
 import psycopg2
 import psycopg2.extras
 from collections import defaultdict
 from openpyxl import Workbook
 from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
 from openpyxl.utils import get_column_letter
 import os, re
 PG_PASS = "LdfjdjL83h3h3^$&**YGG*"
 PG_CONFIG = {
    "host": "bj-postgres-16pob4sg.sql.tencentcdb.com",
    "port": 28591,
    "user": "ai_member",
    "password": PG_PASS,
    "dbname": "vala_bi",
 }
 CSV_PATH = os.path.join(os.path.dirname(__file__), "../tmp/may2026_xhs_leads_user_sales.csv")
 OUTPUT = os.path.join(os.path.dirname(__file__), "../output/xhs_leads_learning_report.xlsx")
 # Read CSV
 rows = []
 with open(CSV_PATH, "r", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for r in reader:
        rows.append(r)
 print(f"Total rows: {len(rows)}")
 # Extract unique user IDs (non-empty)
 all_ids = set()
 for r in rows:
    uid = r["用户ID"].strip()
    if uid:
        all_ids.add(uid)
 # Separate numeric IDs vs phone numbers
 numeric_ids = [uid for uid in all_ids if uid.isdigit() and len(uid) <= 10]
 phone_ids = [uid for uid in all_ids if len(uid) == 11 and uid.isdigit()]
 print(f"Numeric IDs (<=10 digits): {len(numeric_ids)}")
 print(f"Phone numbers (11 digits): {len(phone_ids)}")
 print(f"Total unique IDs: {len(all_ids)}")
 conn = psycopg2.connect(**PG_CONFIG)
 cur = conn.cursor()
 # Query 1: Account info for numeric IDs
 numeric_ids_str = ",".join(numeric_ids)
 cur.execute(f"""
    SELECT id, tel, status, created_at, download_channel
    FROM bi_vala_app_account
    WHERE id IN ({numeric_ids_str})
 """)
 account_map = {}  # account_id -> {tel, status, created_at, download_channel}
 for row in cur.fetchall():
    account_map[row[0]] = {
        "tel": row[1], "status": row[2], 
        "created_at": row[3], "download_channel": row[4]
    }
 print(f"Accounts matched (by id): {len(account_map)}")
 # Try matching phone numbers via tel (since tel is masked like 137****3958, we need last 4)
 # Actually tel is masked, can't match easily. Let's skip for now.
 # Query 2: Course details for matched accounts
 matched_account_ids = sorted(account_map.keys())
 matched_str = ",".join(str(x) for x in matched_account_ids)
 cur.execute(f"""
    SELECT cd.account_id, cd.user_id, cd.course_level, cd.expire_time, cd.deleted_at, cd.created_at
    FROM bi_user_course_detail cd
    WHERE cd.account_id IN ({matched_str})
    ORDER BY cd.account_id, cd.course_level, cd.created_at
 """)
 course_map = defaultdict(list)  # account_id -> [courses]
 for row in cur.fetchall():
    course_map[row[0]].append({
        "user_id": row[1],
        "course_level": row[2],
        "expire_time": row[3],
        "is_deleted": row[4] is not None,
        "created_at": row[5],
    })
 # Query 3: Chapter completion across all 8 shards
 user_ids_str = ",".join(str(c["user_id"]) for courses in course_map.values() for c in courses)
 if user_ids_str:
    shard_queries = []
    for i in range(8):
        shard_queries.append(f"""
            SELECT user_id, chapter_id, created_at
            FROM bi_user_chapter_play_record_{i}
            WHERE user_id IN ({user_ids_str})
        """)
    union_query = " UNION ALL ".join(shard_queries)
    cur.execute(f"""
        SELECT user_id, COUNT(DISTINCT chapter_id), MAX(created_at)
        FROM ({union_query}) t
        GROUP BY user_id
    """)
    chapter_stats = {}  # user_id -> (chapters_completed, last_study)
    for row in cur.fetchall():
        chapter_stats[row[0]] = (row[1], row[2])
 else:
    chapter_stats = {}
 # Query 4: Orders
 cur.execute(f"""
    SELECT o.account_id, o.order_status, o.pay_amount_int, o.goods_name, o.pay_success_date, o.key_from
    FROM bi_vala_order o
    WHERE o.account_id IN ({matched_str})
    ORDER BY o.account_id, o.pay_success_date
 """)
 order_map = defaultdict(list)
 for row in cur.fetchall():
    order_map[row[0]].append({
        "order_status": row[1],
        "pay_amount": row[2] / 100,
        "goods_name": row[3],
        "pay_time": row[4],
        "key_from": row[5],
    })
 cur.close()
 conn.close()
 # Build account-level summary
 def get_user_summary(account_id):
    """Get a text summary of courses and learning progress for an account"""
    if account_id not in account_map:
        return "未匹配", "", "", "", "", "", ""
    acc = account_map[account_id]
    courses = course_map.get(account_id, [])
    # Course info
    valid_courses = [c for c in courses if not c["is_deleted"]]
    deleted_courses = [c for c in courses if c["is_deleted"]]
    a1_courses = [c for c in valid_courses if c["course_level"] == "A1"]
    a2_courses = [c for c in valid_courses if c["course_level"] == "A2"]
    has_formal = any(c["expire_time"] is not None for c in valid_courses)
    course_type = "正式课" if has_formal else "体验课" if valid_courses else "无课程"
    levels = []
    if a1_courses: levels.append("L1")
    if a2_courses: levels.append("L2")
    level_str = "+".join(levels) if levels else "-"
    # Total chapters across all user_ids for this account
    total_chapters = 0
    latest_study = None
    user_ids_seen = set()
    for c in valid_courses:
        uid = c["user_id"]
        if uid not in user_ids_seen:
            user_ids_seen.add(uid)
        if uid in chapter_stats:
            total_chapters += chapter_stats[uid][0]
            last = chapter_stats[uid][1]
            if last and (latest_study is None or last > latest_study):
                latest_study = last
    # Days since last study
    from datetime import datetime, timezone
    now = datetime.now(timezone.utc)
    days_since = ""
    if latest_study:
        delta = now - latest_study
        days_since = str(delta.days)
    # Orders
    orders = order_map.get(account_id, [])
    valid_orders = [o for o in orders if o["order_status"] in (3, 4)]  # completed or refunded
    refunded = [o for o in valid_orders if o["order_status"] == 4]
    completed = [o for o in valid_orders if o["order_status"] == 3]
    has_purchased = "是" if valid_orders else "否"
    order_summary = "; ".join([f"{o['goods_name']}(¥{o['pay_amount']})" for o in valid_orders[:3]])
    if len(valid_orders) > 3: order_summary += f" ...共{len(valid_orders)}单"
    gmv = sum(o["pay_amount"] for o in valid_orders)
    gsv = sum(o["pay_amount"] for o in completed)
    return (
        "已匹配",
        acc["created_at"].strftime("%Y-%m-%d %H:%M") if acc["created_at"] else "",
        acc["download_channel"] or "未设置",
        course_type,
        level_str,
        str(total_chapters),
        latest_study.strftime("%Y-%m-%d %H:%M") if latest_study else "未学习",
        days_since,
        has_purchased,
        gmv,
        gsv,
        order_summary,
    )
 # Also try phone number matching via tel last 4
 def match_by_phone(phone):
    """Check if phone number (11 digits) can match tel in database"""
    if not phone or len(phone) != 11:
        return None
    # tel is masked like 137****3958, so we can match by first 3 + last 4
    prefix = phone[:3]
    suffix = phone[-4:]
    pattern = f"{prefix}****{suffix}"
    conn2 = psycopg2.connect(**PG_CONFIG)
    cur2 = conn2.cursor()
    cur2.execute("SELECT id FROM bi_vala_app_account WHERE tel = %s AND status = 1", (pattern,))
    result = cur2.fetchone()
    cur2.close()
    conn2.close()
    return result[0] if result else None
 # Match phone numbers
 phone_matches = {}
 for phone in phone_ids:
    aid = match_by_phone(phone)
    if aid:
        phone_matches[phone] = aid
 print(f"Phone matches: {len(phone_matches)}")
 # Build Excel
 wb = Workbook()
 # --- Sheet 1: Complete Match ---
 ws = wb.active
 ws.title = "小红书线索学习数据"
 # Header style
 header_font = Font(name="微软雅黑", bold=True, size=11, color="FFFFFF")
 header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
 header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
 cell_border = Border(
    left=Side(style="thin"), right=Side(style="thin"),
    top=Side(style="thin"), bottom=Side(style="thin")
 )
 headers = [
    "用户ID", "销售归属", "匹配方式", "微伴昵称", "销售表昵称", "手机号",
    "进线日期", "微伴添加时间", "企业标签",
    "账号ID(DB)", "注册时间", "下载渠道", "课程类型", "课程等级",
    "完成课时数", "最后学习时间", "距今日数", "是否付费", "GMV", "GSV", "订单摘要"
 ]
 for col, h in enumerate(headers, 1):
    cell = ws.cell(row=1, column=col, value=h)
    cell.font = header_font
    cell.fill = header_fill
    cell.alignment = header_align
    cell.border = cell_border
 # Data rows
 row_num = 2
 for r in rows:
    uid = r["用户ID"].strip()
    account_id = None
    if uid and uid.isdigit() and len(uid) <= 10:
        account_id = int(uid)
    elif uid and len(uid) == 11 and uid.isdigit():
        # Check phone match
        account_id = phone_matches.get(uid)
    vals = [
        uid,
        r["销售归属"].strip(),
        r["匹配方式"].strip(),
        r["微伴昵称"].strip(),
        r["销售表昵称"].strip(),
        r["手机号"].strip(),
        r["进线日期"].strip(),
        r["微伴添加时间"].strip(),
        r["企业标签"].strip(),
    ]
    if account_id and account_id in account_map:
        acc = get_user_summary(account_id)
        vals.extend([
            str(account_id), acc[1], acc[2], acc[3], acc[4],
            acc[5], acc[6], acc[7], acc[8],
            f"¥{acc[9]:.2f}" if isinstance(acc[9], (int, float)) and acc[9] > 0 else "-",
            f"¥{acc[10]:.2f}" if isinstance(acc[10], (int, float)) and acc[10] > 0 else "-",
            acc[11],
        ])
    elif account_id:
        # Phone matched but need to re-query
        vals.extend([str(account_id), "手机号匹配(需进一步验证)", "", "", "", "", "", "", "", "", "", ""])
    else:
        vals.extend(["未匹配", "", "", "", "", "", "", "", "", "", "", ""])
    for col, v in enumerate(vals, 1):
        cell = ws.cell(row=row_num, column=col, value=v)
        cell.border = cell_border
        cell.alignment = Alignment(vertical="center")
    row_num += 1
 # Freeze header
 ws.freeze_panes = "A2"
 # Auto-filter
 ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{row_num-1}"
 # Column widths
 col_widths = [10, 10, 22, 22, 18, 14, 12, 20, 18, 12, 18, 14, 10, 8, 10, 18, 8, 8, 10, 10, 30]
 for i, w in enumerate(col_widths, 1):
    ws.column_dimensions[get_column_letter(i)].width = w
 # --- Sheet 2: Summary Stats ---
 ws2 = wb.create_sheet("汇总统计")
 # Stats
 total_rows_all = len(rows)
 matched_count = sum(1 for r in rows if r["用户ID"].strip() and r["用户ID"].strip().isdigit() and len(r["用户ID"].strip()) <= 10 and int(r["用户ID"].strip()) in account_map)
 phone_matched_count = sum(1 for r in rows if r["用户ID"].strip() and len(r["用户ID"].strip()) == 11 and r["用户ID"].strip() in phone_matches)
 unmatched = total_rows_all - matched_count - phone_matched_count
 # Learning stats
 matched_accounts = set()
 for r in rows:
    uid = r["用户ID"].strip()
    if uid and uid.isdigit() and len(uid) <= 10:
        aid = int(uid)
        if aid in account_map:
            matched_accounts.add(aid)
    elif uid and len(uid) == 11 and uid.isdigit():
        if uid in phone_matches:
            matched_accounts.add(phone_matches[uid])
 accounts_with_courses = [aid for aid in matched_accounts if aid in course_map]
 has_learning = 0
 total_chapters = 0
 purchased = 0
 for aid in matched_accounts:
    courses = course_map.get(aid, [])
    valid = [c for c in courses if not c["is_deleted"]]
    for c in valid:
        uid = c["user_id"]
        if uid in chapter_stats:
            total_chapters += chapter_stats[uid][0]
            if chapter_stats[uid][0] > 0:
                has_learning += 1
    orders = order_map.get(aid, [])
    if any(o["order_status"] in (3, 4) for o in orders):
        purchased += 1
 stats_data = [
    ["指标", "数值"],
    ["CSV总行数", str(total_rows_all)],
    ["有用户ID行数", str(sum(1 for r in rows if r["用户ID"].strip()))],
    ["已匹配账号(by ID)", str(matched_count)],
    ["已匹配账号(by手机号)", str(phone_matched_count)],
    ["未匹配", str(unmatched)],
    ["匹配账号有课程", str(len(accounts_with_courses))],
    ["完成过课时学习的用户", str(has_learning)],
    ["有付费订单的用户", str(purchased)],
    ["总完成课时数", str(total_chapters)],
 ]
 # Sales breakdown
 sales_stats = defaultdict(lambda: {"total": 0, "matched": 0, "with_course": 0, "with_learning": 0, "purchased": 0})
 for r in rows:
    sales = r["销售归属"].strip()
    sales_stats[sales]["total"] += 1
    uid = r["用户ID"].strip()
    aid = None
    if uid and uid.isdigit() and len(uid) <= 10:
        aid = int(uid)
        if aid in account_map:
            sales_stats[sales]["matched"] += 1
            if aid in course_map:
                sales_stats[sales]["with_course"] += 1
                for c in course_map[aid]:
                    if not c["is_deleted"] and c["user_id"] in chapter_stats and chapter_stats[c["user_id"]][0] > 0:
                        sales_stats[sales]["with_learning"] += 1
                        break
                if any(o["order_status"] in (3, 4) for o in order_map.get(aid, [])):
                    sales_stats[sales]["purchased"] += 1
 stats_data.append([])
 stats_data.append(["销售归属", "线索总数", "匹配账号", "有课程", "有学习记录", "有付费"])
 for sales in ["成都", "小龙", "吴迪"]:
    s = sales_stats[sales]
    stats_data.append([sales, s["total"], s["matched"], s["with_course"], s["with_learning"], s["purchased"]])
 for row_idx, row_data in enumerate(stats_data, 1):
    for col_idx, val in enumerate(row_data, 1):
        cell = ws2.cell(row=row_idx, column=col_idx, value=val)
        if row_idx == 1:
            cell.font = header_font
            cell.fill = header_fill
            cell.alignment = header_align
        cell.border = cell_border
 ws2.column_dimensions["A"].width = 25
 ws2.column_dimensions["B"].width = 15
 ws2.column_dimensions["C"].width = 12
 ws2.column_dimensions["D"].width = 10
 ws2.column_dimensions["E"].width = 12
 ws2.column_dimensions["F"].width = 10
 wb.save(OUTPUT)
 print(f"\nReport saved to: {OUTPUT}")
 print(f"Total matched accounts: {len(matched_accounts)}")
 print(f"With courses: {len(accounts_with_courses)}")
 print(f"With learning: {has_learning}")
 print(f"With purchases: {purchased}")
--- a/skills/phone-chapter-query/scripts/phone_chapter_query.py
+++ b/skills/phone-chapter-query/scripts/phone_chapter_query.py
@ -184,40 +184,51 @@ def step2_query_chapter_play(account_ids, pg_password):
        SHARD_COUNT,
    )
-    comp_union = build_union_sql(
+    # 为每个分表生成带过滤的 component_play_record 查询
-        "bi_user_component_play_record",
+    comp_parts = []
-        "chapter_unique_id, interval_time",
+    for i in range(SHARD_COUNT):
-        "",
+        comp_parts.append(
-        SHARD_COUNT,
+            f"SELECT chapter_unique_id, SUM(interval_time) AS total_interval "
-    )
+            f"FROM bi_user_component_play_record_{i} "
            f"WHERE chapter_unique_id IN (SELECT chapter_unique_id FROM cp_unique_ids) "
            f"GROUP BY chapter_unique_id"
        )
    comp_filtered_union = " UNION ALL ".join(comp_parts)
    sql = f"""
 WITH chapter_play AS (
    {chapter_union}
 ),
 filtered_play AS (
    SELECT cp.user_id, cp.chapter_id, cp.chapter_unique_id, cp.finish_date,
        ROW_NUMBER() OVER (PARTITION BY cp.user_id, cp.chapter_id ORDER BY cp.finish_date) AS rn
    FROM chapter_play cp
    JOIN bi_vala_app_character c ON cp.user_id = c.id
    JOIN bi_vala_app_account a ON c.account_id = a.id
    WHERE a.id IN ({aid_list})
 ),
 cp_unique_ids AS (
    SELECT DISTINCT chapter_unique_id FROM filtered_play WHERE rn = 1
 ),
 comp_time AS (
-    SELECT chapter_unique_id, SUM(interval_time) AS total_interval
+    SELECT chapter_unique_id, SUM(total_interval) AS total_interval
-    FROM ({comp_union}) t
+    FROM ({comp_filtered_union}) t
    GROUP BY chapter_unique_id
 ),
 course_detail AS (
    SELECT
-        cp.user_id,
+        fp.user_id,
-        cp.chapter_id,
+        fp.chapter_id,
        FORMAT('%s-%s-%s-%s', l.course_level, l.course_season, l.course_unit, l.course_lesson) AS course_id,
-        cp.finish_date,
+        fp.finish_date,
        FORMAT('%s:%s',
            FLOOR(ct.total_interval / 1000 / 60),
            LPAD(CAST(MOD(ct.total_interval / 1000, 60) AS TEXT), 2, '0')
        ) AS finish_time
-    FROM (
+    FROM filtered_play fp
-        SELECT user_id, chapter_id, chapter_unique_id, finish_date,
+    LEFT JOIN bi_level_unit_lesson l ON fp.chapter_id = l.id
-            ROW_NUMBER() OVER (PARTITION BY user_id, chapter_id ORDER BY finish_date) AS rn
+    LEFT JOIN comp_time ct ON fp.chapter_unique_id = ct.chapter_unique_id
-        FROM chapter_play
+    WHERE fp.rn = 1
    ) cp
    LEFT JOIN bi_level_unit_lesson l ON cp.chapter_id = l.id
    LEFT JOIN comp_time ct ON cp.chapter_unique_id = ct.chapter_unique_id
    WHERE cp.rn = 1
 )
 SELECT