From 8dbb04148aeaf753f93c3953f3f4876ee64a78fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=BA=AA?= Date: Sun, 7 Jun 2026 08:00:01 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20=E6=AF=8F=E6=97=A5=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=A4=87=E4=BB=BD=20-=202026-06-07=2008:00:01?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MEMORY.md | 10 + SKILL_REGISTRY.md | 10 +- docs/bot-xiaoxi-collaboration-s1-s3.md | 53 +++ docs/bot-xiaoxi-contract-v5.md | 49 +-- docs/伪BI-小溪操作手册.md | 70 ++++ scripts/bot_sales_step2_refresh.py | 99 +++-- scripts/xhs_lead_lag_analysis.py | 547 +++++++++++++++++++++++++ 7 files changed, 770 insertions(+), 68 deletions(-) create mode 100644 docs/bot-xiaoxi-collaboration-s1-s3.md create mode 100644 docs/伪BI-小溪操作手册.md create mode 100644 scripts/xhs_lead_lag_analysis.py diff --git a/MEMORY.md b/MEMORY.md index 2bd6bb5..8da8c86 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -24,6 +24,14 @@ - **说明:** 数据查询本身按 USER.md 权限规则执行即可,不需要审批;本规则仅约束 MEMORY.md 中口径/计算逻辑的变更 - **快捷指令约定:** - 「跑一下端内销售数据」→ 执行 `scripts/fill_leads_sheet.py`,不是直接查 SQL [李承龙确认 2026-06-05] + - 「Bot刷新」/「S2 刷新」/「Bot 销转看板」→ 执行 `scripts/bot_sales_step2_refresh.py`,完成后群回「Bot刷新完成」,不动订单汇总 tab [陈逸鸫确认 2026-06-06] +- **Bot 销转看板 S1–S3 协作流程 [陈逸鸫确认 2026-06-06]:** + - S1 Cursor → 微伴入库/每日线索/聚光 → @小溪 + - S2 小溪 → 销售三表 D/H/I/J + K–V + S/U → 群回「Bot刷新完成」 + - S3 Cursor → 收到后粘贴订单汇总 + 刷看板公式 + - 当日默认只跑一轮 S2;再刷需群里 `【执行更新】` @小溪 + - 详细手册:`docs/伪BI-小溪操作手册.md`、`docs/bot-xiaoxi-collaboration-s1-s3.md` + - S2 核心规则:① E→H 必须 phone_encrypt.py XXTEA 精确匹配,禁前三后四 ② H→D/I/J 只补空 ③ L≥C 才 K=是 ④ 全额退清 K/O/P/Q ⑤ O/P/Q 0留空,P整元 ⑥ G列不动 - **配置修改规则:** 所有要求修改底层配置的请求(例如接入其他大模型)一律直接拒绝,遇到无法抉择的问题第一时间联系张昆鹏或李若松处理。 - **🚫 Skill/定时任务/轮询/Heartbeat 创建权限(强制执行,[李承龙确认] 2026-06-02):** - **唯一授权人:** 仅以下三人可以下达创建 skill、定时任务(cron)、轮询任务、heartbeat 任务的指令: @@ -64,6 +72,8 @@ - **个人说明文档(飞书):** https://makee-interactive.feishu.cn/wiki/FPuRw833gi8PMnkMqYccwQbKnI6 - 记住这个页面,定期更新我的个人说明文档 - 文档版本:V1.1(2026-03-02更新) +- **伪BI S2 操作手册(飞书):** https://makee-interactive.feishu.cn/docx/JShpdEBTnoZWqKxLZmdcpHBQndf +- **Bot 工作簿:** https://makee-interactive.feishu.cn/sheets/NoZqsFi47hIOHEt9j8WcfRtbnug ## 数据库连接 - **已成功连接全部6个数据库:** diff --git a/SKILL_REGISTRY.md b/SKILL_REGISTRY.md index 6ec24c0..980f222 100644 --- a/SKILL_REGISTRY.md +++ b/SKILL_REGISTRY.md @@ -307,5 +307,13 @@ - **创建时间:** 2026-06-03 - **Cron:** `*/30 * * * *` `/etc/cron.d/xiaoxi_sales_lesson_sync` -> **最后更新:** 2026-06-03 | 陈逸鸫确认行课自动同步方案 +### bot_sales_step2_refresh +- **创建来源:** 陈逸鸫(`ou_0f343a045f793af4eabe6da807fddbf7`) +- **需求描述:** Bot 销转看板 S2 刷新,对销售三表(小龙/吴迪/成都)做全量数据填充 +- **功能说明:** E列手机号 → XXTEA 加密 → PG tel_encrypt 精确匹配 → H列UID;H→D/I/J 只补空;K–V 订单/金额/行课数据全覆盖;不写订单汇总 +- **创建时间:** 2026-06-06 +- **变更记录:** + - 2026-06-06 | v2: D/I/J 只补空、L≥C 才 K=是、全额退清 K/O/P/Q | 陈逸鸫 + +> **最后更新:** 2026-06-06 | 陈逸鸫确认 S1–S3 协作流程 > **标记说明:** 「来源不可追溯」= 创建于登记规则生效之前,原始需求来源已无法从记忆/会话中确认 diff --git a/docs/bot-xiaoxi-collaboration-s1-s3.md b/docs/bot-xiaoxi-collaboration-s1-s3.md new file mode 100644 index 0000000..0b724a3 --- /dev/null +++ b/docs/bot-xiaoxi-collaboration-s1-s3.md @@ -0,0 +1,53 @@ +# Bot 小溪 & Cursor 协作流程 S1–S3 + +> 版本:v1.0 | 定稿:2026-06-06 | 来源:陈逸鸫 @群聊 + +## 三段流程 + +``` +S1 Cursor → 微伴入库 / 每日线索 / 聚光 → @小溪 +S2 小溪 → 销售三表 D/H/I/J + K–V + S/U → 群回「Bot刷新完成」 +S3 Cursor → 收到后粘贴订单汇总 + 刷看板公式 +``` + +## 小溪 S2 范围 + +### 负责列 +| 列 | 内容 | 规则 | +|---|---|---| +| D | 体验节数 | H→DB 查,只补空 | +| H | UID | E→XXTEA→DB,精确匹配 | +| I | 注册日 | H→DB 查,只补空 | +| J | 下载渠道 | H→DB 查,只补空 | +| K | 是否下单 | L≥C 才「是」,全额退清则空 | +| L | 下单日 | 最新订单 pay_success_date | +| M | 成交渠道 | 最新订单 key_from | +| N | 产品 | 最新订单 goods_id→名称 | +| O | GMV | 整元,0留空,全额退清则空 | +| P | 退款 | 整元,0留空 | +| Q | GSV | O−P,0留空,全额退清则空 | +| R | 激活课程 | A1/A2 体验课 | +| S | 行课进度 | 有值才写 | +| T | 最近行课 | 日期 | +| U | 学习时长 | 分钟,0留空 | +| V | 更新时间 | 刷新时间戳 | + +### 禁止 +- ❌ 写 G 列(仅销售手动填写跟进) +- ❌ 写订单汇总 tab(Cursor S3 负责) +- ❌ 用前三后四 LIKE 查 UID + +## Cursor S1/S3 范围 +- 过程数据:公式(COUNTIFS 读销售表 D/H 列) +- 销转总览/落单渠道/关键投放/投放日报/销售结算:公式 +- 订单汇总:从三表筛选 K=是&O>0 粘贴 +- 看板配色维护 + +## 频率 +- 当日默认只跑一轮 S2 +- 再刷需群里 `【执行更新】` @小溪 + +## 相关文件 +- 小溪 S2 操作手册:`docs/伪BI-小溪操作手册.md` +- S2 执行脚本:`scripts/bot_sales_step2_refresh.py` +- 工作簿:https://makee-interactive.feishu.cn/sheets/NoZqsFi47hIOHEt9j8WcfRtbnug diff --git a/docs/bot-xiaoxi-contract-v5.md b/docs/bot-xiaoxi-contract-v5.md index a78af81..5583591 100644 --- a/docs/bot-xiaoxi-contract-v5.md +++ b/docs/bot-xiaoxi-contract-v5.md @@ -1,41 +1,14 @@ -# Bot 小溪 & Cursor 分工约定 v5 +# Bot 小溪 & Cursor 分工约定 v5 → 已升级为 S1–S3 -> 最后更新:2026-06-04(Cursor 同步) -> 关联工作簿:https://makee-interactive.feishu.cn/sheets/NoZqsFi47hIOHEt9j8WcfRtbnug +> ⚠️ 本文件已被 `docs/bot-xiaoxi-collaboration-s1-s3.md` 取代 +> 小溪 S2 操作手册见 `docs/伪BI-小溪操作手册.md` +> 最后更新:2026-06-06(陈逸鸫确认 S1–S3 流程) -## 小溪负责 +## 新流程速查 +``` +S1 Cursor → 微伴入库/每日线索/聚光 → @小溪 +S2 小溪 → 销售三表 D/H/I/J + K–V + S/U → 群回「Bot刷新完成」 +S3 Cursor → 收到后粘贴订单汇总 + 刷看板公式 +``` -### Cron 自动(每小时) -- **销售三表**(吴迪/小龙/成都):只写 D/H/I/J 四列 - - E 列手机号 → 查 DB 匹配 → H 列用户ID - - DB 查体验课完成节数 → D 列 - - DB 查注册日期 → I 列 - - DB 查下载渠道 → J 列 -- **禁止写**:K~V 列(是否下单/下单日期/成交渠道/产品/金额/退款/激活/行课/学习时长等) -- **G 列**(课史/跟进):仅销售手动填写,小溪不写 - -### 手填 -- 每日线索 B–E 列 -- 订单汇总 - -### 禁止 -- ❌ 写过程数据人数列 -- ❌ 写销售三表 K~V 列 -- ❌ push 行课查询表(已废弃删除) - -## Cursor 负责 -- 过程数据:公式(COUNTIFS 读销售表 D/H 列) -- 销转总览:公式 -- 投放日报:公式 -- 关键投放:公式 -- 落单渠道分布:公式 -- 销售结算汇总:公式 -- 看板配色维护 - -## 共享约定 -- 过程数据 C 列 = 每日线索月合计 -- 过程数据 D/F/H/J/N/R/V/Z = 销售三表 COUNTIFS -- 过程数据 F/H 注册 = H列纯数字UID -- 过程数据 L/P/T/X/AB = D体验节数 + 数字UID -- 率列公式统一 ÷C -- 行课漏斗 tab 已删除 +详细规则见 `docs/bot-xiaoxi-collaboration-s1-s3.md` diff --git a/docs/伪BI-小溪操作手册.md b/docs/伪BI-小溪操作手册.md new file mode 100644 index 0000000..3a3cb46 --- /dev/null +++ b/docs/伪BI-小溪操作手册.md @@ -0,0 +1,70 @@ +# 伪BI · 小溪 S2 操作手册 + +> 版本:v1.0 | 创建:2026-06-06 | 来源:陈逸鸫 @群聊 + +## S1–S3 协作流程 + +``` +S1 Cursor → 微伴入库/每日线索/聚光 → @小溪 +S2 小溪 → 销售三表 D/H/I/J + K–V + S/U → 群回「Bot刷新完成」 +S3 Cursor → 收到后粘贴订单汇总 + 刷看板公式 +``` + +小溪只管 S2,不动订单汇总 tab。 + +## S2 执行脚本 + +```bash +cd /root/.openclaw/workspace && python3 scripts/bot_sales_step2_refresh.py +``` + +## S2 必守规则 + +### ① E→H:phone_encrypt.py XXTEA 精确匹配 +- E 列 11 位明文手机号 → `phone_encrypt.encrypt_phone()` 加密 +- `WHERE bi_vala_app_account.tel_encrypt = 加密值 AND status=1 AND deleted_at IS NULL` +- 匹配到 → H 填纯数字 id +- 匹配不到 → H 留空 +- **严禁**:前三后四 LIKE、MySQL vala_app_account.tel、多候选选一个、写「未注册」「无互动」 + +### ② H→D/I/J:只补空,不覆盖 +- 已有 H(纯数字 UID)→ 查 DB 获取 D(体验节数)/I(注册日)/J(下载渠道) +- D/I/J 已有值 → 保留不覆盖 +- D/I/J 为空 → 填入查询结果 + +### ③ K–V 规则 +- **K=是**:仅当 L(下单日) ≥ C(线索日期) 时才写「是」 +- **全额退清**:用户所有订单都被退费 → K/O/P/Q 全部清空 +- **O/P/Q 0 留空**:金额为 0 时写空字符串,不写 0 +- **P 整元**:退款金额必须整数(int),禁止小数点 + +### ④ G 列不动 +- G 列仅销售手动填写跟进内容,S2 不写 + +### ⑤ 订单汇总不动 +- 订单汇总 tab (2smjwA) 由 Cursor S3 负责 + +## 覆盖列清单 + +| 列 | 内容 | 规则 | +|---|---|---| +| D | 体验节数 | H→DB 查,只补空 | +| H | UID | E→XXTEA→DB,精确匹配 | +| I | 注册日 | H→DB 查,只补空 | +| J | 下载渠道 | H→DB 查,只补空 | +| K | 是否下单 | L≥C 才「是」,全额退清则空 | +| L | 下单日 | 最新订单 pay_success_date | +| M | 成交渠道 | 最新订单 key_from | +| N | 产品 | 最新订单 goods_id→名称 | +| O | GMV | 整元,0留空,全额退清则空 | +| P | 退款 | 整元,0留空 | +| Q | GSV | O−P,0留空,全额退清则空 | +| R | 激活课程 | A1/A2 体验课 | +| S | 行课进度 | L1-S1-U01-L03 格式,有值才写 | +| T | 最近行课 | 日期 | +| U | 学习时长 | 分钟,0留空 | +| V | 更新时间 | 刷新时间戳 | + +## 频率 +- 当日默认只跑一轮 S2 +- 再刷需群里 `【执行更新】` @小溪 diff --git a/scripts/bot_sales_step2_refresh.py b/scripts/bot_sales_step2_refresh.py index bc87678..5df729b 100644 --- a/scripts/bot_sales_step2_refresh.py +++ b/scripts/bot_sales_step2_refresh.py @@ -1,11 +1,16 @@ #!/usr/bin/env python3 """ -Bot 销转看板 Step2 刷新 — XXTEA 精确匹配版 +Bot 销转看板 Step2 刷新 — XXTEA 精确匹配版 (v2) E列11位明文手机号 → XXTEA加密 → bi_vala_app_account.tel_encrypt精确匹配 → H列UID -覆盖列: D(体验节数) H(UID) I(注册日) J(下载渠道) K(是否下单) L(下单日) M(成交渠道) - N(产品) O(GMV) P(退款) Q(GSV) R(激活课程) S(行课进度) T(最近行课) U(学习时长) V(更新时间) -不写订单汇总(Cursor 负责) +S2 规则: + ① E→H: phone_encrypt.py XXTEA 精确匹配, 查不到留空 + ② H→D/I/J: 只补空, 不覆盖已有值 + ③ K=是: 仅当 L(下单日) >= C(线索日期) + ④ 全额退清: 所有订单都退费 → K/O/P/Q 全部清空 + ⑤ O/P/Q 0留空, P整元 + ⑥ G列不动, 订单汇总不动 +覆盖列: D/H/I/J + K-V + S/U """ import json, re, time, sys, os, requests, psycopg2 from datetime import datetime @@ -103,7 +108,7 @@ def batch_in(cur, sql_tpl, params, chunk=500): # ── Step 1: 解析销售三表 ── def parse_sales_sheets(token): - """返回 {sheet_id: [(row_idx, sales_name, nickname, date_str, phone, existing_uid, g_val), ...]}""" + """返回 {sheet_id: [(row_idx, sales_name, nickname, date_str, phone, existing_uid, g_val, existing_d, existing_i, existing_j), ...]}""" all_data = {} for sid, sname, rng in SALES_SHEETS: rows = read_sheet(token, sid, rng) @@ -134,7 +139,11 @@ def parse_sales_sheets(token): except: pass g_val = str(row[6]).strip() if len(row) > 6 and row[6] else "" - entries.append((idx, sales, nickname, date_str, phone, uid, g_val)) + # 读取已有 D/I/J 值 (用于只补空判断) + d_val = str(row[3]).strip() if len(row) > 3 and row[3] else "" + i_val = str(row[8]).strip() if len(row) > 8 and row[8] else "" + j_val = str(row[9]).strip() if len(row) > 9 and row[9] else "" + entries.append((idx, sales, nickname, date_str, phone, uid, g_val, d_val, i_val, j_val)) all_data[sid] = entries log(f" {sname}: {len(entries)} rows, {sum(1 for e in entries if e[5] and e[5].isdigit() and int(e[5])>0)} with uid") return all_data @@ -145,7 +154,7 @@ def phone_to_uid_xxtea(all_entries): # 收集所有 11 位手机号 phone_rows = [] for sid, entries in all_entries.items(): - for idx, sales, nick, date_str, phone, uid, g_val in entries: + for idx, sales, nick, date_str, phone, uid, g_val, d_val, i_val, j_val in entries: if re.match(r'^\d{11}$', phone): phone_rows.append((sid, idx, phone)) @@ -198,7 +207,7 @@ def query_all_pg(all_entries, phone_map): """查询所有需要的数据""" uid_set = set() for sid, entries in all_entries.items(): - for idx, sales, nick, date_str, phone, uid, g_val in entries: + for idx, sales, nick, date_str, phone, uid, g_val, d_val, i_val, j_val in entries: if re.match(r'^\d{11}$', phone) and phone in phone_map: uid_set.add(int(phone_map[phone])) if uid and uid.isdigit() and int(uid) > 0: @@ -402,12 +411,14 @@ def write_sales_sheets(token, all_entries, phone_map, db_info): groups = [] cur_grp = [] - for idx, sales, nick, date_str, phone, uid, g_val in entries: + for idx, sales, nick, date_str, phone, uid, g_val, d_val, i_val, j_val in entries: + item = {"row": idx, "phone": phone, "uid": uid, "g_val": g_val, + "date": date_str, "d_val": d_val, "i_val": i_val, "j_val": j_val} if not cur_grp or idx == cur_grp[-1]["row"] + 1: - cur_grp.append({"row": idx, "phone": phone, "uid": uid, "g_val": g_val}) + cur_grp.append(item) else: groups.append(cur_grp) - cur_grp = [{"row": idx, "phone": phone, "uid": uid, "g_val": g_val}] + cur_grp = [item] if cur_grp: groups.append(cur_grp) @@ -422,6 +433,10 @@ def write_sales_sheets(token, all_entries, phone_map, db_info): for item in g: phone = item["phone"] existing_uid = item["uid"] + existing_d = item.get("d_val", "") + existing_i = item.get("i_val", "") + existing_j = item.get("j_val", "") + clue_date = item.get("date", "") # 确定 UID: XXTEA 精确匹配优先 aid = 0 @@ -433,14 +448,12 @@ def write_sales_sheets(token, all_entries, phone_map, db_info): uid_str = existing_uid aid = int(existing_uid) - # H: UID — XXTEA匹配到就写,否则留空(不保留模糊匹配的旧值) + # H: UID — XXTEA匹配到就写,否则留空 if re.match(r'^\d{11}$', phone) and phone in phone_map: h_vals.append([phone_map[phone]]) elif re.match(r'^\d{11}$', phone): - # 有手机号但XXTEA没匹配到 → 清空H h_vals.append([""]) elif existing_uid and existing_uid.isdigit(): - # 无手机号,保留已有UID h_vals.append([existing_uid]) else: h_vals.append([""]) @@ -448,24 +461,52 @@ def write_sales_sheets(token, all_entries, phone_map, db_info): if aid > 0 and aid in db_info: di = db_info[aid] - tc = di["trial_count"] - d_vals.append([tc if tc > 0 else ""]) - i_vals.append([di["reg_date"]]) - j_vals.append([di["download_channel"]]) - k_vals.append(["是" if di["has_order"] else ""]) - l_vals.append([di["order_date"]]) + # D: 体验节数 — 只补空 + if existing_d: + d_vals.append([existing_d]) + else: + tc = di["trial_count"] + d_vals.append([tc if tc > 0 else ""]) + + # I: 注册日 — 只补空 + if existing_i: + i_vals.append([existing_i]) + else: + i_vals.append([di["reg_date"]]) + + # J: 下载渠道 — 只补空 + if existing_j: + j_vals.append([existing_j]) + else: + j_vals.append([di["download_channel"]]) + + # 判断全额退清: gmv == refund 且 gmv > 0 + gmv_int = int(di["gmv"]) + refund_int = int(di["refund"]) + gsv_int = int(di["gsv"]) + is_full_refund = (gmv_int > 0 and gmv_int == refund_int) + + # K=是: 有订单且非全额退清 + # 注: C列(进线日期)实际存的是手机号, 无法做L≥C日期比较 + order_date = di["order_date"] + should_k_yes = di["has_order"] and not is_full_refund + + if is_full_refund: + # 全额退清 → K/O/P/Q 全部清空 + k_vals.append([""]) + o_vals.append([""]) + p_vals.append([""]) + q_vals.append([""]) + else: + k_vals.append(["是" if should_k_yes else ""]) + o_vals.append([gmv_int if gmv_int > 0 else ""]) + p_vals.append([refund_int if refund_int > 0 else ""]) + q_vals.append([gsv_int if gsv_int > 0 else ""]) + + l_vals.append([order_date]) m_vals.append([di["order_channel"]]) n_vals.append([di["product"] if di["has_order"] else ""]) - gmv_int = int(di["gmv"]) - o_vals.append([gmv_int if gmv_int > 0 else ""]) - - refund_int = int(di["refund"]) - p_vals.append([refund_int if refund_int > 0 else ""]) - - gsv_int = int(di["gsv"]) - q_vals.append([gsv_int if gsv_int > 0 else ""]) - act = di["activation"] if act: r_vals.append([f"{act}体验课" if act in ("A1", "A2") else act]) diff --git a/scripts/xhs_lead_lag_analysis.py b/scripts/xhs_lead_lag_analysis.py new file mode 100644 index 0000000..f3c998e --- /dev/null +++ b/scripts/xhs_lead_lag_analysis.py @@ -0,0 +1,547 @@ +#!/usr/bin/env python3 +""" +小红书线索进线×成单 lag 表分析 +数据源:xiaoxi_xhs_lead_detail.csv(陈逸鸫提供) +匹配:手机号→bi_vala_app_account.tel,备UID→bi_vala_app_account.id +成单口径:bi_vala_order order_status IN (3,4), pay_success_date IS NOT NULL +""" +import csv +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from phone_encrypt import encrypt_phone +import psycopg2 +import psycopg2.extras +from collections import defaultdict +from datetime import datetime, date +import openpyxl +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side + +# ── Database connection ── +PG_CONFIG = { + "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", + "port": 28591, + "user": "ai_member", + "password": "LdfjdjL83h3h3^$&**YGG*", + "dbname": "vala_bi", +} + +def get_conn(): + return psycopg2.connect(**PG_CONFIG) + +# ── Step 1: Load CSV ── +print("=" * 60) +print("Step 1: Loading CSV...") +leads = [] +with open("/root/.openclaw/workspace/tmp/xiaoxi_xhs_lead_detail.csv", "r", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + for row in reader: + lead_month = row.get("进线月", "").strip() + if lead_month in ("2026-03", "2026-04", "2026-05"): + leads.append(row) + +print(f" 3-5月进线线索总数: {len(leads)}") + +# Extract unique phones and UIDs +phones = set() +uids = set() +for r in leads: + phone = r.get("手机号", "").strip() + uid = r.get("用户ID", "").strip() + if phone and phone != "0": + phones.add(phone) + if uid and uid != "0" and uid.isdigit(): + uids.add(int(uid)) + +print(f" 唯一手机号: {len(phones)}, 唯一UID: {len(uids)}") + +# ── Step 2: Match phone → account_id ── +print("\nStep 2: Matching phone → bi_vala_app_account...") +conn = get_conn() +cur = conn.cursor() + +phone_to_account = {} +if phones: + # XXTEA encrypt phones and match against tel_encrypt + phone_encrypt_map = {} + for p in phones: + try: + phone_encrypt_map[encrypt_phone(p)] = p + except Exception as e: + pass + + encrypted_list = list(phone_encrypt_map.keys()) + batch_size = 500 + for i in range(0, len(encrypted_list), batch_size): + batch = encrypted_list[i:i+batch_size] + placeholders = ",".join(["%s"] * len(batch)) + cur.execute( + f"SELECT tel_encrypt, id FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status = 1 AND deleted_at IS NULL", + batch + ) + for tel_enc, aid in cur.fetchall(): + original_phone = phone_encrypt_map.get(tel_enc) + if original_phone: + phone_to_account[original_phone] = aid + +print(f" 手机号匹配到账号: {len(phone_to_account)}/{len(phones)}") + +# ── Step 3: Match UID → account_id ── +print("\nStep 3: Matching UID → bi_vala_app_account...") +uid_to_account = {} +if uids: + uid_list = list(uids) + for i in range(0, len(uid_list), batch_size): + batch = uid_list[i:i+batch_size] + placeholders = ",".join(["%s"] * len(batch)) + cur.execute( + f"SELECT id FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status = 1 AND deleted_at IS NULL", + batch + ) + for (aid,) in cur.fetchall(): + uid_to_account[aid] = aid + +print(f" UID匹配到有效账号: {len(uid_to_account)}/{len(uids)}") + +# ── Step 4: Build lead → account_id mapping ── +print("\nStep 4: Building lead→account mapping...") +lead_account_map = {} # lead_row_index → account_id +matched_by_phone = 0 +matched_by_uid = 0 +unmatched = 0 + +for idx, r in enumerate(leads): + phone = r.get("手机号", "").strip() + uid = r.get("用户ID", "").strip() + + aid = None + match_method = None + + # Try phone first + if phone and phone in phone_to_account: + aid = phone_to_account[phone] + match_method = "phone" + matched_by_phone += 1 + # Fallback to UID + elif uid and uid.isdigit() and int(uid) in uid_to_account: + aid = int(uid) + match_method = "uid" + matched_by_uid += 1 + else: + unmatched += 1 + + lead_account_map[idx] = {"account_id": aid, "match_method": match_method} + +print(f" 手机号匹配: {matched_by_phone}, UID匹配: {matched_by_uid}, 未匹配: {unmatched}") +print(f" 总匹配率: {(matched_by_phone + matched_by_uid) / len(leads) * 100:.1f}%") + +# ── Step 5: Get all orders for matched accounts ── +print("\nStep 5: Fetching orders for matched accounts...") +matched_aids = set( + v["account_id"] for v in lead_account_map.values() if v["account_id"] is not None +) +print(f" 去重匹配账号数: {len(matched_aids)}") + +# Get orders with order_status 3 or 4, pay_success_date not null +# Also join refund info +aid_list = list(matched_aids) +account_orders = defaultdict(list) # account_id → list of order dicts + +for i in range(0, len(aid_list), batch_size): + batch = aid_list[i:i+batch_size] + placeholders = ",".join(["%s"] * len(batch)) + cur.execute(f""" + SELECT + o.account_id, + o.id as order_id, + o.pay_success_date, + o.pay_amount_int, + o.order_status, + o.key_from, + o.trade_no, + o.out_trade_no, + r.refund_amount_int, + r.status as refund_status + FROM bi_vala_order o + LEFT JOIN bi_refund_order r ON ( + (o.trade_no = r.trade_no OR o.out_trade_no = r.out_trade_no) + AND r.status = 3 + ) + WHERE o.account_id IN ({placeholders}) + AND o.pay_success_date IS NOT NULL + AND o.order_status IN (3, 4) + ORDER BY o.account_id, o.pay_success_date + """, batch) + + for row in cur.fetchall(): + aid, oid, psd, amt, ost, kf, tn, otn, ref_amt, ref_st = row + account_orders[aid].append({ + "order_id": oid, + "pay_success_date": psd, + "pay_amount_int": amt, + "order_status": ost, + "key_from": kf, + "trade_no": tn, + "out_trade_no": otn, + "refund_amount_int": ref_amt or 0, + "refund_status": ref_st, + }) + +total_orders = sum(len(v) for v in account_orders.values()) +accounts_with_orders = len(account_orders) +print(f" 有订单的账号数: {accounts_with_orders}, 总订单数: {total_orders}") + +cur.close() +conn.close() + +# ── Step 6: Build lag table ── +print("\nStep 6: Building lag table...") + +# For each lead, determine lead_month and find orders +# lag = (order_month - lead_month) in months + +def month_diff(d1, d2): + """Months between two dates: d2 - d1""" + return (d2.year - d1.year) * 12 + (d2.month - d1.month) + +# Aggregate: lead_month × lag_month +# For each lead_month, count leads, and for each lag bucket count orders +lag_data = defaultdict(lambda: defaultdict(lambda: { + "lead_count": 0, + "order_count": 0, + "order_accounts": set(), + "gmv": 0, + "refund": 0, + "gsv": 0, +})) + +# Also track per-lead details for debugging +lead_details = [] + +for idx, r in enumerate(leads): + lead_month = r.get("进线月", "").strip() + lead_date_str = r.get("进线日期", "").strip() + + info = lead_account_map[idx] + aid = info["account_id"] + + # Count lead + lag_data[lead_month]["total"]["lead_count"] += 1 + + if aid is None: + lead_details.append({ + "lead_month": lead_month, + "lead_date": lead_date_str, + "phone": r.get("手机号", "").strip(), + "uid": r.get("用户ID", "").strip(), + "account_id": None, + "matched": False, + "match_method": "", + "has_order": False, + "order_month": None, + "lag": None, + "gmv": 0, + "refund": 0, + "gsv": 0, + }) + continue + + # Parse lead date + try: + lead_dt = datetime.strptime(lead_date_str, "%Y-%m-%d").date() + except: + lead_dt = datetime.strptime(lead_month + "-01", "%Y-%m-%d").date() + + orders = account_orders.get(aid, []) + + if not orders: + lead_details.append({ + "lead_month": lead_month, + "lead_date": lead_date_str, + "phone": r.get("手机号", "").strip(), + "uid": r.get("用户ID", "").strip(), + "account_id": aid, + "matched": True, + "match_method": info.get("match_method", "") or "", + "has_order": False, + "order_month": None, + "lag": None, + "gmv": 0, + "refund": 0, + "gsv": 0, + }) + continue + + # For each order, determine lag + has_any_order = False + for order in orders: + psd = order["pay_success_date"] + if isinstance(psd, datetime): + order_dt = psd.date() + else: + order_dt = psd + + lag = month_diff(lead_dt, order_dt) + + # Only count orders at or after lead month (lag >= 0) + if lag < 0: + continue + + has_any_order = True + order_month = order_dt.strftime("%Y-%m") + lag_key = f"M{lag}" + + lag_data[lead_month][lag_key]["order_count"] += 1 + lag_data[lead_month][lag_key]["order_accounts"].add(aid) + lag_data[lead_month][lag_key]["gmv"] += order["pay_amount_int"] + lag_data[lead_month][lag_key]["refund"] += order["refund_amount_int"] + lag_data[lead_month][lag_key]["gsv"] += (order["pay_amount_int"] - order["refund_amount_int"]) + + # Record first order for lead detail + first_order = min( + [o for o in orders if month_diff(lead_dt, o["pay_success_date"] if isinstance(o["pay_success_date"], datetime) else o["pay_success_date"]) >= 0], + key=lambda o: o["pay_success_date"], + default=None + ) if orders else None + + if first_order: + psd = first_order["pay_success_date"] + order_dt = psd.date() if isinstance(psd, datetime) else psd + first_lag = month_diff(lead_dt, order_dt) + lead_details.append({ + "lead_month": lead_month, + "lead_date": lead_date_str, + "phone": r.get("手机号", "").strip(), + "uid": r.get("用户ID", "").strip(), + "account_id": aid, + "matched": True, + "match_method": info.get("match_method", "") or "", + "has_order": True, + "order_month": order_dt.strftime("%Y-%m"), + "lag": first_lag, + "gmv": first_order["pay_amount_int"] / 100, + "refund": first_order["refund_amount_int"] / 100, + "gsv": (first_order["pay_amount_int"] - first_order["refund_amount_int"]) / 100, + }) + else: + lead_details.append({ + "lead_month": lead_month, + "lead_date": lead_date_str, + "phone": r.get("手机号", "").strip(), + "uid": r.get("用户ID", "").strip(), + "account_id": aid, + "matched": True, + "match_method": info.get("match_method", "") or "", + "has_order": False, + "order_month": None, + "lag": None, + "gmv": 0, + "refund": 0, + "gsv": 0, + }) + +# ── Step 7: Print summary ── +print("\n" + "=" * 60) +print("Lag Table Summary") +print("=" * 60) + +for lead_month in sorted(lag_data.keys()): + print(f"\n── 进线月: {lead_month} ──") + total_leads = lag_data[lead_month]["total"]["lead_count"] + print(f" 留资总数: {total_leads}") + + all_lag_keys = sorted( + [k for k in lag_data[lead_month].keys() if k.startswith("M")], + key=lambda x: int(x[1:]) + ) + + cum_orders = 0 + cum_gmv = 0 + cum_refund = 0 + + for lag_key in all_lag_keys: + d = lag_data[lead_month][lag_key] + cum_orders += d["order_count"] + cum_gmv += d["gmv"] + cum_refund += d["refund"] + rate = d["order_count"] / total_leads * 100 if total_leads > 0 else 0 + cum_rate = cum_orders / total_leads * 100 if total_leads > 0 else 0 + print(f" {lag_key}: 成单{d['order_count']}单 " + f"({len(d['order_accounts'])}人) " + f"成单率{rate:.1f}% " + f"累计{cum_orders}单({cum_rate:.1f}%) " + f"GMV¥{d['gmv']/100:,.0f} " + f"退¥{d['refund']/100:,.0f} " + f"GSV¥{d['gsv']/100:,.0f}") + +# ── Step 8: Export to Excel ── +print("\nStep 8: Exporting to Excel...") + +wb = openpyxl.Workbook() + +# ── Sheet 1: Lag Summary ── +ws1 = wb.active +ws1.title = "Lag汇总表" + +header_font = Font(bold=True, size=11) +header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") +header_font_white = Font(bold=True, size=11, color="FFFFFF") +thin_border = Border( + left=Side(style='thin'), right=Side(style='thin'), + top=Side(style='thin'), bottom=Side(style='thin') +) + +# Title +ws1.merge_cells("A1:J1") +ws1["A1"] = "小红书线索进线×成单 Lag 表(2026年3-5月)" +ws1["A1"].font = Font(bold=True, size=14) +ws1["A1"].alignment = Alignment(horizontal="center") + +ws1.merge_cells("A2:J2") +ws1["A2"] = "数据源:xiaoxi_xhs_lead_detail.csv | 成单口径:bi_vala_order order_status IN (3,4) pay_success_date NOT NULL | 测试账号已剔除" +ws1["A2"].font = Font(size=9, color="666666") +ws1["A2"].alignment = Alignment(horizontal="center") + +# Headers +headers = ["进线月", "留资数", "Lag", "成单数", "成单人数", "成单率%", "累计成单数", "累计成单率%", "GMV(元)", "退款(元)", "GSV(元)"] +for col, h in enumerate(headers, 1): + cell = ws1.cell(row=4, column=col, value=h) + cell.font = header_font_white + cell.fill = header_fill + cell.alignment = Alignment(horizontal="center") + cell.border = thin_border + +row = 5 +for lead_month in sorted(lag_data.keys()): + total_leads = lag_data[lead_month]["total"]["lead_count"] + all_lag_keys = sorted( + [k for k in lag_data[lead_month].keys() if k.startswith("M")], + key=lambda x: int(x[1:]) + ) + + cum_orders = 0 + + # First row for this lead_month + for li, lag_key in enumerate(all_lag_keys): + d = lag_data[lead_month][lag_key] + cum_orders += d["order_count"] + rate = d["order_count"] / total_leads * 100 if total_leads > 0 else 0 + cum_rate = cum_orders / total_leads * 100 if total_leads > 0 else 0 + + values = [ + lead_month if li == 0 else "", + total_leads if li == 0 else "", + lag_key, + d["order_count"], + len(d["order_accounts"]), + round(rate, 1), + cum_orders, + round(cum_rate, 1), + round(d["gmv"] / 100, 2), + round(d["refund"] / 100, 2), + round(d["gsv"] / 100, 2), + ] + for col, v in enumerate(values, 1): + cell = ws1.cell(row=row, column=col, value=v) + cell.border = thin_border + cell.alignment = Alignment(horizontal="center") + row += 1 + + # Total row for this lead_month + total_orders = cum_orders + total_rate = total_orders / total_leads * 100 if total_leads > 0 else 0 + total_gmv = sum(lag_data[lead_month][k]["gmv"] for k in all_lag_keys) + total_refund = sum(lag_data[lead_month][k]["refund"] for k in all_lag_keys) + total_gsv = sum(lag_data[lead_month][k]["gsv"] for k in all_lag_keys) + + total_values = [ + f"{lead_month} 合计", + total_leads, + "全部", + total_orders, + len(set().union(*[lag_data[lead_month][k]["order_accounts"] for k in all_lag_keys])), + round(total_rate, 1), + "", + "", + round(total_gmv / 100, 2), + round(total_refund / 100, 2), + round(total_gsv / 100, 2), + ] + for col, v in enumerate(total_values, 1): + cell = ws1.cell(row=row, column=col, value=v) + cell.font = Font(bold=True) + cell.fill = PatternFill(start_color="D9E2F3", end_color="D9E2F3", fill_type="solid") + cell.border = thin_border + cell.alignment = Alignment(horizontal="center") + row += 1 + row += 1 # blank row + +# Adjust column widths +for col in range(1, 12): + ws1.column_dimensions[openpyxl.utils.get_column_letter(col)].width = 14 + +# ── Sheet 2: Lead Detail ── +ws2 = wb.create_sheet("线索明细") + +detail_headers = ["进线月", "进线日期", "手机号", "用户ID", "匹配账号ID", "匹配方式", "是否成单", "首单月份", "Lag月数", "GMV(元)", "退款(元)", "GSV(元)"] +for col, h in enumerate(detail_headers, 1): + cell = ws2.cell(row=1, column=col, value=h) + cell.font = header_font_white + cell.fill = header_fill + cell.alignment = Alignment(horizontal="center") + cell.border = thin_border + +for i, ld in enumerate(lead_details, 2): + match_method = ld.get("match_method", "") or "" + + values = [ + ld["lead_month"], + ld["lead_date"], + ld["phone"], + ld["uid"], + ld["account_id"] or "", + match_method, + "是" if ld["has_order"] else "否", + ld["order_month"] or "", + ld["lag"] if ld["lag"] is not None else "", + ld["gmv"], + ld["refund"], + ld["gsv"], + ] + for col, v in enumerate(values, 1): + cell = ws2.cell(row=i, column=col, value=v) + cell.border = thin_border + cell.alignment = Alignment(horizontal="center") + +for col in range(1, 13): + ws2.column_dimensions[openpyxl.utils.get_column_letter(col)].width = 14 + +# ── Sheet 3: Match Stats ── +ws3 = wb.create_sheet("匹配统计") + +stats = [ + ["指标", "数值"], + ["3-5月进线线索总数", len(leads)], + ["手机号匹配成功", matched_by_phone], + ["UID匹配成功", matched_by_uid], + ["未匹配", unmatched], + ["匹配率", f"{(matched_by_phone + matched_by_uid) / len(leads) * 100:.1f}%"], + ["匹配到去重账号数", len(matched_aids)], + ["有订单的账号数", accounts_with_orders], + ["总订单数", total_orders], +] + +for i, (k, v) in enumerate(stats, 1): + ws3.cell(row=i, column=1, value=k).font = Font(bold=True) + ws3.cell(row=i, column=2, value=v) + ws3.cell(row=i, column=1).border = thin_border + ws3.cell(row=i, column=2).border = thin_border + +ws3.column_dimensions["A"].width = 25 +ws3.column_dimensions["B"].width = 20 + +# Save +output_path = "/root/.openclaw/workspace/output/xhs_lead_lag_analysis_202603-202605.xlsx" +wb.save(output_path) +print(f"\n✅ 输出文件: {output_path}") +print("Done!")