From 15b49d34cdb95ca5af412141b01d87cb7b6d77ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E6=BA=AA?= <xiaoxi@makee.ai>
Date: Thu, 21 May 2026 08:00:01 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20=E6=AF=8F=E6=97=A5=E8=87=AA?=
 =?UTF-8?q?=E5=8A=A8=E5=A4=87=E4=BB=BD=20-=202026-05-21=2008:00:01?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 memory/2026-05-20-1003.md               |  29 ++
 memory/2026-05-20-1026.md               |  92 +++++++
 memory/2026-05-20-1220.md               |  96 +++++++
 memory/2026-05-20-1650.md               | 117 +++++++++
 scripts/channel_role_refund_analysis.py | 334 ++++++++++++++++++++++++
 scripts/lead_user_analysis.py           | 219 ++++++++++++++--
 6 files changed, 858 insertions(+), 29 deletions(-)
 create mode 100644 memory/2026-05-20-1003.md
 create mode 100644 memory/2026-05-20-1026.md
 create mode 100644 memory/2026-05-20-1220.md
 create mode 100644 memory/2026-05-20-1650.md
 create mode 100644 scripts/channel_role_refund_analysis.py

diff --git a/memory/2026-05-20-1003.md b/memory/2026-05-20-1003.md
new file mode 100644
index 0000000..31c3884
--- /dev/null
+++ b/memory/2026-05-20-1003.md
@@ -0,0 +1,29 @@
+# Session: 2026-05-20 10:03:28 GMT+8
+
+- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
+- **Session ID**: 4a161e2f-a23d-4db6-956c-df0c9a9e3037
+- **Source**: feishu
+
+## Conversation Summary
+
+user: 将你的workspace中，SkillHub中没有的skill，都推到SkillHub上去
+assistant: SkillHub 上已有 8 个你的 skill，还缺 5 个：`phone-chapter-query`、`refund-user-learning-analysis`、`vala-component-practice-stat`、`vala-order-amortization-stat`、`welfare-user-list`。
+
+直接运行自动同步脚本，它会检测到缺失/变更并自动推送：
+assistant: 奇怪，`refund-user-learning-analysis` 和 `vala-component-practice-stat` 被跳过了。让我检查一下哈希文件和 SkillHub 上的实际状态：
+assistant: 这两个 skill 的哈希已记录但仓库不在 SkillHub 上。清掉它们的哈希记录后重新同步：
+assistant: 全部推送完毕。最后验证一下 SkillHub 上的完整状态：
+assistant: 全部完成。总结：
+
+**SkillHub 上已有 8 个 → 本次新增推送 5 个：**
+
+| 状态 | Skill |
+|------|-------|
+| 已有 | feishu-file-sender、feishu-wiki-access、feishu-wiki-content-reader、find-skills、self-improving-agent、skill-builder、timed-reminder、feishu_send_file |
+| ✅ 新增 | **phone-chapter-query** |
+| ✅ 新增 | **refund-user-learning-analysis** |
+| ✅ 新增 | **vala-component-practice-stat** |
+| ✅ 新增 | **vala-order-amortization-stat** |
+| ✅ 新增 | **welfare-user-list** |
+
+现在 workspace 中全部 12 个 skill 都已同步到 SkillHub（共 13 个 repo，含旧的 `feishu_send_file`）。
diff --git a/memory/2026-05-20-1026.md b/memory/2026-05-20-1026.md
new file mode 100644
index 0000000..5c0c594
--- /dev/null
+++ b/memory/2026-05-20-1026.md
@@ -0,0 +1,92 @@
+# Session: 2026-05-20 10:26:42 GMT+8
+
+- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
+- **Session ID**: 50fddf8c-d5b4-4e50-96c7-bff257907bd4
+- **Source**: feishu
+
+## Conversation Summary
+
+user: 我们来调整一下lead_user_analysis.py这个脚本，你先读取理解一下
+assistant: 已经读完了，理解如下：
+
+**脚本功能：** 销售线索用户分析报表
+
+**输入：** Excel 文件，含「用户ID」「线索进线日期」「销售」三列
+
+**输出：** `output/销售线索_用户分析.xlsx`，每个用户+角色一行
+
+**执行流程：**
+
+| 步骤 | 内容 |
+|------|------|
+| 1 | 读取输入 Excel，提取有效用户ID（过滤非数字） |
+| 2 | 查 `bi_vala_app_account`（status=1）拿注册时间 |
+| 3 | 查 `bi_vala_app_character`（nickname非空、未删除）拿角色 |
+| 4 | 遍历 8 张 `bi_user_chapter_play_record_{0-7}` 分表，查 10 个序章 chapter 的首次完成时间（play_status=1, MIN(updated_at)） |
+| 5 | 查 `bi_vala_order` 拿订单（购买时间/课包名称/金额/渠道/trade_no/order_status） |
+| 6 | 查 `bi_refund_order`（status=3）拿退款金额 |
+| 7 | 组装：用户基础信息 + 10 节课完成时间 + 购买/退款聚合 + 原始文件字段 |
+| 8 | 输出 Excel |
+
+**序章 chapter_id：**
+- L1 U00: 343-346, 348（L01-L05）
+- L2 U00: 55-59（L01-L05）
+
+**已知注意点：**
+- 体验课固定 10 节，不区分用户实际购买的课程等级
+- 完成时间用 `play_status=1` 的最早 `updated_at`
+- 分表用 user_id（角色ID）关联，未走 `bi_level_unit_lesson` 映射表
+
+---
+
+脚本理解完毕，请说具体要调整哪些地方？
+user: 我们来调整对于输入excel的处理，先识别输入文件中是否有用户ID，如果有则直接用用户ID进行匹配，如果没有用户ID，则识别是否有用户手机号，如果有用户手机号，则采用phone_chapter_qyery这个skill里面手机号和用户ID的匹配规则进行匹配，然后再用用户ID查询后续数据。
+assistant: 明白，我先看一下 `phone-chapter-query` 这个 skill 里手机号和用户ID的匹配规则。
+assistant: 已经读完了 `phone_chapter_query.py`，核心匹配逻辑是：
+
+- 扫描 Excel 中所有 `1` 开头的 11 位数字作为手机号
+- 脱敏为 `前3****后4` 格式，匹配 `bi_vala_app_account.tel`（`status=1 AND deleted_at IS NULL`）
+- 返回 `phone → account_id` 映射
+
+现在来改 `lead_user_analysis.py`。主要改动：
+
+1. 先检测输入文件是否有「用户ID」列
+2. 有 → 直接走原逻辑
+3. 没有 → 检测手机号 → 脱敏匹配账号ID → 用匹配到的用户ID走后续查询
+assistant: 验证一下语法和逻辑完整性：
+assistant: 看到了，第 41 行 `***` 其实应该是调用 `get_password()`，之前的显示被工具脱敏了。修复：
+assistant: 语法检查通过。下面总结改动内容：
+
+---
+
+### 改动总结
+
+**核心改动：输入文件智能识别**
+
+```
+输入Excel
+  ├── 有「用户ID」列 → 原逻辑，直接用用户ID查询
+  └── 无「用户ID」列
+       ├── 扫描全表找手机号（1开头的11位数字）
+       ├── 脱敏 前3****后4 → 匹配 bi_vala_app_account.tel
+       ├── 拿到 account_id 列表
+       └── 用 account_id 走后续查询（与用户ID路径一致）
+```
+
+**新增函数（3个）：**
+
+| 函数 | 用途 |
+|------|------|
+| `mask_phone()` | 手机号脱敏 `138****5678` |
+| `extract_phones_from_df()` | 从任意列中提取 11 位手机号并去重 |
+| `match_phones_to_accounts()` | 脱敏后 IN 匹配 `bi_vala_app_account.tel`，返回映射 |
+
+**输出列差异：**
+- 用户ID模式：`用户ID → 线索进线日期 → 销售 → 角色ID ...`（不变）
+- 手机号模式：`用户ID → 手机号 → 线索进线日期 → 销售 → 角色ID ...`（多了「手机号」列）
+
+**未匹配处理：** 手机号未匹配到账号的，在控制台打印警告，但不写入输出（与 `phone_chapter_query` 追加到表尾的策略不同，因为本报表是用户+角色一行，未匹配手机号没有后续数据可展示）。
+
+---
+
+有什么需要调整的随时说。
diff --git a/memory/2026-05-20-1220.md b/memory/2026-05-20-1220.md
new file mode 100644
index 0000000..966e760
--- /dev/null
+++ b/memory/2026-05-20-1220.md
@@ -0,0 +1,96 @@
+# Session: 2026-05-20 12:20:10 GMT+8
+
+- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
+- **Session ID**: a098ecef-dd4b-4632-ae09-e160fad52992
+- **Source**: feishu
+
+## Conversation Summary
+
+user: 我想看晚柠、老王和三人行这三个渠道的数据。数据上我想看这三个渠道上，下单用户的不同角色数对应的退费率、不同的角色数是否行课的退费率、不同角色数的行课数的退费率。剔除测试账号、删除的账号和订单。有什么问题可以问我。
+assistant: 李承龙，需求收到了。我先确认几个口径问题，同时去查一下这三个渠道在数据库中的 key_from 值。
+
+让我先做一些前置查询。
+assistant: 好的，先查一下这三个渠道在数据库中的 key_from 值。
+assistant: 数据已找到，三个渠道对应10个 key_from 值。需求比较复杂，我用 Python 脚本处理。
+assistant: 报告已生成，下面是关键发现摘要：
+
+---
+
+## 📊 三个渠道退费率分析
+
+### 一、渠道汇总
+
+| 渠道 | 订单数 | 用户数 | 退费单数 | **退费率** | GMV | GSV |
+|------|--------|--------|----------|-----------|-----|-----|
+| 晚柠 | 906 | 758 | 213 | **23.5%** | 276.1万 | 214.0万 |
+| 老王 | 559 | 530 | 259 | **46.3%** | 171.5万 | 91.3万 |
+| 三人行 | 475 | 456 | 238 | **50.1%** | 149.2万 | 75.2万 |
+
+### 二、不同角色数 → 退费率
+
+| 角色数 | 晚柠退费率 | 老王退费率 | 三人行退费率 |
+|--------|-----------|-----------|------------|
+| 0 | 59.0% (39单) | 90.0% (50单) | 83.3% (30单) |
+| 1 | 24.9% (485单) | 41.3% (322单) | 54.3% (280单) |
+| 2 | 18.6% (274单) | 42.3% (142单) | 39.7% (131单) |
+| 3 | 17.8% (73单) | 47.4% (38单) | 25.0% (20单) |
+| 4 | 18.5% (27单) | 75.0% (4单) | 20.0% (10单) |
+| 5 | 0% (8单) | 0% (3单) | 50.0% (4单) |
+
+**规律：0角色用户（下单但未创建角色）退费率极高，三个渠道分别达59%/90%/83%。创建角色后，退费率普遍大幅下降。**
+
+### 三、是否行课 → 退费率
+
+三个渠道共性明显：**无行课用户的退费率远高于有行课用户**。
+
+- 晚柠：无行课 71.8% vs 有行课 19.1%
+- 老王：无行课 84.6% vs 有行课 38.2%  
+- 三人行：无行课 81.0% vs 有行课 47.4%
+
+### 四、行课角色数 → 退费率
+
+同一角色数下，行课角色越多退费率越低。比如：
+- 2角色用户：0行课退费率 50~57% → 2行课退费率 15~44%
+- 3角色用户：0行课退费率 100% → 3行课退费率 14~29%
+
+---
+
+详细数据已写入 Excel，包含4个 Sheet。需要我把文件发给你，还是你有其他想进一步分析的维度？
+user: excel发给我吧
+assistant: 已发送，请查收飞书文件 📊
+user: 允许
+assistant: 文件刚刚已发送过去了，你看看收到了吗？如果有需要进一步拆解或补充的维度，随时告诉我。
+user: 我想添加念妈渠道在2026年4月到2026年5月19日的订单的相关数据。另外，角色数不要匹配行课了的角色数，要匹配行课的课程数。
+Reply target of current user message (untrusted, for context):
+```json
+{
+  "body": "[message_id=om_x100b6fe589b754c0b4bd46161cb45e2] <card>\n李承龙，需求收到了。我先确认几个口径问题，同时去查一下这三个渠道在数据库中的 key_from 值。\n\n让我先做一些前置查询。\n\n好的，先查一下这三个渠道在数据库中的 key_from 值。\n\n数据已找到，三个渠道对应10个 key_from 值。需求比较复杂，我用 Python 脚本处理。\n\n报告已生成，下面是关键发现摘要：---##### 📊 三个渠道退费率分析\n##### 一、渠道汇总\n| 渠道 | 订单数 | 用户数 | 退费单数 | **退费率** | GMV | GSV |\n|------|------|------|------|------|------|------|\n| 晚柠 | 906 | 758 | 213 | 23.5% | 276.1万 | 214.0万 |\n| 老王 | 559 | 530 | 259 | 46.3% | 171.5万 | 91.3万 |\n| 三人行 | 475 | 456 | 238 | 50.1% | 149.2万 | 75.2万 |##### 二、不同角色数 → 退费率\n| 角色数 | 晚柠退费率 | 老王退费率 | 三人行退费率 |\n|------|------|------|------|\n| 0 | 59.0% (39单) | 90.0% (50单) | 83.3% (30单) |\n| 1 | 24.9% (485单) | 41.3% (322单) | 54.3% (280单) |\n| 2 | 18.6% (274单) | 42.3% (142单) | 39.7% (131单) |\n| 3 | 17.8% (73单) | 47.4% (38单) | 25.0% (20单) |\n| 4 | 18.5% (27单) | 75.0% (4单) | 20.0% (10单) |\n| 5 | 0% (8单) | 0% (3单) | 50.0% (4单) |**规律：0角色用户（下单但未创建角色）退费率极高，三个渠道分别达59%/90%/83%。创建角色后，退费率普遍大幅下降。**##### 三、是否行课 → 退费率三个渠道共性明显：**无行课用户的退费率远高于有行课用户**。\n- 晚柠：无行课 71.8% vs 有行课 19.1%\n- 老王：无行课 84.6% vs 有行课 38.2%\n- 三人行：无行课 81.0% vs 有行课 47.4%##### 四、行课角色数 → 退费率同一角色数下，行课角色越多退费率越低。比如：\n- 2角色用户：0行课退费率 50~57% → 2行课退费率 15~44%\n- 3角色用户：0行课退费率 100% → 3行课退费率 14~29%---详细数据已写入 Excel，包含4个 Sheet。需要我把文件发给你，还是你有其他想进一步分析的维度？\n</card>"
+}
+```
+assistant: 两个改动：加念妈渠道 + 把"行课角色数"改为"行课课程数"。先查念妈的 key_from，然后改脚本。
+assistant: 念妈有4个 key_from。确认一下口径：**"行课课程数"我理解为用户所有角色完成的去重课时数（distinct chapter_id，play_status=1）**，有异议可以纠正。现在更新脚本。
+assistant: 报告已生成，先发给你，然后说关键发现。
+assistant: 已发送。关键变化：
+
+---
+
+## 📊 四渠道汇总（含念妈 4/1-5/19）
+
+| 渠道 | 订单 | 用户 | 退费单 | **退费率** | GMV | GSV |
+|------|------|------|--------|-----------|-----|-----|
+| 晚柠 | 906 | 758 | 213 | **23.5%** | 276.1万 | 214.0万 |
+| 老王 | 559 | 530 | 259 | **46.3%** | 171.5万 | 91.3万 |
+| 三人行 | 475 | 456 | 238 | **50.1%** | 149.2万 | 75.2万 |
+| 念妈 | 263 | 244 | 117 | **44.5%** | 82.5万 | 45.8万 |
+
+### 核心规律（四个渠道一致）
+
+1. **0角色用户退费率极高**：晚柠59%、老王90%、三人行83%、念妈77%
+2. **无行课（完成0节课）vs 有行课退费率差距巨大**：
+   - 晚柠 71.8% vs 19.1%
+   - 老王 84.6% vs 38.2%
+   - 三人行 81.0% vs 47.4%
+   - 念妈 83.6% vs 38.2%
+3. **行课课程数越多，退费率越低**——在相同角色数下，完成更多课时的用户退费率明显更低
+
+另外这次维度3已改为「行课课程数」（用户完成的去重 chapter_id 数），你看看是不是你想要的？如果需要按"不去重的总完课次数"或其他口径调整，随时说。
diff --git a/memory/2026-05-20-1650.md b/memory/2026-05-20-1650.md
new file mode 100644
index 0000000..1d02e3a
--- /dev/null
+++ b/memory/2026-05-20-1650.md
@@ -0,0 +1,117 @@
+# Session: 2026-05-20 16:50:52 GMT+8
+
+- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
+- **Session ID**: 564305df-8f23-4390-9405-3394b914a149
+- **Source**: feishu
+
+## Conversation Summary
+
+user: 我们来讨论一下vala-order-amortization-stat这个skill，你先读取理解一下
+assistant: 已读完 SKILL.md，这个技能是**订单均摊结算统计**，核心是按天均摊订单收入，处理退费冲销场景。让我再看看实际的 SQL 和脚本实现，全面理解后再和你讨论。
+assistant: 已经读完了全部文件，我对这个 skill 的结构和逻辑有了完整理解：
+
+---
+
+**`vala-order-amortization-stat` 技能总览：**
+
+| 维度 | 说明 |
+|------|------|
+| **目的** | 按月均摊订单收入，处理退费冲销，产出账期结算报表 |
+| **核心逻辑** | 季卡周期（90天×N季）按天均摊税后金额，转正日=下单+7天 |
+| **税率** | 价外税，2026-05-01前1%，之后6% |
+| **退费6场景** | A(期内外部分) B(历史部分) C(期前部分) D(期内外全额) E(历史全额) F(期前全额) |
+| **输出** | Excel三Sheet（汇总/明细/预收账款）+ 控制台JSON |
+| **架构** | `_common.sql`公共CTE链 → `detail.sql`/`summary.sql`各自SELECT + 独立`prepaid.sql` |
+| **执行** | `run.py` 纯Python，连线上PG直查 → pandas写Excel |
+
+**SQL CTE 链路：**
+```
+all_refund_records → order_base → order_cycle → order_with_cycle
+    → order_with_cycle_fixed（fallback修正）→ order_classified（场景标记）
+    → order_with_days（均摊天数计算）→ detail/summary 各自 SELECT
+```
+
+**一个值得注意的细节：** `_common.sql` 中 `order_classified` CTE 的 WHERE 条件是 `total_cycle_days > 0 OR (has_refund=1 AND is_full_refund=1 AND original_cycle_days>0)`，这意味着无季卡周期信息且无退费的订单会被过滤掉——这是合理的，因为无法确定均摊周期就无法计算。
+
+---
+
+已完全理解，你想讨论哪个方面？是业务逻辑、数据口径、性能优化、还是发现有什么问题？
+user: 在这个skill中帮我看看关于ios20260205142407177027264720872这笔订单的数据
+assistant: 好的，我来按这个 skill 的逻辑查这笔订单在各表中的原始数据。
+assistant: 订单没查到，让我先查表结构和确认订单号。
+assistant: 订单号没匹配到，让我更广泛地搜索。
+assistant: 有意思——订单不在 `bi_vala_order` 但在 `bi_vala_seasonal_ticket` 有4条记录且全为 status=-1。让我深入查。
+assistant: 关键发现：这笔订单号在 `bi_vala_order` 中不存在，但在 `bi_vala_seasonal_ticket` 有4条全为 status=-1 的记录。同一 account 下的实际订单号是 `ios202602051424071770272647180359`。让我查这个实际订单的完整数据。
+assistant: 查到完整情况了，汇总如下：
+
+---
+
+## 📊 `ios20260205142407177027264720872` 分析
+
+### 核心结论：这笔订单号**不会出现在均摊报表中**
+
+**原因：`bi_vala_order` 表中不存在此 `out_trade_no`。** 均摊 skill 的 `order_base` CTE 从 `bi_vala_order` 查订单，按 `out_trade_no` 关联，查不到就不会产生任何记录。
+
+---
+
+### 数据全貌
+
+| 表 | out_trade_no `...20872` | out_trade_no `...180359` |
+|---|---|---|
+| **bi_vala_order** | ❌ 不存在 | ✅ id=1969, 1999元, order_status=3 |
+| **bi_vala_seasonal_ticket** | 4条，**全部 status=-1**，season_package_id=0 | 4条，**全部 status=1**，season_package_id=2/12/13/14 |
+| **bi_refund_order** | 无 | 无 |
+
+> 两笔共享同一个 **trade_no**：`440002908556965`，实际是同一笔支付，iOS 产生了两个不同的 transaction_id。
+
+---
+
+### 实际订单 `...180359` 的均摊计算（以3月账期为例）
+
+| 字段 | 值 |
+|------|-----|
+| 下单日 | 2026-02-05 |
+| 含税金额 | 1,999.00 元 |
+| 税率 | 1%（< 2026-05-01） |
+| 税后金额 | 1,979.21 元 |
+| 税费 | 19.79 元 |
+| 转正日 | 2026-02-12 |
+| 均摊周期 | **360天**（4季 × 90天） |
+| 日均摊 | 5.4978 元/天 |
+| 均摊结束日 | 2027-02-06 |
+| 退费 | 无 |
+
+---
+
+### 潜在问题：`...20872` 的4条季卡脏数据
+
+`bi_vala_seasonal_ticket` 中 `...20872` 对应的4条记录全部 `status=-1, season_package_id=0`，它们与 `...180359` 的4条正常季卡（status=1）**同一秒创建**（`2026-02-05 14:24:07`），看起来像是同一笔支付产生了重复的季卡记录，随后 `...20872` 的4条被立即作废。
+
+**对 skill 的影响**：当前 skill 通过 `out_trade_no` 关联 `bi_vala_order` ↔ `bi_vala_seasonal_ticket`，所以 `...20872` 既查不到订单，其4条 status=-1 的季卡也不会被任何订单引用——**实际上无害，但属于脏数据**。
+
+---
+
+你想讨论的方向是？是对这笔特定数据有疑问，还是想优化 skill 处理这类 iOS 双 transaction_id 场景？
+user: 运行vala-order-amortization-stat这个skill，起始时间和结束时间分别是2026年4月1日和2026年4月30日，并把运行的结果文件发给我
+assistant: 执行成功，现在发文件给李承龙。
+assistant: 报表已生成并发送给你，汇总如下：
+
+---
+
+📊 **2026年4月均摊结算结果（2026-04-01 至 2026-04-30）**
+
+| 指标 | 数值 |
+|------|------|
+| 总订单数 | 1,108 单 |
+| 正式订单数 | 927 单 |
+| 试用订单数 | 181 单 |
+| **当期摊销金额**（核心） | **331,686.23 元** |
+| 账期税额 | 23,363.21 元 |
+| 冲销历史累计摊销金额 | -967.04 元 |
+| 冲销原税款 | -543.77 元 |
+| 冲销未确认收入 | -53,410.19 元 |
+| 未确认收入 | 5,687,773.39 元 |
+
+---
+
+Excel 文件已发到你飞书，包含三个 Sheet：汇总表、订单明细、本月预收账款。
diff --git a/scripts/channel_role_refund_analysis.py b/scripts/channel_role_refund_analysis.py
new file mode 100644
index 0000000..51ee5df
--- /dev/null
+++ b/scripts/channel_role_refund_analysis.py
@@ -0,0 +1,334 @@
+#!/usr/bin/env python3
+"""
+晚柠/老王/三人行/念妈 四个渠道的退费率分析
+维度：角色数、是否行课、行课课程数（用户完成的去重chapter_id数）
+念妈：限制 2026-04-01 ~ 2026-05-19
+其他三个渠道：全量
+"""
+import psycopg2
+import pandas as pd
+from openpyxl import load_workbook
+from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
+from openpyxl.utils import get_column_letter
+
+conn = psycopg2.connect(
+    host="bj-postgres-16pob4sg.sql.tencentcdb.com",
+    port=28591,
+    user="ai_member",
+    password="LdfjdjL83h3h3^$&**YGG*",
+    dbname="vala_bi"
+)
+
+# 渠道定义
+channel_keys = {
+    '晚柠': ["newmedia-daren-xhs-晚柠也是个妈妈了-0"],
+    '老王': [
+        "newmedia-daren-douyin-学霸老王讲真话（4月8日瓦拉英语首发）-0",
+        "newmedia-daren-xhs-学霸老王讲真话-0",
+        "newmedia-daren-douyin-学霸老王讲真话-0",
+        "newmedia-daren-wxxd-学霸老王讲真话-0"
+    ],
+    '三人行': [
+        "newmedia-daren-douyin-学霸三人行（4.8瓦拉英语首发)-0",
+        "newmedia-daren-wxxd-学霸三人行-0",
+        "newmedia-daren-xhs-学霸三人行-0",
+        "newmedia-daren-douyin-学霸三人行-0",
+        "newmedia-daren-wxxd001-学霸三人行-0"
+    ],
+    '念妈': [
+        "newmedia-daren-douyin-念妈讲学习规划-0",
+        "newmedia-daren-wxxd-念妈讲学习规划-0",
+        "newmedia-daren-wxxd001-念妈讲学习规划-0",
+        "newmedia-daren-xhs-念妈讲学习规划-0"
+    ]
+}
+
+all_keys = []
+for ks in channel_keys.values():
+    all_keys.extend(ks)
+
+key_to_channel = {}
+for ch, ks in channel_keys.items():
+    for k in ks:
+        key_to_channel[k] = ch
+
+# ============================================================
+# Step 1: 获取目标订单
+# ============================================================
+print("Step 1: 获取目标订单（剔除测试/删除账号和删除订单）")
+
+# 先获取所有渠道的订单（念妈有时间限制，其他没有）
+orders_parts = []
+
+for ch, ks in channel_keys.items():
+    if ch == '念妈':
+        sql = """
+        SELECT o.id as order_id, o.account_id, o.key_from, o.pay_amount_int,
+               o.order_status, o.trade_no,
+               CASE WHEN r.status = 3 AND o.order_status = 4 THEN 1 ELSE 0 END as is_refunded
+        FROM bi_vala_order o
+        JOIN bi_vala_app_account a ON o.account_id = a.id 
+            AND a.status = 1 AND a.deleted_at IS NULL
+        LEFT JOIN bi_refund_order r ON o.trade_no = r.trade_no AND r.status = 3
+        WHERE o.deleted_at IS NULL
+          AND o.key_from = ANY(%s)
+          AND o.pay_success_date >= '2026-04-01'
+          AND o.pay_success_date < '2026-05-20'
+        """
+    else:
+        sql = """
+        SELECT o.id as order_id, o.account_id, o.key_from, o.pay_amount_int,
+               o.order_status, o.trade_no,
+               CASE WHEN r.status = 3 AND o.order_status = 4 THEN 1 ELSE 0 END as is_refunded
+        FROM bi_vala_order o
+        JOIN bi_vala_app_account a ON o.account_id = a.id 
+            AND a.status = 1 AND a.deleted_at IS NULL
+        LEFT JOIN bi_refund_order r ON o.trade_no = r.trade_no AND r.status = 3
+        WHERE o.deleted_at IS NULL
+          AND o.key_from = ANY(%s)
+        """
+    df = pd.read_sql(sql, conn, params=(ks,))
+    orders_parts.append(df)
+
+orders_df = pd.concat(orders_parts, ignore_index=True)
+orders_df['channel'] = orders_df['key_from'].map(key_to_channel)
+
+print(f"  总订单数: {len(orders_df)}, 总用户数: {orders_df['account_id'].nunique()}")
+for ch in ['晚柠', '老王', '三人行', '念妈']:
+    cd = orders_df[orders_df['channel'] == ch]
+    print(f"  {ch}: {len(cd)}单, {cd['account_id'].nunique()}用户, {cd['is_refunded'].sum():.0f}退费单")
+
+# ============================================================
+# Step 2: 统计每个用户的角色数
+# ============================================================
+print("\nStep 2: 统计每个用户的角色数")
+target_account_ids = sorted(orders_df['account_id'].unique().tolist())
+batch_size = 5000
+
+all_roles = []
+for i in range(0, len(target_account_ids), batch_size):
+    batch = target_account_ids[i:i+batch_size]
+    sql = """
+    SELECT account_id, COUNT(*) as total_roles
+    FROM bi_vala_app_character
+    WHERE account_id = ANY(%s) AND deleted_at IS NULL
+    GROUP BY account_id
+    """
+    df = pd.read_sql(sql, conn, params=(batch,))
+    all_roles.append(df)
+
+roles_df = pd.concat(all_roles, ignore_index=True) if all_roles else pd.DataFrame(columns=['account_id','total_roles'])
+
+account_roles = pd.DataFrame({'account_id': target_account_ids})
+account_roles = account_roles.merge(roles_df, on='account_id', how='left')
+account_roles['total_roles'] = account_roles['total_roles'].fillna(0).astype(int)
+
+print(f"  角色数分布: {account_roles['total_roles'].value_counts().sort_index().to_dict()}")
+
+# ============================================================
+# Step 3: 统计每个用户的"行课课程数"（去重chapter_id, play_status=1）
+# ============================================================
+print("\nStep 3: 统计每个用户的「行课课程数」（完成课时去重chapter_id数）")
+
+# 获取所有目标用户的角色
+all_characters = []
+for i in range(0, len(target_account_ids), batch_size):
+    batch = target_account_ids[i:i+batch_size]
+    sql = """
+    SELECT id as character_id, account_id
+    FROM bi_vala_app_character
+    WHERE account_id = ANY(%s) AND deleted_at IS NULL
+    """
+    df = pd.read_sql(sql, conn, params=(batch,))
+    all_characters.append(df)
+
+char_df = pd.concat(all_characters, ignore_index=True) if all_characters else pd.DataFrame()
+print(f"  目标用户总角色数: {len(char_df)}")
+
+# 获取这些角色完成的课时（去重chapter_id）
+char_ids = sorted(char_df['character_id'].unique().tolist())
+user_chapter_count = {}  # account_id -> set of chapter_id
+
+for i in range(0, len(char_ids), batch_size):
+    batch = char_ids[i:i+batch_size]
+    # Union all 8 shard tables, get distinct (user_id, chapter_id) where play_status=1
+    union_sqls = []
+    for t in range(8):
+        union_sqls.append(f"""
+        SELECT DISTINCT user_id, chapter_id
+        FROM bi_user_chapter_play_record_{t}
+        WHERE user_id = ANY(%s) AND play_status = 1 AND deleted_at IS NULL
+        """)
+    sql = " UNION ".join(union_sqls)
+    params = tuple([batch] * 8)
+    df = pd.read_sql(sql, conn, params=params)
+    
+    # Map character_id -> account_id
+    char_to_account = dict(zip(char_df['character_id'], char_df['account_id']))
+    for _, row in df.iterrows():
+        aid = char_to_account.get(row['user_id'])
+        if aid:
+            if aid not in user_chapter_count:
+                user_chapter_count[aid] = set()
+            user_chapter_count[aid].add(row['chapter_id'])
+
+# Build account-level chapter count
+account_chapter_counts = []
+for aid in target_account_ids:
+    ch_set = user_chapter_count.get(aid, set())
+    account_chapter_counts.append({
+        'account_id': aid,
+        'chapter_count': len(ch_set),
+        'has_learning': '有行课' if len(ch_set) > 0 else '无行课'
+    })
+
+chapter_df = pd.DataFrame(account_chapter_counts)
+
+# Merge into account_roles
+account_roles = account_roles.merge(chapter_df, on='account_id', how='left')
+account_roles['chapter_count'] = account_roles['chapter_count'].fillna(0).astype(int)
+account_roles['has_learning'] = account_roles['has_learning'].fillna('无行课')
+
+print(f"  用户行课状态: {account_roles['has_learning'].value_counts().to_dict()}")
+print(f"  行课课程数分布:")
+cc_dist = account_roles['chapter_count'].value_counts().sort_index().head(15)
+for k, v in cc_dist.items():
+    print(f"    {k}节课: {v}用户")
+
+# ============================================================
+# Step 4: 合并数据并计算退费率
+# ============================================================
+print("\nStep 4: 计算各维度退费率")
+
+analysis = orders_df.merge(account_roles, on='account_id', how='left')
+
+def calc_refund_rate(g):
+    n = len(g)
+    refunded = g['is_refunded'].sum()
+    users = g['account_id'].nunique()
+    rate = refunded / n * 100 if n > 0 else 0
+    gmv = g['pay_amount_int'].sum() / 100
+    refund_amt = g[g['is_refunded'] == 1]['pay_amount_int'].sum() / 100
+    return pd.Series({
+        '订单数': n,
+        '用户数': users,
+        '退费订单数': int(refunded),
+        '退费率': round(rate, 1),
+        'GMV(元)': round(gmv, 0),
+        '退费金额(元)': round(refund_amt, 0),
+        'GSV(元)': round(gmv - refund_amt, 0)
+    })
+
+# --- 维度1: 不同角色数 → 退费率 ---
+print("\n--- 维度1: 不同角色数 → 退费率 ---")
+result1 = []
+for ch in ['晚柠', '老王', '三人行', '念妈']:
+    cd = analysis[analysis['channel'] == ch]
+    for rc in sorted(cd['total_roles'].unique()):
+        row = calc_refund_rate(cd[cd['total_roles'] == rc])
+        row['渠道'] = ch
+        row['角色数'] = rc
+        result1.append(row)
+r1 = pd.DataFrame(result1).sort_values(['渠道','角色数']).reset_index(drop=True)
+print(r1.to_string(index=False))
+
+# --- 维度2: 不同角色数 × 是否行课 → 退费率 ---
+print("\n--- 维度2: 不同角色数 × 是否行课 → 退费率 ---")
+result2 = []
+for ch in ['晚柠', '老王', '三人行', '念妈']:
+    cd = analysis[analysis['channel'] == ch]
+    for rc in sorted(cd['total_roles'].unique()):
+        for hl in ['有行课', '无行课']:
+            subset = cd[(cd['total_roles'] == rc) & (cd['has_learning'] == hl)]
+            if len(subset) == 0:
+                continue
+            row = calc_refund_rate(subset)
+            row['渠道'] = ch
+            row['角色数'] = rc
+            row['是否行课'] = hl
+            result2.append(row)
+r2 = pd.DataFrame(result2).sort_values(['渠道','角色数','是否行课']).reset_index(drop=True)
+print(r2.to_string(index=False))
+
+# --- 维度3: 不同角色数 × 行课课程数 → 退费率 ---
+print("\n--- 维度3: 不同角色数 × 行课课程数 → 退费率 ---")
+result3 = []
+for ch in ['晚柠', '老王', '三人行', '念妈']:
+    cd = analysis[analysis['channel'] == ch]
+    for rc in sorted(cd['total_roles'].unique()):
+        for cc in sorted(cd[cd['total_roles'] == rc]['chapter_count'].unique()):
+            subset = cd[(cd['total_roles'] == rc) & (cd['chapter_count'] == cc)]
+            if len(subset) == 0:
+                continue
+            row = calc_refund_rate(subset)
+            row['渠道'] = ch
+            row['角色数'] = rc
+            row['行课课程数'] = cc
+            result3.append(row)
+r3 = pd.DataFrame(result3).sort_values(['渠道','角色数','行课课程数']).reset_index(drop=True)
+print(r3.to_string(index=False))
+
+# --- 渠道汇总 ---
+print("\n--- 渠道汇总 ---")
+summary = []
+for ch in ['晚柠', '老王', '三人行', '念妈']:
+    row = calc_refund_rate(analysis[analysis['channel'] == ch])
+    row['渠道'] = ch
+    summary.append(row)
+s = pd.DataFrame(summary)
+print(s.to_string(index=False))
+
+# ============================================================
+# Step 5: 写入Excel
+# ============================================================
+print("\nStep 5: 写入Excel")
+output_path = '/root/.openclaw/workspace/output/晚柠老王三人行念妈_角色数退费率分析.xlsx'
+
+cols_1 = ['渠道','角色数','订单数','用户数','退费订单数','退费率','GMV(元)','退费金额(元)','GSV(元)']
+cols_2 = ['渠道','角色数','是否行课','订单数','用户数','退费订单数','退费率','GMV(元)','退费金额(元)','GSV(元)']
+cols_3 = ['渠道','角色数','行课课程数','订单数','用户数','退费订单数','退费率','GMV(元)','退费金额(元)','GSV(元)']
+
+with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
+    s[['渠道','订单数','用户数','退费订单数','退费率','GMV(元)','退费金额(元)','GSV(元)']].to_excel(writer, sheet_name='渠道汇总', index=False)
+    r1[cols_1].to_excel(writer, sheet_name='角色数-退费率', index=False)
+    r2[cols_2].to_excel(writer, sheet_name='角色数×是否行课-退费率', index=False)
+    r3[cols_3].to_excel(writer, sheet_name='角色数×行课课程数-退费率', index=False)
+
+# Format
+wb = load_workbook(output_path)
+header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
+header_font = Font(bold=True, size=11, color="FFFFFF")
+thin_border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin'))
+center = Alignment(horizontal='center', vertical='center')
+red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
+
+for sn in wb.sheetnames:
+    ws = wb[sn]
+    for cell in ws[1]:
+        cell.font = header_font
+        cell.fill = header_fill
+        cell.alignment = center
+        cell.border = thin_border
+    for row in ws.iter_rows(min_row=2, max_row=ws.max_row, max_col=ws.max_column):
+        for cell in row:
+            cell.alignment = center
+            cell.border = thin_border
+    for col in range(1, ws.max_column+1):
+        max_len = 0
+        for row in range(1, ws.max_row+1):
+            v = str(ws.cell(row=row, column=col).value or '')
+            l = sum(2 if ord(c) > 127 else 1 for c in v)
+            max_len = max(max_len, l)
+        ws.column_dimensions[get_column_letter(col)].width = min(max_len + 4, 25)
+    # Highlight high refund rate
+    for col in range(1, ws.max_column+1):
+        if ws.cell(row=1, column=col).value == '退费率':
+            for row in range(2, ws.max_row+1):
+                cell = ws.cell(row=row, column=col)
+                if cell.value and isinstance(cell.value, (int,float)) and cell.value > 50:
+                    cell.fill = red_fill
+            break
+
+wb.save(output_path)
+print(f"\n✅ 报告已生成: {output_path}")
+conn.close()
diff --git a/scripts/lead_user_analysis.py b/scripts/lead_user_analysis.py
index c3a5032..b20d97b 100644
--- a/scripts/lead_user_analysis.py
+++ b/scripts/lead_user_analysis.py
@@ -2,15 +2,17 @@
 """
 销售线索用户分析报表
 
-输入：Excel文件，包含「用户ID」列
+输入：Excel文件，包含「用户ID」列 或 手机号列
 输出：每个用户+角色一行，包含注册信息、序章完成情况、购买和退款信息
 
 修正口径：
 - 体验课 = 固定10节课：L1 U00 L01-L05 (chapter_id: 343,344,345,346,348) + L2 U00 L01-L05 (55-59)
 - 完成时间 = play_status=1 的最早 updated_at
+- 若无「用户ID」列，则自动识别手机号列，脱敏匹配账号ID后再查询
 """
 
 import os
+import re
 import sys
 import psycopg2
 import pandas as pd
@@ -52,20 +54,160 @@ COL_ORDER = [
     "L2_U00_L04_完成时间", "L2_U00_L05_完成时间",
 ]
 
+# ── 手机号匹配 ──
+def mask_phone(phone):
+    """手机号脱敏：前3 + **** + 后4"""
+    return f"{phone[:3]}****{phone[-4:]}"
+
+
+def extract_phones_from_df(df):
+    """从 DataFrame 中提取所有 1 开头的 11 位手机号（去重保持顺序）"""
+    phones = []
+    for col in df.columns:
+        for val in df[col].dropna():
+            val_str = str(int(val)) if isinstance(val, (int, float)) else str(val)
+            val_str = val_str.strip()
+            if re.match(r'^1\d{10}$', val_str):
+                phones.append(val_str)
+    seen = set()
+    unique = []
+    for p in phones:
+        if p not in seen:
+            seen.add(p)
+            unique.append(p)
+    return unique
+
+
+def match_phones_to_accounts(phones, conn):
+    """
+    手机号脱敏后匹配 bi_vala_app_account.tel
+    返回: (phone_to_account, account_ids, unmatched_phones)
+      - phone_to_account: {明文手机号: account_id}
+      - account_ids: 匹配到的 account_id 列表
+      - unmatched_phones: 未匹配的明文手机号列表
+    """
+    if not phones:
+        return {}, [], []
+
+    # 脱敏
+    masked_to_phones = {}
+    for p in phones:
+        m = mask_phone(p)
+        masked_to_phones.setdefault(m, []).append(p)
+
+    masks = list(masked_to_phones.keys())
+    cur = conn.cursor()
+    placeholders = ",".join(["%s"] * len(masks))
+    cur.execute(f"""
+        SELECT id AS account_id, tel
+        FROM bi_vala_app_account
+        WHERE tel IN ({placeholders})
+          AND status = 1
+          AND deleted_at IS NULL
+    """, masks)
+    rows = cur.fetchall()
+    cur.close()
+
+    # masked -> account_id
+    masked_to_account = {}
+    for aid, masked in rows:
+        if masked not in masked_to_account:
+            masked_to_account[masked] = aid
+
+    # 明文手机号 -> account_id
+    phone_to_account = {}
+    for p in phones:
+        m = mask_phone(p)
+        if m in masked_to_account:
+            phone_to_account[p] = masked_to_account[m]
+
+    account_ids = list(set(phone_to_account.values()))
+    unmatched = [p for p in phones if p not in phone_to_account]
+
+    return phone_to_account, account_ids, unmatched
+
+
 def main():
     input_file = sys.argv[1] if len(sys.argv) > 1 else "/root/.openclaw/media/inbound/3æ_è_çº_çº_ç---d9a41af7-b100-43a7-a983-d4fd1f164023.xlsx"
     print(f"读取输入文件: {input_file}")
     df_input = pd.read_excel(input_file, dtype=str)
-    user_ids_raw = df_input["用户ID"].dropna().unique().tolist()
-    # 过滤掉非数字的用户ID（如"不上"等标记）
-    valid_ids = []
-    for x in user_ids_raw:
-        try:
-            valid_ids.append(int(x))
-        except ValueError:
-            print(f"  跳过非数字用户ID: {x}")
-    account_ids = valid_ids
-    print(f"共 {len(account_ids)} 个用户ID")
+
+    # ── 0. 识别输入文件类型：用户ID 还是 手机号 ──
+    has_user_id = "用户ID" in df_input.columns
+    account_ids = []
+    phone_to_account = {}  # 明文手机号 -> account_id（仅手机号模式使用）
+    unmatched_phones = []
+    use_phone_mode = False
+    df_input_map = None  # 每行的线索信息映射
+
+    if has_user_id:
+        print("检测到「用户ID」列，使用用户ID直接匹配")
+        user_ids_raw = df_input["用户ID"].dropna().unique().tolist()
+        valid_ids = []
+        for x in user_ids_raw:
+            try:
+                valid_ids.append(int(x))
+            except ValueError:
+                print(f"  跳过非数字用户ID: {x}")
+        account_ids = valid_ids
+        print(f"共 {len(account_ids)} 个用户ID")
+
+        # 原始文件线索信息
+        df_input = df_input[df_input["用户ID"].apply(lambda x: str(x).isdigit() if pd.notna(x) else False)]
+        df_input["用户ID_int"] = df_input["用户ID"].astype(int)
+        df_input_map = df_input[["用户ID_int", "线索进线日期", "销售"]].drop_duplicates(subset="用户ID_int")
+
+    else:
+        print("未检测到「用户ID」列，尝试识别手机号...")
+        phones = extract_phones_from_df(df_input)
+        if not phones:
+            print("ERROR: 既没有「用户ID」列，也没有找到手机号，无法继续")
+            sys.exit(1)
+        print(f"提取到 {len(phones)} 个手机号（去重后）")
+
+        conn = get_conn()
+        phone_to_account, account_ids, unmatched_phones = match_phones_to_accounts(phones, conn)
+        conn.close()
+        print(f"匹配到 {len(account_ids)} 个账号，未匹配 {len(unmatched_phones)} 个手机号")
+        if unmatched_phones:
+            print(f"  未匹配手机号: {unmatched_phones}")
+
+        if not account_ids:
+            print("ERROR: 没有任何手机号匹配到账号，无法继续")
+            sys.exit(1)
+
+        use_phone_mode = True
+
+        # 构建手机号 → 线索信息的映射
+        # 找手机号列（第一个含手机号的列）
+        phone_col = None
+        for col in df_input.columns:
+            sample = df_input[col].dropna().head(5).tolist()
+            if any(re.match(r'^1\d{10}$', str(int(v)) if isinstance(v, (int, float)) else str(v))
+                   for v in sample):
+                phone_col = col
+                break
+
+        if phone_col:
+            # 规范化手机号
+            def normalize_phone(val):
+                try:
+                    return str(int(float(val))) if pd.notna(val) else ""
+                except (ValueError, TypeError):
+                    return str(val).strip()
+
+            df_input["_phone"] = df_input[phone_col].apply(normalize_phone)
+            # 提取线索信息列
+            clue_cols = [c for c in ["线索进线日期", "销售"] if c in df_input.columns]
+            if clue_cols:
+                df_input_map = df_input[["_phone"] + clue_cols].drop_duplicates(subset="_phone")
+                df_input_map.rename(columns={"_phone": "手机号"}, inplace=True)
+            else:
+                # 没有线索列，只保留手机号用于后续关联
+                df_input_map = df_input[["_phone"]].drop_duplicates(subset="_phone")
+                df_input_map.rename(columns={"_phone": "手机号"}, inplace=True)
+
+    # ── 公共部分：用 account_ids 查询后续数据 ──
 
     conn = get_conn()
 
@@ -151,12 +293,7 @@ def main():
     conn.close()
     print(f"  退费记录: {len(refund_map)} 条")
 
-    # ── 6. 原始文件线索信息 ──
-    df_input = df_input[df_input["用户ID"].apply(lambda x: str(x).isdigit() if pd.notna(x) else False)]
-    df_input["用户ID_int"] = df_input["用户ID"].astype(int)
-    df_input_map = df_input[["用户ID_int", "线索进线日期", "销售"]].drop_duplicates(subset="用户ID_int")
-
-    # ── 7. 组装结果 ──
+    # ── 6. 组装结果 ──
     df_chars = df_chars.merge(df_accounts, on="account_id", how="left")
 
     # 去掉时区信息（Excel不支持）
@@ -214,11 +351,6 @@ def main():
             if tn in refund_map:
                 has_refund = True
                 total_refund += refund_map[tn]
-        # 也检查 order_status=4
-        for tn in row["trade_nos"]:
-            if tn not in refund_map:
-                # 查订单状态
-                pass  # order_status 已在订单表中
         return pd.Series({"是否退款": "是" if has_refund else "否", "退款金额": total_refund / 100.0})
 
     refund_info = order_agg.apply(calc_refund, axis=1)
@@ -229,9 +361,27 @@ def main():
     df_result = df_result.merge(order_agg, left_on="account_id_int", right_on="account_id", how="left")
     df_result.drop(columns=["account_id"], inplace=True, errors="ignore")
 
-    # 合并原始文件的线索进线日期和销售
-    df_result = df_result.merge(df_input_map, left_on="account_id_int", right_on="用户ID_int", how="left")
-    df_result.drop(columns=["account_id_int", "用户ID_int"], inplace=True, errors="ignore")
+    # ── 合并 线索进线日期、销售 和（手机号模式的）手机号 ──
+    if df_input_map is not None:
+        if use_phone_mode:
+            # 手机号模式：通过 phone_to_account 反查手机号 → 合并线索信息
+            # 先给 df_result 加上手机号列
+            # account_id -> 明文手机号（取第一个匹配到的）
+            account_to_phone = {}
+            for phone, aid in phone_to_account.items():
+                if aid not in account_to_phone:
+                    account_to_phone[aid] = phone
+            df_result["手机号"] = df_result["用户ID"].map(account_to_phone)
+
+            # 用手机号关联线索信息
+            if "线索进线日期" in df_input_map.columns or "销售" in df_input_map.columns:
+                df_result = df_result.merge(df_input_map, on="手机号", how="left")
+        else:
+            # 用户ID模式：原逻辑
+            df_result = df_result.merge(df_input_map, left_on="account_id_int", right_on="用户ID_int", how="left")
+            df_result.drop(columns=["用户ID_int"], inplace=True, errors="ignore")
+
+    df_result.drop(columns=["account_id_int"], inplace=True, errors="ignore")
 
     # 填充空值
     df_result["购买时间"] = df_result["购买时间"].fillna("")
@@ -240,14 +390,20 @@ def main():
     df_result["购买渠道key_from"] = df_result["购买渠道key_from"].fillna("")
     df_result["是否退款"] = df_result["是否退款"].fillna("否")
     df_result["退款金额"] = df_result["退款金额"].fillna(0.0)
-    df_result["线索进线日期"] = df_result["线索进线日期"].fillna("")
-    df_result["销售"] = df_result["销售"].fillna("")
+    df_result["线索进线日期"] = df_result["线索进线日期"].fillna("") if "线索进线日期" in df_result.columns else ""
+    df_result["销售"] = df_result["销售"].fillna("") if "销售" in df_result.columns else ""
+
+    if use_phone_mode and "手机号" in df_result.columns:
+        df_result["手机号"] = df_result["手机号"].fillna("")
 
     # 按用户ID升序排列
     df_result = df_result.sort_values(by=["用户ID", "角色ID"], ascending=True).reset_index(drop=True)
 
-    # 调整列顺序：原始字段靠前
-    col_order = ["用户ID", "线索进线日期", "销售", "角色ID", "用户注册时间", "角色创建时间"] + COL_ORDER + ["购买时间", "购买课包名称", "支付金额", "购买渠道key_from", "是否退款", "退款金额"]
+    # 调整列顺序
+    if use_phone_mode:
+        col_order = ["用户ID", "手机号", "线索进线日期", "销售", "角色ID", "用户注册时间", "角色创建时间"] + COL_ORDER + ["购买时间", "购买课包名称", "支付金额", "购买渠道key_from", "是否退款", "退款金额"]
+    else:
+        col_order = ["用户ID", "线索进线日期", "销售", "角色ID", "用户注册时间", "角色创建时间"] + COL_ORDER + ["购买时间", "购买课包名称", "支付金额", "购买渠道key_from", "是否退款", "退款金额"]
     # 只保留实际存在的列
     col_order = [c for c in col_order if c in df_result.columns]
     df_result = df_result[col_order]
@@ -271,6 +427,11 @@ def main():
     print(f"   完成L1序章(U00 L01): {done_l1} 个角色")
     print(f"   完成L2序章(U00 L01): {done_l2} 个角色")
 
+    if unmatched_phones:
+        print(f"\n⚠️  未匹配到账号的手机号 ({len(unmatched_phones)} 个):")
+        for p in unmatched_phones:
+            print(f"   {p}")
+
 
 if __name__ == "__main__":
     main()