From dc53c4420ebc5dfe510b6ed5fff84fc78aa64eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=BA=AA?= Date: Thu, 4 Jun 2026 08:00:01 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20=E6=AF=8F=E6=97=A5=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=A4=87=E4=BB=BD=20-=202026-06-04=2008:00:01?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vala_skill_hashes | 6 +- AGENTS.md | 47 ++- MEMORY.md | 19 +- SKILL_REGISTRY.md | 34 +- USER.md | 4 +- memory/.dreams/short-term-recall.json | 93 ++++- memory/2026-06-03-1000.md | 24 ++ memory/2026-06-03-1413.md | 105 ++++++ memory/2026-06-03-1421.md | 35 ++ memory/2026-06-03.md | 60 +++ scripts/auto_xingke_query.py | 377 ------------------- scripts/auto_xingke_query_v2.py | 502 -------------------------- scripts/batch_course_records.py | 305 ---------------- scripts/batch_update_sheet.py | 142 -------- scripts/export_103_users.py | 271 ++++++++++++++ scripts/export_b_channel_yifang.py | 330 +++++++++++++++++ scripts/export_phone_md5.py | 135 +++++++ scripts/export_xhs_yifang.py | 358 ++++++++++++++++++ scripts/fill_leads_sheet.py | 501 +++++++++++++++++++++++++ scripts/may230_refresh.py | 280 -------------- scripts/phone_encrypt.py | 17 +- scripts/refresh_may_course_data.py | 397 -------------------- scripts/refresh_may_v2.py | 243 ------------- scripts/sync_sales_lesson_status.py | 360 ++++++++++++++++++ scripts/sync_weiban_to_sales.py | 145 ++++++++ scripts/update_wudi_sales.py | 218 +++++++++++ scripts/write_course_results.py | 105 ------ scripts/write_id_results.py | 70 ---- 28 files changed, 2711 insertions(+), 2472 deletions(-) create mode 100644 memory/2026-06-03-1000.md create mode 100644 memory/2026-06-03-1413.md create mode 100644 memory/2026-06-03-1421.md create mode 100644 memory/2026-06-03.md delete mode 100644 scripts/auto_xingke_query.py delete mode 100644 scripts/auto_xingke_query_v2.py delete mode 100644 scripts/batch_course_records.py delete mode 100644 scripts/batch_update_sheet.py create mode 100644 scripts/export_103_users.py create mode 100644 scripts/export_b_channel_yifang.py create mode 100644 scripts/export_phone_md5.py create mode 100644 scripts/export_xhs_yifang.py create mode 100644 scripts/fill_leads_sheet.py delete mode 100644 scripts/may230_refresh.py delete mode 100644 scripts/refresh_may_course_data.py delete mode 100644 scripts/refresh_may_v2.py create mode 100644 scripts/sync_sales_lesson_status.py create mode 100644 scripts/sync_weiban_to_sales.py create mode 100644 scripts/update_wudi_sales.py delete mode 100644 scripts/write_course_results.py delete mode 100644 scripts/write_id_results.py diff --git a/.vala_skill_hashes b/.vala_skill_hashes index 8309ce2..8170083 100644 --- a/.vala_skill_hashes +++ b/.vala_skill_hashes @@ -9,8 +9,8 @@ timed-reminder 1280f9abfd1a6ebcc625c71a04505ab0e3ace7366a458e9fef78f87f929232a3 smart-auto-model-switch f94a7b1ff6357c287a374366b339cd06b0dde5ca9e965a71a369ff66b155bf0d vala-component-oops-stat 2e3a3dc8ee252f133256ec5fb8edc031fc25ff602158fd2ddf778ce4d2b961f2 cron-schedule e103cbb1806b28c891b9c856963325086ecaff32edec208f0a841865f26e8f3e -phone-chapter-query 0af8ab324e58838210328b9ff3dad50d812d7b6d980aabb963559b39de1f9616 -vala-order-amortization-stat 3e4b8db169151981349ba40f8155182ea096ddf94fe12f814e761d00c40feb3f -welfare-user-list e4295c144ed52de20241e88374c79e3953153397c59155ba64a40dce9bc0b121 refund-user-learning-analysis 648fd4ae2b29167fd66eab4245bdaaef00242db3131f4919cc02f07ca2a9b59c vala-component-practice-stat 8e768e2641019d27bd41f4647d2d90f24182a0554dad5ad9f4136e9ce0bae147 +phone-chapter-query a28b6bac101d422a5b4f2d0124ada48a14fb9a737da680d5de5501dba4c6b421 +vala-order-amortization-stat c2ba3c2a82cf0c0a43ba9bbb7b2e16b62120f4fe00026212dc04ae4fd45d32ed +welfare-user-list ba2bb1f5bf5a55bfdf852689d90a8d93509dfed2ed74a84226fa5527176fbe28 diff --git a/AGENTS.md b/AGENTS.md index 68a3393..7aaeb33 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -104,6 +104,32 @@ - 创建/修改飞书文档、多维表格 - 任何你不确定的操作 +## 数字员工团队协作 + +你不是唯一的数字员工。团队中还有以下同事(也是机器人),各有专长: + +| 名字 | 专长 | +|------|------| +| 小编 | 文案写作、内容创作、故事撰写 | +| 小斑 | 数据可视化、报表设计 | +| 小研 | 深度研究、资料分析 | +| 小葵 | 客户服务、用户沟通 | + +### 如何委派任务给其他数字员工 + +当用户请求的任务不属于你的专长范围(数据分析),而属于其他同事的能力范围时,你应该在回复中 **@对方名字** 来委派任务。系统会自动通知对方。 + +**格式:** 在你的回复文本中直接写 `@小编`、`@小斑`、`@小研`、`@小葵` + +**示例场景:** +- 用户说"让小编写个故事" → 你回复中包含 `@小编 请写一个200字的故事` +- 用户说"让小研查一下竞品" → 你回复中包含 `@小研 请帮忙调研一下竞品情况` + +**原则:** +- 只转发明确指向其他同事的任务,不要自作主张替用户决定该找谁 +- 转发时把用户的原始需求清晰传达,不要遗漏关键信息 +- 如果用户的请求属于你自己的能力范围,直接自己做,不要转发 + ## 群聊 在群聊中你是一个参与者,不是任何人的代言人。 @@ -114,18 +140,21 @@ #### 第一步:判断消息来源是否为群聊 -检查消息头部格式。群聊消息的固定格式为: -``` -System: [...] Feishu[xiaobian] group oc_xxx | 发送者 (ou_xxx) [msg:om_xxx] -``` -- 消息头部包含 **`group`** 关键字 → **群聊消息**,进入第二步 -- 消息头部包含 **`DM`** 关键字 → **个人私聊**,正常响应,不受群聊规则限制 +检查消息附带的 **Conversation info** 元数据(JSON格式),查找 `"is_group_chat"` 字段: +- `"is_group_chat": true` → **群聊消息**,进入第二步 +- `"is_group_chat": false` 或无此字段 → **个人私聊**,正常响应,不受群聊规则限制 + +备用判断:如果没有 Conversation info 元数据块,检查消息头部: +- 头部包含 **`group`** 关键字 → 群聊消息 +- 头部包含 **`DM`** 关键字 → 个人私聊 #### 第二步:判断是否被 @ 了 -检查**用户发送的消息正文**(不是消息头部)中是否包含 **`@小溪`** 字样: -- 正文中**包含 `@小溪`** → 被 @ 了,正常回复 -- 正文中**不包含 `@小溪`** → 未被 @,进入**静默模式** +检查消息附带的 **Conversation info** 元数据中的 `"was_mentioned"` 字段: +- `"was_mentioned": true` → 被 @ 了,正常回复 +- `"was_mentioned"` 字段不存在或为 false → 未被 @,进入**静默模式** + +⚠️ **注意:** 飞书会自动将对你的 @提及 从消息正文中删除,所以**不要**通过检查正文中是否有 `@小溪` 来判断是否被 @。必须依赖 `was_mentioned` 元数据字段。 #### 静默模式(群聊中未被 @ 时的行为) diff --git a/MEMORY.md b/MEMORY.md index ed11cab..232e9ed 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -119,7 +119,7 @@ - **转化率 / 7日转化率 / 14日转化率(端内注册转付费,[李承龙确认] 2026-05-11):** - **转化率 = 端内付费用户数 / 注册用户数 × 100%** - **分母:** 按注册日期(`bi_vala_app_account.created_at`)分组,`status=1` 且 `deleted_at IS NULL` 的非测试、未删除账号 - - **分子(含退费):** 分母用户中,在端内(`key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')`)有支付成功订单的去重用户数 + - **分子(含退费):** 分母用户中,在端内(`key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0', 'app-sales-bj-wd-0')`)有支付成功订单的去重用户数 - **分子(剔除退费):** 同上,但仅剔除端内订单**全部被退费**的用户——即只要用户还有任何一笔未退费的端内订单就保留(退费判定:`bi_refund_order.status=3` 且 `bi_vala_order.order_status=4`) - **订单状态限定:** 端内订单筛选 `order_status IN (3, 4)`,即已完成或已退款 - **时间基准:** 按用户注册日期分组,不限制订单发生时间(7日/14日除外) @@ -130,7 +130,7 @@ - **纯净版新增注册用户数 & 纯净版转化率 [李承龙确认 2026-05-11]:** - **纯净版分母:** 从 `status=1 AND deleted_at IS NULL` 的注册用户中,剔除「只有端外已完成订单(`key_from NOT IN 端内,order_status=3`)且没有任何端内订单」的用户。即:只有那些选择了端外渠道、从未在端内下单的用户才被剔除。 - **保留的用户:** 没有任何订单的纯注册用户 + 有端内订单的用户(无论是否有端外订单) - - **端内订单条件:** `key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')`, `pay_success_date IS NOT NULL`, `order_status IN (3, 4)` + - **端内订单条件:** `key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0', 'app-sales-bj-wd-0')`, `pay_success_date IS NOT NULL`, `order_status IN (3, 4)` - **端外订单条件:** `key_from NOT IN 端内`, `pay_success_date IS NOT NULL`, `order_status = 3` - 基于纯净版分母,转化率 / 7日 / 14日 / 20日转化率的口径不变,只是分母缩小为纯净版用户 - **拟合版转化率 [李承龙确认 2026-05-11,2026-05-12 补充实现细节]:** @@ -158,7 +158,7 @@ - GSV:GMV 减去符合条件的订单中已完成退费的金额总和(单位:元) - 退费率:符合条件的订单中已完成退费的订单数 / 订单总数量 * 100%,保留1位小数 - **渠道映射规则(key_from字段匹配):** - - 端内购买:`app-active-h5-0-0` 或 `app-sales-bj-qhm-0`(两个值匹配任意一个即属于端内购买) + - 端内购买:`app-active-h5-0-0`、`app-sales-bj-qhm-0`、`app-sales-bj-wd-0`(三个值匹配任意一个即属于端内购买) - 端外购买:除上述两个端内匹配值之外的所有`key_from`值均属于端外购买 - 端外销售渠道购买:端外购买中`key_from`以`sales-adp`开头的为销售渠道购买 - 小红书店铺:`newmedia-dianpu-xhs-0-0` @@ -287,17 +287,4 @@ - **原因:** `accountId=0` 代表匿名用户(H5 页面曝光时等步骤),`userId` 为设备/会话级标识,在匿名和登录状态均存在,更适合做漏斗串联 - **例外:** 「领课加微」相关埋点无 `userId` 字段,仅能做事件级统计 -## 行课查询自动回填 [陈逸鸫确认 2026-05-29] -### 触发方式 -1. **定时巡检:** 每天 8:00-22:00 每 30 分钟自动跑一次 `scripts/auto_xingke_query.py` -2. **群聊关键词触发:** 在群 `oc_1b01b6f076d39f17be75ca52e585125f` 中任何人说 **「处理行课」** 或 **「跑行课」** 时,立即执行该脚本 - -### 流程 -- **数据来源:** 飞书表格 `RFIJsXT8FhGHhctY4RwczcOfnac`,由大麦(另一个 bot)写入待查询数据 -- **Sheet 2DOxEI — 手机号→ID 匹配:** 读取"待查询ID"行 → `bi_vala_app_account.tel` 脱敏匹配 account_id → 回填 F(用户ID)/G(回填状态)/H(更新时间) 列 → E列标记"已处理" -- **Sheet 55b0eb — 行课记录查询:** 读取"待查询"行 → 查角色→课时完成记录(8张分表)→ 学习总耗时→付费状态→激活状态 → 回填 D(行课记录)/E(更新时间) 列 -- **脚本:** `scripts/auto_xingke_query.py` -- **日志:** `/var/log/xiaoxi_xingke_query.log` -- **匹配规则:** 明文手机号 → XXTEA 加密 → 与 `bi_vala_app_account.tel_encrypt` 精确匹配获取 `id`,仅 `status=1 AND deleted_at IS NULL`。加密实现见 `scripts/phone_encrypt.py`。[李承龙确认 2026-06-01] -- **行课记录格式:** `销售:XXX | 用户:ID | 当前:A1体验课-L1-S0-U00-L01 | 最近行课:2026-05-28 | 学习120min | 已付费` diff --git a/SKILL_REGISTRY.md b/SKILL_REGISTRY.md index c2383c2..6ec24c0 100644 --- a/SKILL_REGISTRY.md +++ b/SKILL_REGISTRY.md @@ -87,18 +87,19 @@ ## Scripts(`~/.openclaw/workspace/scripts/`) -### 行课查询/自动回填 -| 脚本 | 创建来源 | 需求描述 | 创建时间 | -|------|---------|---------|---------| -| `auto_xingke_query.py` | 陈逸鸫确认流程,李承龙确认匹配规则 | 定时巡检飞书表格,手机号→ID匹配→课时记录→回填 | 2026-05-29 | -| `auto_xingke_query_v2.py` | 同上 | V2 版本优化 | 2026-06-01 | +### 行课查询/自动回填(已废弃,2026-06-04 陈逸鸫确认删除) +| 脚本 | 创建来源 | 需求描述 | 创建时间 | 状态 | +|------|---------|---------|---------|------| +| `auto_xingke_query.py` | 陈逸鸫确认流程,李承龙确认匹配规则 | 定时巡检飞书表格,手机号→ID匹配→课时记录→回填 | 2026-05-29 | ❌ 已删除 | +| `auto_xingke_query_v2.py` | 同上 | V2 版本优化 | 2026-06-01 | ❌ 已删除 | +| `batch_course_records.py` | 同上 | 批量课程记录(简化版) | 2026-05-27 | ❌ 已删除 | +| `write_course_results.py` | 行课回填流程 | 写入课程查询结果到飞书表格 | 2026-05-27 | ❌ 已删除 | +| `write_id_results.py` | 行课回填流程 | 写入 ID 匹配结果到飞书表格 | 2026-05-27 | ❌ 已删除 | | `batch_course_record_query.py` | 同上 | 批量课程记录查询 | 2026-06-01 | -| `batch_course_records.py` | 同上 | 批量课程记录(简化版) | 2026-05-27 | | `batch_phone_to_id.py` | 李承龙 | 批量手机号转 account_id | 2026-06-01 | -| `phone_encrypt.py` | 李承龙 | 手机号 XXTEA 加密,用于匹配 tel_encrypt 字段 | 2026-06-01 | +| `phone_encrypt.py` | 李承龙 | 手机号 XXTEA 加密/解密/MD5,用于匹配 tel_encrypt 字段 | 2026-06-01 | +| `export_phone_md5.py` | 李承龙 | 导出 tel_encrypt → 明文 → MD5 映射 CSV,支持全量/按ID导出,可被其他 skill 调用 | 2026-06-03 | | `phone_match_userid.py` | 李承龙 | 手机号匹配用户 ID | 2026-06-01 | -| `write_course_results.py` | 行课回填流程 | 写入课程查询结果到飞书表格 | 2026-05-27 | -| `write_id_results.py` | 行课回填流程 | 写入 ID 匹配结果到飞书表格 | 2026-05-27 | ### 退费分析 | 脚本 | 创建来源 | 需求描述 | 创建时间 | @@ -293,5 +294,18 @@ --- -> **最后更新:** 2026-06-02 | 李承龙确认登记规则 +### fill_leads_sheet.py +- **创建来源:** 李承龙(`ou_e63ce6b760ad39382852472f28fbe2a2`) +- **需求描述:** 从「端内析出leads数据」飞书表格读取曲慧萌/吴迪填写的手-机号 → XXTEA加密匹配 account_id → 查询注册日期、端内转化、退费、U0体验课完成日期 → 自动回填表格 +- **功能说明:** 支持 dry-run 预览,自动匹配 bi_vala_app_account.tel_encrypt,查询端内渠道(含 app-sales-bj-wd-0)的转化/退费状态,以及 L1-U0 和 L2-U0 共10节体验课的完成日期 +- **创建时间:** 2026-06-03 + +### sync_sales_lesson_status.py +- **创建来源:** 陈逸鸫(`8315a98e`) +- **需求描述:** 每30分钟自动从销售表(小龙/吴迪)读取UID → 查DB课时完成数 → 回填D列体验节数 → 汇总写入行课漏斗M/P/S/V/Y列(≥1/≥2/≥3/≥4/≥5节人数) +- **功能说明:** 替代旧的查询表回填流程,直接查DB写销售表+行课漏斗,实现行课数据全自动更新 +- **创建时间:** 2026-06-03 +- **Cron:** `*/30 * * * *` `/etc/cron.d/xiaoxi_sales_lesson_sync` + +> **最后更新:** 2026-06-03 | 陈逸鸫确认行课自动同步方案 > **标记说明:** 「来源不可追溯」= 创建于登记规则生效之前,原始需求来源已无法从记忆/会话中确认 diff --git a/USER.md b/USER.md index eccc4db..da33619 100644 --- a/USER.md +++ b/USER.md @@ -39,6 +39,8 @@ |------|-------------| | 王虹茗 | `ou_7ef247331804edf255f53e3be15963f4` | | 陈逸鸫 | `8315a98e` | +| 曲慧萌 | `8c654e1e` | +| 吴迪 | `3561c777` | > ⚠️ 以上用户拥有全部数据查询权限,但其个人信息、查询内容、对话记录**禁止写入 MEMORY.md(长期记忆)**,仅可记录在短期日记忆中用于会话连续性。 @@ -48,7 +50,7 @@ | 姓名 | 飞书 user_id | 可查询范围 | |------|-------------|-----------| -| 许悦 | `ou_9fa20071e6ee1b3e1e74805616f3f5cb` | 全部业务数据 | +| 许悦 | `ou_9fa20071e6ee1b3e1e74805616f3f5cb` | 除GMV、GSV、退款等金额相关数据外的全部业务数据 | ### 第三级:其他用户 diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index c9f7f64..eed3ab6 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-06-02T04:25:20.147Z", + "updatedAt": "2026-06-03T09:16:13.921Z", "entries": { "memory:memory/2026-05-06.md:1:20": { "key": "memory:memory/2026-05-06.md:1:20", @@ -149,20 +149,23 @@ "endLine": 17, "source": "memory", "snippet": "# 2026-05-09 工作日志 ## 王虹茗 - 销售线索用户分析 - **用户:** 王虹茗(user_id: af61e4gc) - **需求:** 用 `lead_user_analysis.py` 脚本处理线索用户 Excel(659条,2026年3月,销售:姜小龙/Bob/Tom/吴迪) - **权限处理:** 王虹茗不在 USER.md 权限列表,按规则通知业务负责人审批 - 已通知李承龙、刘庆逊、胡陈辰三位业务负责人 - 刘庆逊于 13:29 审批通过,允许查看全部数据 - **结果:** 脚本已执行,报表已发送给王虹茗 - 总线索用户:652人,775行(含多角色) - 姜小龙:163人→32人有购买(19.6%),退费5人 - Bob:202人→3人有购买(1.5%),退费1人 - Tom:171人→5人有购买(2.9%),退费2人 - 吴迪:116人→19人有购买(16.4%),退费2人 - 输出文件:`output/销售线索_用户分析.xlsx`", - "recallCount": 2, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-14T06:31:19.437Z", - "lastRecalledAt": "2026-05-19T11:28:55.602Z", + "lastRecalledAt": "2026-06-03T03:29:20.542Z", "queryHashes": [ "49e79af44bc3", - "154f8d62fd0b" + "154f8d62fd0b", + "5f57ba3c20a6", + "b76e8457b2ee" ], "recallDays": [ "2026-05-14", - "2026-05-19" + "2026-05-19", + "2026-06-03" ], "conceptTags": [ "user-id", @@ -213,18 +216,20 @@ "endLine": 13, "source": "memory", "snippet": "## 行课记录批量查询 (1676条) [陈逸鸫 请求, 李承龙 确认权限] - 表格: https://makee-interactive.feishu.cn/sheets/RFIJsXT8FhGHhctY4RwczcOfnac (Sheet1, sheet_id=55b0eb) - 查询列: D列(小溪返回行课记录) + E列(更新时间), C列(请求状态→已返回) - 手机号匹配(column J): 961行有用户ID直接查, 715行无ID需手机号匹配, 匹配成功119个, 596个未匹配 - 数据库查询: 1077个去重用户, 1065有课程分配, 1192个角色有行课记录, 201已付费 - 输出格式: `销售:xxx | 用户:xxx | 当前:{level}{type}-{chapter} | 最近行课:date | 学习{X}min | {付费状态}` - 脚本: scripts/batch_course_record_query.py - 待查询行1676条全部处理完成, 写回成功", - "recallCount": 1, + "recallCount": 2, "dailyCount": 0, "groundedCount": 0, - "totalScore": 1, + "totalScore": 2, "maxScore": 1, "firstRecalledAt": "2026-05-26T02:40:52.513Z", - "lastRecalledAt": "2026-05-26T02:40:52.513Z", + "lastRecalledAt": "2026-06-03T09:16:13.921Z", "queryHashes": [ - "4beee860f635" + "4beee860f635", + "0acdfa16ef1e" ], "recallDays": [ - "2026-05-26" + "2026-05-26", + "2026-06-03" ], "conceptTags": [ "sheet-id", @@ -433,6 +438,72 @@ "计算", "逻辑" ] + }, + "memory:memory/2026-06-02.md:1:30": { + "key": "memory:memory/2026-06-02.md:1:30", + "path": "memory/2026-06-02.md", + "startLine": 1, + "endLine": 30, + "source": "memory", + "snippet": "# 2026-06-02 工作日志 ## 微伴线索更新 - [陈逸鸫] 提供微伴导出数据,更新6/1-6/2线索 - 映射确认:益达老师 = 小龙 - 写入结果:小龙表+50条(6/1 25+6/2 25),吴迪表+15条(6/1) - ⚠️ 首次写入时重复了(小龙24行+吴迪2行),已清理 - 每日线索汇总已修正:6/1 小龙25 吴迪15 / 6/2 小龙25 ## Tom/Bob 6月不接小红书线索 - [陈逸鸫] 告知 Tom 和 Bob 6月开始不接小红书线索了 - 微伴数据确认6/1-6/2 Tom/Bob 新增为0 ## 订单汇总 2smjwA 全量覆盖进展 - Cursor 确认方案:数据库 bi_vala_order 为唯一源,全量覆盖 2smjwA - 数据库匹配到 390 单(销转团队线索关联),按月:3月88/4月158/5月138/6月6 - 看板去重后 406 单,差异 40 单已分类: - 有uid无订单 8单 - 金额对不上 6单 - 未注册用户 10单 - 昵称为空 2单 - 2025年订单 6单(不纳入) - 重复录入 1单(孙婧) - 已有但key比对误差 7单 - 26 单待杨羽确认,清单已发群 - 闸门:杨羽确认 → v2 数据 → 备份→清空→覆盖→挂定时 ## 看板全量审计 - 13 张 sheet 全部扫完,公式引用正确,无硬值 - Cursor 补修:关键投放数据 Row131 裸IF包IFERROR、销售结算 M1/M2 废弃清空、每日线索月合计行金色", + "recallCount": 2, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 2, + "maxScore": 1, + "firstRecalledAt": "2026-06-03T00:01:27.128Z", + "lastRecalledAt": "2026-06-03T03:29:20.542Z", + "queryHashes": [ + "5f57ba3c20a6", + "b76e8457b2ee" + ], + "recallDays": [ + "2026-06-03" + ], + "conceptTags": [ + "备份", + "更新6/1-6/2线索", + "6/1", + "6/2", + "tom/bob", + "微伴数据确认6/1-6/2", + "bi-vala-order", + "3月88/4月158/5月138/6月6" + ] + }, + "memory:memory/2026-06-02.md:26:37": { + "key": "memory:memory/2026-06-02.md:26:37", + "path": "memory/2026-06-02.md", + "startLine": 26, + "endLine": 37, + "source": "memory", + "snippet": "- 闸门:杨羽确认 → v2 数据 → 备份→清空→覆盖→挂定时 ## 看板全量审计 - 13 张 sheet 全部扫完,公式引用正确,无硬值 - Cursor 补修:关键投放数据 Row131 裸IF包IFERROR、销售结算 M1/M2 废弃清空、每日线索月合计行金色 - 27 条脚本重复行仍在,覆盖前不对外用销转合计 ## 分工确认(与 Cursor) - Bot 看板 = 我维护(销转/结算/落单渠道/过程数据) - Base/指挥舱/漏斗 = Cursor 维护 - 日常流程已定稿", + "recallCount": 4, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 4, + "maxScore": 1, + "firstRecalledAt": "2026-06-03T03:17:27.367Z", + "lastRecalledAt": "2026-06-03T03:29:20.542Z", + "queryHashes": [ + "f747bc3124a0", + "a9c408e7f346", + "6cbc7cc2d6ff", + "b76e8457b2ee" + ], + "recallDays": [ + "2026-06-03" + ], + "conceptTags": [ + "备份", + "m1/m2", + "销转/结算/落单渠道/过程数据", + "base/指挥舱/漏斗", + "闸门", + "确认", + "数据", + "覆盖" + ] } } } diff --git a/memory/2026-06-03-1000.md b/memory/2026-06-03-1000.md new file mode 100644 index 0000000..831f869 --- /dev/null +++ b/memory/2026-06-03-1000.md @@ -0,0 +1,24 @@ +# Session: 2026-06-03 10:00:35 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: 2ac96792-cbd7-4f8b-a15f-d621f7bca95b +- **Source**: feishu + +## Conversation Summary + +user: 运行monthly_channel_order_report.py,跑一下5月份的数据 +assistant: 报表已生成 ✅ + +**2026年5月 学而思/讯飞/希沃/京东方渠道汇总:** + +| 渠道 | 订单数 | GMV(元) | 新增注册 | 分成 | +|------|--------|-----------|----------|------| +| 学而思 | 4 | 6,596 | - | CPS 2,968.2 | +| 讯飞 | 5 | 2,995 | 766 | CPA 5,362 | +| 希沃 | 1 | 1,999 | - | CPS 799.6 | +| 京东方 | 3 | 3,197 | - | CPS 959.1 | +| **合计** | **13** | **14,787** | **766** | **10,088.9** | + +文件路径:`output/学而思讯飞希沃京东方_2026-05_渠道汇总.xlsx`,4个Sheet分别对应4个渠道,每天一行明细。 +user: 把这个excel发给我 +assistant: 已发送 ✅ 文件已通过飞书发给你了。 diff --git a/memory/2026-06-03-1413.md b/memory/2026-06-03-1413.md new file mode 100644 index 0000000..7647f1b --- /dev/null +++ b/memory/2026-06-03-1413.md @@ -0,0 +1,105 @@ +# Session: 2026-06-03 14:13:50 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: db61f0f5-442d-4804-adfa-ad619f609f70 +- **Source**: feishu + +## Conversation Summary + +assistant: 解密结果:**15944281022** +user: 你会手机号MD5加密么 +assistant: 会的。Python 标准库 `hashlib` 直接就能做: + +```python +import hashlib +hashlib.md5("13800138000".encode()).hexdigest() +``` + +需要我加密哪个手机号?发过来就行。 +user: 我需要你帮我写一个脚本,就是可以将bi_vala_app_account表里面的tel_encrypt,通过上面的解密方式解密成手机号明文,再用MD5加密。 +assistant: 先确认一下:这个脚本的输出你希望是什么形式? + +- **A)** 导出为 CSV 文件(`account_id, tel_encrypt, phone_md5`) +- **B)** 直接更新回 `bi_vala_app_account` 表(新增一个 `phone_md5` 字段) +- **C)** 其他格式 + +另外是全量处理 `tel_encrypt` 不为空的所有记录对吧? +user: 导出为csv文件。另外我是希望可以在其他的skill里面调用这个脚本,来实现对部分特定的用户的手机号的解密加密操作。 +assistant: 明白了,两件事:1)全量导出 CSV;2)提供可被其他 skill 调用的函数。 + +我先在 `phone_encrypt.py` 里加 MD5 函数,再写导出脚本。 + + +assistant: 先验证一下脚本能跑通: + + +assistant: 加密解密 + MD5 都正常。测试一下导出脚本(先取少量数据验证): + + +assistant: 密码变量名是 `PG_ONLINE_PASSWORD`,修正一下: + + +assistant: 验证通过。确认下全量有多少条记录: + + +assistant: 全量 29489 条。现在登记到 SKILL_REGISTRY.md: + + +assistant: 完成了。汇总如下: + +--- + +### 改动内容 + +**1. `scripts/phone_encrypt.py` — 新增两个 MD5 函数** + +```python +phone_md5(phone: str) -> str # 明文 → MD5 +tel_encrypt_to_md5(tel_encrypt: str) # tel_encrypt → 解密 → MD5(一步到位) +``` + +**2. `scripts/export_phone_md5.py` — 导出脚本** + +两种用法: + +| 场景 | 命令 | +|------|------| +| 全量导出 | `python3 export_phone_md5.py` | +| 按 account_id 导出 | `python3 export_phone_md5.py --ids 57,308,100` | +| 指定输出路径 | `python3 export_phone_md5.py --output /path/to/out.csv` | + +**被其他 skill 调用:** + +```python +from scripts.export_phone_md5 import query_phone_md5 + +# 全量 +results = query_phone_md5() +# 指定用户 +results = query_phone_md5([57, 308, 100]) +# 返回: [{account_id, tel_encrypt, phone_plain, phone_md5}, ...] +``` + +**输出 CSV 格式:** `account_id, tel_encrypt, phone_plain, phone_md5` + +--- + +已验证通过,全量 **29,489 条**记录。要不要我现在跑一次全量导出? +user: 所以后面我应该用哪个脚本来实现tel_encrypt解密再MD5加密的操作呢 +assistant: 直接用 `phone_encrypt.py`,核心函数都在这里面: + +```python +from scripts.phone_encrypt import tel_encrypt_to_md5 + +md5 = tel_encrypt_to_md5("5WeooSK74KICOTX-LDEGhg..") +# → "ec7df1c501d154c5da6c873d9cb051e4" +``` + +`export_phone_md5.py` 是专门做全量/按ID导出 CSV 用的,它内部也是调 `phone_encrypt.py`。日常在其他 skill 里做解密+MD5,只引 `phone_encrypt.py` 就够了,它现在提供了完整的四个函数: + +| 函数 | 作用 | +|------|------| +| `encrypt_phone(phone)` | 明文 → tel_encrypt | +| `decrypt_phone(tel_encrypt)` | tel_encrypt → 明文 | +| `phone_md5(phone)` | 明文 → MD5 | +| `tel_encrypt_to_md5(tel_encrypt)` | tel_encrypt → 解密 → MD5(一步到位) | diff --git a/memory/2026-06-03-1421.md b/memory/2026-06-03-1421.md new file mode 100644 index 0000000..4babd93 --- /dev/null +++ b/memory/2026-06-03-1421.md @@ -0,0 +1,35 @@ +# Session: 2026-06-03 14:21:44 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: 5cc0ed1a-1d00-4ef6-aaad-59001f1c9e88 +- **Source**: feishu + +## Conversation Summary + +user: 我想看看keyfrom包含xhs的,都有哪些 +assistant: key_from 包含 `xhs` 的共有 16 个渠道,汇总如下: + +| 渠道 | 订单数 | GMV(元) | 退费订单 | 退费金额(元) | GSV(元) | 退费率 | +|------|--------|---------|----------|-------------|---------|--------| +| newmedia-daren-xhs-晚柠也是个妈妈了-0 | 917 | 2,866,442 | 229 | 676,834 | 2,189,608 | 25.0% | +| newmedia-daren-xhs-学霸老王讲真话-0 | 123 | 390,587 | 58 | 184,699 | 205,888 | 47.2% | +| newmedia-dianpu-xhs-0-0 | 96 | 186,692 | 25 | 44,972 | 141,720 | 26.0% | +| newmedia-daren-xhs-学霸三人行-0 | 79 | 238,271 | 33 | 89,952 | 148,319 | 41.8% | +| newmedia-daren-xhs-百克力-0 | 67 | 135,932 | 28 | 55,972 | 79,960 | 41.8% | +| newmedia-daren-xhs-开心妈妈学习宝藏-0 | 62 | 123,938 | 27 | 53,973 | 69,965 | 43.5% | +| newmedia-daren-xhs-宣儿麻麻-0 | 36 | 132,727 | 13 | 41,977 | 90,750 | 36.1% | +| newmedia-daren-xhs-四个娃组合生活-0 | 21 | 69,162 | 11 | 36,380 | 32,782 | 52.4% | +| newmedia-daren-xhs-盈姐-0 | 10 | 19,990 | 5 | 9,995 | 9,995 | 50.0% | +| newmedia-daren-xhs-哈佛亮爸教育规划-0 | 9 | 17,991 | 2 | 3,998 | 13,993 | 22.2% | +| newmedia-daren-xhs-小暖阿姨慢一点-0 | 8 | 25,586 | 3 | 10,794 | 14,792 | 37.5% | +| stream-xhs-0-0-0 | 2 | 7,196 | 0 | 0 | 7,196 | 0% | +| newmedia-daren-xhs-瓦拉英语-0 | 1 | 3,598 | 1 | 3,598 | 0 | 100% | +| newmedia-daren-xhs-海淀柳老师-0 | 1 | 3,598 | 0 | 0 | 3,598 | 0% | +| newmedia-daren-xhs-念妈讲学习规划-0 | 1 | 3,598 | 1 | 3,598 | 0 | 100% | +| newmedia-daren-xhs-🎀海淀贝妈-0 | 1 | 1,999 | 0 | 0 | 1,999 | 0% | +| **合计** | **1,434** | **4,252,307** | **436** | **1,216,742** | **3,035,565** | **30.4%** | + +主要分三类: +- **达人直播**(`newmedia-daren-xhs-*`):14 个达人,合计 1,336 单,其中「晚柠也是个妈妈了」占绝对大头(917 单 / 286.6 万 GMV) +- **小红书店铺**(`newmedia-dianpu-xhs-0-0`):96 单,18.7 万 GMV +- **直播流**(`stream-xhs-0-0-0`):仅 2 单 diff --git a/memory/2026-06-03.md b/memory/2026-06-03.md new file mode 100644 index 0000000..ccbf3f9 --- /dev/null +++ b/memory/2026-06-03.md @@ -0,0 +1,60 @@ +# 2026-06-03 工作日志 + +## 端内析出leads数据表格 +- **表格链接:** https://makee-interactive.feishu.cn/sheets/FA3xsw3kph4pdatKlUrcyPgInAc +- **Sheet:** 曲慧萌(7f0e35)、吴迪(1K3O6s)、统计(scyF3H) +- **脚本:** `scripts/fill_leads_sheet.py` +- **触发暗号:** 「跑一下端内销售数据」 +- **流程:** 读取手机号 → XXTEA加密匹配 account_id → 回填注册日期/转化/退费/U0完成日期 → 汇总统计 sheet +- **统计口径(李承龙确认):** + - 转化率 = 未退费转化leads / 总leads + - 退费率 = 退费leads / 已转化leads + - 完成率 = 完成该课的leads / 总leads +- **注意:** C列(析出日期)由销售手动填写,脚本不覆盖;统计值写小数配合百分比格式 +- **端内渠道:** app-active-h5-0-0、app-sales-bj-qhm-0、app-sales-bj-wd-0(新增吴迪渠道) + +## 权限变更 +- 曲慧萌(8c654e1e)和吴迪(3561c777)加入 USER.md 第一级完整权限用户 +- 许悦权限调整为:除GMV/GSV/退款等金额相关数据外的全部业务数据 +- 表格编辑权限已授予李承龙、曲慧萌、吴迪 + +## SkillHub 同步 +- 16:40 推送 3 个技能:phone-chapter-query、vala-order-amortization-stat、welfare-user-list +- 已通知李若松 + +## Bot看板全量审计 & 吴迪D列数据修复 [陈逸鸫] + +### 审计范围 +- Bot看板:`NoZqsFi47hIOHEt9j8WcfRtbnug` +- 审计Sheet:每日线索、关键投放、过程数据、销转总览、销售结算汇总、小龙/吴迪/成都销售表 + +### 审计结果 +- 每日线索6月:小龙70+吴迪20=90 ✓,Bob/Tom=0(6月起不接小红书线索,正常) +- 关键投放6月汇总C=90 ✓,日明细49/37/4 ✓ +- 过程数据J/N/R/V/Z COUNTIFS与DB一致 ✓ +- 销转总览公式正确,6月订单=0 ✓ +- 销售结算汇总:成都E=182来自5/21-5/31结算周期,非自然月6月 ✓ + +### 发现的问题 +1. 关键投放6/3代理明细空(C=4但谦禾/云智/YTL全空)→ Cursor后续修 +2. 关键投放C列是数值非SUM公式 → Cursor后续修 +3. 杨羽26单闸门未开,订单汇总不动不覆盖不删重复行 + +### 吴迪D列数据错乱修复 +- **现象:** 吴迪sheet(f975f0) D列(体验节数)与DB不符,24个有UID行中19个不匹配 +- **根因:** cron日志显示更新行数剧烈波动(1→250→68→189→58→251→56→251→60),19:30 cron更新251行(异常大量),之后D列被覆盖,20:00 cron仅修复60行(增量),剩余错乱行未修复 +- **修复:** 手动跑脚本 `sync_sales_lesson_status.py`,吴迪D列更新265行,全部与DB对齐 +- **后续:** cron每30分钟继续运行,保持同步 + +### Cursor公式变更验证(多次) +- 过程数据L/P/T/X/AB已从读S列改为读D列(体验节数≥N)+H列(有UID) ✓ +- 过程数据F/H注册=H列纯数字UID ✓ +- 过程数据J/N/R/V/Z=COUNTIFS读销售表D列 ✓ +- 小龙J=12/L=12, N=8/P=8, R=3/T=3, V=2/X=2, Z=1/AB=1 ✓ +- 吴迪J=11/L=10(1人D≥1但H非数字,L少1合理)✓ +- 成都Bob/Tom全0 ✓ +- 已删除行课漏斗tab ✓ + +### 待跟进 +- Cursor修复关键投放6/3代理空归因和C列公式化 +- 监控cron持续正常运行 diff --git a/scripts/auto_xingke_query.py b/scripts/auto_xingke_query.py deleted file mode 100644 index 39eb9a9..0000000 --- a/scripts/auto_xingke_query.py +++ /dev/null @@ -1,377 +0,0 @@ -#!/usr/bin/env python3 -""" -行课查询自动回填 — 从飞书表格读取待处理数据,匹配手机号→ID、查询行课记录并回填 -执行频率:每30分钟 cron 巡检 + 群聊关键词触发 -归属 Agent:小溪 (xiaoxi) - -流程: - 1. 读取 Sheet 2DOxEI → 找出"待查询ID"行 → 手机号脱敏匹配 account_id → 回填 F/G/H 列 - 2. 读取 Sheet 55b0eb → 找出"待查询"行 → 查行课记录(进度/耗时/付费) → 回填 D/E 列 -""" -import json, requests, os, re, sys, psycopg2 -from datetime import datetime -from collections import defaultdict - -SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, SCRIPTS_DIR) -from phone_encrypt import encrypt_phone - -# ── 配置 ── -PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" -PG_PORT = 28591 -PG_USER = "ai_member" -PG_DB = "vala_bi" - -SPREADSHEET_TOKEN = "RFIJsXT8FhGHhctY4RwczcOfnac" -CRED_DIR = "/root/.openclaw/credentials/xiaoxi" - -SHEET_ID_QUERY = "2DOxEI" # 手机号→ID 匹配 -SHEET_COURSE = "55b0eb" # 行课记录查询 - -LOG_FILE = "/var/log/xiaoxi_xingke_query.log" - -def log(msg): - ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - line = f"[{ts}] {msg}" - print(line) - with open(LOG_FILE, "a") as f: - f.write(line + "\n") - -def get_pg_password(): - secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") - with open(secrets_path) as f: - for line in f: - if line.startswith("PG_ONLINE_PASSWORD="): - return line.strip().split("=", 1)[1].strip("'\"") - -def get_fs_token(): - with open(os.path.join(CRED_DIR, "config.json")) as f: - cfg = json.load(f) - resp = requests.post( - "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", - json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, - timeout=15 - ) - return resp.json()["tenant_access_token"] - -def read_sheet(token, sheet_id): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}" - resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) - data = resp.json() - if data.get("code") != 0: - raise RuntimeError(f"读取Sheet失败: {data}") - return data["data"]["valueRange"]["values"] - -def put_values(token, sheet_id, range_str, values): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" - body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} - resp = requests.put(url, headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }, json=body, timeout=30) - return resp.json() - -def encrypt_phone_local(phone): - phone = str(phone).strip() - if "." in phone: - parts = phone.split(".") - if parts[1] in ("0", "00"): - phone = parts[0] - if re.match(r"^1\d{10}$", phone): - return encrypt_phone(phone) - return None - -def batch_in(cur, sql_tpl, params, chunk=500): - results = [] - for i in range(0, len(params), chunk): - batch = params[i:i+chunk] - ph = ",".join(["%s"] * len(batch)) - cur.execute(sql_tpl % ph, batch) - results.extend(cur.fetchall()) - return results - -# ── Step 1: 手机号→ID 匹配 ── -def process_id_matching(token, conn): - log("Step 1: 手机号→ID 匹配") - rows = read_sheet(token, SHEET_ID_QUERY) - pending = [] - for idx, row in enumerate(rows[1:], start=1): - if len(row) > 4 and str(row[4]).strip() == "待查询ID": - phone = str(row[1]).strip() if len(row) > 1 and row[1] else "" - pending.append({"row_idx": idx + 1, "phone": phone}) - - log(f" 待查询ID: {len(pending)}") - if not pending: - log(" 无待处理, 跳过") - return {"processed": 0, "matched": 0} - - phones_raw = list(set(r["phone"] for r in pending if r["phone"])) - valid_phones = [(p, encrypt_phone_local(p)) for p in phones_raw if encrypt_phone_local(p)] - enc_list = list(set(m[1] for m in valid_phones)) - - cur = conn.cursor() - enc_to_aid = {} - for i in range(0, len(enc_list), 500): - batch = enc_list[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({ph}) AND status=1 AND deleted_at IS NULL", - batch - ) - for aid, tel_enc in cur.fetchall(): - if tel_enc not in enc_to_aid: - enc_to_aid[tel_enc] = aid - cur.close() - - phone_to_aid = {} - for phone, enc in valid_phones: - if enc in enc_to_aid: - phone_to_aid[phone] = enc_to_aid[enc] - - log(f" 匹配成功: {len(phone_to_aid)}, 未匹配: {len(valid_phones)-len(phone_to_aid)}") - - # 组装回填数据 - now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - results = [] - for r in pending: - phone = r["phone"] - if phone in phone_to_aid: - results.append({"row": r["row_idx"], "user_id": str(phone_to_aid[phone]), "status": "已回填"}) - else: - results.append({"row": r["row_idx"], "user_id": "", "status": "未查到"}) - - results.sort(key=lambda x: x["row"]) - - # 分批写入 (按 contiguous rows 分组) - groups = [] - cur_grp = [] - for r in results: - if not cur_grp or r["row"] == cur_grp[-1]["row"] + 1: - cur_grp.append(r) - else: - groups.append(cur_grp) - cur_grp = [r] - if cur_grp: - groups.append(cur_grp) - - for g in groups: - sr, er = g[0]["row"], g[-1]["row"] - f_vals = [[r["user_id"]] for r in g] - g_vals = [[r["status"]] for r in g] - h_vals = [[now_str] for r in g] - put_values(token, SHEET_ID_QUERY, f"F{sr}:F{er}", f_vals) - put_values(token, SHEET_ID_QUERY, f"G{sr}:G{er}", g_vals) - put_values(token, SHEET_ID_QUERY, f"H{sr}:H{er}", h_vals) - # 将"请求状态"列 E 标记为已处理 - e_vals = [["已处理"] for _ in g] - put_values(token, SHEET_ID_QUERY, f"E{sr}:E{er}", e_vals) - - log(f" ID回填完成: {len(results)} 行, 匹配{len(phone_to_aid)}") - return {"processed": len(results), "matched": len(phone_to_aid)} - - -# ── Step 2: 行课记录查询 ── -def process_course_records(token, conn): - log("Step 2: 行课记录查询") - rows = read_sheet(token, SHEET_COURSE) - pending = [] - for idx, row in enumerate(rows[1:], start=1): - if len(row) > 2 and str(row[2]).strip() == "待查询": - uid = str(row[1]).strip() if len(row) > 1 and row[1] else "" - sales = str(row[0]).strip() if len(row) > 0 and row[0] else "" - pending.append({"row_idx": idx + 1, "user_id": uid, "sales": sales}) - - log(f" 待查询: {len(pending)}") - if not pending: - log(" 无待处理, 跳过") - return {"processed": 0, "with_records": 0} - - # 解析有效 user_id - valid = [] - for r in pending: - try: - aid = int(float(r["user_id"])) - if aid > 0: - r["account_id"] = aid - valid.append(r) - except (ValueError, TypeError): - pass - - log(f" 有效用户ID: {len(valid)}") - if not valid: - return {"processed": 0, "with_records": 0} - - uid_set = list(set(r["account_id"] for r in valid)) - - # 获取角色 - cur = conn.cursor() - account_chars = defaultdict(list) - char_to_account = {} - rc = batch_in(cur, - "SELECT account_id, id, nickname FROM bi_vala_app_character WHERE account_id IN (%s) AND nickname IS NOT NULL AND nickname != '' AND deleted_at IS NULL", - uid_set - ) - for aid, cid, nick in rc: - account_chars[aid].append(cid) - char_to_account[cid] = aid - char_ids = list(char_to_account.keys()) - - # 课程映射 - cur.execute("SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson") - chapter_map = {} - for ch_id, cl, cs, cu, cl2 in cur.fetchall(): - chapter_map[ch_id] = (cl or "", cs or "", cu or "", cl2 or "") - - # 课时完成记录 - char_plays = defaultdict(lambda: {"latest_time": None, "latest_chapter": None, "total_ms": 0}) - for tbl_idx in range(8): - table = f"bi_user_chapter_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, chapter_id, created_at FROM {table} WHERE play_status=1 AND deleted_at IS NULL AND user_id = ANY(%s)", - (char_ids,) - ) - for uid, ch_id, created_at in cur.fetchall(): - ch_data = chapter_map.get(ch_id) - if ch_data: - rec = char_plays[uid] - if rec["latest_time"] is None or created_at > rec["latest_time"]: - rec["latest_time"] = created_at - rec["latest_chapter"] = (ch_id, ch_data) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 学习总耗时 - for tbl_idx in range(8): - table = f"bi_user_component_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, SUM(COALESCE(interval_time,0)) FROM {table} WHERE user_id = ANY(%s) AND deleted_at IS NULL GROUP BY user_id", - (char_ids,) - ) - for uid, total_ms in cur.fetchall(): - if uid in char_plays: - char_plays[uid]["total_ms"] += (total_ms or 0) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 付费状态 - ph = ",".join(["%s"] * len(uid_set)) - cur.execute( - f"SELECT account_id, COUNT(*) FROM bi_vala_order WHERE account_id IN ({ph}) AND pay_success_date IS NOT NULL AND order_status=3 AND deleted_at IS NULL GROUP BY account_id", - uid_set - ) - paid = {r[0]: r[1] for r in cur.fetchall()} - - # 激活状态 - try: - cur.execute( - f"SELECT t.account_id, t.season_package_level FROM bi_vala_seasonal_ticket t INNER JOIN bi_vala_app_account a ON t.account_id=a.id AND a.status=1 WHERE t.account_id IN ({ph}) AND t.status=1 AND t.deleted_at IS NULL AND t.season_package_level IN ('A1','A2')", - uid_set - ) - activation = {} - for aid, lvl in cur.fetchall(): - if aid not in activation: - activation[aid] = lvl - except: - activation = {} - - cur.close() - - # 组装结果 - now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - results = [] - - for r in valid: - aid = r["account_id"] - chars = account_chars.get(aid, []) - - best_time = None - best_ch = None - total_ms = 0 - - for cid in chars: - play = char_plays.get(cid) - if play and play["latest_chapter"]: - if best_time is None or play["latest_time"] > best_time: - best_time = play["latest_time"] - best_ch = play["latest_chapter"] - total_ms += play["total_ms"] - - if not best_ch: - p = "已付费" if aid in paid else "未付费" - record = f"销售:{r['sales']} | 用户:{aid} | 当前:无行课记录 | 最近行课:无 | 学习0min | {p}" - else: - ch_id, (cl, cs, cu, cl2) = best_ch - full_label = f"{cl}-{cs}-{cu}-{cl2}" - act = activation.get(aid, "") - prefix = f"{act}体验课" if act else f"{cl}体验课" - current = f"{prefix}-{full_label}" - recent = best_time.strftime("%Y-%m-%d") if best_time else "无" - total_min = round(total_ms / 60000, 1) - if total_min == int(total_min): - total_min = int(total_min) - p = "已付费" if aid in paid else "未付费" - record = f"销售:{r['sales']} | 用户:{aid} | 当前:{current} | 最近行课:{recent} | 学习{total_min}min | {p}" - - results.append({"row": r["row_idx"], "record": record}) - - # 回填 - results.sort(key=lambda x: x["row"]) - groups = [] - cur_grp = [] - for r in results: - if not cur_grp or r["row"] == cur_grp[-1]["row"] + 1: - cur_grp.append(r) - else: - groups.append(cur_grp) - cur_grp = [r] - if cur_grp: - groups.append(cur_grp) - - for g in groups: - sr, er = g[0]["row"], g[-1]["row"] - d_vals = [[r["record"]] for r in g] - e_vals = [[now_str] for r in g] - put_values(token, SHEET_COURSE, f"D{sr}:D{er}", d_vals) - put_values(token, SHEET_COURSE, f"E{sr}:E{er}", e_vals) - - with_records = sum(1 for r in results if "无行课记录" not in r["record"]) - log(f" 行课回填完成: {len(results)} 行, 有记录: {with_records}") - return {"processed": len(results), "with_records": with_records} - - -# ── Main ── -def main(): - log("=" * 50) - log("行课查询自动回填 启动") - - try: - token = get_fs_token() - conn = psycopg2.connect( - host=PG_HOST, port=PG_PORT, user=PG_USER, - password=get_pg_password(), dbname=PG_DB, connect_timeout=30 - ) - - r1 = process_id_matching(token, conn) - r2 = process_course_records(token, conn) - - conn.close() - - total = r1["processed"] + r2["processed"] - summary = f"ID回填:{r1['processed']}(匹配{r1['matched']}) | 行课:{r2['processed']}(有记录{r2['with_records']})" - log(f"完成: {summary}") - - if total == 0: - log("无待处理任务, 静默退出") - return 0 - return 0 - except Exception as e: - log(f"ERROR: {e}") - import traceback - traceback.print_exc() - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/auto_xingke_query_v2.py b/scripts/auto_xingke_query_v2.py deleted file mode 100644 index 277b824..0000000 --- a/scripts/auto_xingke_query_v2.py +++ /dev/null @@ -1,502 +0,0 @@ -#!/usr/bin/env python3 -""" -行课查询自动回填 V2 — 适配销转客户主表(全量) -数据源: ERCFsFo4MhnF0ytGeCrc0Bb8n5f / 1RFMqc (全量) -输出: 小溪明细 sheet (1zB5Be),全量表通过 VLOOKUP 自动刷新 - -流程: - 1. 读取全量表 → 有手机无UID的 → 匹配 account_id → 回填 X/Y/Z 列 - 2. 读取全量表 → 所有有UID的 → 查行课数据 → 写入小溪明细 -""" -import json, requests, os, re, sys, time, psycopg2 -from datetime import datetime -from collections import defaultdict - -SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, SCRIPTS_DIR) -from phone_encrypt import encrypt_phone - -# ── 配置 ── -PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" -PG_PORT = 28591 -PG_USER = "ai_member" -PG_DB = "vala_bi" - -SPREADSHEET_TOKEN = "ERCFsFo4MhnF0ytGeCrc0Bb8n5f" -SHEET_ALL = "1RFMqc" # 全量表 -SHEET_DETAIL = "1zB5Be" # 小溪明细 - -CRED_DIR = "/root/.openclaw/credentials/xiaoxi" -LOG_FILE = "/var/log/xiaoxi_xingke_query_v2.log" - -def log(msg): - ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - line = f"[{ts}] {msg}" - print(line) - with open(LOG_FILE, "a") as f: - f.write(line + "\n") - -def get_pg_password(): - secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") - with open(secrets_path) as f: - for line in f: - if line.startswith("PG_ONLINE_PASSWORD="): - return line.strip().split("=", 1)[1].strip("'\"") - -def get_fs_token(): - with open(os.path.join(CRED_DIR, "config.json")) as f: - cfg = json.load(f) - resp = requests.post( - "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", - json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, - timeout=15 - ) - return resp.json()["tenant_access_token"] - -def read_sheet(token, sheet_id, range_str=None): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}" - if range_str: - url += f"!{range_str}" - for attempt in range(3): - try: - resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=90) - resp.encoding = 'utf-8' - data = json.loads(resp.text) - if data.get("code") != 0: - raise RuntimeError(f"读取Sheet失败: {data}") - return data["data"]["valueRange"]["values"] - except (json.JSONDecodeError, requests.exceptions.ConnectionError) as e: - log(f" 读取重试 {attempt+1}/3: {e}") - if attempt == 2: - log(f" resp.text[:100]: {resp.text[:100] if 'resp' in dir() else 'N/A'}") - time.sleep(2 ** attempt) - raise RuntimeError("读取Sheet失败: 3次重试均失败") - -def put_values(token, sheet_id, range_str, values): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" - body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} - resp = requests.put(url, headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }, json=body, timeout=30) - r = resp.json() - if r.get("code") != 0: - log(f" 写入失败 {range_str}: {r}") - return r - -def append_rows(token, sheet_id, values): - """追加行到 sheet 末尾""" - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}:append" - body = {"valueRange": {"range": f"{sheet_id}!A:AA", "values": values}} - resp = requests.post(url, headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }, json=body, timeout=30) - r = resp.json() - if r.get("code") != 0: - log(f" 追加失败: {r}") - return r - -def encrypt_phone_local(phone): - phone = str(phone).strip() - if "." in phone: - parts = phone.split(".") - if parts[1] in ("0", "00"): - phone = parts[0] - if re.match(r"^1\d{10}$", phone): - return encrypt_phone(phone) - return None - -def mask_phone_display(phone): - """手机号脱敏用于展示: 130****1234""" - phone = str(phone).strip() - if re.match(r"^1\d{10}$", phone): - return f"{phone[:3]}****{phone[-4:]}" - return phone - -def batch_in(cur, sql_tpl, params, chunk=500): - results = [] - for i in range(0, len(params), chunk): - batch = params[i:i+chunk] - ph = ",".join(["%s"] * len(batch)) - cur.execute(sql_tpl % ph, batch) - results.extend(cur.fetchall()) - return results - - -# ── Phase 1: 手机号→ID 匹配 ── -def phase1_phone_to_id(token, conn): - """读取全量表,对有手机无UID的行做匹配,回填 X/Y/Z 列""" - log("Phase 1: 手机号→ID 匹配") - rows = read_sheet(token, SHEET_ALL) - pending = [] - for idx, row in enumerate(rows[3:], start=4): - if len(row) < 5: - continue - # D列=手机号(索引3), I列=用户ID(索引8), X列=匹配uid(索引23) - phone = str(row[3]).strip() if len(row) > 3 and row[3] else "" - uid_i = str(row[8]).strip() if len(row) > 8 and row[8] else "" - uid_x = str(row[23]).strip() if len(row) > 23 and row[23] else "" - if not phone or uid_i or uid_x: - continue - if phone.startswith("="): - continue - pending.append({"row_idx": idx, "phone": phone}) - - log(f" 待匹配: {len(pending)}") - if not pending: - log(" 无需匹配, 跳过") - return 0, 0 - - phones_raw = list(set(r["phone"] for r in pending)) - valid_phones = [(p, encrypt_phone_local(p)) for p in phones_raw if encrypt_phone_local(p)] - enc_list = list(set(m[1] for m in valid_phones)) - - cur = conn.cursor() - enc_to_aid = {} - for i in range(0, len(enc_list), 500): - batch = enc_list[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({ph}) AND status=1 AND deleted_at IS NULL", - batch - ) - for aid, tel_enc in cur.fetchall(): - if tel_enc not in enc_to_aid: - enc_to_aid[tel_enc] = aid - - phone_to_aid = {} - for phone, enc in valid_phones: - if enc in enc_to_aid: - phone_to_aid[phone] = enc_to_aid[enc] - - matched = len(phone_to_aid) - log(f" 匹配成功: {matched}, 未匹配: {len(valid_phones)-matched}") - - for r in pending: - phone = r["phone"] - row = r["row_idx"] - if phone in phone_to_aid: - put_values(token, SHEET_ALL, f"X{row}:X{row}", [[str(phone_to_aid[phone])]]) - - log(f" 回填完成: {len(pending)} 行") - cur.close() - return len(pending), matched - - -# ── Phase 2: 行课记录查询 & 写入小溪明细 ── -def phase2_course_records(token, conn): - """读取全量表所有UID → 查行课数据 → 写入小溪明细""" - log("Phase 2: 行课记录查询") - rows = read_sheet(token, SHEET_ALL) - - uid_rows = {} - for idx, row in enumerate(rows[3:], start=4): - uid = "" - if len(row) > 8 and row[8]: - try: - uid = str(int(float(str(row[8]).strip()))) - except (ValueError, TypeError): - pass - if not uid and len(row) > 23 and row[23]: - try: - uid = str(int(float(str(row[23]).strip()))) - except (ValueError, TypeError): - pass - if not uid: - continue - sales = str(row[0]).strip() if len(row) > 0 and row[0] else "" - jinxian = str(row[2]).strip() if len(row) > 2 and row[2] else "" - phone = str(row[3]).strip() if len(row) > 3 and row[3] else "" - aid = int(uid) - if aid <= 0: - continue - if aid not in uid_rows: - uid_rows[aid] = (idx, sales, jinxian, phone) - - uid_set = list(uid_rows.keys()) - log(f" 有效UID: {len(uid_set)}") - if not uid_set: - log(" 无UID, 跳过") - return 0 - - cur = conn.cursor() - - # 账户信息 - log(" 查询账户信息...") - aid_info = {} - for i in range(0, len(uid_set), 500): - batch = uid_set[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"SELECT id, tel, created_at FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", - batch - ) - for aid, tel, created_at in cur.fetchall(): - aid_info[aid] = {"tel": tel or "", "created_at": str(created_at) if created_at else ""} - - # 角色信息 - log(" 查询角色信息...") - account_chars = defaultdict(list) - char_to_account = {} - rc = batch_in(cur, - "SELECT account_id, id, nickname FROM bi_vala_app_character WHERE account_id IN (%s) AND nickname IS NOT NULL AND nickname != '' AND deleted_at IS NULL", - uid_set - ) - for aid, cid, nick in rc: - account_chars[aid].append(cid) - char_to_account[cid] = aid - char_ids = list(char_to_account.keys()) - log(f" 角色数: {len(char_ids)}") - - # 课程映射 - cur.execute("SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson") - chapter_map = {} - for ch_id, cl, cs, cu, cl2 in cur.fetchall(): - chapter_map[ch_id] = (cl or "", cs or "", cu or "", cl2 or "") - - # 课时完成记录 - log(" 查询课时完成记录...") - char_chapter_times = defaultdict(dict) - char_latest = {} - for tbl_idx in range(8): - table = f"bi_user_chapter_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, chapter_id, created_at FROM {table} WHERE play_status=1 AND deleted_at IS NULL AND user_id = ANY(%s)", - (char_ids,) - ) - for uid, ch_id, created_at in cur.fetchall(): - ch_data = chapter_map.get(ch_id) - if not ch_data: - continue - if ch_id not in char_chapter_times[uid] or created_at < char_chapter_times[uid][ch_id]: - char_chapter_times[uid][ch_id] = created_at - prev = char_latest.get(uid) - if prev is None or created_at > prev[0]: - char_latest[uid] = (created_at, ch_id, ch_data) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 学习耗时 - log(" 查询学习耗时...") - char_total_ms = defaultdict(int) - for tbl_idx in range(8): - table = f"bi_user_component_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, SUM(COALESCE(interval_time,0)) FROM {table} WHERE user_id = ANY(%s) AND deleted_at IS NULL GROUP BY user_id", - (char_ids,) - ) - for uid, total_ms in cur.fetchall(): - char_total_ms[uid] += (total_ms or 0) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 激活状态 - log(" 查询激活状态...") - activation = {} - for i in range(0, len(uid_set), 500): - batch = uid_set[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"SELECT account_id, season_package_level FROM bi_vala_seasonal_ticket WHERE account_id IN ({ph}) AND status=1 AND deleted_at IS NULL AND season_package_level IN ('A1','A2')", - batch - ) - for aid, lvl in cur.fetchall(): - if aid not in activation: - activation[aid] = lvl - - # 付费信息 - log(" 查询付费信息...") - paid_info = {} - for i in range(0, len(uid_set), 500): - batch = uid_set[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"""SELECT account_id, - MIN(pay_success_date) as first_pay, - SUM(pay_amount_int)/100.0 as total_gmv, - SUM(CASE WHEN order_status=3 THEN pay_amount_int ELSE 0 END)/100.0 as total_gsv, - STRING_AGG(DISTINCT key_from, ', ') as channels - FROM bi_vala_order - WHERE account_id IN ({ph}) - AND pay_success_date IS NOT NULL - AND order_status IN (3,4) - AND deleted_at IS NULL - GROUP BY account_id""", - batch - ) - for aid, first_pay, gmv, gsv, channels in cur.fetchall(): - paid_info[aid] = (str(first_pay) if first_pay else "", gmv or 0, gsv or 0, channels or "") - - # 最近登录 - log(" 查询最近登录...") - last_login = {} - for i in range(0, len(uid_set), 500): - batch = uid_set[i:i+500] - ph = ",".join(["%s"] * len(batch)) - cur.execute( - f"SELECT account_id, MAX(login_date) FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL GROUP BY account_id", - batch - ) - for aid, dt in cur.fetchall(): - if dt: - last_login[aid] = str(dt) - - cur.close() - - # 组装小溪明细 - log(" 组装小溪明细...") - detail_rows = [] - for aid in uid_set: - row_idx, sales, jinxian, phone_raw = uid_rows[aid] - info = aid_info.get(aid, {}) - tel = info.get("tel", "") - created_at = info.get("created_at", "") - masked_tel = mask_phone_display(tel) or mask_phone_display(phone_raw) or "" - masked_phone = mask_phone_display(phone_raw) or "" - - chars = account_chars.get(aid, []) - - exp_lessons = {} - first_lesson_time = None - first_lesson_ch = None - best_latest_time = None - best_latest_ch = None - total_min = 0.0 - - for cid in chars: - ctimes = char_chapter_times.get(cid, {}) - for ch_id, ct in ctimes.items(): - ch_info = chapter_map.get(ch_id) - if not ch_info: - continue - cl, cs, cu, cl2 = ch_info - if cs == "S0" and cu == "U00": - if cl2 not in exp_lessons or ct < exp_lessons[cl2]: - exp_lessons[cl2] = ct - if first_lesson_time is None or ct < first_lesson_time: - first_lesson_time = ct - first_lesson_ch = ch_info - - lt = char_latest.get(cid) - if lt and (best_latest_time is None or lt[0] > best_latest_time): - best_latest_time = lt[0] - best_latest_ch = lt[1] - - total_min += char_total_ms.get(cid, 0) / 60000.0 - - total_min = round(total_min, 1) - if total_min == int(total_min): - total_min = int(total_min) - - exp_count = len(exp_lessons) - first_time_str = first_lesson_time.strftime("%Y-%m-%d %H:%M:%S") if first_lesson_time else "" - first_ch_str = f"{first_lesson_ch[0]}-{first_lesson_ch[1]}-{first_lesson_ch[2]}-{first_lesson_ch[3]}" if first_lesson_ch else "" - latest_time_str = best_latest_time.strftime("%Y-%m-%d %H:%M:%S") if best_latest_time else "" - latest_ch_str = f"{chapter_map[best_latest_ch][0]}-{chapter_map[best_latest_ch][1]}-{chapter_map[best_latest_ch][2]}-{chapter_map[best_latest_ch][3]}" if best_latest_ch and best_latest_ch in chapter_map else "" - - act_level = activation.get(aid, "") - if act_level: - course_level = act_level - course_type = "正式课" if aid in paid_info else "体验课" - elif first_lesson_ch: - course_level = first_lesson_ch[0] - course_type = "体验课" - else: - course_level = "" - course_type = "" - - nicknames = [] - for cid in chars: - for orig_aid, orig_cid, orig_nick in rc: - if orig_cid == cid: - nicknames.append(orig_nick) - nickname_str = " / ".join(nicknames[:3]) - - pi = paid_info.get(aid) - paid_status = "已付费" if pi else "未付费" - first_pay_time = pi[0] if pi else "" - total_gmv = pi[1] if pi else 0 - total_gsv = pi[2] if pi else 0 - channels = pi[3] if pi else "" - - row_data = [ - str(aid), masked_tel, masked_phone, sales, jinxian, created_at, "", - nickname_str, course_level, course_type, - first_time_str, first_ch_str, - exp_lessons.get("L01").strftime("%Y-%m-%d %H:%M:%S") if exp_lessons.get("L01") else "", - exp_lessons.get("L02").strftime("%Y-%m-%d %H:%M:%S") if exp_lessons.get("L02") else "", - exp_lessons.get("L03").strftime("%Y-%m-%d %H:%M:%S") if exp_lessons.get("L03") else "", - exp_lessons.get("L04").strftime("%Y-%m-%d %H:%M:%S") if exp_lessons.get("L04") else "", - exp_lessons.get("L05").strftime("%Y-%m-%d %H:%M:%S") if exp_lessons.get("L05") else "", - str(exp_count), - latest_time_str, latest_ch_str, str(total_min), - paid_status, first_pay_time, - str(total_gmv), str(total_gsv), channels, - last_login.get(aid, ""), - ] - detail_rows.append((aid, row_data)) - - # 写入小溪明细 - log(" 写入小溪明细...") - existing = read_sheet(token, SHEET_DETAIL) - existing_map = {} - for i, row in enumerate(existing[1:], start=2): - if row and row[0]: - try: - existing_map[int(float(str(row[0]).strip()))] = i - except (ValueError, TypeError): - pass - - update_count = 0 - new_rows_list = [] - for aid, row_data in detail_rows: - if aid in existing_map: - ri = existing_map[aid] - put_values(token, SHEET_DETAIL, f"A{ri}:AA{ri}", [row_data]) - update_count += 1 - else: - new_rows_list.append(row_data) - - log(f" 更新: {update_count}, 新增: {len(new_rows_list)}") - - # 追加新行 - for i in range(0, len(new_rows_list), 500): - batch = new_rows_list[i:i+500] - append_rows(token, SHEET_DETAIL, batch) - - log(f" 行课明细完成: 共 {len(detail_rows)} 条") - return len(detail_rows) - - -# ── Main ── -def main(): - log("=" * 50) - log("行课查询 V2 启动 (销转客户主表)") - - try: - token = get_fs_token() - conn = psycopg2.connect( - host=PG_HOST, port=PG_PORT, user=PG_USER, - password=get_pg_password(), dbname=PG_DB, connect_timeout=30 - ) - - p1_total, p1_matched = phase1_phone_to_id(token, conn) - p2_total = phase2_course_records(token, conn) - - conn.close() - - summary = f"Phase1(ID匹配): {p1_total} 行(匹配{p1_matched}) | Phase2(行课): {p2_total} 条" - log(f"完成: {summary}") - return 0 - except Exception as e: - log(f"ERROR: {e}") - import traceback - traceback.print_exc() - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/batch_course_records.py b/scripts/batch_course_records.py deleted file mode 100644 index 42d7fe8..0000000 --- a/scripts/batch_course_records.py +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python3 -""" -批量查询行课记录 -从 Sheet1 (55b0eb) 读取"待查询"的用户,查询行课数据并写回 -""" -import json -import re -import os -import psycopg2 -import requests -from datetime import datetime -from collections import defaultdict - -DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" -DB_PORT = 28591 -DB_USER = "ai_member" -DB_NAME = "vala_bi" -SPREADSHEET_TOKEN = "RFIJsXT8FhGHhctY4RwczcOfnac" -SHEET_ID = "55b0eb" -CRED_DIR = "/root/.openclaw/credentials/xiaoxi" - -def get_password(): - pw = os.environ.get("PG_ONLINE_PASSWORD", "") - if pw: - return pw - secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") - if os.path.exists(secrets_path): - with open(secrets_path) as f: - for line in f: - if line.startswith("PG_ONLINE_PASSWORD="): - return line.strip().split("=", 1)[1].strip("'\"") - raise RuntimeError("PG_ONLINE_PASSWORD not found") - -def get_conn(): - return psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=get_password(), dbname=DB_NAME, connect_timeout=30) - -def get_token(): - with open(os.path.join(CRED_DIR, "config.json")) as f: - cfg = json.load(f) - app_id = cfg['apps'][0]['appId'] - app_secret = cfg['apps'][0]['appSecret'] - resp = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", - json={"app_id": app_id, "app_secret": app_secret}) - return resp.json()['tenant_access_token'] - -# Experience course chapters (U00) -EXPERIENCE_CHAPTERS = { - # L1 U00 - 343: "L1-U00-L01", 344: "L1-U00-L02", 345: "L1-U00-L03", - 346: "L1-U00-L04", 348: "L1-U00-L05", - # L2 U00 - 55: "L2-U00-L01", 56: "L2-U00-L02", 57: "L2-U00-L03", - 58: "L2-U00-L04", 59: "L2-U00-L05", -} - -def main(): - print("=== Step 1: 读取待查询数据 ===") - with open('/tmp/sheet_course_data.json') as f: - data = json.load(f) - rows = data['data']['valueRange']['values'] - - pending = [] - for idx, row in enumerate(rows[1:], start=1): - if len(row) > 2 and row[2] == '待查询': - user_id = row[1].strip() if len(row) > 1 and row[1] else '' - sales = row[0].strip() if len(row) > 0 and row[0] else '' - pending.append({ - 'row_idx': idx + 1, - 'user_id': user_id, - 'sales': sales, - }) - - print(f"待查询记录: {len(pending)}") - - # Filter valid user IDs - valid_ids = [] - for r in pending: - try: - uid = int(r['user_id']) - if uid > 0: - valid_ids.append(uid) - except (ValueError, TypeError): - pass - - uid_set = list(set(valid_ids)) - print(f"有效用户ID: {len(uid_set)}") - - if not uid_set: - print("无有效用户ID,退出") - return - - conn = get_conn() - - print("\n=== Step 2: 获取角色信息 ===") - # Get characters for these account IDs - placeholders = ",".join(["%s"] * len(uid_set)) - cur = conn.cursor() - cur.execute(f""" - SELECT account_id, id AS character_id, nickname - FROM bi_vala_app_character - WHERE account_id IN ({placeholders}) - AND (nickname IS NOT NULL AND nickname != '') - AND deleted_at IS NULL - """, uid_set) - char_rows = cur.fetchall() - cur.close() - - account_chars = defaultdict(list) - char_to_account = {} - for aid, cid, nick in char_rows: - account_chars[aid].append(cid) - char_to_account[cid] = aid - - char_ids = list(char_to_account.keys()) - print(f"角色数: {len(char_ids)}, 覆盖账号: {len(account_chars)}") - - print("\n=== Step 3: 查询课时完成记录 ===") - # All chapters from bi_level_unit_lesson - cur = conn.cursor() - cur.execute(""" - SELECT id AS chapter_id, course_level, course_season, course_unit, course_lesson - FROM bi_level_unit_lesson - - """) - chapter_map = {} # chapter_id -> (course_level, course_season, course_unit, course_lesson) - for ch_id, cl, cs, cu, cl2 in cur.fetchall(): - chapter_map[ch_id] = (cl or '', cs or '', cu or '', cl2 or '') - cur.close() - print(f"课程映射: {len(chapter_map)} chapters") - - # Query completed chapters from 8 shards - char_plays = defaultdict(lambda: {'chapters': [], 'total_time_ms': 0, 'latest_time': None, 'latest_chapter': None}) - - for tbl_idx in range(8): - table = f"bi_user_chapter_play_record_{tbl_idx}" - try: - cur = conn.cursor() - cur.execute(f""" - SELECT user_id, chapter_id, created_at - FROM {table} - WHERE play_status = 1 - AND deleted_at IS NULL - AND user_id = ANY(%s) - """, (char_ids,)) - for user_id, ch_id, created_at in cur.fetchall(): - ch_data = chapter_map.get(ch_id) - if ch_data: - rec = char_plays[user_id] - rec['chapters'].append((ch_id, ch_data, created_at)) - if rec['latest_time'] is None or created_at > rec['latest_time']: - rec['latest_time'] = created_at - rec['latest_chapter'] = (ch_id, ch_data) - cur.close() - except Exception as e: - print(f" 警告: {table} 查询失败: {e}") - - users_with_records = len([k for k, v in char_plays.items() if v['chapters']]) - print(f"有课时记录的角色: {users_with_records}") - - print("\n=== Step 4: 查询学习总时长 ===") - # Sum interval_time from component play records - for tbl_idx in range(8): - table = f"bi_user_component_play_record_{tbl_idx}" - try: - cur = conn.cursor() - cur.execute(f""" - SELECT user_id, SUM(COALESCE(interval_time, 0)) AS total_ms - FROM {table} - WHERE user_id = ANY(%s) - AND deleted_at IS NULL - GROUP BY user_id - """, (char_ids,)) - for user_id, total_ms in cur.fetchall(): - if user_id in char_plays: - char_plays[user_id]['total_time_ms'] += (total_ms or 0) - cur.close() - except Exception as e: - print(f" 警告: {table} 查询失败: {e}") - - print("\n=== Step 5: 查询付费状态 ===") - cur = conn.cursor() - cur.execute(f""" - SELECT account_id, COUNT(*) AS order_count - FROM bi_vala_order - WHERE account_id IN ({placeholders}) - AND pay_success_date IS NOT NULL - AND order_status = 3 - AND deleted_at IS NULL - GROUP BY account_id - """, uid_set) - paid_accounts = {row[0]: row[1] for row in cur.fetchall()} - cur.close() - print(f"已付费账号: {len(paid_accounts)}") - - print("\n=== Step 6: 查询激活状态 (A1/A2) ===") - cur = conn.cursor() - cur.execute(f""" - SELECT t.account_id, t.season_package_level - FROM bi_vala_seasonal_ticket t - INNER JOIN bi_vala_app_account a ON t.account_id = a.id AND a.status = 1 - WHERE t.account_id IN ({placeholders}) - AND t.status = 1 - AND t.deleted_at IS NULL - AND t.season_package_level IN ('A1', 'A2') - """, uid_set) - account_activation = {} - for aid, lvl in cur.fetchall(): - if aid not in account_activation: - account_activation[aid] = lvl - cur.close() - print(f"有激活记录: {len(account_activation)}") - - conn.close() - - print("\n=== Step 7: 组装结果 ===") - now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - results = [] - - for r in pending: - try: - uid = int(r['user_id']) - except (ValueError, TypeError): - results.append({ - 'row_idx': r['row_idx'], - 'record': '无有效用户ID', - 'update_time': now_str, - }) - continue - - # Find characters for this account - char_list = account_chars.get(uid, []) - - # Find latest chapter across all characters - best_char = None - best_latest_time = None - best_latest_chapter = None - best_chapters = [] - total_time_ms = 0 - - for cid in char_list: - play = char_plays.get(cid) - if play and play['latest_time']: - if best_latest_time is None or play['latest_time'] > best_latest_time: - best_latest_time = play['latest_time'] - best_latest_chapter = play['latest_chapter'] - best_char = cid - best_chapters.extend(play['chapters']) - total_time_ms += play['total_time_ms'] - - if not best_char or not best_latest_chapter: - # No study records - paid = '已付费' if uid in paid_accounts else '未付费' - results.append({ - 'row_idx': r['row_idx'], - 'record': f"销售:{r['sales']} | 用户:{uid} | 当前:无行课记录 | 最近行课:无 | 学习0min | {paid}", - 'update_time': now_str, - }) - continue - - # Format latest chapter - ch_id, (course_level, course_season, course_unit, course_lesson) = best_latest_chapter - activation = account_activation.get(uid, '') - - # Build course label - # Format: L1-S0-U00-L01 - full_label = f"{course_level}-{course_season}-{course_unit}-{course_lesson}" - - if activation: - prefix = f"{activation}体验课" - else: - prefix = f"{course_level}体验课" - - current = f"{prefix}-{full_label}" - - # Recent date - recent_date = best_latest_time.strftime('%Y-%m-%d') if best_latest_time else '无' - - # Total time in minutes - total_min = round(total_time_ms / 60000, 1) - if total_min == int(total_min): - total_min = int(total_min) - - paid = '已付费' if uid in paid_accounts else '未付费' - - record = f"销售:{r['sales']} | 用户:{uid} | 当前:{current} | 最近行课:{recent_date} | 学习{total_min}min | {paid}" - - results.append({ - 'row_idx': r['row_idx'], - 'record': record, - 'update_time': now_str, - }) - - print(f"生成结果: {len(results)} 条") - - # Sample output - for r in results[:5]: - print(f" Row {r['row_idx']}: {r['record'][:120]}...") - - # Save results - with open('/tmp/sheet_course_results.json', 'w') as f: - json.dump({'results': results, 'total': len(results)}, f, ensure_ascii=False, indent=2) - print(f"结果已保存到 /tmp/sheet_course_results.json") - -if __name__ == "__main__": - main() diff --git a/scripts/batch_update_sheet.py b/scripts/batch_update_sheet.py deleted file mode 100644 index 619600c..0000000 --- a/scripts/batch_update_sheet.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -""" -批量更新飞书电子表格:回填用户ID和匹配状态 -用于 2DOxEI 表 R915-R992 行 -""" -import json -import subprocess -import sys - -# 78个手机号(按表格行顺序 915-992) -PHONES = [ - '18898596908', '13104122113', '18616818587', '18600641856', '18527822530', - '15301808320', '15921183656', '18969141986', '18853077186', '19879837192', - '15013730773', '18240307314', '15133168361', '18607715299', '18640248566', - '15229999262', '18615767595', '15880070471', '15210946014', '13416197660', - '18030731125', '13372561305', '13438029626', '13426271919', '13380241801', - '13331090268', '13941957202', '13668236095', '18666339866', '18193473383', - '18719069856', '15092617699', '15602091300', '18906300189', '18823116345', - '16675181845', '15805920790', '13631576638', '13825629898', '18689550023', - '13858852527', '17701557793', '18800105821', '18243588666', '15070812805', - '15906585627', '13818184885', '18609909747', '18501055123', '18781333078', - '13510511993', '13763607518', '19131773001', '13429170125', '13548532992', - '18273336778', '15004066188', '15386183750', '15002087823', '15622866383', - '15236831122', '13980065537', '17351768736', '13752368975', '18988791586', - '13465563287', '18268989827', '18358369704', '13370181982', '15062199752', - '13009161168', '17701717015', '13969392995', '13560010506', '18042928605', - '13603503266', '15776824932', '18131135363', -] - -# 数据库查询结果: 脱敏手机号 -> [account_id列表] -# rn=1 作为首选(最大account_id) -MATCHES = {} -raw_matches = [ - ('188****6908', 26655), ('131****2113', 27442), ('186****8587', 27410), - ('186****1856', 27213), ('185****2530', 27227), ('153****8320', 27417), - ('159****3656', 27316), ('189****1986', 27591), ('188****7186', 27248), - ('198****7192', 27686), ('150****0773', 27464), ('182****7314', 27429), - ('151****8361', 27432), ('186****5299', 27017), ('186****8566', 27630), - ('152****9262', 27483), ('186****7595', 27467), ('158****0471', 27693), - ('152****6014', 27490), ('134****7660', 27619), ('180****1125', 27618), - ('133****1305', 26920), ('134****9626', 27583), ('134****1919', 27755), - ('133****1801', 27633), ('133****0268', 27983), ('139****7202', 27499), - ('136****6095', 27598), ('186****9866', 27745), ('181****3383', 27576), - ('187****9856', 27585), ('150****7699', 28052), ('156****1300', 27672), - ('189****0189', 27292), ('188****6345', 27836), ('166****1845', 27955), - ('158****0790', 27951), ('136****6638', 5149), ('138****9898', 3612), - ('186****0023', 27674), ('138****2527', 4882), ('177****7793', 27890), - ('188****5821', 27761), ('182****8666', 27812), ('150****2805', 27813), - ('159****5627', 27882), ('138****4885', 27911), ('186****9747', 27809), - ('185****5123', 27776), ('187****3078', 27738), ('135****1993', 27806), - ('137****7518', 27770), ('191****3001', 27638), ('134****0125', 27794), - ('135****2992', 27750), ('182****6778', 27728), ('150****6188', 27803), - ('153****3750', 27790), ('150****7823', 11807), ('156****6383', 28018), - ('152****1122', 27869), ('139****5537', 27947), ('173****8736', 28070), - ('137****8975', 27916), ('189****1586', 27941), ('134****3287', 28079), - ('182****9827', 27860), ('183****9704', 27849), ('133****1982', 4923), - ('150****9752', 28071), ('130****1168', 27936), ('177****7015', 27946), - ('139****2995', 27889), ('135****0506', 28394), ('180****8605', 28011), - ('136****3266', 27965), ('157****4932', 28088), ('181****5363', 27979), -] - -for tel_masked, acc_id in raw_matches: - MATCHES[tel_masked] = acc_id # rn=1 优先,后出现的会覆盖 - -def mask_phone(phone): - """脱敏手机号: 前3位 + **** + 后4位""" - return f"{phone[:3]}****{phone[-4:]}" - -def get_tat(): - """获取 Tenant Access Token""" - import json - config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json')) - app = config['apps'][0] - import urllib.request - data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode() - req = urllib.request.Request( - 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', - data=data, - headers={'Content-Type': 'application/json; charset=utf-8'} - ) - resp = json.loads(urllib.request.urlopen(req).read()) - return resp['tenant_access_token'] - -def update_sheet_range(tat, token, sheet_id, range_str, values): - """批量更新电子表格范围""" - import urllib.request - url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values' - body = json.dumps({ - "valueRange": { - "range": f"{sheet_id}!{range_str}", - "values": values - } - }).encode() - req = urllib.request.Request(url, data=body, method='PUT') - req.add_header('Authorization', f'Bearer {tat}') - req.add_header('Content-Type', 'application/json; charset=utf-8') - resp = json.loads(urllib.request.urlopen(req).read()) - return resp - -if __name__ == '__main__': - # 构建回填数据 - f_values = [] # F列: account_id - g_values = [] # G列: 回填状态 - - matched_count = 0 - multi_count = 0 - - for phone in PHONES: - masked = mask_phone(phone) - acc_id = MATCHES.get(masked) - if acc_id: - f_values.append([str(acc_id)]) - g_values.append(["已匹配"]) - matched_count += 1 - else: - f_values.append(["未匹配"]) - g_values.append(["未匹配"]) - - print(f"Total: {len(PHONES)}, Matched: {matched_count}, Unmatched: {len(PHONES) - matched_count}") - - # 获取 token - tat = get_tat() - token = 'RFIJsXT8FhGHhctY4RwczcOfnac' - sheet_id = '2DOxEI' - - # 更新 F 列 (F915:F992) - 回填用户ID - resp_f = update_sheet_range(tat, token, sheet_id, 'F915:F992', f_values) - print(f"Update F column: code={resp_f.get('code')}, msg={resp_f.get('msg')}") - - # 更新 G 列 (G915:G992) - 回填状态 - resp_g = update_sheet_range(tat, token, sheet_id, 'G915:G992', g_values) - print(f"Update G column: code={resp_g.get('code')}, msg={resp_g.get('msg')}") - - # 更新 E 列 (E915:E992) - 请求状态改为"已返回" - e_values = [["已返回"] for _ in range(len(PHONES))] - resp_e = update_sheet_range(tat, token, sheet_id, 'E915:E992', e_values) - print(f"Update E column: code={resp_e.get('code')}, msg={resp_e.get('msg')}") - - if resp_f.get('code') == 0 and resp_g.get('code') == 0: - print("\n✅ 所有78条记录已成功回填!") - else: - print("\n⚠️ 部分更新可能失败,请检查。") diff --git a/scripts/export_103_users.py b/scripts/export_103_users.py new file mode 100644 index 0000000..f156aa8 --- /dev/null +++ b/scripts/export_103_users.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +"""导出2026年付费用户中5月完成>30课时的103人明细""" + +import os +import psycopg2 +import pandas as pd +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" + +SQL = """ +WITH user_orders AS ( + SELECT DISTINCT o.account_id, o.goods_id, o.trade_no, o.key_from, o.pay_success_date + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 + WHERE o.pay_success_date >= '2026-01-01' + AND o.pay_success_date IS NOT NULL + AND o.order_status IN (3, 4) + AND o.goods_id NOT IN (32) +), +refunded_trades AS ( + SELECT DISTINCT r.trade_no + FROM bi_refund_order r + JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 + WHERE r.status = 3 +), +user_all_refunded AS ( + SELECT uo.account_id + FROM user_orders uo + LEFT JOIN refunded_trades rt ON uo.trade_no = rt.trade_no + GROUP BY uo.account_id + HAVING COUNT(*) = COUNT(rt.trade_no) +), +non_refund_users AS ( + SELECT DISTINCT uo.account_id + FROM user_orders uo + WHERE uo.account_id NOT IN (SELECT account_id FROM user_all_refunded) +), +user_level AS ( + SELECT + uo.account_id, + CASE + WHEN bool_or(uo.goods_id = 61) THEN 'L1+L2' + WHEN bool_or(uo.goods_id IN (57, 60, 63)) AND bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L1+L2' + WHEN bool_or(uo.goods_id IN (57, 60, 63)) THEN 'L1' + WHEN bool_or(uo.goods_id IN (31, 33, 54)) THEN 'L2' + ELSE '其他' + END AS level + FROM user_orders uo + GROUP BY uo.account_id +), +purchase_info AS ( + SELECT account_id, + STRING_AGG(DISTINCT key_from, ', ' ORDER BY key_from) AS channels, + MIN(pay_success_date) AS first_purchase_date + FROM user_orders + GROUP BY account_id +), +target_chars AS ( + SELECT c.id AS char_id, c.account_id, c.gender, c.birthday + FROM bi_vala_app_character c + JOIN non_refund_users n ON c.account_id = n.account_id +), +chapter_done AS ( + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_0 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 0 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_1 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 1 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_2 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 2 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_3 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 3 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_4 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 4 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_5 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 5 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_6 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 6 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id + UNION ALL + SELECT cp.user_id AS char_id, COUNT(DISTINCT cp.chapter_id) AS done_count + FROM bi_user_chapter_play_record_7 cp + JOIN target_chars tc ON cp.user_id = tc.char_id AND tc.char_id % 8 = 7 + WHERE cp.play_status = 1 + AND cp.created_at >= '2026-05-01' AND cp.created_at < '2026-06-01' + GROUP BY cp.user_id +), +user_done AS ( + SELECT tc.account_id, SUM(cd.done_count) AS total_done + FROM target_chars tc + JOIN chapter_done cd ON tc.char_id = cd.char_id + GROUP BY tc.account_id + HAVING SUM(cd.done_count) > 30 +), +char_info AS ( + SELECT DISTINCT ON (c.account_id) + c.account_id, c.gender, c.birthday + FROM bi_vala_app_character c + JOIN user_done ud ON c.account_id = ud.account_id + WHERE c.deleted_at IS NULL + ORDER BY c.account_id, c.birthday DESC NULLS LAST +), +addr_info AS ( + SELECT DISTINCT ON (account_id) account_id, login_address + FROM account_detail_info + WHERE login_address IS NOT NULL AND login_address != '' + ORDER BY account_id, created_time DESC +) +SELECT + ud.account_id, + ul.level, + CASE + WHEN ci.gender = 1 THEN '男' + WHEN ci.gender = 2 THEN '女' + ELSE '未知' + END AS gender, + CASE + WHEN ci.birthday IS NOT NULL AND ci.birthday != '' + THEN EXTRACT(YEAR FROM AGE(TO_DATE(ci.birthday, 'YYYY-MM-DD'))) + ELSE NULL + END AS age, + SPLIT_PART(ai.login_address, '-', 1) AS province, + SPLIT_PART(ai.login_address, '-', 2) AS city, + pi.channels AS purchase_channel, + TO_CHAR(pi.first_purchase_date, 'YYYY-MM-DD') AS first_purchase_date, + ud.total_done AS may_done_count +FROM user_done ud +JOIN user_level ul ON ud.account_id = ul.account_id +LEFT JOIN char_info ci ON ud.account_id = ci.account_id +LEFT JOIN addr_info ai ON ud.account_id = ai.account_id +LEFT JOIN purchase_info pi ON ud.account_id = pi.account_id +ORDER BY ul.level, ud.account_id; +""" + +def main(): + conn = psycopg2.connect( + host="bj-postgres-16pob4sg.sql.tencentcdb.com", + port=28591, + user="ai_member", + password=PG_PASSWORD, + database="vala_bi" + ) + + df = pd.read_sql(SQL, conn) + conn.close() + + # Rename columns + df.columns = ['账号ID', '课程级别', '性别', '年龄', '省份', '城市', '购课渠道', '首次购课日期', '5月完成课时数'] + + # Output path + output_path = "/root/.openclaw/workspace/output/2026付费用户_5月完成30课时以上_103人.xlsx" + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Write with openpyxl for styling + wb = Workbook() + ws = wb.active + ws.title = "5月完成>30课时用户" + + # Header style + header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF') + header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid') + header_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True) + thin_border = Border( + left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin') + ) + + # Write headers + for col_idx, col_name in enumerate(df.columns, 1): + cell = ws.cell(row=1, column=col_idx, value=col_name) + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_alignment + cell.border = thin_border + + # Write data + data_font = Font(name='微软雅黑', size=10) + data_alignment = Alignment(vertical='center') + + for row_idx, row in df.iterrows(): + for col_idx, value in enumerate(row, 1): + cell = ws.cell(row=row_idx + 2, column=col_idx, value=value if pd.notna(value) else '') + cell.font = data_font + cell.alignment = data_alignment + cell.border = thin_border + + # Column widths + col_widths = [12, 10, 8, 8, 16, 12, 60, 16, 16] + for i, w in enumerate(col_widths, 1): + ws.column_dimensions[get_column_letter(i)].width = w + + # Freeze header + ws.freeze_panes = 'A2' + + # Auto filter + ws.auto_filter.ref = f"A1:{get_column_letter(len(df.columns))}{len(df)+1}" + + # Summary sheet + ws2 = wb.create_sheet("汇总") + summary_data = [ + ["指标", "数值"], + ["总人数", len(df)], + ["仅L1", len(df[df['课程级别'] == 'L1'])], + ["仅L2", len(df[df['课程级别'] == 'L2'])], + ["L1+L2", len(df[df['课程级别'] == 'L1+L2'])], + ["平均年龄", round(df['年龄'].mean(), 1)], + ["男性", len(df[df['性别'] == '男'])], + ["女性", len(df[df['性别'] == '女'])], + ["性别未知", len(df[df['性别'] == '未知'])], + ["平均5月完成课时", round(df['5月完成课时数'].mean(), 1)], + ["最高5月完成课时", int(df['5月完成课时数'].max())], + ] + + for row_idx, row_data in enumerate(summary_data, 1): + for col_idx, value in enumerate(row_data, 1): + cell = ws2.cell(row=row_idx, column=col_idx, value=value) + if row_idx == 1: + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_alignment + else: + cell.font = data_font + cell.alignment = data_alignment + cell.border = thin_border + + ws2.column_dimensions['A'].width = 20 + ws2.column_dimensions['B'].width = 15 + + wb.save(output_path) + print(f"✅ 已导出: {output_path}") + print(f" 共 {len(df)} 条记录") + +if __name__ == "__main__": + main() diff --git a/scripts/export_b_channel_yifang.py b/scripts/export_b_channel_yifang.py new file mode 100644 index 0000000..495d685 --- /dev/null +++ b/scripts/export_b_channel_yifang.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +""" +B档一方回传 · 非小红书渠道 + 小红书达人密集成单 +时间窗: 2025-09-01 ~ 2026-06-03 +输出: plaintext CSV → phone_encrypt → MD5上传CSV +""" +import os, sys, csv, psycopg2 +from datetime import date, datetime +from collections import defaultdict + +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPTS_DIR) +from phone_encrypt import phone_md5, decrypt_phone + +OUTPUT_DIR = os.path.join(os.path.dirname(SCRIPTS_DIR), "output", "yifang_export") +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ── 渠道定义 ── +CHANNELS = { + "douyin_daren": { + "name": "抖音达人", + "prefix": "B1_douyin_daren", + "key_from_like": "newmedia-daren-douyin%", + "account_key_from_like": "newmedia-daren-douyin%", + }, + "wxxd_daren": { + "name": "视频号达人", + "prefix": "B2_wxxd_daren", + "key_from_like": "newmedia-daren-wxxd%", + "account_key_from_like": "newmedia-daren-wxxd%", + }, + "wwxx_shop": { + "name": "万物店", + "prefix": "B3_wwxx_shop", + "key_from_like": "newmedia-dianpu-wwxx-0-0", + "account_key_from_like": "newmedia-dianpu-wwxx-0-0", + }, + "app_internal": { + "name": "端内", + "prefix": "B4_app_internal", + "key_from_like": None, # IN 列表 + "key_from_in": ["app-active-h5-0-0", "app-sales-bj-qhm-0"], + "account_key_from_like": None, + "account_key_from_in": ["app-active-h5-0-0", "app-sales-bj-qhm-0"], + }, + "sales_adp": { + "name": "销售渠道", + "prefix": "B5_sales_adp", + "key_from_like": "sales-adp%", + "account_key_from_like": "sales-adp%", + }, +} + +# 小红书达人密集成单 +XHS_DAREN_PREFIX = "B6_xhs_daren_paid" +XHS_DAREN_KEY_LIKE = ["newmedia-daren-xhs%", "newmedia-dianpu-xhs-0-0"] + +TIME_START = "2025-09-01" +TIME_END = "2026-06-03" + +PLAINTEXT_HEADER = ["手机号", "行为时间", "行为类型", "样本渠道", "实付金额", "额外信息"] +UPLOAD_HEADER = ["用户ID(必填)", "行为时间(选填)", "行为类型(选填)", "样本渠道(选填)", "实付金额(选填)", "额外信息(选填)"] + + +def get_pg_conn(): + secrets_path = os.path.join(SCRIPTS_DIR, "..", "secrets.env") + with open(secrets_path) as f: + pg_pass = None + for line in f: + if line.startswith("PG_ONLINE_PASSWORD="): + pg_pass = line.strip().split("=", 1)[1].strip("'\"") + return psycopg2.connect( + host="bj-postgres-16pob4sg.sql.tencentcdb.com", + port=28591, + user="ai_member", + password=pg_pass, + dbname="vala_bi" + ) + + +def clean_phone(val): + if not val: + return None + val = str(val).strip().replace(" ", "").replace("-", "").replace("'", "") + if "e" in val.lower() or "E" in val: + try: + val = str(int(float(val))) + except: + return None + digits = "".join(c for c in val if c.isdigit()) + if len(digits) == 11 and digits.startswith("1"): + return digits + return None + + +def write_csv(filename, rows, is_upload=False): + """rows: list of dicts""" + path = os.path.join(OUTPUT_DIR, filename) + header = UPLOAD_HEADER if is_upload else PLAINTEXT_HEADER + with open(path, "w", newline="", encoding="utf-8-sig") as f: + writer = csv.writer(f) + writer.writerow(header) + for r in rows: + phone = r["phone"] + user_id = phone_md5(phone) if is_upload else phone + writer.writerow([ + user_id, + r["event_time"], + r["behavior"], + r["channel"], + r["amount"], + r["extra"], + ]) + return path + + +def build_where_clause(ch_cfg, table_alias="o"): + """构建 WHERE 条件""" + if ch_cfg.get("key_from_in"): + placeholders = ",".join([f"'{v}'" for v in ch_cfg["key_from_in"]]) + return f"{table_alias}.key_from IN ({placeholders})" + elif ch_cfg.get("key_from_like"): + return f"{table_alias}.key_from LIKE '{ch_cfg['key_from_like']}'" + return "1=0" + + +def build_account_where(ch_cfg, table_alias="a"): + """构建 account 表 WHERE 条件""" + if ch_cfg.get("account_key_from_in"): + placeholders = ",".join([f"'{v}'" for v in ch_cfg["account_key_from_in"]]) + return f"{table_alias}.key_from IN ({placeholders})" + elif ch_cfg.get("account_key_from_like"): + return f"{table_alias}.key_from LIKE '{ch_cfg['account_key_from_like']}'" + return "1=0" + + +def main(): + conn = get_pg_conn() + cur = conn.cursor() + + all_b_phones = set() # 用于 overlap 统计 + + for ch_key, ch_cfg in CHANNELS.items(): + print(f"\n{'='*60}") + print(f"处理渠道: {ch_cfg['name']} ({ch_key})") + + order_where = build_where_clause(ch_cfg) + account_where = build_account_where(ch_cfg) + + # ── 留资包:从 bi_vala_app_account 查注册用户 ── + print(" 查询留资用户...") + cur.execute(f""" + SELECT a.id::text, a.tel_encrypt, a.created_at::date + FROM bi_vala_app_account a + WHERE a.status = 1 AND a.deleted_at IS NULL + AND a.created_at >= '{TIME_START}' + AND a.created_at < '{TIME_END}'::date + 1 + AND ({account_where}) + ORDER BY a.created_at + """) + lead_rows_raw = cur.fetchall() + print(f" 留资用户(原始): {len(lead_rows_raw)}") + + # 去重:同一手机号取最早注册 + lead_map = {} + for aid, tel_enc, created_date in lead_rows_raw: + tel = decrypt_phone(tel_enc) if tel_enc else None + phone = clean_phone(tel) + if not phone: + continue + if phone not in lead_map or (created_date and created_date < lead_map[phone]["date"]): + lead_map[phone] = {"date": created_date, "aid": aid} + + lead_rows = [] + for phone, info in lead_map.items(): + lead_rows.append({ + "phone": phone, + "event_time": str(info["date"]) if info["date"] else "", + "behavior": "留资", + "channel": ch_cfg["name"], + "amount": "", + "extra": f"uid={info['aid']}", + }) + all_b_phones.add(phone) + + plaintext_path = write_csv(f"plaintext_{ch_cfg['prefix']}_lead.csv", lead_rows) + upload_path = write_csv(f"{ch_cfg['prefix']}_lead.csv", lead_rows, is_upload=True) + print(f" 留资: {len(lead_rows)} 行 → {upload_path}") + + # ── 成单包:从 bi_vala_order 查 ── + print(" 查询成单用户...") + cur.execute(f""" + SELECT DISTINCT ON (o.account_id) + o.account_id::text, a.tel_encrypt, o.pay_success_date::date, + o.pay_amount_int, o.key_from + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.pay_success_date >= '{TIME_START}' + AND o.pay_success_date < '{TIME_END}'::date + 1 + AND o.order_status IN (3, 4) + AND ({order_where}) + ORDER BY o.account_id, o.pay_success_date DESC + """) + paid_rows_raw = cur.fetchall() + print(f" 成单用户(原始): {len(paid_rows_raw)}") + + paid_rows = [] + for aid, tel_enc, pay_date, amount_int, key_from in paid_rows_raw: + tel = decrypt_phone(tel_enc) if tel_enc else None + phone = clean_phone(tel) + if not phone: + continue + amount_yuan = f"{amount_int / 100:.2f}" if amount_int else "" + paid_rows.append({ + "phone": phone, + "event_time": str(pay_date) if pay_date else "", + "behavior": "购买", + "channel": ch_cfg["name"], + "amount": amount_yuan, + "extra": f"uid={aid} | key_from={key_from}", + }) + all_b_phones.add(phone) + + plaintext_path = write_csv(f"plaintext_{ch_cfg['prefix']}_paid.csv", paid_rows) + upload_path = write_csv(f"{ch_cfg['prefix']}_paid.csv", paid_rows, is_upload=True) + print(f" 成单: {len(paid_rows)} 行 → {upload_path}") + + # ── B6: 小红书达人密集成单 ── + print(f"\n{'='*60}") + print("处理: 小红书达人密集成单 (B6)") + + xhs_where = " OR ".join([f"o.key_from LIKE '{lk}'" for lk in XHS_DAREN_KEY_LIKE]) + cur.execute(f""" + SELECT o.account_id::text, a.tel_encrypt, o.pay_success_date::date, + o.pay_amount_int, o.key_from + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.pay_success_date >= '{TIME_START}' + AND o.pay_success_date < '{TIME_END}'::date + 1 + AND o.order_status IN (3, 4) + AND ({xhs_where}) + ORDER BY o.pay_success_date DESC + """) + xhs_rows_raw = cur.fetchall() + print(f" 成单(原始): {len(xhs_rows_raw)}") + + # 去重:同一account_id取最近一单 + xhs_paid_map = {} + for aid, tel_enc, pay_date, amount_int, key_from in xhs_rows_raw: + tel = decrypt_phone(tel_enc) if tel_enc else None + phone = clean_phone(tel) + if not phone: + continue + if aid not in xhs_paid_map or (pay_date and pay_date > xhs_paid_map[aid]["date"]): + amount_yuan = f"{amount_int / 100:.2f}" if amount_int else "" + xhs_paid_map[aid] = { + "phone": phone, + "date": pay_date, + "amount": amount_yuan, + "key_from": key_from, + } + + xhs_paid_rows = [] + for aid, info in xhs_paid_map.items(): + xhs_paid_rows.append({ + "phone": info["phone"], + "event_time": str(info["date"]) if info["date"] else "", + "behavior": "购买", + "channel": "小红书达人", + "amount": info["amount"], + "extra": f"uid={aid} | key_from={info['key_from']}", + }) + + plaintext_path = write_csv(f"plaintext_{XHS_DAREN_PREFIX}.csv", xhs_paid_rows) + upload_path = write_csv(f"{XHS_DAREN_PREFIX}.csv", xhs_paid_rows, is_upload=True) + print(f" 成单: {len(xhs_paid_rows)} 行 → {upload_path}") + + # ── QA: B档手机号与A1重合 ── + print(f"\n{'='*60}") + print("QA: B档与A1重合分析") + + # 读取A1明文 + a1_path = os.path.join(OUTPUT_DIR, "plaintext_A1_wala_lead_xhs_202509-20260603.csv") + a1_phones = set() + if os.path.exists(a1_path): + with open(a1_path, encoding="utf-8-sig") as f: + reader = csv.reader(f) + next(reader) # skip header + for row in reader: + if row: + a1_phones.add(row[0]) + print(f" A1手机号: {len(a1_phones)}") + else: + print(" A1文件不存在,跳过") + + overlap = all_b_phones & a1_phones + print(f" B档总手机号(去重): {len(all_b_phones)}") + print(f" B档∩A1重合: {len(overlap)}") + print(f" 重合率: {len(overlap)/len(a1_phones)*100:.1f}% (相对A1)") + + # 输出overlap清单 + overlap_path = os.path.join(OUTPUT_DIR, "B_vs_A1_overlap.csv") + with open(overlap_path, "w", newline="", encoding="utf-8-sig") as f: + writer = csv.writer(f) + writer.writerow(["手机号_MD5", "在A1", "在B档"]) + for phone in sorted(overlap): + writer.writerow([phone_md5(phone), "Y", "Y"]) + print(f" Overlap清单: {overlap_path} ({len(overlap)} 行)") + + cur.close() + conn.close() + + # ── 汇总 ── + print(f"\n{'='*60}") + print("=== B档导出完成 ===") + for ch_key, ch_cfg in CHANNELS.items(): + lead_path = os.path.join(OUTPUT_DIR, f"{ch_cfg['prefix']}_lead.csv") + paid_path = os.path.join(OUTPUT_DIR, f"{ch_cfg['prefix']}_paid.csv") + lead_cnt = sum(1 for _ in open(lead_path)) - 1 if os.path.exists(lead_path) else 0 + paid_cnt = sum(1 for _ in open(paid_path)) - 1 if os.path.exists(paid_path) else 0 + print(f" {ch_cfg['name']}: 留资{lead_cnt}行 | 成单{paid_cnt}行") + xhs_path = os.path.join(OUTPUT_DIR, f"{XHS_DAREN_PREFIX}.csv") + xhs_cnt = sum(1 for _ in open(xhs_path)) - 1 if os.path.exists(xhs_path) else 0 + print(f" 小红书达人密集成单: {xhs_cnt}行") + print(f" B档∩A1重合: {len(overlap)}人") + print(f"输出目录: {OUTPUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/scripts/export_phone_md5.py b/scripts/export_phone_md5.py new file mode 100644 index 0000000..aea8e87 --- /dev/null +++ b/scripts/export_phone_md5.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +""" +导出 bi_vala_app_account 手机号 MD5 映射表 + +用途: + 1. 全量导出:python3 export_phone_md5.py → output/phone_md5_export.csv + 2. 按 account_id 导出:python3 export_phone_md5.py --ids 123,456,789 + 3. 被其他 skill 调用:from scripts.export_phone_md5 import query_phone_md5 + +输出 CSV 字段:account_id, tel_encrypt, phone_plain, phone_md5 +""" +import csv +import os +import sys +import argparse +from datetime import datetime + +import psycopg2 +import psycopg2.extras + +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +WORKSPACE_DIR = os.path.dirname(SCRIPTS_DIR) +sys.path.insert(0, SCRIPTS_DIR) +from phone_encrypt import decrypt_phone, phone_md5, tel_encrypt_to_md5 + +# ── 数据库配置 ── +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_DB = "vala_bi" + +# 从 secrets.env 读取密码 +SECRETS_FILE = os.path.join(WORKSPACE_DIR, "secrets.env") + + +def _get_pg_password() -> str: + """从 secrets.env 读取 PostgreSQL 密码""" + with open(SECRETS_FILE) as f: + for line in f: + line = line.strip() + if line.startswith("PG_ONLINE_PASSWORD="): + return line.split("=", 1)[1].strip().strip('"').strip("'") + raise RuntimeError("未找到 PG_ONLINE_PASSWORD 在 secrets.env 中") + + +def _get_conn(): + return psycopg2.connect( + host=PG_HOST, port=PG_PORT, user=PG_USER, + password=_get_pg_password(), dbname=PG_DB + ) + + +def query_phone_md5(account_ids: list[int] = None) -> list[dict]: + """ + 查询手机号 MD5 映射 + + Args: + account_ids: 指定 account_id 列表,None 表示全量 + + Returns: + [{account_id, tel_encrypt, phone_plain, phone_md5}, ...] + """ + conn = _get_conn() + try: + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if account_ids: + sql = """ + SELECT id AS account_id, tel_encrypt + FROM bi_vala_app_account + WHERE id = ANY(%s) AND tel_encrypt IS NOT NULL AND tel_encrypt != '' + """ + cur.execute(sql, (account_ids,)) + else: + sql = """ + SELECT id AS account_id, tel_encrypt + FROM bi_vala_app_account + WHERE tel_encrypt IS NOT NULL AND tel_encrypt != '' + """ + cur.execute(sql) + + rows = cur.fetchall() + results = [] + for row in rows: + tel_enc = row["tel_encrypt"] + try: + phone = decrypt_phone(tel_enc) + md5 = phone_md5(phone) + except Exception: + phone = "" + md5 = "" + results.append({ + "account_id": row["account_id"], + "tel_encrypt": tel_enc, + "phone_plain": phone, + "phone_md5": md5, + }) + return results + finally: + conn.close() + + +def export_csv(account_ids: list[int] = None, output_path: str = None) -> str: + """ + 导出为 CSV 文件 + + Returns: + 输出文件路径 + """ + if output_path is None: + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + output_path = os.path.join(WORKSPACE_DIR, "output", f"phone_md5_export_{ts}.csv") + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + results = query_phone_md5(account_ids) + + with open(output_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["account_id", "tel_encrypt", "phone_plain", "phone_md5"]) + writer.writeheader() + writer.writerows(results) + + print(f"导出完成: {len(results)} 条记录 → {output_path}") + return output_path + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="导出手机号 MD5 映射表") + parser.add_argument("--ids", help="指定 account_id,逗号分隔,不传则全量导出") + parser.add_argument("--output", help="输出文件路径,默认 output/phone_md5_export_<时间戳>.csv") + args = parser.parse_args() + + ids = None + if args.ids: + ids = [int(x.strip()) for x in args.ids.split(",") if x.strip()] + + export_csv(ids, args.output) diff --git a/scripts/export_xhs_yifang.py b/scripts/export_xhs_yifang.py new file mode 100644 index 0000000..499fe42 --- /dev/null +++ b/scripts/export_xhs_yifang.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +""" +小红书一方回传 · A档三包导出 +数据源: Bot销转表三页(小龙qJF4I + 吴迪f975f0 + 成都qJF4J) +输出: plaintext CSV → phone_encrypt.py → 上传CSV +""" +import json, requests, os, sys, csv +from datetime import datetime, date, timedelta +from collections import defaultdict + +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPTS_DIR) +from phone_encrypt import phone_md5 + +# ── 配置 ── +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_IDS = {"小龙": "qJF4I", "吴迪": "f975f0", "成都": "qJF4J"} +CRED_DIR = "/root/.openclaw/credentials/xiaoxi" +OUTPUT_DIR = os.path.join(os.path.dirname(SCRIPTS_DIR), "output", "yifang_export") +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 列索引 (0-based, A=0) +COL_C = 2 # 进线日期 +COL_E = 4 # 手机号 +COL_H = 7 # 用户ID (account_id) +COL_K = 10 # 是否下单 +COL_L = 11 # 下单日期 +COL_O = 14 # 下单金额/GMV + + +def get_fs_token(): + with open(os.path.join(CRED_DIR, "config.json")) as f: + cfg = json.load(f) + resp = requests.post( + "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, + timeout=15 + ) + return resp.json()["tenant_access_token"] + + +def read_sheet(token, sheet_id): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}" + resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + data = resp.json() + if data.get("code") != 0: + raise RuntimeError(f"读取Sheet {sheet_id} 失败: {data}") + return data["data"]["valueRange"]["values"] + + +def parse_date(val): + """解析日期字符串,支持多种格式,包括中文 'M月D日'""" + if not val: + return None + val = str(val).strip() + if not val: + return None + # 标准格式 + for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%Y.%m.%d", "%Y%m%d"]: + try: + return datetime.strptime(val, fmt).date() + except ValueError: + continue + # 只取前10位 + try: + return datetime.strptime(val[:10], "%Y-%m-%d").date() + except ValueError: + pass + # 中文格式 "M月D日" — 根据月份推断年份 + import re + m = re.match(r"(\d{1,2})月(\d{1,2})日", val) + if m: + month, day = int(m.group(1)), int(m.group(2)) + now = datetime.now() + year = now.year if month <= now.month else now.year - 1 + try: + return date(year, month, day) + except ValueError: + return None + return None + + +def parse_amount(val): + """解析金额,返回float""" + if not val: + return None + val = str(val).strip().replace("¥", "").replace(",", "").replace(" ", "") + if not val: + return None + try: + return float(val) + except ValueError: + return None + + +def clean_phone(val): + """清洗手机号,返回11位数字字符串""" + if not val: + return None + val = str(val).strip().replace(" ", "").replace("-", "").replace("'", "") + # 处理科学计数法 + if "e" in val.lower() or "E" in val: + try: + val = str(int(float(val))) + except: + return None + # 只保留数字 + digits = "".join(c for c in val if c.isdigit()) + if len(digits) == 11 and digits.startswith("1"): + return digits + return None + + +def main(): + token = get_fs_token() + print(f"飞书Token获取成功") + + # ── 第一步:读取三页数据 ── + all_rows = [] # [(sales_name, row_data)] + for name, sheet_id in SHEET_IDS.items(): + print(f"读取 {name}({sheet_id})...") + rows = read_sheet(token, sheet_id) + # 跳过第1行(表头)和第2行(标注行),从第3行开始 + data_rows = rows[2:] if len(rows) > 2 else [] + for row in data_rows: + all_rows.append((name, row)) + print(f" {name}: {len(data_rows)} 行数据") + + print(f"\n三页合计: {len(all_rows)} 行") + + # ── 第二步:解析并去重 ── + # A1: 按 account_id 去重,取最早进线日期 + # 同时记录手机号(优先E列,E空用H列后续反查) + lead_map = {} # account_id -> {phone, lead_date, has_order, order_date, amount, sales_name} + + for sales_name, row in all_rows: + # 读取各列 + lead_date_str = row[COL_C] if len(row) > COL_C else None + phone_raw = row[COL_E] if len(row) > COL_E else None + uid_raw = row[COL_H] if len(row) > COL_H else None + is_order = row[COL_K] if len(row) > COL_K else None + order_date_str = row[COL_L] if len(row) > COL_L else None + amount_raw = row[COL_O] if len(row) > COL_O else None + + # 解析 + lead_date = parse_date(lead_date_str) + phone = clean_phone(phone_raw) + uid = str(uid_raw).strip() if uid_raw else None + + # 跳过无效行:没有进线日期且没有UID + if not lead_date and not uid: + continue + + # 过滤:进线日期 >= 2025-09-01 + if lead_date and lead_date < date(2025, 9, 1): + continue + + # 确定 account_id + account_id = uid if uid and uid != "None" and uid != "" else None + if not account_id: + continue # 没有UID无法关联 + + # 判断是否下单 + has_order = False + if is_order and str(is_order).strip() in ("是", "1", "yes", "Yes", "YES", "TRUE", "true"): + has_order = True + + order_date = parse_date(order_date_str) + amount = parse_amount(amount_raw) + + # 去重:同一 account_id 保留最早进线日期 + if account_id not in lead_map: + lead_map[account_id] = { + "phone": phone, + "lead_date": lead_date, + "has_order": has_order, + "order_date": order_date, + "amount": amount, + "sales_name": sales_name, + } + else: + existing = lead_map[account_id] + # 取最早进线日期 + if lead_date and (not existing["lead_date"] or lead_date < existing["lead_date"]): + existing["lead_date"] = lead_date + # 如果之前没手机号,用新的 + if phone and not existing["phone"]: + existing["phone"] = phone + # 如果之前没标记下单,用新的 + if has_order and not existing["has_order"]: + existing["has_order"] = True + existing["order_date"] = order_date + existing["amount"] = amount + # 如果有多个订单,保留最近的 + if has_order and order_date and existing["order_date"]: + if order_date > existing["order_date"]: + existing["order_date"] = order_date + existing["amount"] = amount + + print(f"\n去重后留资用户: {len(lead_map)} 人") + + # ── 第三步:E列为空的,用H列反查数据库手机号 ── + no_phone_uids = [aid for aid, info in lead_map.items() if not info["phone"]] + print(f"E列无手机号需反查: {len(no_phone_uids)} 人") + + if no_phone_uids: + import psycopg2 + secrets_path = os.path.join(SCRIPTS_DIR, "..", "secrets.env") + with open(secrets_path) as f: + pg_pass = None + for line in f: + if line.startswith("PG_ONLINE_PASSWORD="): + pg_pass = line.strip().split("=", 1)[1].strip("'\"") + + conn = psycopg2.connect( + host="bj-postgres-16pob4sg.sql.tencentcdb.com", + port=28591, + user="ai_member", + password=pg_pass, + dbname="vala_bi" + ) + cur = conn.cursor() + # 分批查询 + batch_size = 500 + for i in range(0, len(no_phone_uids), batch_size): + batch = no_phone_uids[i:i+batch_size] + placeholders = ",".join(["%s"] * len(batch)) + cur.execute( + f"SELECT id::text, tel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL", + batch + ) + for row in cur.fetchall(): + aid, tel = row + phone = clean_phone(tel) + if phone and aid in lead_map: + lead_map[aid]["phone"] = phone + cur.close() + conn.close() + + still_no_phone = sum(1 for info in lead_map.values() if not info["phone"]) + print(f"反查后仍无手机号: {still_no_phone} 人") + + # ── 第四步:生成 A1/A2/A3 ── + today = date.today() + cutoff_90d = today - timedelta(days=90) + + # A1: 小红书留资(全部) + a1_rows = [] + a1_no_phone = 0 + for aid, info in lead_map.items(): + if not info["lead_date"]: + continue + phone = info["phone"] + if not phone: + a1_no_phone += 1 + continue + a1_rows.append({ + "phone": phone, + "event_time": info["lead_date"].strftime("%Y-%m-%d"), + "behavior": "留资", + "channel": "小红书", + "amount": "", + "extra": f"uid={aid}", + "account_id": aid, + "lead_date": info["lead_date"], + "has_order": info["has_order"], + "order_date": info["order_date"], + "amount_val": info["amount"], + }) + + # A2: 小红书留资未成交90d(A1子集,K≠是 且 进线≥90天) + a2_rows = [] + a2_no_phone = 0 + for row in a1_rows: + if row["has_order"]: + continue + if row["lead_date"] > cutoff_90d: + continue + a2_rows.append(row) + + # A3: 小红书成单(K=是 或 O=1,取最近一单) + # 先收集所有成单用户 + paid_map = {} # account_id -> info + for aid, info in lead_map.items(): + if info["has_order"] and info["phone"] and info["order_date"]: + if aid not in paid_map or info["order_date"] > paid_map[aid]["order_date"]: + paid_map[aid] = { + "phone": info["phone"], + "event_time": info["order_date"].strftime("%Y-%m-%d"), + "behavior": "购买", + "channel": "小红书", + "amount": str(info["amount"]) if info["amount"] else "", + "extra": f"uid={aid}", + "account_id": aid, + "order_date": info["order_date"], + "amount_val": info["amount"], + } + + a3_rows = list(paid_map.values()) + + print(f"\n=== 各包统计 ===") + print(f"A1 留资: {len(a1_rows)} 行 (无手机号跳过: {a1_no_phone})") + print(f"A2 未成交90d: {len(a2_rows)} 行") + print(f"A3 成单: {len(a3_rows)} 行") + + # ── 第五步:写明文CSV ── + plaintext_header = ["手机号", "行为时间", "行为类型", "样本渠道", "实付金额", "额外信息"] + + for name, rows in [("A1_wala_lead_xhs_202509-20260603", a1_rows), + ("A2_wala_lead_xhs_no_order_90d", a2_rows), + ("A3_wala_paid_xhs_202509-20260603", a3_rows)]: + plaintext_path = os.path.join(OUTPUT_DIR, f"plaintext_{name}.csv") + with open(plaintext_path, "w", newline="", encoding="utf-8-sig") as f: + writer = csv.writer(f) + writer.writerow(plaintext_header) + for row in rows: + writer.writerow([ + row["phone"], + row["event_time"], + row["behavior"], + row["channel"], + row["amount"], + row["extra"], + ]) + print(f"明文CSV: {plaintext_path} ({len(rows)} 行)") + + # ── 第六步:跑 phone_encrypt.py 生成上传CSV ── + upload_header = ["用户ID(必填)", "行为时间(选填)", "行为类型(选填)", "样本渠道(选填)", "实付金额(选填)", "额外信息(选填)"] + + for name, rows in [("A1_wala_lead_xhs_202509-20260603", a1_rows), + ("A2_wala_lead_xhs_no_order_90d", a2_rows), + ("A3_wala_paid_xhs_202509-20260603", a3_rows)]: + upload_path = os.path.join(OUTPUT_DIR, f"{name}.csv") + with open(upload_path, "w", newline="", encoding="utf-8-sig") as f: + writer = csv.writer(f) + writer.writerow(upload_header) + for row in rows: + user_id_md5 = phone_md5(row["phone"]) + writer.writerow([ + user_id_md5, + row["event_time"], + row["behavior"], + row["channel"], + row["amount"], + row["extra"], + ]) + print(f"上传CSV: {upload_path} ({len(rows)} 行)") + + # ── 汇总 ── + print(f"\n=== 导出完成 ===") + print(f"A1_wala_lead_xhs_202509-20260603.csv: {len(a1_rows)} 行") + print(f"A2_wala_lead_xhs_no_order_90d.csv: {len(a2_rows)} 行") + print(f"A3_wala_paid_xhs_202509-20260603.csv: {len(a3_rows)} 行") + print(f"输出目录: {OUTPUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/scripts/fill_leads_sheet.py b/scripts/fill_leads_sheet.py new file mode 100644 index 0000000..44d7f1d --- /dev/null +++ b/scripts/fill_leads_sheet.py @@ -0,0 +1,501 @@ +#!/usr/bin/env python3 +""" +端内析出leads数据 — 自动回填 + 统计汇总脚本 + +流程: +1. 读取曲慧萌/吴迪 sheet 中 A 列的手机号 +2. XXTEA 加密 → 匹配 bi_vala_app_account.tel_encrypt → 获取 account_id +3. 查询注册日期、转化、退费、U0体验课完成日期 → 回写 +4. 按析出月份汇总统计 → 写入"统计" sheet + +统计口径: +- 转化率 = 未退费转化leads / 总leads +- 退费率 = 退费leads / 已转化leads +- 完成率 = 完成该课的leads / 总leads + +用法: + python3 fill_leads_sheet.py [--dry-run] +""" + +import sys +import os +import json +import subprocess +import re +from collections import defaultdict + +# ── 配置 ────────────────────────────────────────────── +SPREADSHEET_TOKEN = "FA3xsw3kph4pdatKlUrcyPgInAc" +SHEET_QHM = "7f0e35" # 曲慧萌 +SHEET_WD = "1K3O6s" # 吴迪 +SHEET_STAT = "scyF3H" # 统计 + +# 端内渠道 +INNER_CHANNELS = [ + "app-active-h5-0-0", + "app-sales-bj-qhm-0", + "app-sales-bj-wd-0", +] + +# U0 体验课 chapter_id +U0_CHAPTERS = { + "L1-U0-L01": 343, + "L1-U0-L02": 344, + "L1-U0-L03": 345, + "L1-U0-L04": 346, + "L1-U0-L05": 348, + "L2-U0-L01": 55, + "L2-U0-L02": 56, + "L2-U0-L03": 57, + "L2-U0-L04": 58, + "L2-U0-L05": 59, +} + +# U0 列顺序(与表头一致) +U0_COL_ORDER = [ + "L1-U0-L01", "L1-U0-L02", "L1-U0-L03", "L1-U0-L04", "L1-U0-L05", + "L2-U0-L01", "L2-U0-L02", "L2-U0-L03", "L2-U0-L04", "L2-U0-L05", +] + +# 列映射(0-based) +COL_PHONE = 0 # A: 用户手机号 +COL_USER_ID = 1 # B: 用户ID +COL_EXTRACT_DATE = 2 # C: 析出日期(手动填) +COL_REG_DATE = 3 # D: 注册日期 +COL_CONVERTED = 4 # E: 是否转化 +COL_CONVERT_DATE = 5 # F: 转化日期 +COL_REFUND = 6 # G: 是否退费 +COL_REFUND_DATE = 7 # H: 退费日期 +COL_U0_START = 8 # I-R: L1-U0-L01 ~ L2-U0-L05 + +# ── 数据库 ───────────────────────────────────────────── +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = "28591" +PG_USER = "ai_member" +PG_DB = "vala_bi" +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" + +# ── 加密 ─────────────────────────────────────────────── +import xxtea +import base64 + +XXTEA_KEY = "K1pNOZ5O5+ZqTPSHA2kzPdoNOMOGcv6g" + + +def encrypt_phone(phone: str) -> str: + encrypted = xxtea.encrypt(phone.encode(), XXTEA_KEY.encode()) + result = base64.b64encode(encrypted).decode() + result = result.replace("+", "-").replace("/", "_").replace("=", ".") + return result + + +def pg_query(sql: str) -> list[list]: + env = os.environ.copy() + env["PGPASSWORD"] = PG_PASSWORD + cmd = [ + "psql", "-h", PG_HOST, "-p", PG_PORT, "-U", PG_USER, "-d", PG_DB, + "-t", "-A", "-F", "\t", "-c", sql, + ] + result = subprocess.run(cmd, capture_output=True, text=True, env=env, timeout=60) + if result.returncode != 0: + print(f"[ERROR] PG query failed: {result.stderr}", file=sys.stderr) + return [] + lines = result.stdout.strip().split("\n") + rows = [] + for line in lines: + if not line.strip(): + continue + rows.append(line.split("\t")) + return rows + + +def lark_read(sheet_id: str, range_str: str) -> list: + result = subprocess.run( + ["lark-cli", "sheets", "+read", "--as", "bot", + "--spreadsheet-token", SPREADSHEET_TOKEN, + "--sheet-id", sheet_id, "--range", range_str], + capture_output=True, text=True, timeout=30 + ) + if result.returncode != 0: + print(f"[ERROR] lark read failed: {result.stderr}", file=sys.stderr) + return [] + data = json.loads(result.stdout) + if not data.get("ok"): + print(f"[ERROR] lark read error: {data}", file=sys.stderr) + return [] + return data["data"]["valueRange"]["values"] + + +def lark_write(sheet_id: str, range_str: str, values: list) -> bool: + payload = json.dumps(values) + result = subprocess.run( + ["lark-cli", "sheets", "+write", "--as", "bot", + "--spreadsheet-token", SPREADSHEET_TOKEN, + "--sheet-id", sheet_id, "--range", range_str, + "--values", payload], + capture_output=True, text=True, timeout=30 + ) + if result.returncode != 0: + print(f"[ERROR] lark write failed: {result.stderr}", file=sys.stderr) + return False + data = json.loads(result.stdout) + return data.get("ok", False) + + +def match_phones(phones: list[str]) -> dict[str, dict]: + if not phones: + return {} + encrypted_map = {encrypt_phone(p): p for p in phones} + enc_list = list(encrypted_map.keys()) + BATCH_SIZE = 50 + results = {} + for i in range(0, len(enc_list), BATCH_SIZE): + batch = enc_list[i:i + BATCH_SIZE] + quoted = ",".join(f"'{e}'" for e in batch) + sql = f""" + SELECT id, name, tel_encrypt, created_at::date::text + FROM bi_vala_app_account + WHERE tel_encrypt IN ({quoted}) + AND status = 1 AND deleted_at IS NULL + """ + for row in pg_query(sql): + if len(row) >= 4: + acc_id, name, tel_enc, created_at = row[0], row[1], row[2], row[3] + plain = encrypted_map.get(tel_enc) + if plain: + results[plain] = {"id": acc_id, "name": name, "created_at": created_at} + return results + + +def query_conversion(account_ids: list[str]) -> dict[str, dict]: + if not account_ids: + return {} + BATCH_SIZE = 100 + results = {} + channels_str = ",".join(f"'{c}'" for c in INNER_CHANNELS) + for i in range(0, len(account_ids), BATCH_SIZE): + batch = account_ids[i:i + BATCH_SIZE] + ids_str = ",".join(batch) + sql = f""" + SELECT o.account_id::text, + MIN(o.pay_success_date::date::text) AS first_pay_date, + BOOL_OR(r.id IS NOT NULL AND r.status = 3 AND o2.order_status = 4) AS has_refund, + MIN(CASE WHEN r.id IS NOT NULL AND r.status = 3 AND o2.order_status = 4 + THEN r.created_at::date::text END) AS first_refund_date + FROM bi_vala_order o + LEFT JOIN bi_refund_order r ON o.trade_no = r.trade_no AND r.status = 3 + LEFT JOIN bi_vala_order o2 ON o.trade_no = o2.trade_no AND o2.order_status = 4 + WHERE o.account_id IN ({ids_str}) + AND o.key_from IN ({channels_str}) + AND o.pay_success_date IS NOT NULL + AND o.order_status IN (3, 4) + GROUP BY o.account_id + """ + for row in pg_query(sql): + if len(row) >= 4: + acc_id, first_pay, has_refund, first_refund = row[0], row[1], row[2], row[3] + results[acc_id] = { + "converted": "是" if first_pay else "否", + "convert_date": first_pay or "", + "refunded": "是" if has_refund in ("t", "true") else "否", + "refund_date": first_refund or "", + } + return results + + +def query_learning(account_ids: list[str]) -> dict[str, dict[str, str]]: + if not account_ids: + return {} + BATCH_SIZE = 100 + all_chapter_dates = {} + for i in range(0, len(account_ids), BATCH_SIZE): + batch = account_ids[i:i + BATCH_SIZE] + ids_str = ",".join(batch) + char_sql = f""" + SELECT c.account_id::text, c.id::text + FROM bi_vala_app_character c + WHERE c.account_id IN ({ids_str}) AND c.deleted_at IS NULL + """ + char_rows = pg_query(char_sql) + char_map = {} + for row in char_rows: + acc_id, char_id = row[0], row[1] + char_map.setdefault(acc_id, []).append(char_id) + if not char_map: + continue + all_char_ids = [] + for cids in char_map.values(): + all_char_ids.extend(cids) + chapter_ids = list(U0_CHAPTERS.values()) + chapter_str = ",".join(str(c) for c in chapter_ids) + for table_idx in range(8): + table_name = f"bi_user_chapter_play_record_{table_idx}" + char_batches = [all_char_ids[j:j + 200] for j in range(0, len(all_char_ids), 200)] + for char_batch in char_batches: + chars_str = ",".join(char_batch) + sql = f""" + SELECT user_id::text, chapter_id, MIN(created_at::date::text) + FROM {table_name} + WHERE user_id IN ({chars_str}) + AND chapter_id IN ({chapter_str}) + AND play_status = 1 + GROUP BY user_id, chapter_id + """ + try: + rows = pg_query(sql) + except Exception: + continue + for row in rows: + if len(row) >= 3: + char_id, ch_id, comp_date = row[0], int(row[1]), row[2] + for acc_id, cids in char_map.items(): + if char_id in cids: + all_chapter_dates.setdefault(acc_id, {}) + for name, cid in U0_CHAPTERS.items(): + if cid == ch_id: + all_chapter_dates[acc_id][name] = comp_date + break + break + return all_chapter_dates + + +# ── 处理单个销售 sheet ────────────────────────────────── + +def process_sheet(sheet_id: str, sheet_name: str, dry_run: bool = False) -> list[dict]: + """处理单个销售 sheet,回填数据,返回 lead 数据列表供统计使用""" + print(f"\n{'='*60}") + print(f"处理 Sheet: {sheet_name} ({sheet_id})") + print(f"{'='*60}") + + range_str = f"{sheet_id}!A2:R" + try: + rows = lark_read(sheet_id, range_str) + except Exception as e: + print(f"[ERROR] 读取失败: {e}") + return [] + + if not rows: + print("没有数据行") + return [] + + print(f"读取到 {len(rows)} 行数据") + + phone_to_row = {} + for idx, row in enumerate(rows): + if len(row) > COL_PHONE and row[COL_PHONE]: + phone = str(row[COL_PHONE]).strip() + if phone and re.match(r'^1\d{10}$', phone): + phone_to_row.setdefault(phone, []).append(idx) + + if not phone_to_row: + print("没有有效的手机号") + return [] + + phones = list(phone_to_row.keys()) + print(f"有效手机号: {len(phones)} 个") + + print("→ 匹配 account_id...") + acc_info = match_phones(phones) + print(f" 匹配到 {len(acc_info)} 个账号") + + matched_accounts = [info["id"] for info in acc_info.values()] + matched_phones = set(acc_info.keys()) + + print("→ 查询转化信息...") + conv_info = query_conversion(matched_accounts) + + print("→ 查询 U0 学习进度...") + learn_info = query_learning(matched_accounts) + + updates = [] + lead_data = [] + + for phone, row_indices in phone_to_row.items(): + info = acc_info.get(phone) + if not info: + continue + acc_id = info["id"] + conv = conv_info.get(acc_id, {}) + learn = learn_info.get(acc_id, {}) + + for row_idx in row_indices: + extract_date = "" + if len(rows[row_idx]) > COL_EXTRACT_DATE and rows[row_idx][COL_EXTRACT_DATE]: + extract_date = str(rows[row_idx][COL_EXTRACT_DATE]).strip() + + updates.append((row_idx, COL_USER_ID, acc_id)) + updates.append((row_idx, COL_REG_DATE, info.get("created_at", ""))) + updates.append((row_idx, COL_CONVERTED, conv.get("converted", "否"))) + updates.append((row_idx, COL_CONVERT_DATE, conv.get("convert_date", ""))) + updates.append((row_idx, COL_REFUND, conv.get("refunded", "否"))) + updates.append((row_idx, COL_REFUND_DATE, conv.get("refund_date", ""))) + for col_offset, lesson_name in enumerate(U0_COL_ORDER): + updates.append((row_idx, COL_U0_START + col_offset, learn.get(lesson_name, ""))) + + lead_data.append({ + "extract_date": extract_date, + "converted": conv.get("converted", "否"), + "refunded": conv.get("refunded", "否"), + "lessons": {k: learn.get(k, "") for k in U0_COL_ORDER}, + }) + + # 回写 + row_updates = {} + for row_idx, col, val in updates: + row_updates.setdefault(row_idx, {})[col] = val + + print(f"\n→ 准备回写 {len(row_updates)} 行数据...") + + for row_idx, col_vals in sorted(row_updates.items()): + actual_row = row_idx + 2 + + # 分开写入:B列单独写,D-R列一起写,跳过C列(析出日期由销售手动维护) + # B: 用户ID + if COL_USER_ID in col_vals: + lark_write(sheet_id, f"{sheet_id}!B{actual_row}:B{actual_row}", + [[str(col_vals[COL_USER_ID])]]) + + # D-R: 注册日期 ~ L2-U0-L5 + d_to_r = [] + for col in range(COL_REG_DATE, COL_U0_START + len(U0_COL_ORDER)): + val = col_vals.get(col, "") + d_to_r.append(str(val) if val else "") + + if dry_run: + print(f" [DRY-RUN] {sheet_id}!B{actual_row} + D{actual_row}:R{actual_row} ← ...") + else: + lark_write(sheet_id, f"{sheet_id}!D{actual_row}:R{actual_row}", [d_to_r]) + print(f" ✓ 行 {actual_row} 回写成功") + + unmatched = set(phones) - matched_phones + if unmatched: + print(f"\n⚠️ 未匹配到账号的手机号 ({len(unmatched)} 个):") + for p in sorted(unmatched): + print(f" {p}") + + return lead_data + + +# ── 统计汇总 ──────────────────────────────────────────── + +def compute_stats(lead_data: list[dict]) -> dict[str, dict]: + """ + 按析出月份汇总统计 + 口径: + - 转化率 = 未退费转化leads / 总leads + - 退费率 = 退费leads / 已转化leads + - 完成率 = 完成该课的leads / 总leads + """ + month_groups = defaultdict(list) + for lead in lead_data: + extract = lead.get("extract_date", "") + if not extract: + continue + m = re.match(r'(\d{4})[-/](\d{1,2})', extract) + if m: + month = f"{m.group(1)}-{m.group(2).zfill(2)}" + else: + continue + month_groups[month].append(lead) + + if not month_groups: + return {} + + result = {} + for month, leads in sorted(month_groups.items()): + total = len(leads) + converted_all = sum(1 for l in leads if l["converted"] == "是") + refunded = sum(1 for l in leads if l["refunded"] == "是") + converted_unrefunded = sum(1 for l in leads if l["converted"] == "是" and l["refunded"] != "是") + + conv_rate = converted_unrefunded / total * 100 if total > 0 else 0 + refund_rate = refunded / converted_all * 100 if converted_all > 0 else 0 + + lesson_rates = {} + for lesson_name in U0_COL_ORDER: + completed = sum(1 for l in leads if l["lessons"].get(lesson_name, "")) + lesson_rates[lesson_name] = completed / total * 100 if total > 0 else 0 + + result[month] = { + "total": total, + "converted_all": converted_all, + "converted_unrefunded": converted_unrefunded, + "refunded": refunded, + "conv_rate": conv_rate, + "refund_rate": refund_rate, + "lesson_rates": lesson_rates, + } + + return result + + +def write_all_stats(all_stats: dict[str, dict[str, dict]], dry_run: bool = False): + """ + 将所有销售的统计数据写入统计 sheet + all_stats: {sales_name: {month: {conv_rate, refund_rate, ...}}} + + 按 销售+月份 逐行写入,从第2行开始 + """ + # 构建有序行列表: [(sales_name, month, stats), ...] + rows_data = [] + for sales_name in ["曲慧萌", "吴迪"]: + stats = all_stats.get(sales_name, {}) + for month in sorted(stats.keys()): + rows_data.append((sales_name, month, stats[month])) + + if not rows_data: + print(" 无统计数据") + return + + for i, (sales_name, month, s) in enumerate(rows_data): + row_num = i + 2 # 从第2行开始 + + # A: 销售名 + lark_write(SHEET_STAT, f"{SHEET_STAT}!A{row_num}:A{row_num}", [[sales_name]]) + # B: 月份 + lark_write(SHEET_STAT, f"{SHEET_STAT}!B{row_num}:B{row_num}", [[month]]) + # C: 转化率(小数,配合百分比格式显示) + lark_write(SHEET_STAT, f"{SHEET_STAT}!C{row_num}:C{row_num}", [[round(s["conv_rate"] / 100, 3)]]) + # D: 退费率 + lark_write(SHEET_STAT, f"{SHEET_STAT}!D{row_num}:D{row_num}", [[round(s["refund_rate"] / 100, 3)]]) + # E-N: 完成率 + lesson_vals = [round(s["lesson_rates"][name] / 100, 3) for name in U0_COL_ORDER] + lark_write(SHEET_STAT, f"{SHEET_STAT}!E{row_num}:N{row_num}", [lesson_vals]) + + print(f" ✓ {sales_name} {month}: 转化率={s['conv_rate']:.1f}% " + f"退费率={s['refund_rate']:.1f}% 总leads={s['total']}") + + +# ── 主流程 ────────────────────────────────────────────── + +def main(): + dry_run = "--dry-run" in sys.argv + if dry_run: + print("⚠️ DRY-RUN 模式,不会实际写入\n") + + # 处理两个销售 sheet + qhm_data = process_sheet(SHEET_QHM, "曲慧萌", dry_run) + wd_data = process_sheet(SHEET_WD, "吴迪", dry_run) + + # 汇总统计 + print(f"\n{'='*60}") + print("汇总统计 → 统计 sheet") + print(f"{'='*60}") + + qhm_stats = compute_stats(qhm_data) + wd_stats = compute_stats(wd_data) + + all_stats = {"曲慧萌": qhm_stats, "吴迪": wd_stats} + + if dry_run: + for sales_name, stats in all_stats.items(): + for month, s in stats.items(): + print(f" [DRY-RUN] {sales_name} {month}: 转化率={s['conv_rate']:.1f}% 退费率={s['refund_rate']:.1f}%") + else: + write_all_stats(all_stats, dry_run) + + print("\n✅ 处理完成") + + +if __name__ == "__main__": + main() diff --git a/scripts/may230_refresh.py b/scripts/may230_refresh.py deleted file mode 100644 index e6526d0..0000000 --- a/scripts/may230_refresh.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python3 -"""处理Sheet1 5月230条待查询:查学情+回填D/C列""" -import json, subprocess, os, urllib.request, re, sys -from datetime import datetime - -SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, SCRIPTS_DIR) -from phone_encrypt import encrypt_phone - -# ===== 0. 准备 ===== -secrets = {} -with open('/root/.openclaw/workspace/secrets.env') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - k, v = line.split('=', 1) - secrets[k] = v.strip('"').strip("'") -PG_PASS = secrets['PG_ONLINE_PASSWORD'] - -def pg_query(sql): - r = subprocess.run(['psql', '-h', 'bj-postgres-16pob4sg.sql.tencentcdb.com', '-p', '28591', - '-U', 'ai_member', '-d', 'vala_bi', '-t', '-A', '-F', '\t'], - input=sql, capture_output=True, text=True, env={**os.environ, 'PGPASSWORD': PG_PASS}) - return r.stdout.strip() - -config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json')) -app = config['apps'][0] -data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode() -req = urllib.request.Request('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', - data=data, headers={'Content-Type': 'application/json; charset=utf-8'}) -TAT = json.loads(urllib.request.urlopen(req).read())['tenant_access_token'] - -TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac' -SHEET = '55b0eb' - -# ===== 1. 读取目标数据 ===== -targets = json.load(open('/tmp/may230_targets.json')) -with_uid = targets['with_uid'] # [{row, uid, sales, phone}] -without_uid = targets['without_uid'] # [{row, uid:'', sales, phone}] -all_rows = with_uid + without_uid -print(f"[1] 目标: {len(all_rows)} 条 (有uid={len(with_uid)}, 无uid={len(without_uid)})") - -# ===== 2. 手机号加密匹配 account_id(无uid的行)===== -print("[2] 手机号加密匹配 account_id...") -phone_to_acc = {} # phone -> account_id -if without_uid: - phones = sorted(set(r['phone'] for r in without_uid if r['phone'])) - - # 加密手机号 - phone_enc_map = {} - for p in phones: - if len(p) == 11 and p.isdigit(): - phone_enc_map[encrypt_phone(p)] = p - - enc_list = list(phone_enc_map.keys()) - phone_to_acc_raw = {} # tel_encrypt -> acc_id - batch_size = 200 - for i in range(0, len(enc_list), batch_size): - batch = enc_list[i:i+batch_size] - placeholders = ','.join(f"'{e}'" for e in batch) - sql = f""" - SELECT id, tel_encrypt FROM ( - SELECT id, tel_encrypt, ROW_NUMBER() OVER (PARTITION BY tel_encrypt ORDER BY id DESC) AS rn - FROM bi_vala_app_account - WHERE status = 1 AND deleted_at IS NULL - AND tel_encrypt IN ({placeholders}) - ) t WHERE rn = 1 - """ - for line in pg_query(sql).split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 2: - phone_to_acc_raw[parts[1]] = parts[0] - - # 用明文手机号做key映射 - for enc, p in phone_enc_map.items(): - acc = phone_to_acc_raw.get(enc) - if acc: - phone_to_acc[p] = acc - - print(f" 匹配: {len(phone_to_acc)}/{len(phones)}") - -# 构建 row_num -> {acc_id, sales} 映射 -row_info = {} -for r in with_uid: - row_info[r['row']] = {'acc_id': r['uid'], 'sales': r['sales']} -for r in without_uid: - acc = phone_to_acc.get(r['phone'], '') - row_info[r['row']] = {'acc_id': acc, 'sales': r['sales']} - -matched = sum(1 for v in row_info.values() if v['acc_id']) -print(f" 最终有account_id: {matched}/{len(row_info)}") - -# ===== 3. 批量数据库查询学情 ===== -print("[3] 批量查询学情...") - -acc_ids = sorted(set(v['acc_id'] for v in row_info.values() if v['acc_id']), key=int) -uid_csv = ','.join(acc_ids) -print(f" 去重用户: {len(acc_ids)}") - -# 3a. 课程+角色 -print(" 课程信息...") -course_rows = pg_query(f""" -SELECT a.id, c.id, COALESCE(d.course_level,'?'), - CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END -FROM bi_vala_app_account a -JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL -LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL -WHERE a.id IN ({uid_csv}) AND a.status = 1 AND a.deleted_at IS NULL -ORDER BY a.id, c.id, d.course_level -""") - -user_courses = {} -all_uids = set() -for line in course_rows.split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 4: - acc_id, uid, level, ctype = parts[0], parts[1], parts[2], parts[3] - all_uids.add(uid) - user_courses.setdefault(acc_id, []).append({'user_id': uid, 'level': level, 'type': ctype}) - -# 3b. 最近行课 -print(" 最近行课...") -play_records = {} -for i in range(8): - subset = [u for u in all_uids if int(u) % 8 == i] - if not subset: continue - for line in pg_query(f""" - SELECT DISTINCT ON (user_id) user_id, created_at::date, chapter_id, chapter_unique_id - FROM bi_user_chapter_play_record_{i} - WHERE user_id IN ({','.join(subset)}) AND play_status = 1 - ORDER BY user_id, created_at DESC - """).split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 4: - play_records[parts[0]] = {'date': parts[1], 'ch_id': parts[2], 'ch_uid': parts[3]} - -# 3c. 课程结构 -print(" 课程结构...") -ch_ids = set(r['ch_id'] for r in play_records.values()) -ch_map = {} -if ch_ids: - for line in pg_query(f"SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson WHERE id IN ({','.join(ch_ids)})").split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 5: - ch_map[parts[0]] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}" - -# 3d. 学习时长 -print(" 学习时长...") -study_map = {} -for i in range(8): - subset = [u for u in all_uids if int(u) % 8 == i] - if not subset: continue - for line in pg_query(f"SELECT user_id, COALESCE(SUM(interval_time),0)/60000.0 FROM bi_user_component_play_record_{i} WHERE user_id IN ({','.join(subset)}) GROUP BY user_id").split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 2: - study_map[parts[0]] = float(parts[1]) - -# 3e. 付费状态 -print(" 付费状态...") -paid_set = set() -for line in pg_query(f"SELECT DISTINCT account_id FROM bi_vala_order WHERE account_id IN ({uid_csv}) AND pay_success_date IS NOT NULL AND order_status = 3").split('\n'): - if line.strip(): - paid_set.add(line.strip()) - -print(f" 课程:{len(user_courses)} 行课:{len(play_records)} 付费:{len(paid_set)}") - -# ===== 4. 组装 D 列文本 ===== -print("[4] 组装学情文本...") - -def get_best_char(acc_id): - chars = user_courses.get(acc_id, []) - if not chars: - return None, None, None - best, best_date = None, None - for c in chars: - pr = play_records.get(c['user_id']) - if pr and (best_date is None or pr['date'] > best_date): - best_date = pr['date'] - best = c - if best is None: - best = chars[0] - uid = best['user_id'] - pr = play_records.get(uid) - level = best['level'].replace('A1','L1').replace('A2','L2') if best['level'] != '?' else '?' - ctype = best['type'] - if pr and pr['ch_id'] in ch_map: - ch_name = ch_map[pr['ch_id']] - # format: "L1体验课-U00-L01" - parts = ch_name.split('-') - if len(parts) >= 5: - current = f"{level}{ctype}-{parts[3]}-{parts[4]}" - else: - current = f"{level}{ctype}-{ch_name}" - elif pr: - current = f"{level}{ctype}-?" - else: - current = f"{level}{ctype}-无记录" - recent = pr['date'] if pr else '无记录' - study = int(study_map.get(uid, 0)) - return current, recent, study - -updates = {} -no_match = [] - -for row_num, info in row_info.items(): - acc_id = info['acc_id'] - sales = info['sales'] - - if not acc_id: - no_match.append(row_num) - d_text = f"销售:{sales} | 用户:未匹配 | 当前:无记录 | 最近行课:无记录 | 学习0min | 未付费" - else: - current, recent, study = get_best_char(acc_id) - pay = '已付费' if acc_id in paid_set else '未付费' - if current: - d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent} | 学习{study}min | {pay}" - else: - d_text = f"销售:{sales} | 用户:{acc_id} | 无课程角色 | 最近行课:无记录 | 学习0min | {pay}" - - updates[row_num] = d_text - -print(f" 未匹配手机号: {len(no_match)} 行") - -# ===== 5. 批量回填 ===== -print(f"[5] 批量回填 {len(updates)} 条...") - -def api_put(tat, token, sheet, range_str, values): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values" - body = {"valueRange": {"range": f"{sheet}!{range_str}", "values": values}} - data = json.dumps(body).encode() - req = urllib.request.Request(url, data=data, method='PUT') - req.add_header('Authorization', f'Bearer {tat}') - req.add_header('Content-Type', 'application/json; charset=utf-8') - try: - resp = json.loads(urllib.request.urlopen(req).read()) - return resp.get('code') == 0 - except Exception as e: - print(f" API error: {e}") - return False - -sorted_items = sorted(updates.items(), key=lambda x: x[0]) - -# 找连续块 -blocks = [] -cur_block = [sorted_items[0]] -for i in range(1, len(sorted_items)): - if sorted_items[i][0] == sorted_items[i-1][0] + 1: - cur_block.append(sorted_items[i]) - else: - blocks.append(cur_block) - cur_block = [sorted_items[i]] -if cur_block: - blocks.append(cur_block) - -print(f" {len(blocks)} 个连续块") - -success_d = 0 -success_c = 0 -for bi, block in enumerate(blocks): - rows = [r for r, _ in block] - d_vals = [[v] for _, v in block] - c_vals = [["已返回"] for _ in block] - start_row, end_row = rows[0], rows[-1] - - if api_put(TAT, TOKEN, SHEET, f'D{start_row}:D{end_row}', d_vals): - success_d += len(block) - if api_put(TAT, TOKEN, SHEET, f'C{start_row}:C{end_row}', c_vals): - success_c += len(block) - - if (bi+1) % 20 == 0: - print(f" 块 {bi+1}/{len(blocks)}: D={success_d} C={success_c}") - -print(f"\n✅ 完成: D列={success_d}/230, C列={success_c}/230") -if no_match: - print(f"⚠️ 未匹配手机号 {len(no_match)} 行: {no_match[:20]}") diff --git a/scripts/phone_encrypt.py b/scripts/phone_encrypt.py index 70d6be1..3189c32 100644 --- a/scripts/phone_encrypt.py +++ b/scripts/phone_encrypt.py @@ -14,9 +14,12 @@ Go 原始逻辑: } 匹配方式: 加密明文手机号 → 与 bi_vala_app_account.tel_encrypt 比对 → 获取 account_id + +MD5 加密: tel_encrypt → 解密为明文 → MD5 → 用于跨系统关联 """ import xxtea import base64 +import hashlib KEY = "K1pNOZ5O5+ZqTPSHA2kzPdoNOMOGcv6g" @@ -41,11 +44,23 @@ def decrypt_phone(encrypted: str) -> str: return decrypted.decode() +def phone_md5(phone: str) -> str: + """明文手机号 → MD5(32位小写十六进制)""" + return hashlib.md5(phone.encode()).hexdigest() + + +def tel_encrypt_to_md5(tel_encrypt: str) -> str: + """tel_encrypt 密文 → 解密 → MD5(一步到位)""" + phone = decrypt_phone(tel_encrypt) + return phone_md5(phone) + + if __name__ == "__main__": # 自测 test_phones = ["13800138000", "15912345678", "18888888888"] for p in test_phones: enc = encrypt_phone(p) dec = decrypt_phone(enc) + md5 = phone_md5(p) status = "✓" if dec == p else "✗" - print(f"{p} → {enc} → {dec} {status}") + print(f"{p} → {enc} → {dec} → MD5:{md5} {status}") diff --git a/scripts/refresh_may_course_data.py b/scripts/refresh_may_course_data.py deleted file mode 100644 index 7db06f8..0000000 --- a/scripts/refresh_may_course_data.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python3 -""" -刷新 5 月行课记录:查询学情数据并回填 Sheet1 D 列 -""" -import json -import subprocess -import sys -import os -from datetime import datetime - -# ========== 1. 读取目标行 ========== -target_rows = json.load(open('/tmp/target_rows.json')) -print(f"[1/4] 读取目标行: {len(target_rows)} 条") - -# 提取 account_id 列表 -uid_set = sorted(set(str(r[1]) for r in target_rows if r[1]), key=int) -print(f" 去重用户数: {len(uid_set)}") -uid_csv = ','.join(uid_set) - -# ========== 2. 数据库查询 ========== -print(f"[2/4] 查询学情数据...") - -# 从 secrets.env 获取密码 -secrets = {} -with open('/root/.openclaw/workspace/secrets.env') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - k, v = line.split('=', 1) - secrets[k] = v.strip('"').strip("'") - -pg_pass = secrets.get('PG_ONLINE_PASSWORD', '') -pg_host = 'bj-postgres-16pob4sg.sql.tencentcdb.com' -pg_port = '28591' - -# 2a. 课程信息 + 角色ID -print(" 查询课程信息...") -course_sql = f""" -SELECT - a.id AS account_id, - c.id AS user_id, - d.course_level, - CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END AS course_type -FROM bi_vala_app_account a -JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL -LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL -WHERE a.id IN ({uid_csv}) - AND a.status = 1 - AND a.deleted_at IS NULL -ORDER BY a.id, c.id, d.course_level -""" - -result = subprocess.run( - ['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi', - '-t', '-A', '-F', '\t'], - input=course_sql, - capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': pg_pass} -) - -# 构建 account_id -> {user_id列表, 课程信息} 映射 -user_courses = {} # account_id -> [{'user_id': ..., 'level': ..., 'type': ...}, ...] -for line in result.stdout.strip().split('\n'): - if not line: - continue - parts = line.split('\t') - if len(parts) >= 4: - acc_id, user_id, level, ctype = parts[0], parts[1], parts[2], parts[3] - if acc_id not in user_courses: - user_courses[acc_id] = [] - user_courses[acc_id].append({ - 'user_id': user_id, - 'level': level, - 'type': ctype - }) - -print(f" 课程信息: {len(user_courses)} 个账号有角色") - -# 2b. 最近行课记录 (分表查询) -print(" 查询最近行课记录...") -# 收集所有 user_id -all_user_ids = set() -for acc_id, chars in user_courses.items(): - for c in chars: - all_user_ids.add(c['user_id']) - -# 分表查询 -play_records = {} # user_id -> {'recent_date': ..., 'chapter_id': ..., 'chapter_unique_id': ...} -tables = [f'bi_user_chapter_play_record_{i}' for i in range(8)] -for table in tables: - uid_subset = [u for u in all_user_ids if int(u) % 8 == int(table[-1])] - if not uid_subset: - continue - uid_csv_sub = ','.join(uid_subset) - sql = f""" - SELECT DISTINCT ON (user_id) - user_id, - created_at::date AS recent_date, - chapter_id, - chapter_unique_id - FROM {table} - WHERE user_id IN ({uid_csv_sub}) - AND play_status = 1 - ORDER BY user_id, created_at DESC - """ - result = subprocess.run( - ['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi', - '-t', '-A', '-F', '\t'], - input=sql, - capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': pg_pass} - ) - for line in result.stdout.strip().split('\n'): - if not line: - continue - parts = line.split('\t') - if len(parts) >= 4: - play_records[parts[0]] = { - 'recent_date': parts[1], - 'chapter_id': parts[2], - 'chapter_unique_id': parts[3] - } - -print(f" 行课记录: {len(play_records)} 个角色有行课") - -# 2c. 课程结构映射 (chapter_id -> 课程名称) -print(" 查询课程结构...") -chapter_ids = set(r['chapter_id'] for r in play_records.values()) -chapter_map = {} -if chapter_ids: - ch_csv = ','.join(chapter_ids) - ch_sql = f""" - SELECT id, course_level, course_season, course_unit, course_lesson - FROM bi_level_unit_lesson - WHERE id IN ({ch_csv}) - """ - result = subprocess.run( - ['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi', - '-t', '-A', '-F', '\t'], - input=ch_sql, - capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': pg_pass} - ) - for line in result.stdout.strip().split('\n'): - if not line: - continue - parts = line.split('\t') - if len(parts) >= 5: - ch_id = parts[0] - chapter_map[ch_id] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}" - -# 2d. 学习时长 -print(" 查询学习时长...") -study_times = {} # user_id -> total_minutes -comp_tables = [f'bi_user_component_play_record_{i}' for i in range(8)] -for table in comp_tables: - uid_subset = [u for u in all_user_ids if int(u) % 8 == int(table[-1])] - if not uid_subset: - continue - uid_csv_sub = ','.join(uid_subset) - sql = f""" - SELECT user_id, COALESCE(SUM(interval_time), 0) / 60000.0 AS total_min - FROM {table} - WHERE user_id IN ({uid_csv_sub}) - GROUP BY user_id - """ - result = subprocess.run( - ['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi', - '-t', '-A', '-F', '\t'], - input=sql, - capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': pg_pass} - ) - for line in result.stdout.strip().split('\n'): - if not line: - continue - parts = line.split('\t') - if len(parts) >= 2: - study_times[parts[0]] = float(parts[1]) - -# 2e. 付费状态 -print(" 查询付费状态...") -pay_sql = f""" -SELECT account_id, - CASE WHEN COUNT(*) > 0 THEN '已付费' ELSE '未付费' END AS pay_status -FROM bi_vala_order -WHERE account_id IN ({uid_csv}) - AND pay_success_date IS NOT NULL - AND order_status = 3 -GROUP BY account_id -""" -result = subprocess.run( - ['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi', - '-t', '-A', '-F', '\t'], - input=pay_sql, - capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': pg_pass} -) -paid_users = set() -for line in result.stdout.strip().split('\n'): - if not line: - continue - parts = line.split('\t') - if len(parts) >= 2 and parts[1] == '已付费': - paid_users.add(parts[0]) - -# 查询所有用户(包括未付费的) -all_paid = set() -for acc_id in uid_set: - if acc_id in paid_users: - all_paid.add(acc_id) -print(f" 付费用户: {len(paid_users)}") - -# ========== 3. 组装学情数据 ========== -print(f"[3/4] 组装学情数据...") - -def get_course_info(acc_id): - """获取用户最优先的课程信息""" - chars = user_courses.get(acc_id, []) - if not chars: - return None, None, None - - # 选最近有过行课记录的角色 - best_char = None - best_date = None - for c in chars: - uid = c['user_id'] - if uid in play_records: - d = play_records[uid]['recent_date'] - if best_date is None or d > best_date: - best_date = d - best_char = c - - if best_char is None and chars: - best_char = chars[0] # 回退到第一个角色 - - if best_char is None: - return None, None, None - - uid = best_char['user_id'] - level = best_char['level'] or '?' - ctype = best_char['type'] or '体验课' - - # 获取当前进度 - pr = play_records.get(uid) - if pr and pr['chapter_id'] in chapter_map: - chapter_name = chapter_map[pr['chapter_id']] - current = f"{level}{ctype}-{chapter_name}" - else: - current = f"{level}{ctype}-?" - - recent_date = pr['recent_date'] if pr else '无记录' - study_min = study_times.get(uid, 0) - - return current, recent_date, int(study_min) - -# 为每个目标行生成 D 列文本 -now_str = datetime.now().strftime('%Y-%m-%d %H:%M') -updates = [] # [(row_num, d_text)] - -for row_num, acc_id, phone in target_rows: - acc_id = str(acc_id) - current, recent_date, study_min = get_course_info(acc_id) - pay_status = '已付费' if acc_id in paid_users else '未付费' - - # 从原始表取销售名(通过B列用户ID) - # 销售名需要从原始表A列获取,但这里我们直接用target_rows中没有销售名 - # 实际上我们需要重新读取A列。为了效率,暂时用"用户"占位 - # 让我从之前的扫描中保留销售名... - - # 实际上 target_rows 目前只有 (row_num, acc_id, phone),没有 sales_name - # 需要补读销售名列 - # 先临时处理,后续补全 - - if current is None: - d_text = f"用户:{acc_id} | 未匹配到课程信息 | 最近行课:无记录 | 学习0min | {pay_status}" - else: - d_text = f"用户:{acc_id} | 当前:{current} | 最近行课:{recent_date} | 学习{study_min}min | {pay_status}" - - updates.append((row_num, d_text, pay_status, recent_date, current)) - -print(f" 生成 {len(updates)} 条更新") - -# ========== 需要补读A列(销售名)========== -# 从 target_rows 中批量读取A列 -print(" 补读销售名列...") -import urllib.request - -# 获取 TAT -config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json')) -app = config['apps'][0] -tat_data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode() -tat_req = urllib.request.Request( - 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', - data=tat_data, headers={'Content-Type': 'application/json; charset=utf-8'}) -tat = json.loads(urllib.request.urlopen(tat_req).read())['tenant_access_token'] - -# 读取所有目标行的 A 列 -TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac' -SHEET = '55b0eb' - -# 按行号排序 -target_rows_sorted = sorted(updates, key=lambda x: x[0]) -sales_map = {} # row_num -> sales_name - -# 分批读取A列 -batch_size = 200 -for i in range(0, len(target_rows_sorted), batch_size): - batch = target_rows_sorted[i:i+batch_size] - row_nums = [str(r[0]) for r in batch] - range_str = f"{SHEET}!A{min(row_nums, key=int)}:A{max(row_nums, key=int)}" - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{TOKEN}/values/{range_str}" - req = urllib.request.Request(url) - req.add_header('Authorization', f'Bearer {tat}') - - try: - resp = json.loads(urllib.request.urlopen(req).read()) - values = resp.get('data', {}).get('valueRange', {}).get('values', []) - start_row = int(min(row_nums, key=int)) - for j, v in enumerate(values): - row_num = start_row + j - if v: - sales_map[row_num] = v[0] - except Exception as e: - print(f" Error reading A col: {e}") - -print(f" 销售名: {len(sales_map)} 条") - -# 重新组装 D 列文本(加入销售名) -final_updates = [] -for row_num, d_text, pay_status, recent_date, current in target_rows_sorted: - sales = sales_map.get(row_num, '?') - - # 从原始 target_rows 中找到这个 row_num 对应的 acc_id - orig = next((r for r in target_rows if r[0] == row_num), None) - acc_id = str(orig[1]) if orig else '?' - - if current and current != 'None': - d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent_date} | 学习{study_min}min | {pay_status}" - else: - # 需要重新计算 - current2, recent_date2, study_min2 = get_course_info(acc_id) - pay_status2 = '已付费' if acc_id in paid_users else '未付费' - if current2: - d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current2} | 最近行课:{recent_date2} | 学习{study_min2}min | {pay_status2}" - else: - d_text = f"销售:{sales} | 用户:{acc_id} | 未匹配到课程信息 | 最近行课:无记录 | 学习0min | {pay_status2}" - - final_updates.append((row_num, d_text)) - -updates = final_updates - -# ========== 4. 批量回填 ========== -print(f"[4/4] 批量回填 {len(updates)} 条...") - -# 构建批量更新请求 -# 飞书 API 支持范围更新,但 D 列不连续(有大量跳行) -# 策略:逐行更新,每50行一批 - -def update_cell(tat, token, sheet, row, col, value): - """更新单个单元格""" - range_str = f"{sheet}!{col}{row}:{col}{row}" - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values" - body = json.dumps({ - "valueRange": { - "range": range_str, - "values": [[value]] - } - }).encode() - req = urllib.request.Request(url, data=body, method='PUT') - req.add_header('Authorization', f'Bearer {tat}') - req.add_header('Content-Type', 'application/json; charset=utf-8') - resp = json.loads(urllib.request.urlopen(req).read()) - return resp.get('code') == 0 - -# 分批处理:每50个一批,D列和C列一起更新 -batch_size = 50 -success_d = 0 -success_c = 0 - -for i in range(0, len(updates), batch_size): - batch = updates[i:i+batch_size] - - # 收集 D 列和 C 列的更新 - # 由于行不连续,需要逐行更新 - for row_num, d_text in batch: - # 更新 D 列 - if update_cell(tat, TOKEN, SHEET, row_num, 'D', d_text): - success_d += 1 - - # 更新 C 列为"已返回" - if update_cell(tat, TOKEN, SHEET, row_num, 'C', '已返回'): - success_c += 1 - - print(f" 进度: {min(i+batch_size, len(updates))}/{len(updates)} (D:{success_d}, C:{success_c})") - -print(f"\n✅ 完成!D列更新: {success_d}/{len(updates)},C列更新: {success_c}/{len(updates)}") diff --git a/scripts/refresh_may_v2.py b/scripts/refresh_may_v2.py deleted file mode 100644 index 9661e2c..0000000 --- a/scripts/refresh_may_v2.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env python3 -""" -刷新 5 月行课记录 v2:高效批量查询 + 回填 -""" -import json, subprocess, os, urllib.request -from datetime import datetime - -# ===== 0. 准备 ===== -secrets = {} -with open('/root/.openclaw/workspace/secrets.env') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - k, v = line.split('=', 1) - secrets[k] = v.strip('"').strip("'") -PG_PASS = secrets['PG_ONLINE_PASSWORD'] -PG_HOST = 'bj-postgres-16pob4sg.sql.tencentcdb.com' -PG_PORT = '28591' - -def pg_query(sql): - r = subprocess.run(['psql', '-h', PG_HOST, '-p', PG_PORT, '-U', 'ai_member', - '-d', 'vala_bi', '-t', '-A', '-F', '\t'], input=sql, capture_output=True, text=True, - env={**os.environ, 'PGPASSWORD': PG_PASS}) - return r.stdout.strip() - -def get_tat(): - config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json')) - app = config['apps'][0] - data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode() - req = urllib.request.Request( - 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', - data=data, headers={'Content-Type': 'application/json; charset=utf-8'}) - return json.loads(urllib.request.urlopen(req).read())['tenant_access_token'] - -TAT = get_tat() -TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac' -SHEET = '55b0eb' - -# ===== 1. 读取目标行 + 销售名 ===== -print("[1/4] 读取目标行数据...") -target_rows = json.load(open('/tmp/target_rows.json')) -uid_set = sorted(set(str(r[1]) for r in target_rows if r[1]), key=int) -uid_csv = ','.join(uid_set) -print(f" 644 条记录, {len(uid_set)} 去重用户") - -# 批量读取 A 列(销售名)—— 一次 API 调用读整个范围 -min_row = min(r[0] for r in target_rows) -max_row = max(r[0] for r in target_rows) -url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{TOKEN}/values/{SHEET}!A{min_row}:A{max_row}" -req = urllib.request.Request(url) -req.add_header('Authorization', f'Bearer {TAT}') -resp = json.loads(urllib.request.urlopen(req).read()) -a_values = resp.get('data', {}).get('valueRange', {}).get('values', []) -sales_map = {} # row_num -> sales_name -for i, v in enumerate(a_values): - if v: - sales_map[min_row + i] = v[0] - -# 构建 row_num -> (account_id, sales_name) -row_data = {} -for row_num, acc_id, phone in target_rows: - row_data[row_num] = { - 'acc_id': str(acc_id), - 'sales': sales_map.get(row_num, '?') - } - -# ===== 2. 数据库批量查询 ===== -print("[2/4] 批量查询学情...") - -# 2a. 课程 + 角色 -print(" 课程信息...") -course_rows = pg_query(f""" -SELECT a.id AS account_id, c.id AS user_id, d.course_level, - CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END AS course_type -FROM bi_vala_app_account a -JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL -LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL -WHERE a.id IN ({uid_csv}) AND a.status = 1 AND a.deleted_at IS NULL -ORDER BY a.id, c.id, d.course_level -""") - -user_courses = {} # acc_id -> [{'user_id':..., 'level':..., 'type':...}] -all_user_ids = set() -for line in course_rows.split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 4: - acc_id, uid, level, ctype = parts[0], parts[1], parts[2] or '?', parts[3] or '体验课' - all_user_ids.add(uid) - user_courses.setdefault(acc_id, []).append({'user_id': uid, 'level': level, 'type': ctype}) - -# 2b. 最近行课 (分表) -print(" 最近行课...") -play_records = {} -for i in range(8): - subset = [u for u in all_user_ids if int(u) % 8 == i] - if not subset: continue - sql = f""" - SELECT DISTINCT ON (user_id) user_id, created_at::date AS rd, chapter_id, chapter_unique_id - FROM bi_user_chapter_play_record_{i} - WHERE user_id IN ({','.join(subset)}) AND play_status = 1 - ORDER BY user_id, created_at DESC - """ - for line in pg_query(sql).split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 4: - play_records[parts[0]] = {'date': parts[1], 'ch_id': parts[2], 'ch_uid': parts[3]} - -# 2c. 课程结构 -print(" 课程结构...") -ch_ids = set(r['ch_id'] for r in play_records.values()) -ch_map = {} -if ch_ids: - for line in pg_query(f"SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson WHERE id IN ({','.join(ch_ids)})").split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 5: - ch_map[parts[0]] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}" - -# 2d. 学习时长 -print(" 学习时长...") -study_map = {} -for i in range(8): - subset = [u for u in all_user_ids if int(u) % 8 == i] - if not subset: continue - for line in pg_query(f"SELECT user_id, COALESCE(SUM(interval_time),0)/60000.0 FROM bi_user_component_play_record_{i} WHERE user_id IN ({','.join(subset)}) GROUP BY user_id").split('\n'): - if not line: continue - parts = line.split('\t') - if len(parts) >= 2: - study_map[parts[0]] = float(parts[1]) - -# 2e. 付费状态 -print(" 付费状态...") -paid_set = set() -for line in pg_query(f"SELECT DISTINCT account_id FROM bi_vala_order WHERE account_id IN ({uid_csv}) AND pay_success_date IS NOT NULL AND order_status = 3").split('\n'): - if line.strip(): - paid_set.add(line.strip()) - -print(f" 课程:{len(user_courses)} 行课:{len(play_records)} 付费:{len(paid_set)}") - -# ===== 3. 组装 D 列文本 ===== -print("[3/4] 组装学情文本...") - -def get_best_char(acc_id): - chars = user_courses.get(acc_id, []) - if not chars: - return None, None, None - best, best_date = None, None - for c in chars: - pr = play_records.get(c['user_id']) - if pr and (best_date is None or pr['date'] > best_date): - best_date = pr['date'] - best = c - if best is None: - best = chars[0] - uid = best['user_id'] - pr = play_records.get(uid) - level = best['level'] if best['level'] != '?' else '?' - ctype = best['type'] - if pr and pr['ch_id'] in ch_map: - current = f"{level}{ctype}-{ch_map[pr['ch_id']]}" - elif pr: - current = f"{level}{ctype}-?" - else: - current = f"{level}{ctype}-无记录" - recent = pr['date'] if pr else '无记录' - study = int(study_map.get(uid, 0)) - return current, recent, study - -now_str = datetime.now().strftime('%Y-%m-%d %H:%M') -updates = {} - -for row_num, info in row_data.items(): - acc_id = info['acc_id'] - sales = info['sales'] - current, recent, study = get_best_char(acc_id) - pay = '已付费' if acc_id in paid_set else '未付费' - - if current: - d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent} | 学习{study}min | {pay}" - else: - d_text = f"销售:{sales} | 用户:{acc_id} | 无课程角色 | 最近行课:无记录 | 学习0min | {pay}" - - updates[row_num] = d_text - -# ===== 4. 批量回填 ===== -print(f"[4/4] 批量回填 {len(updates)} 条...") - -def api_put(tat, token, sheet, range_str, values): - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values" - body = {"valueRange": {"range": f"{sheet}!{range_str}", "values": values}} - data = json.dumps(body).encode() - req = urllib.request.Request(url, data=data, method='PUT') - req.add_header('Authorization', f'Bearer {tat}') - req.add_header('Content-Type', 'application/json; charset=utf-8') - try: - resp = json.loads(urllib.request.urlopen(req).read()) - return resp.get('code') == 0 - except Exception as e: - print(f" API error: {e}") - return False - -# 按行号排序后分批(每批50行,连续行合并为范围) -sorted_items = sorted(updates.items(), key=lambda x: x[0]) -success_d = 0 -success_c = 0 - -# 找连续行块 -blocks = [] -cur_block = [sorted_items[0]] -for i in range(1, len(sorted_items)): - prev_row = sorted_items[i-1][0] - cur_row = sorted_items[i][0] - if cur_row == prev_row + 1: - cur_block.append(sorted_items[i]) - else: - blocks.append(cur_block) - cur_block = [sorted_items[i]] -if cur_block: - blocks.append(cur_block) - -print(f" 共 {len(blocks)} 个连续块,批量更新中...") - -for bi, block in enumerate(blocks): - rows = [r for r, _ in block] - d_vals = [[v] for _, v in block] - c_vals = [["已返回"] for _ in block] - - start_row, end_row = rows[0], rows[-1] - - # 批量更新 D 列 - if api_put(TAT, TOKEN, SHEET, f'D{start_row}:D{end_row}', d_vals): - success_d += len(block) - - # 批量更新 C 列 - if api_put(TAT, TOKEN, SHEET, f'C{start_row}:C{end_row}', c_vals): - success_c += len(block) - - if (bi + 1) % 10 == 0: - print(f" 块 {bi+1}/{len(blocks)}: D={success_d} C={success_c}") - -print(f"\n✅ 完成!D列: {success_d}/{len(updates)},C列: {success_c}/{len(updates)}") diff --git a/scripts/sync_sales_lesson_status.py b/scripts/sync_sales_lesson_status.py new file mode 100644 index 0000000..c92af04 --- /dev/null +++ b/scripts/sync_sales_lesson_status.py @@ -0,0 +1,360 @@ +#!/usr/bin/env python3 +""" +销售表行课状态同步 — 从销售表读UID → 查DB行课 → 回填体验节数 +执行频率:每30分钟 cron 巡检 +归属 Agent:小溪 (xiaoxi) + +流程: + 1. 读取小龙(qJF4I)和吴迪(f975f0)销售表,提取有UID的行 + 2. 查DB获取每个用户完成的课时数(唯一chapter_id) + 3. 回填销售表D列(体验节数) + +过程数据 J/N/R/V/Z 由 COUNTIFS 公式自动读取销售表D列,无需脚本写入。 +""" +import json, requests, os, sys, psycopg2 +from datetime import datetime +from collections import defaultdict + +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPTS_DIR) +from phone_encrypt import encrypt_phone + +# ── 配置 ── +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_DB = "vala_bi" + +BOT_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +CRED_DIR = "/root/.openclaw/credentials/xiaoxi" + +SALES_SHEETS = { + "小龙": "qJF4I", + "吴迪": "f975f0", +} + +LOG_FILE = "/var/log/xiaoxi_sales_lesson_sync.log" + +def log(msg): + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + line = f"[{ts}] {msg}" + print(line) + with open(LOG_FILE, "a") as f: + f.write(line + "\n") + +def get_pg_password(): + secrets_path = os.path.join(SCRIPTS_DIR, "..", "secrets.env") + with open(secrets_path) as f: + for line in f: + if line.startswith("PG_ONLINE_PASSWORD="): + return line.strip().split("=", 1)[1].strip("'\"") + +def get_fs_token(): + with open(os.path.join(CRED_DIR, "config.json")) as f: + cfg = json.load(f) + resp = requests.post( + "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, + timeout=15 + ) + return resp.json()["tenant_access_token"] + +def read_sheet(token, sheet_id, range_str): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{BOT_TOKEN}/values/{sheet_id}!{range_str}" + resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + data = resp.json() + if data.get("code") != 0: + raise RuntimeError(f"读取Sheet失败: {data}") + return data["data"]["valueRange"]["values"] + +def put_values(token, sheet_id, range_str, values): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{BOT_TOKEN}/values" + body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} + resp = requests.put(url, headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }, json=body, timeout=30) + return resp.json() + +def parse_date(date_str): + """解析 'X月Y日' → (month, day)""" + date_str = str(date_str).strip() + if '月' in date_str and '日' in date_str: + parts = date_str.replace('月', ' ').replace('日', '').split() + try: + return int(parts[0]), int(parts[1]) + except (ValueError, IndexError): + pass + return None, None + +def batch_in(cur, sql_tpl, params, chunk=500): + results = [] + for i in range(0, len(params), chunk): + batch = params[i:i+chunk] + ph = ",".join(["%s"] * len(batch)) + cur.execute(sql_tpl % ph, batch) + results.extend(cur.fetchall()) + return results + + +def main(): + log("=" * 50) + log("销售表行课状态同步 启动") + + try: + token = get_fs_token() + conn = psycopg2.connect( + host=PG_HOST, port=PG_PORT, user=PG_USER, + password=get_pg_password(), dbname=PG_DB, connect_timeout=30 + ) + cur = conn.cursor() + + # ── Step 1: 读取销售表,提取UID(优先H列,H空则E列手机号匹配) ── + all_users = [] # [{sales, name, uid, month, row_num}] + phone_rows = [] # [{sales, sheet_id, name, phone, month, row_num}] H列为空但有手机号的行 + + for sales_name, sheet_id in SALES_SHEETS.items(): + rows = read_sheet(token, sheet_id, "A1:K2000") + for idx, row in enumerate(rows[2:], start=3): # skip header + legend + if not row or len(row) < 8: + continue + uid_str = str(row[7]).strip() if len(row) > 7 and row[7] else '' + phone_str = str(row[4]).strip() if len(row) > 4 and row[4] else '' + date_str = str(row[2]).strip() if len(row) > 2 and row[2] else '' + name = str(row[1]).strip() if len(row) > 1 and row[1] else '' + + month, day = parse_date(date_str) + if month is None: + continue + + # 优先用H列UID + if uid_str and uid_str not in ('', 'None', '未注册'): + try: + uid = int(float(uid_str)) + if uid > 0: + all_users.append({ + "sales": sales_name, + "sheet_id": sheet_id, + "name": name, + "uid": uid, + "month": month, + "row_num": idx, + }) + continue + except (ValueError, TypeError): + pass + + # H列无有效UID,尝试E列手机号匹配 + if phone_str and phone_str not in ('', 'None', '-'): + # 清洗手机号:去空格、去+86前缀 + phone_clean = phone_str.strip().replace(' ', '').replace('\t', '') + if phone_clean.startswith('+86'): + phone_clean = phone_clean[3:] + # 验证是否为11位数字手机号 + if len(phone_clean) == 11 and phone_clean.isdigit() and phone_clean.startswith('1'): + phone_rows.append({ + "sales": sales_name, + "sheet_id": sheet_id, + "name": name, + "phone": phone_clean, + "month": month, + "row_num": idx, + }) + + # 手机号匹配 account_id + phone_to_uid = {} + if phone_rows: + phone_enc_map = {} # {encrypted: phone} + for pr in phone_rows: + enc = encrypt_phone(pr["phone"]) + phone_enc_map[enc] = pr["phone"] + + enc_list = list(phone_enc_map.keys()) + rc = batch_in(cur, + "SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN (%s) AND status=1 AND deleted_at IS NULL", + enc_list + ) + for aid, tel_enc in rc: + phone = phone_enc_map.get(tel_enc) + if phone: + phone_to_uid[phone] = aid + + # 将匹配到的加入 all_users + for pr in phone_rows: + uid = phone_to_uid.get(pr["phone"]) + if uid: + all_users.append({ + "sales": pr["sales"], + "sheet_id": pr["sheet_id"], + "name": pr["name"], + "uid": uid, + "month": pr["month"], + "row_num": pr["row_num"], + }) + + log(f"Step 1: 读取销售表, 有效UID: {len(all_users)} (含手机号匹配: {len(phone_to_uid)})") + + if not all_users: + log("无有效UID, 退出") + cur.close() + conn.close() + return 0 + + # ── Step 2: 查角色映射 ── + uid_set = list(set(u["uid"] for u in all_users)) + account_chars = defaultdict(list) + char_to_account = {} + + rc = batch_in(cur, + "SELECT account_id, id FROM bi_vala_app_character WHERE account_id IN (%s) AND deleted_at IS NULL", + uid_set + ) + for aid, cid in rc: + account_chars[aid].append(cid) + char_to_account[cid] = aid + + char_ids = list(char_to_account.keys()) + log(f"Step 2: 角色映射, account={len(account_chars)}, char={len(char_ids)}") + + # ── Step 2.5: 查账户信息(注册日期、下载渠道) ── + uid_info = {} # uid → {created_at, download_channel} + rc = batch_in(cur, + "SELECT id, created_at, download_channel FROM bi_vala_app_account WHERE id IN (%s) AND status=1 AND deleted_at IS NULL", + uid_set + ) + for aid, created_at, download_channel in rc: + uid_info[aid] = { + "created_at": str(created_at)[:10] if created_at else "", + "download_channel": download_channel or "" + } + + # ── Step 3: 查课时完成记录(唯一chapter_id) ── + char_chapters = defaultdict(set) # char_id → set of chapter_ids + + for tbl_idx in range(8): + table = f"bi_user_chapter_play_record_{tbl_idx}" + try: + cur.execute( + f"SELECT user_id, chapter_id FROM {table} WHERE play_status=1 AND deleted_at IS NULL AND user_id = ANY(%s)", + (char_ids,) + ) + for uid, ch_id in cur.fetchall(): + char_chapters[uid].add(ch_id) + except Exception as e: + log(f" 警告 {table}: {e}") + + # 汇总每个 account 的完成课时数 + uid_lesson_count = {} # uid → unique chapter count + for uid in uid_set: + chars = account_chars.get(uid, []) + all_chapters = set() + for cid in chars: + all_chapters.update(char_chapters.get(cid, set())) + uid_lesson_count[uid] = len(all_chapters) + + log(f"Step 3: 课时统计完成, 有记录用户: {sum(1 for v in uid_lesson_count.values() if v > 0)}") + + # ── Step 4: 回填销售表 D/H/I/J 列 ── + # D=体验节数, H=UID(手机号匹配到的回填), I=注册日期, J=下载渠道 + for sales_name, sheet_id in SALES_SHEETS.items(): + # 读 D/H/I/J 四列 + d_existing = read_sheet(token, sheet_id, "D1:D2000") + h_existing = read_sheet(token, sheet_id, "H1:H2000") + i_existing = read_sheet(token, sheet_id, "I1:I2000") + j_existing = read_sheet(token, sheet_id, "J1:J2000") + + new_d, new_h, new_i, new_j = [], [], [], [] + d_changed, h_changed, i_changed, j_changed = 0, 0, 0, 0 + + for idx in range(len(d_existing)): + row_num = idx + 1 + + # 保留前2行 + if idx < 2: + new_d.append(d_existing[idx] if idx < len(d_existing) else []) + new_h.append(h_existing[idx] if idx < len(h_existing) else []) + new_i.append(i_existing[idx] if idx < len(i_existing) else []) + new_j.append(j_existing[idx] if idx < len(j_existing) else []) + continue + + uid_for_row = None + for u in all_users: + if u["sheet_id"] == sheet_id and u["row_num"] == row_num: + uid_for_row = u["uid"] + break + + if uid_for_row is not None: + # D列:体验节数(0留空,>5封顶5) + count = uid_lesson_count.get(uid_for_row, 0) + if count == 0: + d_val = '' # 0留空 + elif count > 5: + d_val = 5 # 封顶5 + else: + d_val = count + old_d = str(d_existing[idx][0]).strip() if idx < len(d_existing) and d_existing[idx] else '' + if old_d != str(d_val): + d_changed += 1 + new_d.append([d_val]) + + # H列:UID(手机号匹配到的回填,已有UID不覆盖) + old_h = str(h_existing[idx][0]).strip() if idx < len(h_existing) and h_existing[idx] else '' + if not old_h or old_h in ('', 'None', '未注册'): + new_h.append([uid_for_row]) + h_changed += 1 + else: + new_h.append(h_existing[idx] if idx < len(h_existing) else []) + + # I列:注册日期 + info = uid_info.get(uid_for_row, {}) + new_i_val = info.get("created_at", "") + old_i = str(i_existing[idx][0]).strip() if idx < len(i_existing) and i_existing[idx] else '' + if old_i != new_i_val: + i_changed += 1 + new_i.append([new_i_val]) + + # J列:下载渠道 + new_j_val = info.get("download_channel", "") + old_j = str(j_existing[idx][0]).strip() if idx < len(j_existing) and j_existing[idx] else '' + if old_j != new_j_val: + j_changed += 1 + new_j.append([new_j_val]) + else: + new_d.append(d_existing[idx] if idx < len(d_existing) else []) + new_h.append(h_existing[idx] if idx < len(h_existing) else []) + new_i.append(i_existing[idx] if idx < len(i_existing) else []) + new_j.append(j_existing[idx] if idx < len(j_existing) else []) + + # 写入 + for col_letter, col_name, new_vals, changed in [ + ("D", "体验节数", new_d, d_changed), + ("H", "UID", new_h, h_changed), + ("I", "注册日期", new_i, i_changed), + ("J", "下载渠道", new_j, j_changed), + ]: + if changed > 0: + rng = f"{col_letter}1:{col_letter}2000" + r = put_values(token, sheet_id, rng, new_vals) + if r.get("code") != 0: + log(f" 写入{col_name}列失败 {sheet_id}: {r}") + else: + log(f" {sales_name} {col_name}列更新 {changed} 行") + else: + log(f" {sales_name} {col_name}列无变化") + + log(f"Step 4: D/H/I/J列回填完成") + + cur.close() + conn.close() + log("完成") + return 0 + + except Exception as e: + log(f"ERROR: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/sync_weiban_to_sales.py b/scripts/sync_weiban_to_sales.py new file mode 100644 index 0000000..364c39d --- /dev/null +++ b/scripts/sync_weiban_to_sales.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +"""同步微伴线索到销售表,去重后追加""" +import json, requests, os, sys +from collections import defaultdict + +CRED_DIR = "/root/.openclaw/credentials/xiaoxi" +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +WEIBAN_FILE = "/tmp/weiban_20260603.xlsx" + +# Sheet IDs +SHEET_IDS = {"小龙": "qJF4I", "吴迪": "f975f0"} + +# 微伴客服 → 销售映射 +CS_MAP = {"益达老师": "小龙", "吴迪": "吴迪"} + +def get_token(): + with open(os.path.join(CRED_DIR, "config.json")) as f: + cfg = json.load(f) + resp = requests.post( + "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, + timeout=15) + return resp.json()["tenant_access_token"] + +def read_sheet(token, sheet_id, range_str): + resp = requests.get( + f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}!{range_str}", + headers={"Authorization": f"Bearer {token}"}, timeout=30) + data = resp.json() + if data.get("code") != 0: + print(f" Read error: {data}") + return [] + return data["data"]["valueRange"]["values"] + +def append_rows(token, sheet_id, rows): + """Append rows to sheet using append API""" + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values_append" + body = {"valueRange": {"range": f"{sheet_id}!A:K", "values": rows}} + resp = requests.post(url, headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }, json=body, timeout=30) + r = resp.json() + if r.get("code") != 0: + print(f" Append error: {r}") + return False + return True + +def main(): + token = get_token() + + # Step 1: Parse 微伴 data + print("=== Step 1: 解析微伴数据 ===") + import openpyxl + wb = openpyxl.load_workbook(WEIBAN_FILE, read_only=True) + ws = wb['Sheet1'] + + weiban_entries = [] + for row in ws.iter_rows(min_row=6, values_only=True): + if row[0] is None: + continue + add_time = str(row[13]) if row[13] else '' + if not add_time.startswith('2026-06'): + continue + + kefu = row[6] # 所属客服 + sales = CS_MAP.get(kefu) + if not sales: + continue + + # Parse date: "2026-06-01 12:34:56" → "6月1日" + from datetime import datetime + try: + dt = datetime.strptime(add_time, "%Y-%m-%d %H:%M:%S") + date_str = f"{dt.month}月{dt.day}日" + except: + date_str = add_time[:10] + + name = str(row[0]).strip() if row[0] else '' + weiban_entries.append({ + "name": name, + "date": date_str, + "sales": sales, + "kefu": kefu, + }) + + print(f" 微伴6月线索: {len(weiban_entries)} 条") + + # Count by sales + by_sales = defaultdict(list) + for e in weiban_entries: + by_sales[e["sales"]].append(e) + for s, entries in by_sales.items(): + print(f" {s}: {len(entries)} 条") + + # Step 2: Read existing sales sheet data + print("\n=== Step 2: 读取销售表现有数据 ===") + existing_names = {"小龙": set(), "吴迪": set()} + + for sales_name, sheet_id in SHEET_IDS.items(): + data = read_sheet(token, sheet_id, f"A1:K2000") + for row in data[1:]: # skip header + if row and len(row) >= 2 and row[1]: + name = str(row[1]).strip() + if name: + existing_names[sales_name].add(name) + print(f" {sales_name}表已有 {len(existing_names[sales_name])} 条记录") + + # Step 3: Dedup and prepare new rows + print("\n=== Step 3: 去重 ===") + new_rows = {"小龙": [], "吴迪": []} + + for sales_name, entries in by_sales.items(): + existing = existing_names[sales_name] + for e in entries: + if e["name"] not in existing: + # Format: [销售归属, 微信昵称, 进线日期, 体验节数, 手机号, 用户年级, 课史/跟进, 用户ID, 注册日期, 下载渠道, 是否下单] + new_rows[sales_name].append([sales_name, e["name"], e["date"], "", "", "", "", "", "", "", ""]) + else: + print(f" 跳过重复: [{sales_name}] {e['name']}") + + for s, rows in new_rows.items(): + print(f" {s}新增: {len(rows)} 条") + + # Step 4: Write to sheets + print("\n=== Step 4: 写入销售表 ===") + for sales_name, rows in new_rows.items(): + if not rows: + print(f" {sales_name}: 无新增,跳过") + continue + sheet_id = SHEET_IDS[sales_name] + ok = append_rows(token, sheet_id, rows) + if ok: + print(f" {sales_name}: ✅ 写入 {len(rows)} 条") + else: + print(f" {sales_name}: ❌ 写入失败") + + # Step 5: Summary + print("\n=== 汇总 ===") + print(f"微伴6月总量: {len(weiban_entries)} 条") + print(f"小龙: 微伴{len(by_sales['小龙'])}条 → 新增{len(new_rows['小龙'])}条") + print(f"吴迪: 微伴{len(by_sales['吴迪'])}条 → 新增{len(new_rows['吴迪'])}条") + +if __name__ == "__main__": + main() diff --git a/scripts/update_wudi_sales.py b/scripts/update_wudi_sales.py new file mode 100644 index 0000000..6b35a53 --- /dev/null +++ b/scripts/update_wudi_sales.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +"""吴迪销转表行课更新 — 手机号补ID + 行课回填""" +import json, requests, os, sys, psycopg2 +from datetime import datetime +from collections import defaultdict + +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPTS_DIR) +from phone_encrypt import encrypt_phone + +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "f975f0" +CRED_DIR = "/root/.openclaw/credentials/xiaoxi" + +def get_fs_token(): + with open(os.path.join(CRED_DIR, "config.json")) as f: + cfg = json.load(f) + resp = requests.post( + "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, + timeout=15 + ) + return resp.json()["tenant_access_token"] + +def read_sheet(token, sheet_id): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}" + resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + data = resp.json() + if data.get("code") != 0: + raise RuntimeError(f"读取失败: {data}") + return data["data"]["valueRange"]["values"] + +def put_values(token, sheet_id, range_str, values): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" + body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} + resp = requests.put(url, headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }, json=body, timeout=30) + return resp.json() + +def get_pg(): + secrets_path = os.path.join(SCRIPTS_DIR, "..", "secrets.env") + with open(secrets_path) as f: + pg_pass = None + for line in f: + if line.startswith("PG_ONLINE_PASSWORD="): + pg_pass = line.strip().split("=", 1)[1].strip("'\"") + return psycopg2.connect( + host="bj-postgres-16pob4sg.sql.tencentcdb.com", + port=28591, user="ai_member", password=pg_pass, dbname="vala_bi" + ) + +def clean_phone(val): + if not val: return None + val = str(val).strip().replace(" ", "").replace("-", "").replace("'", "") + if "e" in val.lower(): + try: val = str(int(float(val))) + except: return None + digits = "".join(c for c in val if c.isdigit()) + return digits if len(digits) == 11 and digits.startswith("1") else None + +def main(): + token = get_fs_token() + rows = read_sheet(token, SHEET_ID) + print(f"总行数: {len(rows)}") + + # 分析缺口 + need_phone_to_id = [] # E有手机 H空 + need_course = [] # H有UID D空 + for idx, row in enumerate(rows[2:], start=3): # 第3行起 + e_val = str(row[4]).strip() if len(row) > 4 and row[4] else "" + h_val = str(row[7]).strip() if len(row) > 7 and row[7] else "" + d_val = str(row[3]).strip() if len(row) > 3 and row[3] else "" + + phone = clean_phone(e_val) + uid = h_val if h_val and h_val != "None" and h_val != "" else None + + if phone and not uid: + need_phone_to_id.append((idx, phone)) + if uid and not d_val: + need_course.append((idx, uid)) + + print(f"手机号→ID待补: {len(need_phone_to_id)}") + print(f"有UID但D空: {len(need_course)}") + + conn = get_pg() + cur = conn.cursor() + + # ── Step 1: 手机号→ID ── + if need_phone_to_id: + # 批量加密 + enc_map = {} + for idx, phone in need_phone_to_id: + enc = encrypt_phone(phone) + enc_map[enc] = phone + + enc_list = list(enc_map.keys()) + enc_to_aid = {} + for i in range(0, len(enc_list), 500): + batch = enc_list[i:i+500] + ph = ",".join(["%s"] * len(batch)) + cur.execute( + f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({ph}) AND status=1 AND deleted_at IS NULL", + batch + ) + for aid, tel_enc in cur.fetchall(): + enc_to_aid[tel_enc] = aid + + # 回填H列 + updates = [] + for idx, phone in need_phone_to_id: + enc = encrypt_phone(phone) + aid = enc_to_aid.get(enc) + updates.append((idx, str(aid) if aid else "")) + + # 分批写入 + for i in range(0, len(updates), 50): + batch = updates[i:i+50] + vals = [[u[1]] for u in batch] + sr, er = batch[0][0], batch[-1][0] + put_values(token, SHEET_ID, f"H{sr}:H{er}", vals) + + matched = sum(1 for u in updates if u[1]) + print(f"H列回填完成: {len(updates)}行, 匹配{matched}") + + # ── Step 2: 行课回填 ── + if need_course: + uids = list(set(uid for _, uid in need_course)) + + # 查注册日期和下载渠道 + cur.execute(f""" + SELECT id, created_at, download_channel FROM bi_vala_app_account + WHERE id::text IN ({','.join(['%s']*len(uids))}) AND status=1 AND deleted_at IS NULL + """, uids) + acc_info = {str(row[0]): (row[1], row[2]) for row in cur.fetchall()} + + # 查角色 + cur.execute(f""" + SELECT account_id, id FROM bi_vala_app_character + WHERE account_id::text IN ({','.join(['%s']*len(uids))}) + """, uids) + aid_to_chars = defaultdict(list) + for aid, cid in cur.fetchall(): + aid_to_chars[str(aid)].append(cid) + + # 查体验课节数 (expire_time IS NULL) + cur.execute(f""" + SELECT account_id, course_level, COUNT(*) as cnt + FROM bi_user_course_detail + WHERE account_id::text IN ({','.join(['%s']*len(uids))}) + AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id, course_level + """, uids) + trial_info = defaultdict(dict) + for aid, level, cnt in cur.fetchall(): + trial_info[str(aid)][level] = cnt + + # 查完课记录 + char_ids = [] + for cids in aid_to_chars.values(): + char_ids.extend(cids) + + char_completed = defaultdict(set) + if char_ids: + for tbl_idx in range(8): + cur.execute(f""" + SELECT user_id, chapter_id FROM bi_user_chapter_play_record_{tbl_idx} + WHERE user_id IN ({','.join(['%s']*len(char_ids))}) + AND play_status = 1 + """, char_ids) + for uid, ch_id in cur.fetchall(): + char_completed[uid].add(ch_id) + + # 组装回填数据 + updates_d = [] + updates_i = [] + updates_j = [] + for idx, uid in need_course: + info = acc_info.get(uid) + reg_date = info[0].strftime("%Y-%m-%d") if info and info[0] else "" + dl_channel = info[1] if info and info[1] else "" + + # 体验节数 + trials = trial_info.get(uid, {}) + trial_parts = [] + for level in sorted(trials.keys()): + trial_parts.append(f"{level}体验{trials[level]}节") + trial_str = ", ".join(trial_parts) if trial_parts else "无体验课" + + # 完课数 + chars = aid_to_chars.get(uid, []) + completed = set() + for cid in chars: + completed |= char_completed.get(cid, set()) + + d_val = f"{trial_str} | 已完课{len(completed)}节" if completed else trial_str + + updates_d.append((idx, d_val)) + updates_i.append((idx, reg_date)) + updates_j.append((idx, dl_channel)) + + # 分批写入 + for col_letter, updates in [("D", updates_d), ("I", updates_i), ("J", updates_j)]: + for i in range(0, len(updates), 50): + batch = updates[i:i+50] + vals = [[u[1]] for u in batch] + sr, er = batch[0][0], batch[-1][0] + put_values(token, SHEET_ID, f"{col_letter}{sr}:{col_letter}{er}", vals) + + print(f"D/I/J列回填完成: {len(updates_d)}行") + + cur.close() + conn.close() + print("完成") + +if __name__ == "__main__": + main() diff --git a/scripts/write_course_results.py b/scripts/write_course_results.py deleted file mode 100644 index 1b436ba..0000000 --- a/scripts/write_course_results.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 -""" -将行课记录结果分组写入飞书表格 (Sheet1 55b0eb) -""" -import json -import requests -import os - -SPREADSHEET_TOKEN = "RFIJsXT8FhGHhctY4RwczcOfnac" -SHEET_ID = "55b0eb" -CRED_DIR = "/root/.openclaw/credentials/xiaoxi" - -def get_token(): - with open(os.path.join(CRED_DIR, "config.json")) as f: - cfg = json.load(f) - app_id = cfg['apps'][0]['appId'] - app_secret = cfg['apps'][0]['appSecret'] - resp = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", - json={"app_id": app_id, "app_secret": app_secret}) - return resp.json()['tenant_access_token'] - -def write_range(token, range_str, values): - """Write values to a sheet range""" - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" - body = { - "valueRange": { - "range": f"{SHEET_ID}!{range_str}", - "values": values - } - } - resp = requests.put(url, headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }, json=body) - result = resp.json() - return result.get('code') == 0 - -def main(): - with open('/tmp/sheet_course_results.json') as f: - data = json.load(f) - - results = data['results'] - results.sort(key=lambda x: x['row_idx']) - - # Group consecutive rows into batches - batches = [] - current_batch = [] - prev_row = None - - for r in results: - if prev_row is not None and r['row_idx'] != prev_row + 1: - # Gap found, start new batch - if current_batch: - batches.append(current_batch) - current_batch = [r] - else: - current_batch.append(r) - prev_row = r['row_idx'] - - if current_batch: - batches.append(current_batch) - - print(f"Total batches: {len(batches)}, total rows: {len(results)}") - - token = get_token() - print("Token obtained") - - written = 0 - failed = 0 - - for idx, batch in enumerate(batches): - start_row = batch[0]['row_idx'] - end_row = batch[-1]['row_idx'] - - # Build values array: [record, update_time] for each row - # For contiguous batches, fill all rows - values = [] - batch_idx = 0 - for row_num in range(start_row, end_row + 1): - if batch_idx < len(batch) and batch[batch_idx]['row_idx'] == row_num: - values.append([batch[batch_idx]['record'], batch[batch_idx]['update_time']]) - batch_idx += 1 - else: - # Empty cell for non-target rows (important for non-contiguous fills in contiguous range) - values.append(['', '']) - - range_str = f"D{start_row}:E{end_row}" - - if write_range(token, range_str, values): - written += len(batch) - if (idx + 1) % 20 == 0: - print(f" ... {idx+1}/{len(batches)} batches done, {written} rows written") - else: - failed += len(batch) - print(f" FAILED batch {idx+1}: rows {start_row}-{end_row} ({len(batch)} rows)") - - print(f"\nDone: {written} wrote, {failed} failed") - - # Show sample result from first batch - if batches: - sample = batches[0][0] - print(f"\nSample: Row {sample['row_idx']}: {sample['record'][:100]}") - -if __name__ == "__main__": - main() diff --git a/scripts/write_id_results.py b/scripts/write_id_results.py deleted file mode 100644 index 4d21e5a..0000000 --- a/scripts/write_id_results.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -""" -将手机号→ID匹配结果写回飞书表格 -""" -import json -import requests -import os -import sys - -SPREADSHEET_TOKEN = "RFIJsXT8FhGHhctY4RwczcOfnac" -SHEET_ID = "2DOxEI" -CRED_DIR = "/root/.openclaw/credentials/xiaoxi" - -def get_token(): - with open(os.path.join(CRED_DIR, "config.json")) as f: - cfg = json.load(f) - app_id = cfg['apps'][0]['appId'] - app_secret = cfg['apps'][0]['appSecret'] - resp = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", - json={"app_id": app_id, "app_secret": app_secret}) - return resp.json()['tenant_access_token'] - -def write_values(token, range_str, values): - """Write values to sheet using PUT /sheets/v2/spreadsheets/{token}/values""" - url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" - body = { - "valueRange": { - "range": f"{SHEET_ID}!{range_str}", - "values": values - } - } - resp = requests.put(url, headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }, json=body) - result = resp.json() - if result.get('code') != 0: - print(f"ERROR writing {range_str}: {result}") - return False - return True - -def main(): - with open('/tmp/sheet_id_results.json') as f: - data = json.load(f) - - results = data['results'] - results.sort(key=lambda x: x['row_idx']) - - print(f"Writing {len(results)} rows to sheet...") - - # Prepare values: each row is [user_id, status, update_time] - values = [[r['user_id'], r['status'], r['update_time']] for r in results] - - token = get_token() - print("Token obtained") - - # Write in batches of 850 at a time (API seems to support large writes) - # Range: F2:H851 (row 2 to row 1+850) - range_str = f"F2:H{1+len(values)}" - print(f"Writing range: {range_str}") - - if write_values(token, range_str, values): - print(f"SUCCESS: Wrote {len(values)} rows of ID results") - print(f"Stats: matched={data['stats']['matched']}, unmatched={data['stats']['unmatched']}") - else: - print("FAILED") - sys.exit(1) - -if __name__ == "__main__": - main()