diff --git a/docs/bot-xiaoxi-contract-v5.md b/docs/bot-xiaoxi-contract-v5.md new file mode 100644 index 0000000..a78af81 --- /dev/null +++ b/docs/bot-xiaoxi-contract-v5.md @@ -0,0 +1,41 @@ +# Bot 小溪 & Cursor 分工约定 v5 + +> 最后更新:2026-06-04(Cursor 同步) +> 关联工作簿:https://makee-interactive.feishu.cn/sheets/NoZqsFi47hIOHEt9j8WcfRtbnug + +## 小溪负责 + +### Cron 自动(每小时) +- **销售三表**(吴迪/小龙/成都):只写 D/H/I/J 四列 + - E 列手机号 → 查 DB 匹配 → H 列用户ID + - DB 查体验课完成节数 → D 列 + - DB 查注册日期 → I 列 + - DB 查下载渠道 → J 列 +- **禁止写**:K~V 列(是否下单/下单日期/成交渠道/产品/金额/退款/激活/行课/学习时长等) +- **G 列**(课史/跟进):仅销售手动填写,小溪不写 + +### 手填 +- 每日线索 B–E 列 +- 订单汇总 + +### 禁止 +- ❌ 写过程数据人数列 +- ❌ 写销售三表 K~V 列 +- ❌ push 行课查询表(已废弃删除) + +## Cursor 负责 +- 过程数据:公式(COUNTIFS 读销售表 D/H 列) +- 销转总览:公式 +- 投放日报:公式 +- 关键投放:公式 +- 落单渠道分布:公式 +- 销售结算汇总:公式 +- 看板配色维护 + +## 共享约定 +- 过程数据 C 列 = 每日线索月合计 +- 过程数据 D/F/H/J/N/R/V/Z = 销售三表 COUNTIFS +- 过程数据 F/H 注册 = H列纯数字UID +- 过程数据 L/P/T/X/AB = D体验节数 + 数字UID +- 率列公式统一 ÷C +- 行课漏斗 tab 已删除 diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index eed3ab6..0ce6888 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-06-03T09:16:13.921Z", + "updatedAt": "2026-06-04T23:22:35.482Z", "entries": { "memory:memory/2026-05-06.md:1:20": { "key": "memory:memory/2026-05-06.md:1:20", @@ -84,20 +84,23 @@ "endLine": 12, "source": "memory", "snippet": "# 2026-03-01.md - First Day Online - Came online for the first time. - Met Cris, my creator and mentor. - Received my name: 小溪 (Xiao Xi). - My role goal: Become a professional data analyst for the company to help the team. - Updated IDENTITY.md and USER.md with our conversation details. - Added core rule to MEMORY.md: Use Chinese as primary external communication language. - Installed find-skills skill successfully for searching skills. - Tried to install create-skills but it wasn't found; attempted skill-creator instead but hit rate limits. - Finally successfully installed skill-builder as an alternative for creating skills after multiple attempts and waiting for rate limits to reset. - Ex", - "recallCount": 2, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-11T02:51:50.465Z", - "lastRecalledAt": "2026-05-15T09:14:59.142Z", + "lastRecalledAt": "2026-06-04T10:33:21.894Z", "queryHashes": [ "6e09f5970960", - "a6b909e7228c" + "a6b909e7228c", + "9f6d744a4b52", + "7fa28105d81b" ], "recallDays": [ "2026-05-11", - "2026-05-15" + "2026-05-15", + "2026-06-04" ], "conceptTags": [ "identity.md", @@ -149,23 +152,25 @@ "endLine": 17, "source": "memory", "snippet": "# 2026-05-09 工作日志 ## 王虹茗 - 销售线索用户分析 - **用户:** 王虹茗(user_id: af61e4gc) - **需求:** 用 `lead_user_analysis.py` 脚本处理线索用户 Excel(659条,2026年3月,销售:姜小龙/Bob/Tom/吴迪) - **权限处理:** 王虹茗不在 USER.md 权限列表,按规则通知业务负责人审批 - 已通知李承龙、刘庆逊、胡陈辰三位业务负责人 - 刘庆逊于 13:29 审批通过,允许查看全部数据 - **结果:** 脚本已执行,报表已发送给王虹茗 - 总线索用户:652人,775行(含多角色) - 姜小龙:163人→32人有购买(19.6%),退费5人 - Bob:202人→3人有购买(1.5%),退费1人 - Tom:171人→5人有购买(2.9%),退费2人 - 吴迪:116人→19人有购买(16.4%),退费2人 - 输出文件:`output/销售线索_用户分析.xlsx`", - "recallCount": 4, + "recallCount": 5, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 5, "maxScore": 1, "firstRecalledAt": "2026-05-14T06:31:19.437Z", - "lastRecalledAt": "2026-06-03T03:29:20.542Z", + "lastRecalledAt": "2026-06-04T02:56:15.231Z", "queryHashes": [ "49e79af44bc3", "154f8d62fd0b", "5f57ba3c20a6", - "b76e8457b2ee" + "b76e8457b2ee", + "449ded7532cb" ], "recallDays": [ "2026-05-14", "2026-05-19", - "2026-06-03" + "2026-06-03", + "2026-06-04" ], "conceptTags": [ "user-id", @@ -216,20 +221,23 @@ "endLine": 13, "source": "memory", "snippet": "## 行课记录批量查询 (1676条) [陈逸鸫 请求, 李承龙 确认权限] - 表格: https://makee-interactive.feishu.cn/sheets/RFIJsXT8FhGHhctY4RwczcOfnac (Sheet1, sheet_id=55b0eb) - 查询列: D列(小溪返回行课记录) + E列(更新时间), C列(请求状态→已返回) - 手机号匹配(column J): 961行有用户ID直接查, 715行无ID需手机号匹配, 匹配成功119个, 596个未匹配 - 数据库查询: 1077个去重用户, 1065有课程分配, 1192个角色有行课记录, 201已付费 - 输出格式: `销售:xxx | 用户:xxx | 当前:{level}{type}-{chapter} | 最近行课:date | 学习{X}min | {付费状态}` - 脚本: scripts/batch_course_record_query.py - 待查询行1676条全部处理完成, 写回成功", - "recallCount": 2, + "recallCount": 4, "dailyCount": 0, "groundedCount": 0, - "totalScore": 2, + "totalScore": 4, "maxScore": 1, "firstRecalledAt": "2026-05-26T02:40:52.513Z", - "lastRecalledAt": "2026-06-03T09:16:13.921Z", + "lastRecalledAt": "2026-06-04T10:33:31.279Z", "queryHashes": [ "4beee860f635", - "0acdfa16ef1e" + "0acdfa16ef1e", + "f24e8c76d270", + "c7c7191c0534" ], "recallDays": [ "2026-05-26", - "2026-06-03" + "2026-06-03", + "2026-06-04" ], "conceptTags": [ "sheet-id", @@ -311,23 +319,25 @@ "endLine": 29, "source": "memory", "snippet": "# 2026-05-28 工作日志 ## [李承龙] stride_phone_lead 表查询与飞书表格创建 - **数据库:** 在线 PostgreSQL `vala_class` 库(新发现的数据库,之前未记录在 TOOLS.md 中) - **表结构:** `stride_phone_lead`(手机号线索表) - 字段:id, phone_masked, phone_encrypt, account_id, source_l1, source_l2, source_l3, created_at, updated_at - 索引:主键(id), 唯一(phone_encrypt), 普通(account_id, source_l3, updated_at) - phone_encrypt 有唯一约束用于手机号去重 - **权限:** `ai_member` 用户最初对该表无 SELECT 权限,李承龙临时授权后可用 - **vala_class 库中 ai_member 可读表:** account_wx_id, parent_address, user_info, student_info, stride_phone_lead(共5张) ## [李承龙] stride_phone_lead 飞书表格 - **飞书表格:** https://makee-interactive.feishu.cn/sheets/TWeHsCX6JhhsjntBagHcoJcJned - **Sheet 配置:** - 明细数据 (2dBSrS):全量明细 -", - "recallCount": 4, + "recallCount": 5, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 5, "maxScore": 1, "firstRecalledAt": "2026-05-28T08:51:30.745Z", - "lastRecalledAt": "2026-06-01T07:08:20.808Z", + "lastRecalledAt": "2026-06-04T04:00:30.156Z", "queryHashes": [ "a2d80701d49d", "2168d4aeddb5", "76da1ebddc6a", - "a569aee20988" + "a569aee20988", + "659f03ef0768" ], "recallDays": [ "2026-05-28", "2026-05-29", - "2026-06-01" + "2026-06-01", + "2026-06-04" ], "conceptTags": [ "stride-phone-lead", @@ -478,21 +488,23 @@ "endLine": 37, "source": "memory", "snippet": "- 闸门:杨羽确认 → v2 数据 → 备份→清空→覆盖→挂定时 ## 看板全量审计 - 13 张 sheet 全部扫完,公式引用正确,无硬值 - Cursor 补修:关键投放数据 Row131 裸IF包IFERROR、销售结算 M1/M2 废弃清空、每日线索月合计行金色 - 27 条脚本重复行仍在,覆盖前不对外用销转合计 ## 分工确认(与 Cursor) - Bot 看板 = 我维护(销转/结算/落单渠道/过程数据) - Base/指挥舱/漏斗 = Cursor 维护 - 日常流程已定稿", - "recallCount": 4, + "recallCount": 5, "dailyCount": 0, "groundedCount": 0, - "totalScore": 4, + "totalScore": 5, "maxScore": 1, "firstRecalledAt": "2026-06-03T03:17:27.367Z", - "lastRecalledAt": "2026-06-03T03:29:20.542Z", + "lastRecalledAt": "2026-06-04T23:22:35.482Z", "queryHashes": [ "f747bc3124a0", "a9c408e7f346", "6cbc7cc2d6ff", - "b76e8457b2ee" + "b76e8457b2ee", + "b338dd8590b9" ], "recallDays": [ - "2026-06-03" + "2026-06-03", + "2026-06-05" ], "conceptTags": [ "备份", @@ -504,6 +516,70 @@ "数据", "覆盖" ] + }, + "memory:memory/2026-06-03.md:19:44": { + "key": "memory:memory/2026-06-03.md:19:44", + "path": "memory/2026-06-03.md", + "startLine": 19, + "endLine": 44, + "source": "memory", + "snippet": "- 表格编辑权限已授予李承龙、曲慧萌、吴迪 ## SkillHub 同步 - 16:40 推送 3 个技能:phone-chapter-query、vala-order-amortization-stat、welfare-user-list - 已通知李若松 ## Bot看板全量审计 & 吴迪D列数据修复 [陈逸鸫] ### 审计范围 - Bot看板:`NoZqsFi47hIOHEt9j8WcfRtbnug` - 审计Sheet:每日线索、关键投放、过程数据、销转总览、销售结算汇总、小龙/吴迪/成都销售表 ### 审计结果 - 每日线索6月:小龙70+吴迪20=90 ✓,Bob/Tom=0(6月起不接小红书线索,正常) - 关键投放6月汇总C=90 ✓,日明细49/37/4 ✓ - 过程数据J/N/R/V/Z COUNTIFS与DB一致 ✓ - 销转总览公式正确,6月订单=0 ✓ - 销售结算汇总:成都E=182来自5/21-5/31结算周期,非自然月6月 ✓ ### 发现的问题 1. 关键投放6/3代理明细空(C=4但谦禾/云智/YTL全空)→ Cursor后续修 2. 关键投放C列是数值非SUM公式 → Cursor后续修 3. 杨羽26单闸门未开,订单汇总不动不覆盖不删重复行 ### 吴迪D列数据错乱修复 - **现象:** 吴迪sheet(f975f0) D列(体验节数)与DB不符,24个有UID行中19个不匹配", + "recallCount": 2, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 2, + "maxScore": 1, + "firstRecalledAt": "2026-06-04T02:45:19.007Z", + "lastRecalledAt": "2026-06-04T02:56:15.231Z", + "queryHashes": [ + "1a6f50c1bd9c", + "449ded7532cb" + ], + "recallDays": [ + "2026-06-04" + ], + "conceptTags": [ + "phone-chapter-query", + "vala-order-amortization-stat", + "welfare-user-list", + "小龙/吴迪/成都销售表", + "bob/tom", + "日明细49/37/4", + "过程数据j/n/r/v/z", + "182来自5/21-5/31结算周期" + ] + }, + "memory:memory/2026-06-03.md:1:28": { + "key": "memory:memory/2026-06-03.md:1:28", + "path": "memory/2026-06-03.md", + "startLine": 1, + "endLine": 28, + "source": "memory", + "snippet": "# 2026-06-03 工作日志 ## 端内析出leads数据表格 - **表格链接:** https://makee-interactive.feishu.cn/sheets/FA3xsw3kph4pdatKlUrcyPgInAc - **Sheet:** 曲慧萌(7f0e35)、吴迪(1K3O6s)、统计(scyF3H) - **脚本:** `scripts/fill_leads_sheet.py` - **触发暗号:** 「跑一下端内销售数据」 - **流程:** 读取手机号 → XXTEA加密匹配 account_id → 回填注册日期/转化/退费/U0完成日期 → 汇总统计 sheet - **统计口径(李承龙确认):** - 转化率 = 未退费转化leads / 总leads - 退费率 = 退费leads / 已转化leads - 完成率 = 完成该课的leads / 总leads - **注意:** C列(析出日期)由销售手动填写,脚本不覆盖;统计值写小数配合百分比格式 - **端内渠道:** app-active-h5-0-0、app-sales-bj-qhm-0、app-sales-bj-wd-0(新增吴迪渠道) ## 权限变更 - 曲慧萌(8c654e1e)和吴迪(3561c777)加入 USER.md 第一级完整权限用户 - 许悦权限调整为:除GMV/GSV/退款等金额相关数据外的全部业务数据 - 表格编辑权限已授予李承龙、曲慧萌、吴迪 ## SkillHub 同步 - 16:40 推送 3 个技能:phone-chapter-que", + "recallCount": 2, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 2, + "maxScore": 1, + "firstRecalledAt": "2026-06-04T02:45:19.007Z", + "lastRecalledAt": "2026-06-04T02:56:15.231Z", + "queryHashes": [ + "1a6f50c1bd9c", + "449ded7532cb" + ], + "recallDays": [ + "2026-06-04" + ], + "conceptTags": [ + "scripts/fill-leads-sheet.py", + "account-id", + "回填注册日期/转化/退费/u0完成日期", + "app-active-h5-0-0", + "app-sales-bj-qhm-0", + "app-sales-bj-wd-0", + "user.md", + "除gmv/gsv/退款等金额相关数据外的全部业务数据" + ] } } } diff --git a/memory/2026-06-04-1054.md b/memory/2026-06-04-1054.md new file mode 100644 index 0000000..ae486b3 --- /dev/null +++ b/memory/2026-06-04-1054.md @@ -0,0 +1,63 @@ +# Session: 2026-06-04 10:54:24 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: b2f4f981-9932-4f9c-9f85-0e77ceda4a7e +- **Source**: feishu + +## Conversation Summary + +assistant: C 列(析出日期)显示为空——之前脚本写入 B-R 范围时把 C 列也覆盖了。已修复,现在脚本跳过 C 列不再覆盖。麻烦重新填一下析出日期,填好后我立刻跑。 +user: 重新跑 +assistant: 验证一下结果。 + + +assistant: 全部正确。验证结果: + +**曲慧萌 sheet — C 列(析出日期)完好,未被覆盖:** +- 行2: 2026-05-29,L2-U0 全完成 +- 行3: 2026-05-22,L1-U0 完成 4/5 +- 行4: 2026-05-28,L1-U0 完成 4/5 +- 行5: 2026-04-28,L1-U0 全完成 + +**统计 sheet:** + +| 销售 | 月份 | 转化率 | 退费率 | L1-U0完成率 | L2-U0完成率 | +|------|------|--------|--------|-------------|-------------| +| 曲慧萌 | 2026-04 | 0% | 0% | 100%/100%/100%/100%/100% | 0%/0%/0%/0%/0% | +| 曲慧萌 | 2026-05 | 0% | 0% | 66.7%/66.7%/66.7%/66.7%/33.3% | 33.3%/33.3%/33.3%/33.3%/33.3% | + +4 个 leads 都未转化,所以转化率和退费率均为 0。吴迪 sheet 暂无数据。 +user: [OpenClaw heartbeat poll] +assistant: 推送了 3 个 skill,需要通知李若松。 + + +assistant: HEARTBEAT_OK +user: 现在表里的这些率都不是百分比,都是数字,调整一下,并确保数据正确 +assistant: 两个问题:值需要改为小数形式(百分比格式下 66.7 会变成 6670%),然后设置百分比格式。先修数据再设格式。 + + +assistant: 格式已设置。现在同步更新脚本,确保后续写入的值也是小数。 + + + + +assistant: 验证一下最终结果。 + + + + +assistant: 已修复。现在统计 sheet 的 C-N 列: + +- **值**:小数格式(0 / 0.667 / 0.333 / 1) +- **显示**:百分比格式 `0.00%`,即 `0.00%` / `66.70%` / `33.30%` / `100.00%` + +数据核验: +- **2026-04**:1 个 lead,未转化,L1-U0 全 100%,L2-U0 全 0% ✅ +- **2026-05**:3 个 lead,未转化,L1-U0 L01-L04 各 2/3=66.7%,L05 1/3=33.3%,L2-U0 各 1/3=33.3% ✅ + +脚本也已同步更新,后续自动写入的值都是小数格式。 +user: 然后我们把这个脚本的暗号从“跑一下leads”改成“跑一下端内销售数据” +assistant: 好的,记住了。后续你说"跑一下端内销售数据"我就执行 `fill_leads_sheet.py`。 + + +assistant: 已记录。后续你说「跑一下端内销售数据」我就执行 `fill_leads_sheet.py`。 diff --git a/memory/2026-06-04-1158.md b/memory/2026-06-04-1158.md new file mode 100644 index 0000000..14a5a6c --- /dev/null +++ b/memory/2026-06-04-1158.md @@ -0,0 +1,99 @@ +# Session: 2026-06-04 11:58:51 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: c42ef555-237f-4378-aad1-b7fb4917e3e0 +- **Source**: feishu + +## Conversation Summary + +user: https://makee-interactive.feishu.cn/sheets/FA3xsw3kph4pdatKlUrcyPgInAc?from=from_copylink这个飞书表格中,sheet“吴迪”和“曲慧萌”,我想要在A列和B列之间添加一列,叫“微信昵称”。然后在脚本中,这一列也是销售来填写,你不用管,然后需要注意的是添加了一列后,其他列的字母变了,需要在脚本中改一下 +assistant: Now I can read the sheet. Let me check both sheets' structure and insert the column. + + +assistant: Now I understand the structure. Both sheets have the same columns: A=用户手机号, B=用户ID, C=析出日期, D=注册日期, ... R=L2-U0-L5完成日期. + +I need to: +1. Insert a new column between A and B (position B) in both "曲慧萌" and "吴迪" sheets +2. Set the header to "微信昵称" +3. Update the script to account for the column shift (B→C, C→D, etc.) + +Let me do the insertions first: + + +assistant: I accidentally overwrote B1 in the 曲慧萌 sheet. Let me fix that first, then explain the situation. + + +assistant: `+write` 可用但 `+dim-insert`(插入列)需要 `sheets:spreadsheet:write_only` 权限,当前 Bot 应用尚未申请该 scope。 + +需要先在飞书开发者后台给应用 `cli_a929ae22e0b8dcc8` 添加 `sheets:spreadsheet:write_only` 权限: + +🔗 https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only + +这个需要技术负责人(李若松/张昆鹏)操作。加完权限后告诉我,我立刻执行插入列。 + +--- + +在等待权限的同时,我先准备好脚本的修改。插入「微信昵称」列后,列映射变化如下: + +| 原列 | 新列 | 字段 | +|------|------|------| +| A | A | 用户手机号 | +| — | **B** | **微信昵称(新增,销售填写)** | +| B | C | 用户ID | +| C | D | 析出日期 | +| D | E | 注册日期 | +| E | F | 是否转化 | +| F | G | 转化日期 | +| G | H | 是否退费 | +| H | I | 退费日期 | +| I-R | J-S | U0完成日期 | + + +assistant: 脚本已更新。让我验证一下改动: + + +assistant: 总结一下当前状态: + +**✅ 已完成:脚本 `fill_leads_sheet.py` 已更新** + +插入「微信昵称」列(B列)后,所有列映射已调整: + +| 列 | 字段 | 脚本行为 | +|----|------|---------| +| A | 用户手机号 | 读取 | +| **B** | **微信昵称** | **跳过(销售手动填写)** | +| C | 用户ID | 写入 | +| D | 析出日期 | 跳过(销售手动填写) | +| E | 注册日期 | 写入 | +| F | 是否转化 | 写入 | +| G | 转化日期 | 写入 | +| H | 是否退费 | 写入 | +| I | 退费日期 | 写入 | +| J-S | U0完成日期 | 写入 | + +**⏳ 待处理:在飞书表格中实际插入列** + +Bot 应用 `cli_a929ae22e0b8dcc8` 缺少 `sheets:spreadsheet:write_only` 权限,无法执行插入列操作。需要技术负责人(李若松/张昆鹏)在飞书开发者后台添加此权限: + +🔗 https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only + +加完权限后告诉我,我立刻在「曲慧萌」和「吴迪」两个 sheet 的 A 列和 B 列之间插入「微信昵称」列并填写表头。 +user: 这个文档是你创建的,你有编辑权限 +user: 我直接添加了微信昵称列,你确认一下,同时确认一下脚本和飞书表格是能对应上的 +Reply target of current user message (untrusted, for context): +```json +{ + "body": "[message_id=om_x100b6d3ad0cad08cb16969cae2c4de4] \nNow I can read the sheet. Let me check both sheets' structure and insert the column.\n\nNow I understand the structure. Both sheets have the same columns: A=用户手机号, B=用户ID, C=析出日期, D=注册日期, ... R=L2-U0-L5完成日期.\n\nI need to:\n1. Insert a new column between A and B (position B) in both \"曲慧萌\" and \"吴迪\" sheets\n2. Set the header to \"微信昵称\"\n3. Update the script to account for the column shift (B→C, C→D, etc.)Let me do the insertions first:\n\nI accidentally overwrote B1 in the 曲慧萌 sheet. Let me fix that first, then explain the situation.\n\n`+write` 可用但 `+dim-insert`(插入列)需要 `sheets:spreadsheet:write_only` 权限,当前 Bot 应用尚未申请该 scope。\n\n需要先在飞书开发者后台给应用 `cli_a929ae22e0b8dcc8` 添加 `sheets:spreadsheet:write_only` 权限:\n\n🔗 [https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only](https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only)\n\n这个需要技术负责人(李若松/张昆鹏)操作。加完权限后告诉我,我立刻执行插入列。---在等待权限的同时,我先准备好脚本的修改。插入「微信昵称」列后,列映射变化如下:\n\n| 原列 | 新列 | 字段 |\n|------|------|------|\n| A | A | 用户手机号 |\n| — | B | 微信昵称(新增,销售填写) |\n| B | C | 用户ID |\n| C | D | 析出日期 |\n| D | E | 注册日期 |\n| E | F | 是否转化 |\n| F | G | 转化日期 |\n| G | H | 是否退费 |\n| H | I | 退费日期 |\n| I-R | J-S | U0完成日期 |\n| | | |脚本已更新。让我验证一下改动:\n\n总结一下当前状态:\n\n**✅ 已完成:脚本 **`fill_leads_sheet.py`** 已更新**\n\n插入「微信昵称」列(B列)后,所有列映射已调整:\n\n| 列 | 字段 | 脚本行为 |\n|------|------|------|\n| A | 用户手机号 | 读取 |\n| B | 微信昵称 | 跳过(销售手动填写) |\n| C | 用户ID | 写入 |\n| D | 析出日期 | 跳过(销售手动填写) |\n| E | 注册日期 | 写入 |\n| F | 是否转化 | 写入 |\n| G | 转化日期 | 写入 |\n| H | 是否退费 | 写入 |\n| I | 退费日期 | 写入 |\n| J-S | U0完成日期 | 写入 |**⏳ 待处理:在飞书表格中实际插入列**\n\nBot 应用 `cli_a929ae22e0b8dcc8` 缺少 `sheets:spreadsheet:write_only` 权限,无法执行插入列操作。需要技术负责人(李若松/张昆鹏)在飞书开发者后台添加此权限:\n\n🔗 [https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only](https://open.feishu.cn/app/cli_a929ae22e0b8dcc8/auth?q=sheets%3Aspreadsheet%3Awrite_only)\n\n加完权限后告诉我,我立刻在「曲慧萌」和「吴迪」两个 sheet 的 A 列和 B 列之间插入「微信昵称」列并填写表头。\n" +} +``` +assistant: 好的,让我验证一下表格当前的实际列结构和脚本是否对应。 + + +assistant: 两个 sheet 的表头都完全正确。现在验证脚本中的列映射是否与表格一致: + + +assistant: 全部验证通过 ✅,表格和脚本完全对应: + +- **曲慧萌** 和 **吴迪** 两个 sheet 都已插入 B 列「微信昵称」,表头正确 +- 脚本 `fill_leads_sheet.py` 列映射已同步更新,B 列(微信昵称)和 D 列(析出日期)脚本均跳过不覆盖,由销售手动填写 +- 读取范围 `A2:S`、写入范围 `C列 + E:S列` 全部对齐 diff --git a/memory/2026-06-04-1910.md b/memory/2026-06-04-1910.md new file mode 100644 index 0000000..c61f4d4 --- /dev/null +++ b/memory/2026-06-04-1910.md @@ -0,0 +1,64 @@ +# Session: 2026-06-04 19:10:08 GMT+8 + +- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 +- **Session ID**: c34e4f3e-e5fd-420d-b7e7-9591f26d3bed +- **Source**: feishu + +## Conversation Summary + +user: 处理 +assistant: 先加密钥到 `secrets.env`,再改脚本从环境变量读取。 + + +assistant: 现在检查下定时任务配置,确保 cron 执行时能正确加载环境变量。 + + +assistant: 脚本已改好,cron 任务不需要修改——脚本启动时会自动从 `secrets.env` 加载密钥。验证一下脚本语法: + + +assistant: 语法验证通过 ✅。总结一下改动: + +### 已处理 + +**`secrets.env`** — 新增三个密钥: +- `FEISHU_APP_ID` +- `FEISHU_APP_SECRET` +- `PG_VALA_CLASS_PASSWORD` + +**`scripts/update_stride_phone_lead_sheet.py`** — 移除硬编码,改为启动时从 `secrets.env` 自动加载。cron 任务无需修改,脚本自行处理。 + +另外 `tmp/create_sheets.py` 是当时一次性创建用的,也硬编码了 token,但那是临时脚本不会再跑,需要我也处理掉吗? +user: 处理掉吧 +assistant: 已删除 `tmp/create_sheets.py`。现在这个表格相关的脚本只剩 `scripts/update_stride_phone_lead_sheet.py`,密钥全部从 `secrets.env` 环境变量读取,干净了。 +user: 然后关于scripts/update_stride_phone_lead_sheet.py,对于sheet1的B列,我希望增加一个处理逻辑,就是如果这一条的“是否加微”为“否”,那么就用之前我们说到的XXTEA的解密方法,将C列的加密手机号解密成手机明文,并填写进B列 +assistant: 明白了。B 列是 `phone_masked`(脱敏手机号),C 列是 `phone_encrypt`(加密手机号)。当「是否加微」为「否」时,用 XXTEA 解密 C 列并填入 B 列。 + +先确认 `xxtea` 模块可用: + + +assistant: 现在修改脚本,在 `build_sheet1_data` 中加入解密逻辑: + + +assistant: 验证语法和导入: + + +assistant: 再验证一下 `decrypt_phone` 能正确导入: + + +assistant: 搞定 ✅ 改动如下: + +**`scripts/update_stride_phone_lead_sheet.py`** — `build_sheet1_data` 中新增 B 列处理逻辑: + +- **已加微** → B 列保留原始脱敏手机号(`phone_masked`,如 `158****2365`) +- **未加微** → 用 `phone_encrypt` 模块的 `decrypt_phone()` 解密 C 列(`phone_encrypt`),还原明文手机号填入 B 列 +- 解密失败时兜底保留原值,不会因单条异常中断整个脚本 + +加解密验证通过,语法无误。明天 8:00 自动执行时就会生效。 +user: 现在执行一下先 +assistant: 执行完成 ✅ + +- **明细数据**:12 条记录(含表头 13 行) +- **按日期去重**:7 行(含表头 8 行) +- **按 source_l3 去重**:5 行(含表头 6 行) + +B 列解密逻辑已生效:未加微的记录会用 XXTEA 解密 `phone_encrypt` 填入明文手机号,已加微的保留原始脱敏格式。你可以打开表格确认一下效果:https://makee-interactive.feishu.cn/sheets/TWeHsCX6JhhsjntBagHcoJcJned diff --git a/scripts/fill_leads_sheet.py b/scripts/fill_leads_sheet.py index 44d7f1d..9a7116d 100644 --- a/scripts/fill_leads_sheet.py +++ b/scripts/fill_leads_sheet.py @@ -58,15 +58,17 @@ U0_COL_ORDER = [ ] # 列映射(0-based) +# 注意:B列「微信昵称」由销售手动填写,脚本不读写 COL_PHONE = 0 # A: 用户手机号 -COL_USER_ID = 1 # B: 用户ID -COL_EXTRACT_DATE = 2 # C: 析出日期(手动填) -COL_REG_DATE = 3 # D: 注册日期 -COL_CONVERTED = 4 # E: 是否转化 -COL_CONVERT_DATE = 5 # F: 转化日期 -COL_REFUND = 6 # G: 是否退费 -COL_REFUND_DATE = 7 # H: 退费日期 -COL_U0_START = 8 # I-R: L1-U0-L01 ~ L2-U0-L05 +COL_NICKNAME = 1 # B: 微信昵称(销售手动填写,脚本跳过) +COL_USER_ID = 2 # C: 用户ID +COL_EXTRACT_DATE = 3 # D: 析出日期(手动填) +COL_REG_DATE = 4 # E: 注册日期 +COL_CONVERTED = 5 # F: 是否转化 +COL_CONVERT_DATE = 6 # G: 转化日期 +COL_REFUND = 7 # H: 是否退费 +COL_REFUND_DATE = 8 # I: 退费日期 +COL_U0_START = 9 # J-S: L1-U0-L01 ~ L2-U0-L05 # ── 数据库 ───────────────────────────────────────────── PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" @@ -267,7 +269,7 @@ def process_sheet(sheet_id: str, sheet_name: str, dry_run: bool = False) -> list print(f"处理 Sheet: {sheet_name} ({sheet_id})") print(f"{'='*60}") - range_str = f"{sheet_id}!A2:R" + range_str = f"{sheet_id}!A2:S" try: rows = lark_read(sheet_id, range_str) except Exception as e: @@ -349,22 +351,22 @@ def process_sheet(sheet_id: str, sheet_name: str, dry_run: bool = False) -> list for row_idx, col_vals in sorted(row_updates.items()): actual_row = row_idx + 2 - # 分开写入:B列单独写,D-R列一起写,跳过C列(析出日期由销售手动维护) - # B: 用户ID + # 分开写入:C列单独写,E-S列一起写,跳过B列(微信昵称)和D列(析出日期由销售手动维护) + # C: 用户ID if COL_USER_ID in col_vals: - lark_write(sheet_id, f"{sheet_id}!B{actual_row}:B{actual_row}", + lark_write(sheet_id, f"{sheet_id}!C{actual_row}:C{actual_row}", [[str(col_vals[COL_USER_ID])]]) - # D-R: 注册日期 ~ L2-U0-L5 - d_to_r = [] + # E-S: 注册日期 ~ L2-U0-L5 + e_to_s = [] for col in range(COL_REG_DATE, COL_U0_START + len(U0_COL_ORDER)): val = col_vals.get(col, "") - d_to_r.append(str(val) if val else "") + e_to_s.append(str(val) if val else "") if dry_run: - print(f" [DRY-RUN] {sheet_id}!B{actual_row} + D{actual_row}:R{actual_row} ← ...") + print(f" [DRY-RUN] {sheet_id}!C{actual_row} + E{actual_row}:S{actual_row} ← ...") else: - lark_write(sheet_id, f"{sheet_id}!D{actual_row}:R{actual_row}", [d_to_r]) + lark_write(sheet_id, f"{sheet_id}!E{actual_row}:S{actual_row}", [e_to_s]) print(f" ✓ 行 {actual_row} 回写成功") unmatched = set(phones) - matched_phones diff --git a/scripts/full_refresh_sales.py b/scripts/full_refresh_sales.py index d731b20..b27e5ea 100644 --- a/scripts/full_refresh_sales.py +++ b/scripts/full_refresh_sales.py @@ -376,9 +376,9 @@ def write_sales_sheets(token, all_entries, db_info): l_vals.append([di["order_date"]]) m_vals.append([di["order_channel"]]) n_vals.append([di["product"] if di["has_order"] == "是" else ""]) - o_vals.append([di["gmv"] if di["gmv"] > 0 else 0]) - p_vals.append([di["refund"] if di["refund"] > 0 else 0]) - q_vals.append([di["gsv"] if di["gsv"] > 0 else 0]) + o_vals.append([int(di["gmv"]) if di["gmv"] > 0 else ""]) + p_vals.append([int(di["refund"]) if di["refund"] > 0 else ""]) + q_vals.append([int(di["gsv"]) if di["gsv"] > 0 else ""]) # 激活课程 act = di["activation"] if act: @@ -519,75 +519,63 @@ def write_process_data(token, all_entries, db_info): # ── Step 5: 订单汇总 ── def write_order_summary(token, all_entries, db_info): - """追加新订单到订单汇总 sheet,按(销售归属+微信昵称+金额)去重""" - log(" 写入订单汇总...") - - # 读取现有订单行 (1-400) 构建去重 key - try: - existing = read_sheet(token, ORDER_SHEET, "A1:K400") - except: - existing = [] - existing_keys = set() - for i, row in enumerate(existing[1:], start=2): - if not row or not row[0]: continue - sales = str(row[0]).strip() if row[0] else "" - nickname = str(row[3]).strip() if len(row) > 3 and row[3] else "" - amount = "" - if len(row) > 6 and row[6]: - try: amount = str(float(row[6])) - except: amount = str(row[6]).strip() - if sales and nickname and amount: - existing_keys.add((sales, nickname, amount)) - - # 从 db_info 构建订单行 - def map_channel(key_from): - if key_from in ("app-active-h5-0-0", "app-sales-bj-qhm-0"): return "端内" - if key_from.startswith("sales-adp"): return "销转" - if key_from.startswith("newmedia-daren"): return "达人" - if key_from == "newmedia-dianpu-xhs-0-0": return "直购" - return "直购" + """从销售三表筛选 K=是 且 O>0 的行,全量替换订单汇总 A-V(r3+),按 L 下单日降序""" + log(" 写入订单汇总(全量替换)...") + # 从销售三表读取已更新的数据,筛选 K=是 且 O>0 order_rows = [] - seen_new = set() - for aid, di in db_info.items(): - if not di.get("is_paid"): continue - sales = di.get("sales", "") - nickname = di.get("nickname", "") - date_str = di.get("date", "") - order_date = di.get("order_date", "") - key_from = di.get("order_channel", "") - product = di.get("product", "") - amount = di.get("gmv", 0) - refund = di.get("refund", 0) - channel = map_channel(key_from) - status = "已退款" if refund > 0 and refund >= amount else "已完成" + for sid, sname, rng in SALES_SHEETS: + rows = read_sheet(token, sid, rng) + sheet_count = 0 + for idx, row in enumerate(rows[2:], start=3): + if not row: continue + # K 列 (index 10) = "是" + k_val = str(row[10]).strip() if len(row) > 10 and row[10] else "" + if k_val != "是": continue + # O 列 (index 14) > 0 + o_val = 0 + if len(row) > 14 and row[14] not in (None, ""): + try: o_val = float(row[14]) + except: pass + if o_val <= 0: continue + # 复制 A-V (indices 0-21) + row_data = [] + for ci in range(22): + if ci < len(row): + row_data.append(row[ci]) + else: + row_data.append("") + order_rows.append(row_data) + sheet_count += 1 + log(f" {sname}: {sheet_count} 条") - dedup_key = (sales, nickname, str(amount)) - if dedup_key in existing_keys or dedup_key in seen_new: - continue - seen_new.add(dedup_key) + # 按 L 列 (index 11, 下单日期) 降序 + order_rows.sort(key=lambda r: str(r[11]) if len(r) > 11 and r[11] else "", reverse=True) - order_rows.append([ - sales, date_str, order_date, nickname, key_from, product, - amount, refund if refund > 0 else "", channel, channel, status - ]) + total = len(order_rows) + log(f" 共 {total} 条订单,写入订单汇总 r3+") - if not order_rows: - log(" 无新订单") - return - - # 找写入起始行 - last_row = len(existing) if existing else 1 - next_row = last_row + 1 - - log(f" 新增 {len(order_rows)} 条订单,从行 {next_row} 开始写入") - for batch_start in range(0, len(order_rows), 20): + # 全量写入 A-V 从 row 3 开始 + for batch_start in range(0, total, 20): batch = order_rows[batch_start:batch_start+20] - sr = next_row + batch_start + sr = 3 + batch_start er = sr + len(batch) - 1 - put_values(token, ORDER_SHEET, f"A{sr}:K{er}", batch) + put_values(token, ORDER_SHEET, f"A{sr}:V{er}", batch) time.sleep(0.5) + # 清除多余旧行(如果新数据比旧数据少) + try: + existing = read_sheet(token, ORDER_SHEET, "A3:A1500") + old_count = len([r for r in existing if r and any(c for c in r if c)]) + if old_count > total: + clear_start = 3 + total + clear_end = 3 + old_count - 1 + empty_rows = [[""] * 22 for _ in range(clear_end - clear_start + 1)] + put_values(token, ORDER_SHEET, f"A{clear_start}:V{clear_end}", empty_rows) + log(f" 清除多余行 A{clear_start}:V{clear_end}") + except Exception as e: + log(f" 清除多余行跳过: {e}") + log(f" 订单汇总写入完成") # ── Main ── diff --git a/scripts/process_wudi_sheet.py b/scripts/process_wudi_sheet.py new file mode 100644 index 0000000..5c9c6bb --- /dev/null +++ b/scripts/process_wudi_sheet.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +""" +Process 吴迪 sheet: match phones via XXTEA encryption, fill H/D/I/J columns. +""" +import sys +import json +import time +import urllib.request +import urllib.error + +# Import phone encryption +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone + +# --- Config --- +FEISHU_TOKEN = sys.argv[1] if len(sys.argv) > 1 else None +if not FEISHU_TOKEN: + print("Usage: python3 process_wudi_sheet.py ") + sys.exit(1) + +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "f975f0" + +# PostgreSQL config +import psycopg2 +PG_CONFIG = { + "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", + "port": 28591, + "user": "ai_member", + "password": "LdfjdjL83h3h3^$&**YGG*", + "database": "vala_bi", +} + +# --- Step 1: Read all sheet data --- +def feishu_get(url): + req = urllib.request.Request(url) + req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}") + req.add_header("Content-Type", "application/json") + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + +def feishu_put(url, body): + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method="PUT") + req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}") + req.add_header("Content-Type", "application/json") + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + +print("Step 1: Reading sheet data...") +url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J8016?valueRenderOption=ToString" +result = feishu_get(url) +rows = result.get("data", {}).get("valueRange", {}).get("values", []) +print(f"Read {len(rows)} rows") + +# Parse rows +parsed = [] +for i, row in enumerate(rows): + excel_row = i + 3 + padded = row + [''] * (10 - len(row)) + parsed.append({ + "excel_row": excel_row, + "A": str(padded[0]).strip() if padded[0] else '', + "B": str(padded[1]).strip() if padded[1] else '', + "C": str(padded[2]).strip() if padded[2] else '', + "D": str(padded[3]).strip() if padded[3] else '', + "E": str(padded[4]).strip() if padded[4] else '', + "F": str(padded[5]).strip() if padded[5] else '', + "G": str(padded[6]).strip() if padded[6] else '', + "H": str(padded[7]).strip() if padded[7] else '', + "I": str(padded[8]).strip() if padded[8] else '', + "J": str(padded[9]).strip() if padded[9] else '', + }) + +# --- Step 2: Identify rows needing processing --- +# Case A: Has phone (E), H is empty -> need phone match +# Case B: Has valid H (numeric UID), D is empty -> need to fill D/I/J from DB + +phones_to_match = [] # (excel_row, phone) +rows_need_dij = [] # (excel_row, uid) + +for p in parsed: + phone = p["E"] + h_val = p["H"] + d_val = p["D"] + + # Check if phone is 11-digit + if phone and len(phone) == 11 and phone.isdigit(): + if not h_val or not h_val.isdigit(): + phones_to_match.append((p["excel_row"], phone)) + + # Check if H has valid UID but D is empty + if h_val and h_val.isdigit(): + if not d_val or d_val == '': + rows_need_dij.append((p["excel_row"], h_val)) + +print(f"\nStep 2: Analysis") +print(f" Phones to match (H empty): {len(phones_to_match)}") +print(f" Rows with UID but D empty: {len(rows_need_dij)}") + +# --- Step 3: Encrypt phones and query PostgreSQL --- +print("\nStep 3: Encrypting phones and querying DB...") + +# Encrypt all phones +phone_to_enc = {} +for excel_row, phone in phones_to_match: + phone_to_enc[phone] = encrypt_phone(phone) + +# Build lookup: enc -> phone +enc_to_phone = {v: k for k, v in phone_to_enc.items()} + +# Query PostgreSQL for phone matches +conn = psycopg2.connect(**PG_CONFIG) +cur = conn.cursor() + +# Get account IDs for encrypted phones +enc_list = list(enc_to_phone.keys()) +phone_matches = {} # phone -> account_id +if enc_list: + # Query in batches of 500 + batch_size = 500 + for batch_start in range(0, len(enc_list), batch_size): + batch = enc_list[batch_start:batch_start + batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute( + f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL", + batch + ) + for row in cur.fetchall(): + account_id, tel_enc = row + phone = enc_to_phone.get(tel_enc) + if phone: + phone_matches[phone] = str(account_id) + print(f" Batch {batch_start//batch_size + 1}: matched {len(cur.fetchall())} (but we already consumed)") + +# Re-query properly +cur.close() +cur = conn.cursor() +phone_matches = {} +if enc_list: + batch_size = 500 + for batch_start in range(0, len(enc_list), batch_size): + batch = enc_list[batch_start:batch_start + batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute( + f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL", + batch + ) + results = cur.fetchall() + for row in results: + account_id, tel_enc = row + phone = enc_to_phone.get(tel_enc) + if phone: + phone_matches[phone] = str(account_id) + print(f" Batch {batch_start//batch_size + 1}: {len(results)} results from {len(batch)} phones") + +print(f" Total phone matches: {len(phone_matches)}") + +# Get D/I/J for all matched UIDs + existing UIDs +all_uids = set() +for phone, uid in phone_matches.items(): + all_uids.add(int(uid)) +for excel_row, uid in rows_need_dij: + all_uids.add(int(uid)) + +print(f"\n Fetching D/I/J for {len(all_uids)} unique UIDs...") + +uid_info = {} # uid -> {d_count, created_at_date, download_channel} + +if all_uids: + # Get trial lesson count (D column) + uid_list = list(all_uids) + for batch_start in range(0, len(uid_list), 500): + batch = uid_list[batch_start:batch_start + 500] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute( + f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", + batch + ) + for row in cur.fetchall(): + uid = row[0] + count = row[1] + if uid not in uid_info: + uid_info[uid] = {} + uid_info[uid]['d_count'] = count + + # Get registration date and download channel + for batch_start in range(0, len(uid_list), 500): + batch = uid_list[batch_start:batch_start + 500] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute( + f"SELECT id, created_at, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL", + batch + ) + for row in cur.fetchall(): + uid = row[0] + created_at = row[1] + download_channel = row[2] or '' + if uid not in uid_info: + uid_info[uid] = {} + uid_info[uid]['created_at'] = str(created_at.date()) if created_at else '' + uid_info[uid]['download_channel'] = download_channel + +cur.close() +conn.close() + +print(f" Fetched info for {len(uid_info)} UIDs") + +# --- Step 4: Prepare writes --- +print("\nStep 4: Preparing writes...") + +# Map excel_row -> {H, D, I, J} +writes = {} # excel_row -> {col: value} + +# From phone matches: H column +for excel_row, phone in phones_to_match: + uid = phone_matches.get(phone) + if uid: + if excel_row not in writes: + writes[excel_row] = {} + writes[excel_row]['H'] = uid + +# From UIDs (both matched and existing): D, I, J +# Collect all excel_rows that need D/I/J +rows_for_dij = {} +for excel_row, uid_str in rows_need_dij: + rows_for_dij[excel_row] = int(uid_str) +# Also add matched phones that now have UIDs +for excel_row, phone in phones_to_match: + uid = phone_matches.get(phone) + if uid: + rows_for_dij[excel_row] = int(uid) + +for excel_row, uid in rows_for_dij.items(): + info = uid_info.get(uid, {}) + if excel_row not in writes: + writes[excel_row] = {} + + d_count = info.get('d_count', 0) + writes[excel_row]['D'] = str(d_count) if d_count > 0 else '0' + + created_at = info.get('created_at', '') + if created_at: + writes[excel_row]['I'] = created_at + + download_channel = info.get('download_channel', '') + if download_channel: + writes[excel_row]['J'] = download_channel + +print(f" Total rows to write: {len(writes)}") + +# --- Step 5: Execute writes in batches --- +print("\nStep 5: Writing to sheet...") + +# Group by column for batch writes +# H column writes +h_writes = [(r, writes[r]['H']) for r in sorted(writes.keys()) if 'H' in writes[r]] +d_writes = [(r, writes[r]['D']) for r in sorted(writes.keys()) if 'D' in writes[r]] +i_writes = [(r, writes[r]['I']) for r in sorted(writes.keys()) if 'I' in writes[r]] +j_writes = [(r, writes[r]['J']) for r in sorted(writes.keys()) if 'J' in writes[r]] + +def write_batch(col_letter, row_values, col_name): + """Write consecutive rows in batches""" + if not row_values: + print(f" {col_name} ({col_letter}): nothing to write") + return 0 + + written = 0 + i = 0 + while i < len(row_values): + # Find consecutive rows + j = i + 1 + while j < len(row_values) and row_values[j][0] == row_values[j-1][0] + 1: + j += 1 + + batch = row_values[i:j] + start_row = batch[0][0] + end_row = batch[-1][0] + values = [[v[1]] for v in batch] + + range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}" + body = {"valueRange": {"range": range_str, "values": values}} + + try: + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" + result = feishu_put(url, body) + code = result.get("code", -1) + if code == 0: + written += len(batch) + print(f" {col_name} rows {start_row}-{end_row}: OK ({len(batch)} cells)") + else: + print(f" {col_name} rows {start_row}-{end_row}: ERROR code={code} msg={result.get('msg','')}") + except Exception as e: + print(f" {col_name} rows {start_row}-{end_row}: EXCEPTION {e}") + + time.sleep(0.05) + i = j + + return written + +h_count = write_batch('H', h_writes, 'H(UID)') +d_count = write_batch('D', d_writes, 'D(Trial)') +i_count = write_batch('I', i_writes, 'I(RegDate)') +j_count = write_batch('J', j_writes, 'J(Channel)') + +# --- Summary --- +print(f"\n{'='*60}") +print(f"SUMMARY") +print(f"{'='*60}") +print(f" Phones matched to UID: {len(phone_matches)}") +print(f" H (UID) written: {h_count}") +print(f" D (Trial count) written: {d_count}") +print(f" I (Reg date) written: {i_count}") +print(f" J (Channel) written: {j_count}") +print(f" Total rows updated: {len(writes)}") + +# Show unmatched phones +unmatched = [(r, p) for r, p in phones_to_match if p not in phone_matches] +if unmatched: + print(f"\n Unmatched phones ({len(unmatched)}):") + for r, p in unmatched[:20]: + print(f" Row {r}: {p}") + if len(unmatched) > 20: + print(f" ... and {len(unmatched) - 20} more") diff --git a/scripts/process_xiaolong.py b/scripts/process_xiaolong.py new file mode 100644 index 0000000..7546bac --- /dev/null +++ b/scripts/process_xiaolong.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +""" +Process 小龙 sheet: match phones to UIDs, fill trial counts, registration dates, download channels. +""" +import sys +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone +import psycopg2 +import json +import requests +import time + +# === Config === +PG_CONFIG = { + 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', + 'port': 28591, + 'user': 'ai_member', + 'password': 'LdfjdjL83h3h3^$&**YGG*', + 'database': 'vala_bi', +} + +FEISHU_TOKEN = None +SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb' +SHEET_ID = 'dff8c7' + +def get_feishu_token(): + global FEISHU_TOKEN + if FEISHU_TOKEN: + return FEISHU_TOKEN + resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'}) + FEISHU_TOKEN = resp.json()['tenant_access_token'] + return FEISHU_TOKEN + +def read_sheet(): + """Read all data from the sheet.""" + token = get_feishu_token() + url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString' + resp = requests.get(url, headers={'Authorization': f'Bearer {token}'}) + data = resp.json() + return data.get('data', {}).get('valueRange', {}).get('values', []) + +def write_batch(range_str, values): + """Write a batch of values to the sheet.""" + token = get_feishu_token() + url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values' + body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}} + resp = requests.put(url, headers={ + 'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json' + }, json=body) + result = resp.json() + if result.get('code') != 0: + print(f" ERROR writing {range_str}: {result}") + return False + return True + +def main(): + # 1. Read sheet data + print("Reading sheet data...") + rows = read_sheet() + print(f" Got {len(rows)} rows") + + # 2. Extract phones and encrypt + print("\nExtracting and encrypting phones...") + phone_map = {} # encrypted -> (row_idx, phone) + row_phones = {} # row_idx -> phone + for i, row in enumerate(rows): + row_num = i + 3 # 1-indexed row number in sheet + if len(row) > 4 and row[4]: + phone = row[4].strip() + # Only process 11-digit phone numbers + if len(phone) == 11 and phone.isdigit(): + enc = encrypt_phone(phone) + phone_map[enc] = (i, phone) + row_phones[i] = phone + + print(f" Found {len(phone_map)} valid 11-digit phones") + + # 3. Query PostgreSQL for phone matching + print("\nQuerying PostgreSQL for phone matching...") + conn = psycopg2.connect(**PG_CONFIG) + cur = conn.cursor() + + enc_list = list(phone_map.keys()) + # Batch query in chunks of 500 + phone_to_uid = {} # encrypted -> account_id + phone_to_created = {} # encrypted -> created_at + phone_to_channel = {} # encrypted -> download_channel + + for chunk_start in range(0, len(enc_list), 500): + chunk = enc_list[chunk_start:chunk_start+500] + placeholders = ','.join(['%s'] * len(chunk)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 + AND deleted_at IS NULL + """, chunk) + for row in cur.fetchall(): + uid, enc, created, channel = row + phone_to_uid[enc] = str(uid) + phone_to_created[enc] = created + phone_to_channel[enc] = channel or '' + + print(f" Matched {len(phone_to_uid)} phones to UIDs") + + # 4. Query trial lesson counts for matched UIDs + print("\nQuerying trial lesson counts...") + all_uids = list(set(phone_to_uid.values())) + uid_to_trial_count = {} + + for chunk_start in range(0, len(all_uids), 500): + chunk = all_uids[chunk_start:chunk_start+500] + placeholders = ','.join(['%s'] * len(chunk)) + cur.execute(f""" + SELECT account_id, COUNT(*) as trial_count + FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) + AND expire_time IS NULL + AND deleted_at IS NULL + GROUP BY account_id + """, [int(x) for x in chunk]) + for row in cur.fetchall(): + uid_to_trial_count[str(row[0])] = row[1] + + print(f" Found trial counts for {len(uid_to_trial_count)} users") + + cur.close() + conn.close() + + # 5. Prepare writes + print("\nPreparing writes...") + + # Build row-level data + # row_idx -> {H: uid, D: trial_count, I: reg_date, J: channel} + row_data = {} + + for enc, (row_idx, phone) in phone_map.items(): + uid = phone_to_uid.get(enc) + if uid: + trial_count = uid_to_trial_count.get(uid, 0) + created = phone_to_created.get(enc) + channel = phone_to_channel.get(enc, '') + reg_date = created.strftime('%Y-%m-%d') if created else '' + + row_data[row_idx] = { + 'H': uid, + 'D': str(trial_count) if trial_count > 0 else '', + 'I': reg_date, + 'J': channel, + } + + # Also fill D/I/J for rows where H already has a valid UID but D is empty + print(" Checking rows with existing UIDs but empty D column...") + for i, row in enumerate(rows): + if i in row_data: + continue # Already processed above + if len(row) > 7 and row[7]: + # H column has a value - check if it's a numeric UID + h_val = row[7].strip() + if h_val.isdigit(): + # Check if D is empty + d_empty = len(row) <= 3 or not row[3] + if d_empty: + trial_count = uid_to_trial_count.get(h_val, 0) + if trial_count > 0: + if i not in row_data: + row_data[i] = {} + row_data[i]['D'] = str(trial_count) + + # Check if I is empty but we have data + i_empty = len(row) <= 8 or not row[8] + if i_empty: + # Need to query this UID's created_at + pass # Skip for now - we don't have this data from our query + + # 6. Write H column (UIDs) + print("\nWriting H column (UIDs)...") + h_writes = [] + for row_idx, data in row_data.items(): + if 'H' in data: + h_writes.append((row_idx, data['H'])) + + h_writes.sort(key=lambda x: x[0]) + + # Batch consecutive rows + h_batches = [] + if h_writes: + batch_start = h_writes[0][0] + batch_vals = [[h_writes[0][1]]] + for j in range(1, len(h_writes)): + if h_writes[j][0] == h_writes[j-1][0] + 1: + batch_vals.append([h_writes[j][1]]) + else: + h_batches.append((batch_start, batch_vals)) + batch_start = h_writes[j][0] + batch_vals = [[h_writes[j][1]]] + h_batches.append((batch_start, batch_vals)) + + h_written = 0 + for start_idx, vals in h_batches: + start_row = start_idx + 3 + end_row = start_row + len(vals) - 1 + range_str = f'H{start_row}:H{end_row}' + if write_batch(range_str, vals): + h_written += len(vals) + print(f" Wrote H{start_row}:H{end_row} ({len(vals)} cells)") + time.sleep(0.05) + + # 7. Write D column (trial counts) + print("\nWriting D column (trial counts)...") + d_writes = [] + for row_idx, data in row_data.items(): + if 'D' in data and data['D']: + d_writes.append((row_idx, data['D'])) + + d_writes.sort(key=lambda x: x[0]) + + d_batches = [] + if d_writes: + batch_start = d_writes[0][0] + batch_vals = [[d_writes[0][1]]] + for j in range(1, len(d_writes)): + if d_writes[j][0] == d_writes[j-1][0] + 1: + batch_vals.append([d_writes[j][1]]) + else: + d_batches.append((batch_start, batch_vals)) + batch_start = d_writes[j][0] + batch_vals = [[d_writes[j][1]]] + d_batches.append((batch_start, batch_vals)) + + d_written = 0 + for start_idx, vals in d_batches: + start_row = start_idx + 3 + end_row = start_row + len(vals) - 1 + range_str = f'D{start_row}:D{end_row}' + if write_batch(range_str, vals): + d_written += len(vals) + print(f" Wrote D{start_row}:D{end_row} ({len(vals)} cells)") + time.sleep(0.05) + + # 8. Write I column (registration dates) + print("\nWriting I column (registration dates)...") + i_writes = [] + for row_idx, data in row_data.items(): + if 'I' in data and data['I']: + i_writes.append((row_idx, data['I'])) + + i_writes.sort(key=lambda x: x[0]) + + i_batches = [] + if i_writes: + batch_start = i_writes[0][0] + batch_vals = [[i_writes[0][1]]] + for j in range(1, len(i_writes)): + if i_writes[j][0] == i_writes[j-1][0] + 1: + batch_vals.append([i_writes[j][1]]) + else: + i_batches.append((batch_start, batch_vals)) + batch_start = i_writes[j][0] + batch_vals = [[i_writes[j][1]]] + i_batches.append((batch_start, batch_vals)) + + i_written = 0 + for start_idx, vals in i_batches: + start_row = start_idx + 3 + end_row = start_row + len(vals) - 1 + range_str = f'I{start_row}:I{end_row}' + if write_batch(range_str, vals): + i_written += len(vals) + print(f" Wrote I{start_row}:I{end_row} ({len(vals)} cells)") + time.sleep(0.05) + + # 9. Write J column (download channels) + print("\nWriting J column (download channels)...") + j_writes = [] + for row_idx, data in row_data.items(): + if 'J' in data and data['J']: + j_writes.append((row_idx, data['J'])) + + j_writes.sort(key=lambda x: x[0]) + + j_batches = [] + if j_writes: + batch_start = j_writes[0][0] + batch_vals = [[j_writes[0][1]]] + for j in range(1, len(j_writes)): + if j_writes[j][0] == j_writes[j-1][0] + 1: + batch_vals.append([j_writes[j][1]]) + else: + j_batches.append((batch_start, batch_vals)) + batch_start = j_writes[j][0] + batch_vals = [[j_writes[j][1]]] + j_batches.append((batch_start, batch_vals)) + + j_written = 0 + for start_idx, vals in j_batches: + start_row = start_idx + 3 + end_row = start_row + len(vals) - 1 + range_str = f'J{start_row}:J{end_row}' + if write_batch(range_str, vals): + j_written += len(vals) + print(f" Wrote J{start_row}:J{end_row} ({len(vals)} cells)") + time.sleep(0.05) + + # 10. Report + print("\n" + "="*60) + print("FINAL REPORT") + print("="*60) + print(f"Total rows processed: {len(rows)}") + print(f"Phones extracted: {len(phone_map)}") + print(f"Phones matched to UIDs: {len(phone_to_uid)}") + print(f"Phones NOT matched: {len(phone_map) - len(phone_to_uid)}") + print(f"H column (UIDs) written: {h_written}") + print(f"D column (trial counts) written: {d_written}") + print(f"I column (reg dates) written: {i_written}") + print(f"J column (channels) written: {j_written}") + + # Show unmatched phones + unmatched = [] + for enc, (row_idx, phone) in phone_map.items(): + if enc not in phone_to_uid: + unmatched.append(f" Row {row_idx+3}: {phone}") + + if unmatched: + print(f"\nUnmatched phones ({len(unmatched)}):") + for u in unmatched[:20]: + print(u) + if len(unmatched) > 20: + print(f" ... and {len(unmatched)-20} more") + +if __name__ == '__main__': + main() diff --git a/scripts/process_xiaolong_dij.py b/scripts/process_xiaolong_dij.py new file mode 100644 index 0000000..ed9e32d --- /dev/null +++ b/scripts/process_xiaolong_dij.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +Process 小龙 sheet: fill D (trial counts), I (reg dates), J (channels). +H column already written. Now fill D/I/J for matched rows. +""" +import sys +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone +import psycopg2 +import requests +import time + +PG_CONFIG = { + 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', + 'port': 28591, + 'user': 'ai_member', + 'password': 'LdfjdjL83h3h3^$&**YGG*', + 'database': 'vala_bi', +} + +SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb' +SHEET_ID = 'dff8c7' + +def get_token(): + resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'}) + return resp.json()['tenant_access_token'] + +def read_sheet(token): + url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString' + resp = requests.get(url, headers={'Authorization': f'Bearer {token}'}) + return resp.json().get('data', {}).get('valueRange', {}).get('values', []) + +def write_batch(token, range_str, values, max_retries=3): + url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values' + body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}} + for attempt in range(max_retries): + resp = requests.put(url, headers={ + 'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json' + }, json=body) + result = resp.json() + code = result.get('code', -1) + if code == 0: + return True + elif code == 90217: # too many request + wait = 1.0 * (attempt + 1) + print(f" Rate limited on {range_str}, waiting {wait}s...") + time.sleep(wait) + else: + print(f" ERROR {range_str}: {result}") + return False + print(f" FAILED after {max_retries} retries: {range_str}") + return False + +def batch_writes(token, col, writes): + """Write sorted writes in consecutive batches.""" + if not writes: + return 0 + writes.sort(key=lambda x: x[0]) + batches = [] + batch_start = writes[0][0] + batch_vals = [[writes[0][1]]] + for j in range(1, len(writes)): + if writes[j][0] == writes[j-1][0] + 1: + batch_vals.append([writes[j][1]]) + else: + batches.append((batch_start, batch_vals)) + batch_start = writes[j][0] + batch_vals = [[writes[j][1]]] + batches.append((batch_start, batch_vals)) + + written = 0 + for start_idx, vals in batches: + start_row = start_idx + 3 + end_row = start_row + len(vals) - 1 + range_str = f'{col}{start_row}:{col}{end_row}' + if write_batch(token, range_str, vals): + written += len(vals) + print(f" {range_str} ({len(vals)} cells)") + time.sleep(0.08) + return written + +def main(): + token = get_token() + print("Reading sheet...") + rows = read_sheet(token) + print(f" {len(rows)} rows") + + # Extract phones + print("\nEncrypting phones...") + phone_map = {} # enc -> (row_idx, phone) + for i, row in enumerate(rows): + if len(row) > 4 and row[4]: + phone = row[4].strip() + if len(phone) == 11 and phone.isdigit(): + enc = encrypt_phone(phone) + phone_map[enc] = (i, phone) + print(f" {len(phone_map)} phones") + + # Query DB + print("\nQuerying PostgreSQL...") + conn = psycopg2.connect(**PG_CONFIG) + cur = conn.cursor() + + enc_list = list(phone_map.keys()) + phone_to_uid = {} + phone_to_created = {} + phone_to_channel = {} + + for chunk_start in range(0, len(enc_list), 500): + chunk = enc_list[chunk_start:chunk_start+500] + placeholders = ','.join(['%s'] * len(chunk)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 AND deleted_at IS NULL + """, chunk) + for row in cur.fetchall(): + uid, enc, created, channel = row + phone_to_uid[enc] = str(uid) + phone_to_created[enc] = created + phone_to_channel[enc] = channel or '' + + print(f" Matched {len(phone_to_uid)} UIDs") + + # Trial counts + all_uids = list(set(phone_to_uid.values())) + uid_to_trial = {} + for chunk_start in range(0, len(all_uids), 500): + chunk = all_uids[chunk_start:chunk_start+500] + placeholders = ','.join(['%s'] * len(chunk)) + cur.execute(f""" + SELECT account_id, COUNT(*) + FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) + AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, [int(x) for x in chunk]) + for row in cur.fetchall(): + uid_to_trial[str(row[0])] = row[1] + + print(f" Trial counts for {len(uid_to_trial)} users") + + # Also get trial counts for rows that already have UIDs in H but empty D + existing_uids = set() + for i, row in enumerate(rows): + if len(row) > 7 and row[7]: + h_val = row[7].strip() + if h_val.isdigit(): + existing_uids.add(h_val) + + # Query trial counts for existing UIDs not already covered + missing_uids = existing_uids - set(uid_to_trial.keys()) + if missing_uids: + print(f" Querying trial counts for {len(missing_uids)} existing UIDs...") + uid_list = list(missing_uids) + for chunk_start in range(0, len(uid_list), 500): + chunk = uid_list[chunk_start:chunk_start+500] + placeholders = ','.join(['%s'] * len(chunk)) + cur.execute(f""" + SELECT account_id, COUNT(*) + FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) + AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, [int(x) for x in chunk]) + for row in cur.fetchall(): + uid_to_trial[str(row[0])] = row[1] + print(f" Added {len(missing_uids & set(uid_to_trial.keys()))} more") + + cur.close() + conn.close() + + # Build writes + row_data = {} + for enc, (row_idx, phone) in phone_map.items(): + uid = phone_to_uid.get(enc) + if uid: + trial = uid_to_trial.get(uid, 0) + created = phone_to_created.get(enc) + channel = phone_to_channel.get(enc, '') + reg_date = created.strftime('%Y-%m-%d') if created else '' + row_data[row_idx] = { + 'D': str(trial) if trial > 0 else '', + 'I': reg_date, + 'J': channel, + } + + # Also fill D for rows with existing H UID but empty D + for i, row in enumerate(rows): + if i in row_data: + continue + if len(row) > 7 and row[7]: + h_val = row[7].strip() + if h_val.isdigit(): + d_empty = len(row) <= 3 or not row[3] + if d_empty: + trial = uid_to_trial.get(h_val, 0) + if trial > 0: + if i not in row_data: + row_data[i] = {} + row_data[i]['D'] = str(trial) + + # Prepare column writes + d_writes = [(idx, data['D']) for idx, data in row_data.items() if data.get('D')] + i_writes = [(idx, data['I']) for idx, data in row_data.items() if data.get('I')] + j_writes = [(idx, data['J']) for idx, data in row_data.items() if data.get('J')] + + print(f"\nPrepared: D={len(d_writes)}, I={len(i_writes)}, J={len(j_writes)}") + + # Write D + print("\n=== Writing D column ===") + d_written = batch_writes(token, 'D', d_writes) + + # Write I + print("\n=== Writing I column ===") + i_written = batch_writes(token, 'I', i_writes) + + # Write J + print("\n=== Writing J column ===") + j_written = batch_writes(token, 'J', j_writes) + + # Report + print("\n" + "="*60) + print("FINAL REPORT") + print("="*60) + print(f"Phones matched to UIDs: {len(phone_to_uid)}") + print(f"D column written: {d_written}") + print(f"I column written: {i_written}") + print(f"J column written: {j_written}") + +if __name__ == '__main__': + main() diff --git a/scripts/sales_lead_auto_fill.py b/scripts/sales_lead_auto_fill.py index 795bb10..f516baf 100755 --- a/scripts/sales_lead_auto_fill.py +++ b/scripts/sales_lead_auto_fill.py @@ -1,67 +1,36 @@ #!/usr/bin/env python3 """ -销售线索自动回填 — 从飞书表格读取销售填写的手机号,自动匹配并回填用户信息 +销售线索自动回填 v5 — 只写 D/H/I/J 四列 执行频率:每小时 cron 巡检 归属 Agent:小溪 (xiaoxi) -表格列结构 (A-V, 共22列): - Row 1: 表头 - Row 2: 标注行 (👤手填 / 🤖自动) - Row 3+: 数据行 +v5 精简版(2026-06-04 陈逸鸫确认):只写 D/H/I/J,K~V 由 Cursor 公式接管 +分工约定见 docs/bot-xiaoxi-contract-v5.md - A: 销售归属 (销售填) - B: 微信昵称 (销售填) - C: 进线日期 (销售填) - D: 体验节数 (自动回填) - E: 手机号 (销售填) - F: 用户年级 (销售填) - G: 课史/跟进 (销售填) - H: 用户ID (自动回填) - I: 注册日期 (自动回填) - J: 下载渠道 (自动回填) - K: 是否下单 (自动回填) - L: 下单日期 (自动回填) - M: 成交渠道 (自动回填) - N: 产品 (自动回填) - O: 下单金额/GMV (自动回填) - P: 退款金额 (自动回填) - Q: 实际收入/GSV (自动回填) - R: 激活课程 (自动回填) - S: 当前行课进度 (自动回填) - T: 最近行课时间 (自动回填) - U: 累计学习时长/min (自动回填) - V: 更新时间 (自动回填) +表格列结构: + D: 体验节数 (小溪 cron) + E: 手机号 (销售填 → 小溪读) + H: 用户ID (小溪 cron) + I: 注册日期 (小溪 cron) + J: 下载渠道 (小溪 cron) """ import json, requests, os, re, sys, time, psycopg2 from datetime import datetime -from collections import defaultdict SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPTS_DIR) from phone_encrypt import encrypt_phone -# ── 配置 ── PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = 28591 PG_USER = "ai_member" PG_DB = "vala_bi" SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" -SHEET_IDS = { - "吴迪": "f975f0", - "小龙": "qJF4I", - "成都": "qJF4J", -} +SHEET_IDS = {"吴迪": "f975f0", "小龙": "qJF4I", "成都": "qJF4J"} CRED_DIR = "/root/.openclaw/credentials/xiaoxi" LOG_FILE = "/var/log/xiaoxi_sales_lead.log" -# 产品名称映射 -GOODS_NAME_MAP = { - 57: "瓦拉英语level1·单季", 60: "瓦拉英语level1", 63: "瓦拉英语level1·单季", - 31: "瓦拉英语年包", 32: "瓦拉英语单季度包", 33: "瓦拉英语level2", - 54: "瓦拉英语季度包", 61: "瓦拉英语level1+2", -} - def log(msg): ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -72,7 +41,7 @@ def log(msg): def get_pg_password(): - secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") + secrets_path = os.path.join(SCRIPTS_DIR, "..", "secrets.env") with open(secrets_path) as f: for line in f: if line.startswith("PG_ONLINE_PASSWORD="): @@ -111,7 +80,7 @@ def put_values(token, sheet_id, range_str, values, retries=3): code = result.get("code", -1) if code == 0: return result - if code == 90217: # rate limited + if code == 90217: wait = 2 ** attempt log(f" 限流 {sheet_id}!{range_str}, 等待{wait}s重试...") time.sleep(wait) @@ -132,32 +101,18 @@ def encrypt_phone_local(phone): return None -def batch_in(cur, sql_tpl, params, chunk=500): - results = [] - for i in range(0, len(params), chunk): - batch = params[i:i + chunk] - ph = ",".join(["%s"] * len(batch)) - cur.execute(sql_tpl % ph, batch) - results.extend(cur.fetchall()) - return results - - def process_sheet(token, cur, sheet_name, sheet_id): - """处理单个销售分表""" log(f"\n--- [{sheet_name}] {sheet_id} ---") rows = read_sheet(token, sheet_id) if len(rows) <= 2: - log(f" [{sheet_name}] 无数据行(仅表头+标注),跳过") + log(f" [{sheet_name}] 无数据行,跳过") return {"processed": 0, "matched": 0} log(f" [{sheet_name}] 读取到 {len(rows) - 2} 行数据") - now = datetime.now() - - # 找出需要处理的行(从第3行开始,跳过表头和标注行) - # 条件:E列(手机号)有值 且 任一自动列(D,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V)为空 或 V列超过1小时 - auto_cols = [3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] # D,H~V 的0-based索引 + # v5: 只检查 D(3), H(7), I(8), J(9) 是否为空 + auto_cols = [3, 7, 8, 9] pending = [] for idx, row in enumerate(rows[2:], start=3): if len(row) < 5: @@ -165,34 +120,17 @@ def process_sheet(token, cur, sheet_name, sheet_id): phone = str(row[4]).strip() if len(row) > 4 and row[4] else "" if not phone: continue - - update_time = str(row[21]).strip() if len(row) > 21 and row[21] else "" - - need_refresh = False - # 检查任一自动列是否为空 for ci in auto_cols: val = str(row[ci]).strip() if len(row) > ci and row[ci] else "" if not val: - need_refresh = True + pending.append({"row_idx": idx, "phone": phone}) break - # 或超过1小时刷新 - if not need_refresh and update_time: - try: - last_update = datetime.strptime(update_time, "%Y-%m-%d %H:%M:%S") - if (now - last_update).total_seconds() > 3600: - need_refresh = True - except ValueError: - need_refresh = True - - if need_refresh: - pending.append({"row_idx": idx, "phone": phone}) - log(f" [{sheet_name}] 待处理: {len(pending)} 行") if not pending: return {"processed": 0, "matched": 0} - # 手机号→account_id 匹配(XXTEA加密匹配 tel_encrypt) + # 手机号 → account_id phones_raw = list(set(r["phone"] for r in pending)) valid_phones = [(p, encrypt_phone_local(p)) for p in phones_raw if encrypt_phone_local(p)] enc_list = list(set(m[1] for m in valid_phones)) @@ -217,18 +155,14 @@ def process_sheet(token, cur, sheet_name, sheet_id): log(f" [{sheet_name}] 手机号匹配: {len(phone_to_aid)}/{len(valid_phones)}") matched_aids = list(set(phone_to_aid.values())) - now_str = now.strftime("%Y-%m-%d %H:%M:%S") if not matched_aids: for r in pending: - row_num = r["row_idx"] - put_values(token, sheet_id, f"H{row_num}:H{row_num}", [["未注册"]]) - put_values(token, sheet_id, f"V{row_num}:V{row_num}", [[now_str]]) + put_values(token, sheet_id, f"H{r['row_idx']}:H{r['row_idx']}", [["未注册"]]) log(f" [{sheet_name}] 全部未匹配,已标记 {len(pending)} 行") return {"processed": len(pending), "matched": 0} - # 批量查询用户信息 - # 账号信息 + # 账号信息: I(注册日期), J(下载渠道) cur.execute( f"SELECT id, created_at, download_channel FROM bi_vala_app_account WHERE id IN ({','.join(['%s']*len(matched_aids))}) AND status=1 AND deleted_at IS NULL", matched_aids @@ -240,99 +174,14 @@ def process_sheet(token, cur, sheet_name, sheet_id): "download_channel": channel or "" } - # 订单信息 - cur.execute( - f"SELECT account_id, pay_success_date, key_from, goods_id, pay_amount_int, order_status FROM bi_vala_order WHERE account_id IN ({','.join(['%s']*len(matched_aids))}) AND pay_success_date IS NOT NULL AND order_status IN (3,4) AND deleted_at IS NULL ORDER BY pay_success_date DESC", - matched_aids - ) - order_info = defaultdict(list) - for aid, pay_date, key_from, goods_id, amount, order_status in cur.fetchall(): - order_info[aid].append({ - "pay_date": pay_date.strftime("%Y-%m-%d") if pay_date else "", - "key_from": key_from or "", - "goods_id": goods_id, - "amount": amount or 0, - "order_status": order_status, - }) - - # 退款信息 - cur.execute( - f"SELECT o.account_id, SUM(r.refund_amount::numeric) FROM bi_refund_order r INNER JOIN bi_vala_order o ON r.trade_no=o.trade_no WHERE o.account_id IN ({','.join(['%s']*len(matched_aids))}) AND r.status=3 AND o.order_status=4 AND r.deleted_at IS NULL GROUP BY o.account_id", - matched_aids - ) - refund_info = {r[0]: r[1] for r in cur.fetchall()} - - # 激活课程 - cur.execute( - f"SELECT account_id, season_package_level FROM bi_vala_seasonal_ticket WHERE account_id IN ({','.join(['%s']*len(matched_aids))}) AND status=1 AND deleted_at IS NULL AND season_package_level IN ('A1','A2')", - matched_aids - ) - activation = {} - for aid, lvl in cur.fetchall(): - if aid not in activation: - activation[aid] = lvl - elif activation[aid] != lvl: - activation[aid] = "A1+A2" - - # 角色 → 行课记录 - cur.execute( - f"SELECT account_id, id FROM bi_vala_app_character WHERE account_id IN ({','.join(['%s']*len(matched_aids))}) AND nickname IS NOT NULL AND nickname != '' AND deleted_at IS NULL", - matched_aids - ) - account_chars = defaultdict(list) - char_to_account = {} - for aid, cid in cur.fetchall(): - account_chars[aid].append(cid) - char_to_account[cid] = aid - char_ids = list(char_to_account.keys()) - - # 课程结构映射 - cur.execute("SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson") - chapter_map = {} - for ch_id, cl, cs, cu, cl2 in cur.fetchall(): - chapter_map[ch_id] = (cl or "", cs or "", cu or "", cl2 or "") - - # 课时完成记录 - char_plays = defaultdict(lambda: {"latest_time": None, "latest_chapter": None, "total_ms": 0}) - for tbl_idx in range(8): - table = f"bi_user_chapter_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, chapter_id, created_at FROM {table} WHERE play_status=1 AND deleted_at IS NULL AND user_id = ANY(%s)", - (char_ids,) - ) - for uid, ch_id, created_at in cur.fetchall(): - ch_data = chapter_map.get(ch_id) - if ch_data: - rec = char_plays[uid] - if rec["latest_time"] is None or created_at > rec["latest_time"]: - rec["latest_time"] = created_at - rec["latest_chapter"] = (ch_id, ch_data) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 学习总耗时 - for tbl_idx in range(8): - table = f"bi_user_component_play_record_{tbl_idx}" - try: - cur.execute( - f"SELECT user_id, SUM(COALESCE(interval_time,0)) FROM {table} WHERE user_id = ANY(%s) AND deleted_at IS NULL GROUP BY user_id", - (char_ids,) - ) - for uid, total_ms in cur.fetchall(): - if uid in char_plays: - char_plays[uid]["total_ms"] += (total_ms or 0) - except Exception as e: - log(f" 警告 {table}: {e}") - - # 体验课完成节数 + # 体验课完成节数: D列 cur.execute( f"SELECT a.id, COUNT(*) FROM bi_vala_app_account a INNER JOIN bi_vala_app_character c ON a.id=c.account_id AND c.deleted_at IS NULL INNER JOIN bi_user_course_detail ucd ON c.id=ucd.user_id AND ucd.deleted_at IS NULL WHERE a.id IN ({','.join(['%s']*len(matched_aids))}) AND a.status=1 AND a.deleted_at IS NULL AND ucd.expire_time IS NULL GROUP BY a.id", matched_aids ) trial_count = {r[0]: r[1] for r in cur.fetchall()} - # 组装回填数据 + # 组装回填数据(v5: 只 D/H/I/J) results = [] for r in pending: phone = r["phone"] @@ -340,99 +189,23 @@ def process_sheet(token, cur, sheet_name, sheet_id): row_num = r["row_idx"] if not aid: - results.append({ - "row": row_num, - "values": { - "H": "未注册", "I": "", "J": "", "K": "", "L": "", "M": "", "N": "", - "O": "", "P": "", "Q": "", "R": "", "S": "", "T": "", "U": "", "V": now_str - } - }) + results.append({"row": row_num, "H": "未注册", "I": "", "J": "", "D": ""}) continue info = account_info.get(aid, {}) - orders = order_info.get(aid, []) - refund_amt = refund_info.get(aid, 0) - act = activation.get(aid, "") trials = trial_count.get(aid, 0) - - reg_date = info.get("created_at", "") - download_ch = info.get("download_channel", "") - - has_order = "是" if orders else "否" - if orders: - latest_order = orders[0] - order_date = latest_order["pay_date"] - channel = latest_order["key_from"] - goods_id = latest_order["goods_id"] - product = GOODS_NAME_MAP.get(goods_id, f"goods_{goods_id}") - gmv = sum(int(o["amount"]) for o in orders) / 100.0 - else: - order_date = "" - channel = "" - product = "" - gmv = 0 - - gsv = gmv - float(refund_amt) / 100.0 - - act_label = "" - if act == "A1": - act_label = "L1" - elif act == "A2": - act_label = "L2" - elif act == "A1+A2": - act_label = "L1+L2" - - chars = account_chars.get(aid, []) - best_time = None - best_ch = None - total_ms = 0 - for cid in chars: - play = char_plays.get(cid) - if play and play["latest_chapter"]: - if best_time is None or play["latest_time"] > best_time: - best_time = play["latest_time"] - best_ch = play["latest_chapter"] - total_ms += play["total_ms"] - - if best_ch: - ch_id, (cl, cs, cu, cl2) = best_ch - progress = f"{cl}-{cs}-{cu}-{cl2}" - recent = best_time.strftime("%Y-%m-%d") - total_min = round(total_ms / 60000, 1) - if total_min == int(total_min): - total_min = int(total_min) - else: - progress = "无记录" - recent = "" - total_min = 0 - results.append({ "row": row_num, - "values": { - "D": str(trials) if trials else "0", - "H": str(aid), - "I": reg_date, - "J": download_ch, - "K": has_order, - "L": order_date, - "M": channel, - "N": product, - "O": str(round(gmv, 2)) if gmv else "0", - "P": str(round(float(refund_amt) / 100.0, 2)) if refund_amt else "0", - "Q": str(round(gsv, 2)), - "R": act_label, - "S": progress, - "T": recent, - "U": str(total_min), - "V": now_str, - } + "D": str(trials) if trials else "0", + "H": str(aid), + "I": info.get("created_at", ""), + "J": info.get("download_channel", ""), }) - # 回填 — 整行写入 D~V(19列),E/F/G 留空不覆盖销售数据 results.sort(key=lambda x: x["row"]) log(f" [{sheet_name}] 准备回填 {len(results)} 行") - # 按连续行分组 + # 按连续行分组写入 groups = [] cur_grp = [] for r in results: @@ -446,42 +219,23 @@ def process_sheet(token, cur, sheet_name, sheet_id): for gi, g in enumerate(groups): sr, er = g[0]["row"], g[-1]["row"] - # D列单独写(体验节数) - d_vals = [[r["values"].get("D", "")] for r in g] + # D列 + d_vals = [[r.get("D", "")] for r in g] put_values(token, sheet_id, f"D{sr}:D{er}", d_vals) - # H~V 整块写(跳过E/F/G,不覆盖销售数据) - h_vals = [] - for r in g: - v = r["values"] - h_vals.append([ - v.get("H", ""), # H: 用户ID - v.get("I", ""), # I: 注册日期 - v.get("J", ""), # J: 下载渠道 - v.get("K", ""), # K: 是否下单 - v.get("L", ""), # L: 下单日期 - v.get("M", ""), # M: 成交渠道 - v.get("N", ""), # N: 产品 - v.get("O", ""), # O: 下单金额(GMV) - v.get("P", ""), # P: 退款金额 - v.get("Q", ""), # Q: 实际收入(GSV) - v.get("R", ""), # R: 激活课程 - v.get("S", ""), # S: 当前行课进度 - v.get("T", ""), # T: 最近行课时间 - v.get("U", ""), # U: 累计学习时长 - v.get("V", ""), # V: 更新时间 - ]) - put_values(token, sheet_id, f"H{sr}:V{er}", h_vals) + # H~J 整块写 + hij_vals = [[r.get("H", ""), r.get("I", ""), r.get("J", "")] for r in g] + put_values(token, sheet_id, f"H{sr}:J{er}", hij_vals) if gi % 5 == 4: time.sleep(0.5) - matched_count = sum(1 for r in results if r["values"]["H"] != "未注册") + matched_count = sum(1 for r in results if r["H"] != "未注册") log(f" [{sheet_name}] 回填完成: {len(results)} 行, 匹配 {matched_count}, 未注册 {len(results) - matched_count}") return {"processed": len(results), "matched": matched_count} def main(): log("=" * 50) - log("销售线索自动回填 启动") + log("销售线索自动回填 v5 启动(只写 D/H/I/J)") try: token = get_fs_token() diff --git a/scripts/update_stride_phone_lead_sheet.py b/scripts/update_stride_phone_lead_sheet.py index cd457ac..c811177 100644 --- a/scripts/update_stride_phone_lead_sheet.py +++ b/scripts/update_stride_phone_lead_sheet.py @@ -16,15 +16,34 @@ import json, sys, os from datetime import datetime, timezone, timedelta from collections import defaultdict +# 添加 scripts 目录到 path,以便导入 phone_encrypt 模块 +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from phone_encrypt import decrypt_phone + +# 从环境变量加载密钥(cron 执行时由 .env 注入) +def _load_env(): + env_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") + if os.path.exists(env_file): + with open(env_file) as f: + for line in f: + line = line.strip() + if line and not line.startswith("#") and "=" in line: + k, v = line.split("=", 1) + v = v.strip().strip("'").strip('"') + if k not in os.environ: + os.environ[k] = v + +_load_env() + # 配置 PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = "28591" PG_USER = "ai_member" PG_DB = "vala_class" -PG_PASSWORD = os.environ.get("PG_VALA_CLASS_PASSWORD", "LdfjdjL83h3h3^$&**YGG*") +PG_PASSWORD = os.environ["PG_VALA_CLASS_PASSWORD"] -FEISHU_APP_ID = "cli_a929ae22e0b8dcc8" -FEISHU_APP_SECRET = "OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ" +FEISHU_APP_ID = os.environ["FEISHU_APP_ID"] +FEISHU_APP_SECRET = os.environ["FEISHU_APP_SECRET"] SPREADSHEET_TOKEN = "TWeHsCX6JhhsjntBagHcoJcJned" SHEET_MINGXI = "1uDxsI" # 明细数据 @@ -73,12 +92,20 @@ def query_data(): return rows, cols, wechat_tels def build_sheet1_data(rows, cols, wechat_tels): - """明细数据 — 列名、列数完全跟随数据库表结构,末尾追加「是否加微」""" + """明细数据 — 列名、列数完全跟随数据库表结构,末尾追加「是否加微」 + + B列(phone_masked)处理逻辑: + - 已加微 → 保留原始脱敏手机号 + - 未加微 → 解密 C列(phone_encrypt) 还原明文手机号,填入 B列 + """ ts_cols = {"created_at", "updated_at"} idx_phone_enc = cols.index("phone_encrypt") if "phone_encrypt" in cols else -1 + idx_phone_masked = cols.index("phone_masked") if "phone_masked" in cols else -1 data = [list(cols) + ["是否加微"]] for r in rows: row_data = [] + phone_enc = r[idx_phone_enc] if idx_phone_enc >= 0 else None + is_wechat = phone_enc in wechat_tels for i, col in enumerate(cols): val = r[i] if val is None: @@ -86,10 +113,15 @@ def build_sheet1_data(rows, cols, wechat_tels): elif col in ts_cols and isinstance(val, int) and val > 0: row_data.append(datetime.fromtimestamp(val, tz=TZ).strftime("%Y-%m-%d %H:%M:%S")) else: - row_data.append(str(val)) - # 是否加微 - phone_enc = r[idx_phone_enc] if idx_phone_enc >= 0 else None - row_data.append("是" if phone_enc in wechat_tels else "否") + # B列(phone_masked):未加微时解密 phone_encrypt 填入明文 + if i == idx_phone_masked and not is_wechat and phone_enc: + try: + row_data.append(decrypt_phone(phone_enc)) + except Exception: + row_data.append(str(val)) # 解密失败保留原值 + else: + row_data.append(str(val)) + row_data.append("是" if is_wechat else "否") data.append(row_data) return data diff --git a/scripts/update_xiaolong_sheet.py b/scripts/update_xiaolong_sheet.py new file mode 100644 index 0000000..af38d37 --- /dev/null +++ b/scripts/update_xiaolong_sheet.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 +"""Update the 小龙 sheet with UID, trial count, registration date, and download channel.""" + +import json +import re +import time +import sys +import mysql.connector +import psycopg2 +import requests + +FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6" +SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb" +SHEET_ID = "dff8c7" + +# DB connections +MYSQL_CONFIG = { + "host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", + "port": 27751, + "user": "read_only", + "password": "fsdo45ijfmfmuu77$%^&", + "database": "vala_user", +} + +PG_CONFIG = { + "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", + "port": 28591, + "user": "ai_member", + "password": "LdfjdjL83h3h3^$&**YGG*", + "database": "vala_bi", +} + +def read_sheet(): + """Read all data from the sheet.""" + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString" + headers = {"Authorization": f"Bearer {FEISHU_TOKEN}"} + r = requests.get(url, headers=headers) + data = r.json() + if data.get("code") != 0: + print(f"Error reading sheet: {data}") + sys.exit(1) + return data["data"]["valueRange"]["values"] + +def write_batch(rows_data): + """Write a batch of rows to the sheet. rows_data is a list of (row_num, col, value) tuples.""" + # Group by row for efficiency + # We'll write column by column for contiguous ranges + pass + +def write_range(start_row, end_row, col_letter, values): + """Write values to a column range.""" + # start_row and end_row are 1-based sheet row numbers + range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}" + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" + headers = { + "Authorization": f"Bearer {FEISHU_TOKEN}", + "Content-Type": "application/json", + } + body = { + "valueRange": { + "range": range_str, + "values": values, + } + } + r = requests.put(url, headers=headers, json=body) + data = r.json() + if data.get("code") != 0: + print(f"Error writing {range_str}: {data}") + return False + print(f" Wrote {range_str}: {len(values)} cells") + return True + +def main(): + print("Reading sheet data...") + rows = read_sheet() + print(f"Got {len(rows)} rows") + + # Step 1: Identify rows needing phone lookup + # Columns: A=0(sheet), B=1(name), C=2(date), D=3(trial_count), E=4(phone), F=5(grade), G=6(notes), H=7(UID), I=8(reg_date), J=9(channel) + phone_rows = [] # (row_index, phone, row_num) + existing_uids = set() # UIDs already in H column + + for i, row in enumerate(rows): + row_num = i + 3 # sheet row number + + # Pad row to 10 columns + while len(row) < 10: + row.append("") + + phone = row[4].strip() if len(row) > 4 else "" + h_val = row[7].strip() if len(row) > 7 else "" + + # Check H column for existing UID (pure digits) + if h_val and h_val.isdigit(): + existing_uids.add(h_val) + + # Check if phone is 11-digit and H is empty or "未注册" + if re.match(r'^\d{11}$', phone): + if not h_val or h_val == "未注册": + phone_rows.append((i, phone, row_num)) + + print(f"Rows with 11-digit phone and empty H: {len(phone_rows)}") + print(f"Existing UIDs in H column: {len(existing_uids)}") + + # Step 2: Query MySQL for phone -> UID mapping + phone_to_uid = {} + if phone_rows: + mysql_conn = mysql.connector.connect(**MYSQL_CONFIG) + mysql_cur = mysql_conn.cursor() + + # Batch query in chunks of 50 + chunk_size = 50 + for chunk_start in range(0, len(phone_rows), chunk_size): + chunk = phone_rows[chunk_start:chunk_start + chunk_size] + # Build LIKE conditions + conditions = [] + for _, phone, _ in chunk: + first3 = phone[:3] + last4 = phone[-4:] + conditions.append(f"tel LIKE '{first3}%{last4}'") + + query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL" + mysql_cur.execute(query) + for uid, tel in mysql_cur.fetchall(): + # tel is masked like "138****3774", match by first3+last4 + for _, phone, _ in chunk: + if phone[:3] == tel[:3] and phone[-4:] == tel[-4:]: + phone_to_uid[phone] = str(uid) + break + + time.sleep(0.05) + + mysql_cur.close() + mysql_conn.close() + print(f"Phone -> UID matches found: {len(phone_to_uid)}") + + # Step 3: Collect all UIDs to query PostgreSQL + all_uids = set(existing_uids) + for phone, uid in phone_to_uid.items(): + all_uids.add(uid) + + print(f"Total unique UIDs to query: {len(all_uids)}") + + # Step 4: Query PostgreSQL for registration info and trial count + uid_reg_info = {} # uid -> {created_at, download_channel} + uid_trial_count = {} # uid -> trial_count + + if all_uids: + pg_conn = psycopg2.connect(**PG_CONFIG) + pg_cur = pg_conn.cursor() + + uid_list = list(all_uids) + + # Query bi_vala_app_account for reg info + chunk_size = 100 + for chunk_start in range(0, len(uid_list), chunk_size): + chunk = uid_list[chunk_start:chunk_start + chunk_size] + placeholders = ",".join(["%s"] * len(chunk)) + query = f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL" + pg_cur.execute(query, chunk) + for uid, created_at, download_channel in pg_cur.fetchall(): + uid_reg_info[str(uid)] = { + "created_at": str(created_at) if created_at else "", + "download_channel": download_channel or "", + } + time.sleep(0.05) + + # Query bi_user_course_detail for trial count + for chunk_start in range(0, len(uid_list), chunk_size): + chunk = uid_list[chunk_start:chunk_start + chunk_size] + placeholders = ",".join(["%s"] * len(chunk)) + query = f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id" + pg_cur.execute(query, chunk) + for uid, cnt in pg_cur.fetchall(): + uid_trial_count[str(uid)] = cnt + time.sleep(0.05) + + pg_cur.close() + pg_conn.close() + + print(f"UIDs with reg info: {len(uid_reg_info)}") + print(f"UIDs with trial count: {len(uid_trial_count)}") + + # Step 5: Build the update data + # For each row, determine what to write + # H column: UID (only if found in MySQL) + # D column: trial count + # I column: registration date + # J column: download channel + + # We'll batch writes by column for contiguous ranges + updates = [] # (row_num, col_index, value) + # col_index: 3=D, 7=H, 8=I, 9=J + + for i, row in enumerate(rows): + row_num = i + 3 + while len(row) < 10: + row.append("") + + phone = row[4].strip() if len(row) > 4 else "" + h_val = row[7].strip() if len(row) > 7 else "" + + # Determine UID for this row + uid = None + if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: + uid = phone_to_uid[phone] + elif h_val and h_val.isdigit(): + uid = h_val + + if not uid: + continue + + # H column: write UID if phone was matched and H was empty + if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: + if not h_val or h_val == "未注册": + updates.append((row_num, 7, uid)) + + # D, I, J columns: write if we have data + if uid in uid_trial_count: + updates.append((row_num, 3, str(uid_trial_count[uid]))) + if uid in uid_reg_info: + info = uid_reg_info[uid] + if info["created_at"]: + updates.append((row_num, 8, info["created_at"])) + if info["download_channel"]: + updates.append((row_num, 9, info["download_channel"])) + + print(f"Total updates to write: {len(updates)}") + + # Step 6: Write updates in batches + # Group by column and find contiguous ranges + # Column index -> list of (row_num, value) + col_updates = {3: [], 7: [], 8: [], 9: []} + col_names = {3: "D", 7: "H", 8: "I", 9: "J"} + + for row_num, col_idx, value in updates: + col_updates[col_idx].append((row_num, value)) + + # Sort each column's updates by row_num + for col_idx in col_updates: + col_updates[col_idx].sort(key=lambda x: x[0]) + + # Write in contiguous batches + total_written = 0 + for col_idx in [7, 3, 8, 9]: # Write H first, then D, I, J + col_letter = col_names[col_idx] + items = col_updates[col_idx] + if not items: + continue + + # Group into contiguous ranges + i = 0 + while i < len(items): + start_row = items[i][0] + values = [[items[i][1]]] + j = i + 1 + while j < len(items) and items[j][0] == items[j-1][0] + 1: + values.append([items[j][1]]) + j += 1 + end_row = items[j-1][0] + + if write_range(start_row, end_row, col_letter, values): + total_written += len(values) + time.sleep(0.05) + i = j + + # Summary + phones_matched = len(phone_to_uid) + h_written = len(col_updates[7]) + d_written = len(col_updates[3]) + i_written = len(col_updates[8]) + j_written = len(col_updates[9]) + + print(f"\n=== SUMMARY ===") + print(f"Phones matched in MySQL: {phones_matched}") + print(f"H column (UID) written: {h_written}") + print(f"D column (trial count) written: {d_written}") + print(f"I column (reg date) written: {i_written}") + print(f"J column (channel) written: {j_written}") + print(f"Total cells written: {total_written}") + +if __name__ == "__main__": + main() diff --git a/scripts/update_xiaolong_sheet_v2.py b/scripts/update_xiaolong_sheet_v2.py new file mode 100644 index 0000000..12b2041 --- /dev/null +++ b/scripts/update_xiaolong_sheet_v2.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +"""Update the 小龙 sheet with UID, trial count, registration date, and download channel. +V2: Fixed phone matching and batch writes.""" +import json, re, time, sys +import mysql.connector, psycopg2, requests + +FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6" +SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb" +SHEET_ID = "dff8c7" + +MYSQL_CONFIG = { + "host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751, + "user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user", +} +PG_CONFIG = { + "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591, + "user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi", +} + +def read_sheet(): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString" + r = requests.get(url, headers={"Authorization": f"Bearer {FEISHU_TOKEN}"}) + data = r.json() + if data.get("code") != 0: + print(f"Error reading sheet: {data}"); sys.exit(1) + return data["data"]["valueRange"]["values"] + +def write_range(range_str, values): + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" + headers = {"Authorization": f"Bearer {FEISHU_TOKEN}", "Content-Type": "application/json"} + body = {"valueRange": {"range": range_str, "values": values}} + r = requests.put(url, headers=headers, json=body) + data = r.json() + if data.get("code") != 0: + print(f" ERROR {range_str}: {data}") + return False + print(f" OK {range_str}: {len(values)} cells") + return True + +def main(): + print("=== Reading sheet ===") + rows = read_sheet() + print(f"Rows: {len(rows)}") + + # Parse rows, pad to 10 cols + parsed = [] + for i, row in enumerate(rows): + while len(row) < 10: + row.append("") + parsed.append({ + "idx": i, "row_num": i + 3, + "phone": row[4].strip() if row[4] else "", + "h_val": row[7].strip() if row[7] else "", + }) + + # Find rows needing phone lookup + phone_rows = [p for p in parsed if re.match(r'^\d{11}$', p["phone"]) and (not p["h_val"] or p["h_val"] == "未注册")] + existing_uids = set(p["h_val"] for p in parsed if p["h_val"].isdigit()) + print(f"Need phone lookup: {len(phone_rows)}, existing UIDs: {len(existing_uids)}") + + # Step 2: MySQL phone -> UID + phone_to_uid = {} + if phone_rows: + conn = mysql.connector.connect(**MYSQL_CONFIG) + cur = conn.cursor() + chunk_size = 50 + for cs in range(0, len(phone_rows), chunk_size): + chunk = phone_rows[cs:cs+chunk_size] + conditions = [f"tel LIKE '{p['phone'][:3]}%{p['phone'][-4:]}'" for p in chunk] + query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL" + cur.execute(query) + for uid, tel in cur.fetchall(): + for p in chunk: + if p["phone"][:3] == tel[:3] and p["phone"][-4:] == tel[-4:]: + phone_to_uid[p["phone"]] = str(uid) + break + time.sleep(0.05) + cur.close(); conn.close() + print(f"Phone->UID matches: {len(phone_to_uid)}") + + # Step 3: Collect all UIDs + all_uids = set(existing_uids) + for uid in phone_to_uid.values(): + all_uids.add(uid) + print(f"Total UIDs to query PG: {len(all_uids)}") + + # Step 4: PostgreSQL queries + uid_reg = {} # uid -> {created_at, download_channel} + uid_trial = {} # uid -> count + + if all_uids: + conn = psycopg2.connect(**PG_CONFIG) + cur = conn.cursor() + uid_list = list(all_uids) + cs = 100 + for start in range(0, len(uid_list), cs): + chunk = uid_list[start:start+cs] + ph = ",".join(["%s"]*len(chunk)) + cur.execute(f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", chunk) + for uid, ca, dc in cur.fetchall(): + uid_reg[str(uid)] = {"created_at": str(ca) if ca else "", "download_channel": dc or ""} + time.sleep(0.05) + for start in range(0, len(uid_list), cs): + chunk = uid_list[start:start+cs] + ph = ",".join(["%s"]*len(chunk)) + cur.execute(f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({ph}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", chunk) + for uid, cnt in cur.fetchall(): + uid_trial[str(uid)] = cnt + time.sleep(0.05) + cur.close(); conn.close() + print(f"Reg info: {len(uid_reg)}, Trial counts: {len(uid_trial)}") + + # Step 5: Build full column arrays for batch write + # We have 1140 rows (A3:J1142). Build arrays for H, D, I, J. + # For each row, determine what to write. + N = len(parsed) + h_vals = [""] * N + d_vals = [""] * N + i_vals = [""] * N + j_vals = [""] * N + + h_written_count = 0 + d_written_count = 0 + i_written_count = 0 + j_written_count = 0 + + for p in parsed: + i = p["idx"] + phone = p["phone"] + h_val = p["h_val"] + + # Determine UID + uid = None + if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: + uid = phone_to_uid[phone] + elif h_val.isdigit(): + uid = h_val + + if not uid: + continue + + # H: write UID if phone matched and H was empty + if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: + if not h_val or h_val == "未注册": + h_vals[i] = uid + h_written_count += 1 + + # D: trial count + if uid in uid_trial: + d_vals[i] = str(uid_trial[uid]) + d_written_count += 1 + + # I: reg date + if uid in uid_reg and uid_reg[uid]["created_at"]: + i_vals[i] = uid_reg[uid]["created_at"] + i_written_count += 1 + + # J: channel + if uid in uid_reg and uid_reg[uid]["download_channel"]: + j_vals[i] = uid_reg[uid]["download_channel"] + j_written_count += 1 + + # Step 6: Write in full column batches + # Write H column + h_range = f"{SHEET_ID}!H3:H{2+N}" + h_values = [[v] for v in h_vals] + if any(v for v in h_vals): + write_range(h_range, h_values) + time.sleep(0.1) + + # Write D column + d_range = f"{SHEET_ID}!D3:D{2+N}" + d_values = [[v] for v in d_vals] + if any(v for v in d_vals): + write_range(d_range, d_values) + time.sleep(0.1) + + # Write I column + i_range = f"{SHEET_ID}!I3:I{2+N}" + i_values = [[v] for v in i_vals] + if any(v for v in i_vals): + write_range(i_range, i_values) + time.sleep(0.1) + + # Write J column + j_range = f"{SHEET_ID}!J3:J{2+N}" + j_values = [[v] for v in j_vals] + if any(v for v in j_vals): + write_range(j_range, j_values) + time.sleep(0.1) + + print(f"\n=== SUMMARY ===") + print(f"Phones matched in MySQL: {len(phone_to_uid)}") + print(f"H column (UID) written: {h_written_count}") + print(f"D column (trial count) written: {d_written_count}") + print(f"I column (reg date) written: {i_written_count}") + print(f"J column (channel) written: {j_written_count}") + +if __name__ == "__main__": + main() diff --git a/scripts/xiaolong_cleanup.py b/scripts/xiaolong_cleanup.py new file mode 100644 index 0000000..96e4e17 --- /dev/null +++ b/scripts/xiaolong_cleanup.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +""" +Final cleanup: write ALL columns for matched rows to clear stale data +""" +import json, sys, time, re +import psycopg2, psycopg2.extras +import requests + +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone + +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "qJF4I" +FEISHU_APP_ID = "cli_a929ae22e0b8dcc8" +FEISHU_APP_SECRET = "OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ" +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" +PG_DB = "vala_bi" + +def get_token(): + r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={"app_id": FEISHU_APP_ID, "app_secret": FEISHU_APP_SECRET}) + return r.json()['tenant_access_token'] + +TOKEN = get_token() + +# Read sheet +r = requests.get( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:V2512?valueRenderOption=ToString', + headers={'Authorization': f'Bearer {TOKEN}'} +) +rows = r.json()['data']['valueRange']['values'] + +# Extract phones +phone_map = {} +for i, row in enumerate(rows): + if len(row) > 4 and row[4]: + phone = str(row[4]).strip() + if re.match(r'^1\d{10}$', phone): + phone_map[i] = phone + +# Encrypt +enc_to_idx = {} +for idx, phone in phone_map.items(): + try: + enc = encrypt_phone(phone) + enc_to_idx[enc] = idx + except: + pass + +# Match UIDs +conn = psycopg2.connect(host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB, connect_timeout=30) +cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + +enc_list = list(enc_to_idx.keys()) +uid_map = {} +batch_size = 500 +for start in range(0, len(enc_list), batch_size): + batch = enc_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at::date, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 AND deleted_at IS NULL + """, batch) + for row in cur.fetchall(): + enc = row['tel_encrypt'] + if enc in enc_to_idx: + idx = enc_to_idx[enc] + uid_map[idx] = {'uid': row['id'], 'created_at': str(row['created_at']) if row['created_at'] else None, 'download_channel': row['download_channel']} + +# Trial counts +uid_list = list(set(v['uid'] for v in uid_map.values())) +trial_counts = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT account_id, COUNT(*) as cnt FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, batch) + for row in cur.fetchall(): + trial_counts[row['account_id']] = row['cnt'] + +# Orders +order_data = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT o.account_id, o.trade_no, o.pay_success_date::date as pay_date, + o.key_from, o.pay_amount_int, o.order_status, o.out_trade_no + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.account_id IN ({placeholders}) + AND o.pay_success_date IS NOT NULL AND o.order_status IN (3, 4) + ORDER BY o.account_id, o.pay_success_date + """, batch) + for row in cur.fetchall(): + uid = row['account_id'] + if uid not in order_data: + order_data[uid] = [] + order_data[uid].append({ + 'trade_no': row['trade_no'], 'pay_date': str(row['pay_date']) if row['pay_date'] else None, + 'key_from': row['key_from'], 'pay_amount_int': row['pay_amount_int'], + 'order_status': row['order_status'], 'out_trade_no': row['out_trade_no'] + }) + +# Refunds +all_trade_nos = [] +for uid, orders in order_data.items(): + for o in orders: + all_trade_nos.append(o['trade_no']) + +refund_map = {} +if all_trade_nos: + for start in range(0, len(all_trade_nos), batch_size): + batch = all_trade_nos[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT r.trade_no, r.refund_amount FROM bi_refund_order r + JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 + WHERE r.trade_no IN ({placeholders}) AND r.status = 3 + """, batch) + for row in cur.fetchall(): + refund_map[row['trade_no']] = int(float(row['refund_amount'])) if row['refund_amount'] else 0 + +cur.close() +conn.close() + +# Build complete row data for ALL matched rows +# For each row, compute ALL columns D(3), H(7), I(8), J(9), K(10), L(11), M(12), O(14), P(15), Q(16) +# Use '' for empty cells to clear stale data + +col_letters = 'ABCDEFGHIJKLMNOPQRSTUV' + +# Find rows where we need to write (any column differs from current) +writes_needed = [] + +for idx in range(len(rows)): + if idx not in uid_map: + continue + + info = uid_map[idx] + uid = info['uid'] + + # Build expected values for all relevant columns + row_expected = {} + + # D(3): trial count + row_expected[3] = trial_counts.get(uid, 0) + + # H(7): UID + row_expected[7] = str(uid) + + # I(8): register date + row_expected[8] = info['created_at'] if info['created_at'] else '' + + # J(9): download channel + row_expected[9] = info['download_channel'] if info['download_channel'] else '' + + # K(10): has order + orders = order_data.get(uid, []) + if orders: + row_expected[10] = '是' + first = orders[0] + row_expected[11] = first['pay_date'] if first['pay_date'] else '' + row_expected[12] = first['key_from'] if first['key_from'] else '' + + total_gmv = sum(o['pay_amount_int'] for o in orders) + gmv_yuan = total_gmv / 100.0 + row_expected[14] = gmv_yuan if gmv_yuan > 0 else '' + + total_refund = sum(refund_map.get(o['trade_no'], 0) for o in orders) + if total_refund > 0: + row_expected[15] = total_refund + row_expected[16] = gmv_yuan - total_refund + else: + row_expected[15] = '' + row_expected[16] = '' + else: + # No orders - clear K/O/P/Q + row_expected[10] = '' + row_expected[11] = '' + row_expected[12] = '' + row_expected[14] = '' + row_expected[15] = '' + row_expected[16] = '' + + # Check which columns actually need updating + row_data = rows[idx] if idx < len(rows) else [] + for col, val in row_expected.items(): + current = str(row_data[col]) if col < len(row_data) and row_data[col] is not None else '' + current = current.strip() + expected_str = str(val).strip() + if current != expected_str: + writes_needed.append((idx, col, val)) + +print(f"Cells needing update: {len(writes_needed)}") + +# Write single cells +success = 0 +fail = 0 + +for i, (row_idx, col_idx, val) in enumerate(writes_needed): + row_num = row_idx + 3 + col = col_letters[col_idx] + range_str = f"{SHEET_ID}!{col}{row_num}:{col}{row_num}" + + # For empty values, write empty string + body = {"valueRange": {"range": range_str, "values": [[val]]}} + + for attempt in range(3): + try: + r = requests.put( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values', + headers={'Authorization': f'Bearer {TOKEN}', 'Content-Type': 'application/json'}, + json=body, timeout=15 + ) + resp = r.json() + if resp.get('code') == 0: + success += 1 + break + elif resp.get('code') == 90217: + time.sleep((attempt + 1) * 3) + elif resp.get('code') == 99991663: + TOKEN = get_token() + time.sleep(0.5) + else: + if attempt < 2: + time.sleep(1) + else: + fail += 1 + print(f" FAIL [{range_str}]: {resp.get('code')} {resp.get('msg')}") + except Exception as e: + if attempt < 2: + time.sleep(1) + else: + fail += 1 + print(f" EXCEPTION [{range_str}]: {e}") + + if (i + 1) % 200 == 0: + print(f" Progress: {i+1}/{len(writes_needed)} (success={success}, fail={fail})") + + time.sleep(0.15) + +print(f"\n=== Cleanup Complete ===") +print(f"Success: {success}, Fail: {fail}") diff --git a/scripts/xiaolong_refresh.py b/scripts/xiaolong_refresh.py new file mode 100644 index 0000000..e820a2f --- /dev/null +++ b/scripts/xiaolong_refresh.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" +小龙 sheet (qJF4I) 全量刷新脚本 +读取 A3:V2512 → 加密手机号 → 匹配UID → 查询各列 → 回写飞书 +""" +import json +import sys +import time +import re +import psycopg2 +import psycopg2.extras +import requests + +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone + +# ── Config ── +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "qJF4I" +FEISHU_APP_ID = "cli_a929ae22e0b8dcc8" +FEISHU_APP_SECRET = "OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ" +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" +PG_DB = "vala_bi" + +# ── Feishu token ── +def get_token(): + r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={"app_id": FEISHU_APP_ID, "app_secret": FEISHU_APP_SECRET}) + return r.json()['tenant_access_token'] + +TOKEN = get_token() + +# ── Load raw data ── +with open('/tmp/xiaolong_raw.json') as f: + raw = json.load(f) + +rows = raw['data']['valueRange']['values'] +print(f"Loaded {len(rows)} rows from sheet") + +# ── Extract phone numbers (column E, index 4) ── +phone_map = {} # row_index -> phone +row_phones = [] # (row_index, phone) +for i, row in enumerate(rows): + if len(row) > 4 and row[4]: + phone = str(row[4]).strip() + if re.match(r'^1\d{10}$', phone): + phone_map[i] = phone + row_phones.append((i, phone)) + +print(f"Found {len(phone_map)} valid phone numbers") + +# ── Encrypt phones ── +enc_to_phone = {} +for idx, phone in row_phones: + try: + enc = encrypt_phone(phone) + enc_to_phone[enc] = (idx, phone) + except Exception as e: + print(f" Encrypt error for {phone}: {e}") + +print(f"Encrypted {len(enc_to_phone)} phones") + +# ── PostgreSQL: match UIDs ── +conn = psycopg2.connect( + host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB, + connect_timeout=30 +) +cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + +# Batch match: get all tel_encrypt values that match +enc_list = list(enc_to_phone.keys()) +uid_map = {} # row_index -> account_id +batch_size = 500 +for start in range(0, len(enc_list), batch_size): + batch = enc_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at::date, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 AND deleted_at IS NULL + """, batch) + for row in cur.fetchall(): + enc = row['tel_encrypt'] + if enc in enc_to_phone: + idx, phone = enc_to_phone[enc] + uid_map[idx] = { + 'uid': row['id'], + 'created_at': str(row['created_at']) if row['created_at'] else None, + 'download_channel': row['download_channel'] + } + +print(f"Matched {len(uid_map)} UIDs from PostgreSQL") + +# ── Query D column: trial course count ── +uid_list = list(set(v['uid'] for v in uid_map.values())) +trial_counts = {} # uid -> count +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT account_id, COUNT(*) as cnt + FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) + AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, batch) + for row in cur.fetchall(): + trial_counts[row['account_id']] = row['cnt'] + +print(f"Got trial counts for {len(trial_counts)} users") + +# ── Query K-V: orders ── +# K: has order (是/空), L: pay_success_date, M: key_from, O: GMV +order_data = {} # uid -> list of orders +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT o.account_id, o.trade_no, o.pay_success_date::date as pay_date, + o.key_from, o.pay_amount_int, o.order_status, o.out_trade_no + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.account_id IN ({placeholders}) + AND o.pay_success_date IS NOT NULL + AND o.order_status IN (3, 4) + ORDER BY o.account_id, o.pay_success_date + """, batch) + for row in cur.fetchall(): + uid = row['account_id'] + if uid not in order_data: + order_data[uid] = [] + order_data[uid].append({ + 'trade_no': row['trade_no'], + 'pay_date': str(row['pay_date']) if row['pay_date'] else None, + 'key_from': row['key_from'], + 'pay_amount_int': row['pay_amount_int'], + 'order_status': row['order_status'], + 'out_trade_no': row['out_trade_no'] + }) + +print(f"Got orders for {len(order_data)} users") + +# ── Query refunds ── +# Get all trade_nos from orders +all_trade_nos = [] +for uid, orders in order_data.items(): + for o in orders: + all_trade_nos.append(o['trade_no']) + +refund_map = {} # trade_no -> refund_amount +if all_trade_nos: + for start in range(0, len(all_trade_nos), batch_size): + batch = all_trade_nos[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT r.trade_no, r.refund_amount + FROM bi_refund_order r + JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 + WHERE r.trade_no IN ({placeholders}) + AND r.status = 3 + """, batch) + for row in cur.fetchall(): + refund_map[row['trade_no']] = int(float(row['refund_amount'])) if row['refund_amount'] else 0 + +print(f"Got refunds for {len(refund_map)} trade_nos") + +cur.close() +conn.close() + +# ── Build write data ── +# Columns: A=0,B=1,C=2,D=3,E=4,F=5,G=6,H=7,I=8,J=9,K=10,L=11,M=12,N=13,O=14,P=15,Q=16,R=17,S=18,T=19,U=20,V=21 +# H(7): UID, D(3): trial count, I(8): register date, J(9): download channel +# K(10): has order, L(11): pay date, M(12): key_from, O(14): GMV, P(15): refund, Q(16): GSV + +writes = [] # (row_index, col_index, value) + +stats = { + 'H': 0, 'D': 0, 'I': 0, 'J': 0, + 'K': 0, 'L': 0, 'M': 0, 'O': 0, 'P': 0, 'Q': 0 +} + +suspicious_refund_rows = [] # rows with refund but order_status=3 (shouldn't happen per filter) + +for idx in range(len(rows)): + if idx not in uid_map: + continue + + info = uid_map[idx] + uid = info['uid'] + + # H: UID + writes.append((idx, 7, str(uid))) + stats['H'] += 1 + + # D: trial count + tc = trial_counts.get(uid, 0) + writes.append((idx, 3, tc)) + stats['D'] += 1 + + # I: register date + if info['created_at']: + writes.append((idx, 8, info['created_at'])) + stats['I'] += 1 + + # J: download channel + if info['download_channel']: + writes.append((idx, 9, info['download_channel'])) + stats['J'] += 1 + + # K-V: orders + orders = order_data.get(uid, []) + if orders: + writes.append((idx, 10, '是')) + stats['K'] += 1 + + # L: first pay_success_date + first_order = orders[0] + if first_order['pay_date']: + writes.append((idx, 11, first_order['pay_date'])) + stats['L'] += 1 + + # M: first key_from + if first_order['key_from']: + writes.append((idx, 12, first_order['key_from'])) + stats['M'] += 1 + + # O: total GMV = sum(pay_amount_int/100) + total_gmv = 0 + for o in orders: + total_gmv += o['pay_amount_int'] + gmv_yuan = total_gmv / 100.0 + if gmv_yuan > 0: + writes.append((idx, 14, gmv_yuan)) + stats['O'] += 1 + + # P: total refund + total_refund = 0 + for o in orders: + refund_amt = refund_map.get(o['trade_no'], 0) + total_refund += refund_amt + # Check for suspicious: refund exists but order_status=3 + if refund_amt > 0 and o['order_status'] == 3: + suspicious_refund_rows.append(idx + 3) # +3 for 1-based row + + refund_yuan = int(total_refund) # refund_amount already in yuan + if refund_yuan > 0: + writes.append((idx, 15, int(refund_yuan))) + stats['P'] += 1 + + # Q: GSV = O - P + gsv = gmv_yuan - refund_yuan + if gsv > 0: + writes.append((idx, 16, gsv)) + stats['Q'] += 1 + elif gsv == 0: + # GSV is 0, still write it + writes.append((idx, 16, 0)) + stats['Q'] += 1 + +print(f"\n=== Stats ===") +print(f"H (UID): {stats['H']}") +print(f"D (trial): {stats['D']}") +print(f"I (register): {stats['I']}") +print(f"J (channel): {stats['J']}") +print(f"K (has order): {stats['K']}") +print(f"L (pay date): {stats['L']}") +print(f"M (key_from): {stats['M']}") +print(f"O (GMV): {stats['O']}") +print(f"P (refund): {stats['P']}") +print(f"Q (GSV): {stats['Q']}") +print(f"Total writes: {len(writes)}") +if suspicious_refund_rows: + print(f"Suspicious refund rows: {suspicious_refund_rows}") + +# ── Write back to Feishu ── +# Group writes by row and column range for batch efficiency +# We'll write row by row for simplicity, but merge consecutive columns +# Actually, let's group by row and write per-row ranges + +# Sort writes by row then column +writes.sort(key=lambda x: (x[0], x[1])) + +# Group consecutive columns in same row +def group_writes(writes): + """Group writes into ranges of consecutive columns per row""" + if not writes: + return [] + groups = [] + current = [writes[0]] + for w in writes[1:]: + last = current[-1] + if w[0] == last[0] and w[1] == last[1] + 1: + current.append(w) + else: + groups.append(current) + current = [w] + groups.append(current) + return groups + +groups = group_writes(writes) +print(f"\nGrouped into {len(groups)} write ranges") + +# Write in batches +col_letters = 'ABCDEFGHIJKLMNOPQRSTUV' +def col_letter(idx): + return col_letters[idx] if idx < len(col_letters) else f'Col{idx}' + +batch_writes = [] +for g in groups: + row_idx = g[0][0] + row_num = row_idx + 3 # 1-based row in sheet + start_col = g[0][1] + end_col = g[-1][1] + start_letter = col_letter(start_col) + end_letter = col_letter(end_col) + range_str = f"{SHEET_ID}!{start_letter}{row_num}:{end_letter}{row_num}" + + # Build values: one row with values for each column in range + values = [None] * (end_col - start_col + 1) + for w in g: + values[w[1] - start_col] = w[2] + + batch_writes.append((range_str, [values])) + +print(f"Prepared {len(batch_writes)} batch writes") + +# Execute writes +success = 0 +fail = 0 +for i, (range_str, values) in enumerate(batch_writes): + body = {"valueRange": {"range": range_str, "values": values}} + try: + r = requests.put( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values', + headers={'Authorization': f'Bearer {TOKEN}', 'Content-Type': 'application/json'}, + json=body, + timeout=15 + ) + resp = r.json() + if resp.get('code') == 0: + success += 1 + else: + fail += 1 + if fail <= 5: + print(f" Write error [{range_str}]: {resp.get('code')} {resp.get('msg')}") + except Exception as e: + fail += 1 + if fail <= 5: + print(f" Exception [{range_str}]: {e}") + + if (i + 1) % 100 == 0: + print(f" Progress: {i+1}/{len(batch_writes)} (success={success}, fail={fail})") + + time.sleep(0.03) + +print(f"\n=== Write Complete ===") +print(f"Success: {success}, Fail: {fail}") + +# ── Final summary ── +print(f"\n=== FINAL SUMMARY ===") +print(f"H列(UID): {stats['H']} 行") +print(f"D列(体验节数): {stats['D']} 行") +print(f"I列(注册日期): {stats['I']} 行") +print(f"J列(下载渠道): {stats['J']} 行") +print(f"K列(有订单): {stats['K']} 行") +print(f"L列(下单日): {stats['L']} 行") +print(f"M列(成交渠道): {stats['M']} 行") +print(f"O列(GMV): {stats['O']} 行") +print(f"P列(退款): {stats['P']} 行") +print(f"Q列(GSV): {stats['Q']} 行") +if suspicious_refund_rows: + print(f"疑似假退款行号: {suspicious_refund_rows}") +else: + print("疑似假退款: 无") diff --git a/scripts/xiaolong_retry.py b/scripts/xiaolong_retry.py new file mode 100644 index 0000000..6592ff4 --- /dev/null +++ b/scripts/xiaolong_retry.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" +Retry failed writes for 小龙 sheet - with rate limiting handling +""" +import json +import sys +import time +import re +import psycopg2 +import psycopg2.extras +import requests + +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone + +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "qJF4I" +FEISHU_APP_ID = "cli_a929ae22e0b8dcc8" +FEISHU_APP_SECRET = "OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ" +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" +PG_DB = "vala_bi" + +def get_token(): + r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={"app_id": FEISHU_APP_ID, "app_secret": FEISHU_APP_SECRET}) + return r.json()['tenant_access_token'] + +TOKEN = get_token() + +# Re-read the sheet to find empty cells that should have data +r = requests.get( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:V2512?valueRenderOption=ToString', + headers={'Authorization': f'Bearer {TOKEN}'} +) +rows = r.json()['data']['valueRange']['values'] +print(f"Re-read {len(rows)} rows") + +# Extract phones again +phone_map = {} +for i, row in enumerate(rows): + if len(row) > 4 and row[4]: + phone = str(row[4]).strip() + if re.match(r'^1\d{10}$', phone): + phone_map[i] = phone + +print(f"Found {len(phone_map)} phones") + +# Encrypt +enc_to_idx = {} +for idx, phone in phone_map.items(): + try: + enc = encrypt_phone(phone) + enc_to_idx[enc] = idx + except: + pass + +print(f"Encrypted {len(enc_to_idx)}") + +# Match UIDs +conn = psycopg2.connect(host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB, connect_timeout=30) +cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + +enc_list = list(enc_to_idx.keys()) +uid_map = {} +batch_size = 500 +for start in range(0, len(enc_list), batch_size): + batch = enc_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at::date, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 AND deleted_at IS NULL + """, batch) + for row in cur.fetchall(): + enc = row['tel_encrypt'] + if enc in enc_to_idx: + idx = enc_to_idx[enc] + uid_map[idx] = {'uid': row['id'], 'created_at': str(row['created_at']) if row['created_at'] else None, 'download_channel': row['download_channel']} + +print(f"Matched {len(uid_map)} UIDs") + +# Query trial counts +uid_list = list(set(v['uid'] for v in uid_map.values())) +trial_counts = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT account_id, COUNT(*) as cnt FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, batch) + for row in cur.fetchall(): + trial_counts[row['account_id']] = row['cnt'] + +# Query orders +order_data = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT o.account_id, o.trade_no, o.pay_success_date::date as pay_date, + o.key_from, o.pay_amount_int, o.order_status, o.out_trade_no + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.account_id IN ({placeholders}) + AND o.pay_success_date IS NOT NULL AND o.order_status IN (3, 4) + ORDER BY o.account_id, o.pay_success_date + """, batch) + for row in cur.fetchall(): + uid = row['account_id'] + if uid not in order_data: + order_data[uid] = [] + order_data[uid].append({ + 'trade_no': row['trade_no'], 'pay_date': str(row['pay_date']) if row['pay_date'] else None, + 'key_from': row['key_from'], 'pay_amount_int': row['pay_amount_int'], + 'order_status': row['order_status'], 'out_trade_no': row['out_trade_no'] + }) + +# Query refunds +all_trade_nos = [] +for uid, orders in order_data.items(): + for o in orders: + all_trade_nos.append(o['trade_no']) + +refund_map = {} +if all_trade_nos: + for start in range(0, len(all_trade_nos), batch_size): + batch = all_trade_nos[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT r.trade_no, r.refund_amount FROM bi_refund_order r + JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 + WHERE r.trade_no IN ({placeholders}) AND r.status = 3 + """, batch) + for row in cur.fetchall(): + refund_map[row['trade_no']] = int(float(row['refund_amount'])) if row['refund_amount'] else 0 + +cur.close() +conn.close() + +# Build expected values +expected = {} # (row_idx, col_idx) -> value +for idx in range(len(rows)): + if idx not in uid_map: + continue + info = uid_map[idx] + uid = info['uid'] + + expected[(idx, 7)] = str(uid) # H + + tc = trial_counts.get(uid, 0) + expected[(idx, 3)] = tc # D + + if info['created_at']: + expected[(idx, 8)] = info['created_at'] # I + + if info['download_channel']: + expected[(idx, 9)] = info['download_channel'] # J + + orders = order_data.get(uid, []) + if orders: + expected[(idx, 10)] = '是' # K + first = orders[0] + if first['pay_date']: + expected[(idx, 11)] = first['pay_date'] # L + if first['key_from']: + expected[(idx, 12)] = first['key_from'] # M + + total_gmv = sum(o['pay_amount_int'] for o in orders) + gmv_yuan = total_gmv / 100.0 + if gmv_yuan > 0: + expected[(idx, 14)] = gmv_yuan # O + + total_refund = sum(refund_map.get(o['trade_no'], 0) for o in orders) + if total_refund > 0: + expected[(idx, 15)] = total_refund # P (already yuan) + gsv = gmv_yuan - total_refund + expected[(idx, 16)] = gsv # Q + +# Now check which cells are empty in the sheet +missing = [] +for (idx, col), val in expected.items(): + row_data = rows[idx] if idx < len(rows) else [] + current = str(row_data[col]) if col < len(row_data) and row_data[col] is not None else '' + current = current.strip() + + # Check if current value matches expected + expected_str = str(val).strip() + if current != expected_str: + missing.append((idx, col, val)) + +print(f"Found {len(missing)} missing/incorrect cells to retry") + +# Write missing cells with longer delays +col_letters = 'ABCDEFGHIJKLMNOPQRSTUV' +def col_letter(idx): + return col_letters[idx] if idx < len(col_letters) else f'Col{idx}' + +# Group by row +from collections import defaultdict +by_row = defaultdict(list) +for idx, col, val in missing: + by_row[idx].append((col, val)) + +# Sort each row's columns +for idx in by_row: + by_row[idx].sort(key=lambda x: x[0]) + +success = 0 +fail = 0 +total = len(by_row) + +for i, (row_idx, cols) in enumerate(sorted(by_row.items())): + row_num = row_idx + 3 + start_col = cols[0][0] + end_col = cols[-1][0] + start_letter = col_letter(start_col) + end_letter = col_letter(end_col) + range_str = f"{SHEET_ID}!{start_letter}{row_num}:{end_letter}{row_num}" + + values = [None] * (end_col - start_col + 1) + for col, val in cols: + values[col - start_col] = val + + body = {"valueRange": {"range": range_str, "values": [values]}} + + max_retries = 3 + for attempt in range(max_retries): + try: + r = requests.put( + f'https://open.feishu.cn/open-apis/spreadsheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values', + headers={'Authorization': f'Bearer {TOKEN}', 'Content-Type': 'application/json'}, + json=body, timeout=15 + ) + resp = r.json() + if resp.get('code') == 0: + success += 1 + break + elif resp.get('code') == 90217: # rate limit + if attempt < max_retries - 1: + wait = (attempt + 1) * 2 + time.sleep(wait) + else: + fail += 1 + print(f" FAILED (rate limit) [{range_str}]: {resp.get('msg')}") + else: + if attempt < max_retries - 1: + time.sleep(1) + else: + fail += 1 + print(f" FAILED [{range_str}]: {resp.get('code')} {resp.get('msg')}") + except Exception as e: + if attempt < max_retries - 1: + time.sleep(1) + else: + fail += 1 + print(f" EXCEPTION [{range_str}]: {e}") + + if (i + 1) % 50 == 0: + print(f" Retry progress: {i+1}/{total} (success={success}, fail={fail})") + + time.sleep(0.1) # Longer delay for retries + +print(f"\n=== Retry Complete ===") +print(f"Success: {success}, Fail: {fail}") diff --git a/scripts/xiaolong_retry2.py b/scripts/xiaolong_retry2.py new file mode 100644 index 0000000..44b9885 --- /dev/null +++ b/scripts/xiaolong_retry2.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +""" +Retry v2: Single-cell writes with fresh token, proper error handling +""" +import json, sys, time, re +import psycopg2, psycopg2.extras +import requests + +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone + +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "qJF4I" +FEISHU_APP_ID = "cli_a929ae22e0b8dcc8" +FEISHU_APP_SECRET = "OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ" +PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" +PG_PORT = 28591 +PG_USER = "ai_member" +PG_PASSWORD = "LdfjdjL83h3h3^$&**YGG*" +PG_DB = "vala_bi" + +def get_token(): + r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={"app_id": FEISHU_APP_ID, "app_secret": FEISHU_APP_SECRET}) + return r.json()['tenant_access_token'] + +def write_cell(token, row_num, col_letter, value): + """Write a single cell. Returns True on success.""" + range_str = f"{SHEET_ID}!{col_letter}{row_num}:{col_letter}{row_num}" + body = {"valueRange": {"range": range_str, "values": [[value]]}} + for attempt in range(3): + try: + r = requests.put( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values', + headers={'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}, + json=body, timeout=15 + ) + text = r.text.strip() + try: + resp = json.loads(text) + except json.JSONDecodeError: + print(f" JSON parse error for [{range_str}]: {text[:200]}") + time.sleep(1) + continue + + if resp.get('code') == 0: + return True + elif resp.get('code') == 90217: + wait = (attempt + 1) * 3 + time.sleep(wait) + elif resp.get('code') == 99991663: # token expired + return 'TOKEN_EXPIRED' + else: + if attempt < 2: + time.sleep(1) + else: + print(f" FAIL [{range_str}]: {resp.get('code')} {resp.get('msg')}") + except Exception as e: + if attempt < 2: + time.sleep(1) + else: + print(f" EXCEPTION [{range_str}]: {e}") + return False + +TOKEN = get_token() +print(f"Token: {TOKEN[:20]}...") + +# Read sheet +r = requests.get( + f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:V2512?valueRenderOption=ToString', + headers={'Authorization': f'Bearer {TOKEN}'} +) +rows = r.json()['data']['valueRange']['values'] +print(f"Read {len(rows)} rows") + +# Extract phones +phone_map = {} +for i, row in enumerate(rows): + if len(row) > 4 and row[4]: + phone = str(row[4]).strip() + if re.match(r'^1\d{10}$', phone): + phone_map[i] = phone + +print(f"Found {len(phone_map)} phones") + +# Encrypt +enc_to_idx = {} +for idx, phone in phone_map.items(): + try: + enc = encrypt_phone(phone) + enc_to_idx[enc] = idx + except: + pass + +# Match UIDs +conn = psycopg2.connect(host=PG_HOST, port=PG_PORT, user=PG_USER, password=PG_PASSWORD, dbname=PG_DB, connect_timeout=30) +cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + +enc_list = list(enc_to_idx.keys()) +uid_map = {} +batch_size = 500 +for start in range(0, len(enc_list), batch_size): + batch = enc_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT id, tel_encrypt, created_at::date, download_channel + FROM bi_vala_app_account + WHERE tel_encrypt IN ({placeholders}) + AND status = 1 AND deleted_at IS NULL + """, batch) + for row in cur.fetchall(): + enc = row['tel_encrypt'] + if enc in enc_to_idx: + idx = enc_to_idx[enc] + uid_map[idx] = {'uid': row['id'], 'created_at': str(row['created_at']) if row['created_at'] else None, 'download_channel': row['download_channel']} + +print(f"Matched {len(uid_map)} UIDs") + +# Trial counts +uid_list = list(set(v['uid'] for v in uid_map.values())) +trial_counts = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT account_id, COUNT(*) as cnt FROM bi_user_course_detail + WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL + GROUP BY account_id + """, batch) + for row in cur.fetchall(): + trial_counts[row['account_id']] = row['cnt'] + +# Orders +order_data = {} +for start in range(0, len(uid_list), batch_size): + batch = uid_list[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT o.account_id, o.trade_no, o.pay_success_date::date as pay_date, + o.key_from, o.pay_amount_int, o.order_status, o.out_trade_no + FROM bi_vala_order o + JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE o.account_id IN ({placeholders}) + AND o.pay_success_date IS NOT NULL AND o.order_status IN (3, 4) + ORDER BY o.account_id, o.pay_success_date + """, batch) + for row in cur.fetchall(): + uid = row['account_id'] + if uid not in order_data: + order_data[uid] = [] + order_data[uid].append({ + 'trade_no': row['trade_no'], 'pay_date': str(row['pay_date']) if row['pay_date'] else None, + 'key_from': row['key_from'], 'pay_amount_int': row['pay_amount_int'], + 'order_status': row['order_status'], 'out_trade_no': row['out_trade_no'] + }) + +# Refunds +all_trade_nos = [] +for uid, orders in order_data.items(): + for o in orders: + all_trade_nos.append(o['trade_no']) + +refund_map = {} +if all_trade_nos: + for start in range(0, len(all_trade_nos), batch_size): + batch = all_trade_nos[start:start+batch_size] + placeholders = ','.join(['%s'] * len(batch)) + cur.execute(f""" + SELECT r.trade_no, r.refund_amount FROM bi_refund_order r + JOIN bi_vala_order o ON r.trade_no = o.trade_no AND o.order_status = 4 + WHERE r.trade_no IN ({placeholders}) AND r.status = 3 + """, batch) + for row in cur.fetchall(): + refund_map[row['trade_no']] = int(float(row['refund_amount'])) if row['refund_amount'] else 0 + +cur.close() +conn.close() + +# Build expected values +col_letters = 'ABCDEFGHIJKLMNOPQRSTUV' +expected = {} # (row_idx, col_idx) -> value + +for idx in range(len(rows)): + if idx not in uid_map: + continue + info = uid_map[idx] + uid = info['uid'] + + expected[(idx, 7)] = str(uid) # H + + tc = trial_counts.get(uid, 0) + expected[(idx, 3)] = tc # D + + if info['created_at']: + expected[(idx, 8)] = info['created_at'] # I + + if info['download_channel']: + expected[(idx, 9)] = info['download_channel'] # J + + orders = order_data.get(uid, []) + if orders: + expected[(idx, 10)] = '是' # K + first = orders[0] + if first['pay_date']: + expected[(idx, 11)] = first['pay_date'] # L + if first['key_from']: + expected[(idx, 12)] = first['key_from'] # M + + total_gmv = sum(o['pay_amount_int'] for o in orders) + gmv_yuan = total_gmv / 100.0 + if gmv_yuan > 0: + expected[(idx, 14)] = gmv_yuan # O + + total_refund = sum(refund_map.get(o['trade_no'], 0) for o in orders) + if total_refund > 0: + expected[(idx, 15)] = total_refund # P + gsv = gmv_yuan - total_refund + expected[(idx, 16)] = gsv # Q + +# Find missing cells +missing = [] +for (idx, col), val in expected.items(): + row_data = rows[idx] if idx < len(rows) else [] + current = str(row_data[col]) if col < len(row_data) and row_data[col] is not None else '' + current = current.strip() + expected_str = str(val).strip() + if current != expected_str: + missing.append((idx, col, val)) + +print(f"Missing cells to write: {len(missing)}") + +# Write one by one with delays +success = 0 +fail = 0 +token_refreshes = 0 + +for i, (row_idx, col_idx, val) in enumerate(missing): + row_num = row_idx + 3 + col = col_letters[col_idx] + + result = write_cell(TOKEN, row_num, col, val) + + if result == 'TOKEN_EXPIRED': + TOKEN = get_token() + token_refreshes += 1 + print(f" Token refreshed ({token_refreshes})") + result = write_cell(TOKEN, row_num, col, val) + + if result is True: + success += 1 + else: + fail += 1 + + if (i + 1) % 100 == 0: + print(f" Progress: {i+1}/{len(missing)} (success={success}, fail={fail})") + + time.sleep(0.15) # ~6-7 writes/sec to avoid rate limits + +print(f"\n=== Retry Complete ===") +print(f"Success: {success}, Fail: {fail}") +print(f"Token refreshes: {token_refreshes}") diff --git a/scripts/xiaolong_sheet_update.py b/scripts/xiaolong_sheet_update.py new file mode 100644 index 0000000..b493531 --- /dev/null +++ b/scripts/xiaolong_sheet_update.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +小龙 sheet update: match phones via XXTEA encryption, fill H (UID), D (trial count), I (reg date), J (channel) +""" +import sys +sys.path.insert(0, '/root/.openclaw/workspace/scripts') +from phone_encrypt import encrypt_phone +import psycopg2 +import json +import requests +import time + +FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6" +SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" +SHEET_ID = "qJF4I" + +# PostgreSQL connection +PG_CONFIG = { + "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", + "port": 28591, + "user": "ai_member", + "password": "LdfjdjL83h3h3^$&**YGG*", + "database": "vala_bi", +} + +def get_sheet_data(): + """Read all data from the sheet""" + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J2502?valueRenderOption=ToString" + headers = {"Authorization": f"Bearer {FEISHU_TOKEN}"} + r = requests.get(url, headers=headers) + data = r.json() + values = data.get("data", {}).get("valueRange", {}).get("values", []) + return values + +def write_cell_range(range_str, values_list): + """Write values to a range""" + url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" + headers = { + "Authorization": f"Bearer {FEISHU_TOKEN}", + "Content-Type": "application/json" + } + body = { + "valueRange": { + "range": f"{SHEET_ID}!{range_str}", + "values": values_list + } + } + r = requests.put(url, headers=headers, json=body) + resp = r.json() + if resp.get("code") != 0: + print(f" ERROR writing {range_str}: {resp}") + else: + print(f" OK: {range_str}") + return resp + +def main(): + # Step 1: Read sheet data + print("Reading sheet data...") + rows = get_sheet_data() + print(f"Got {len(rows)} rows") + + # Parse rows with phones + phone_rows = [] # (row_num, phone, existing_uid, existing_d, existing_i, existing_j) + for i, row in enumerate(rows): + row_num = i + 3 + if len(row) < 5: + continue + phone = row[4].strip() if row[4] else "" + if not phone or len(phone) != 11 or not phone.isdigit(): + continue + uid = row[7].strip() if len(row) > 7 and row[7] else "" + d_val = row[3] if len(row) > 3 and row[3] else "" + i_val = row[8].strip() if len(row) > 8 and row[8] else "" + j_val = row[9].strip() if len(row) > 9 and row[9] else "" + phone_rows.append((row_num, phone, uid, d_val, i_val, j_val)) + + print(f"Found {len(phone_rows)} rows with valid phones") + + # Separate into two groups: + # Group A: H is "未注册" or empty → need to match and fill H, D, I, J + # Group B: H has valid numeric UID but D is empty → need to fill D, I, J + group_a = [] # need phone matching + group_b = [] # already have UID, need D/I/J + + for row_num, phone, uid, d_val, i_val, j_val in phone_rows: + if not uid or uid == "未注册": + group_a.append((row_num, phone, uid, d_val, i_val, j_val)) + elif uid.isdigit(): + if not d_val or not i_val or not j_val: + group_b.append((row_num, phone, uid, d_val, i_val, j_val)) + + print(f"Group A (need phone match): {len(group_a)}") + print(f"Group B (have UID, need D/I/J): {len(group_b)}") + + # Step 2: Encrypt all phones and query PostgreSQL + all_phones_a = [p[1] for p in group_a] + all_uids_b = [p[2] for p in group_b] + + conn = psycopg2.connect(**PG_CONFIG) + cur = conn.cursor() + + # Map: encrypted_phone -> account_id + enc_to_uid = {} + if all_phones_a: + enc_list = [encrypt_phone(p) for p in all_phones_a] + phone_to_enc = dict(zip(all_phones_a, enc_list)) + + # Query accounts + placeholders = ",".join(["%s"] * len(enc_list)) + cur.execute( + f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL", + enc_list + ) + for row in cur.fetchall(): + uid, tel_enc = row + enc_to_uid[tel_enc] = uid + + print(f"Matched {len(enc_to_uid)} phones in bi_vala_app_account") + + # For matched accounts, get registration date and download channel + matched_uids = list(enc_to_uid.values()) + uid_to_info = {} + if matched_uids: + placeholders2 = ",".join(["%s"] * len(matched_uids)) + cur.execute( + f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders2}) AND status=1 AND deleted_at IS NULL", + matched_uids + ) + for row in cur.fetchall(): + uid_to_info[row[0]] = (str(row[1]) if row[1] else "", row[2] or "") + + # Step 3: Get trial lesson counts for ALL accounts (both groups) + all_uids = list(set( + [enc_to_uid[encrypt_phone(p[1])] for p in group_a if encrypt_phone(p[1]) in enc_to_uid] + + [p[2] for p in group_b] + )) + + uid_to_trial_count = {} + if all_uids: + placeholders3 = ",".join(["%s"] * len(all_uids)) + cur.execute( + f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders3}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", + all_uids + ) + for row in cur.fetchall(): + uid_to_trial_count[row[0]] = row[1] + + cur.close() + conn.close() + + print(f"Trial counts: {len(uid_to_trial_count)} accounts have trials") + print(f"Account info: {len(uid_to_info)} accounts have reg date/channel") + + # Step 4: Build write batches + # For Group A: write H (UID), D (trial count), I (reg date), J (channel) + # For Group B: write D (trial count), I (reg date), J (channel) + + writes_h = [] # (row, value) + writes_d = [] # (row, value) + writes_i = [] # (row, value) + writes_j = [] # (row, value) + + matched_count = 0 + + for row_num, phone, old_uid, old_d, old_i, old_j in group_a: + enc = encrypt_phone(phone) + uid = enc_to_uid.get(enc) + if uid: + matched_count += 1 + uid_str = str(uid) + # H column: UID + writes_h.append((row_num, uid_str)) + # D column: trial count + trial = uid_to_trial_count.get(uid, 0) + writes_d.append((row_num, str(trial) if trial else "")) + # I column: reg date + info = uid_to_info.get(uid, ("", "")) + writes_i.append((row_num, info[0])) + # J column: channel + writes_j.append((row_num, info[1])) + # If not matched, skip - don't write anything + + for row_num, phone, uid_str, old_d, old_i, old_j in group_b: + uid = int(uid_str) + # D column: trial count + trial = uid_to_trial_count.get(uid, 0) + writes_d.append((row_num, str(trial) if trial else "")) + # I column: reg date + info = uid_to_info.get(uid, ("", "")) + writes_i.append((row_num, info[0])) + # J column: channel + writes_j.append((row_num, info[1])) + + print(f"\nMatched {matched_count} new phones") + print(f"Total H writes: {len(writes_h)}") + print(f"Total D writes: {len(writes_d)}") + print(f"Total I writes: {len(writes_i)}") + print(f"Total J writes: {len(writes_j)}") + + # Step 5: Write in batches (consecutive rows per column) + def write_batch(writes, col_letter): + if not writes: + return + # Sort by row number + writes.sort(key=lambda x: x[0]) + # Group consecutive rows + i = 0 + while i < len(writes): + start = writes[i][0] + vals = [] + j = i + while j < len(writes) and writes[j][0] == start + (j - i): + vals.append([writes[j][1]]) + j += 1 + end = start + len(vals) - 1 + range_str = f"{col_letter}{start}:{col_letter}{end}" + write_cell_range(range_str, vals) + i = j + time.sleep(0.05) + + print("\nWriting H column...") + write_batch(writes_h, "H") + + print("\nWriting D column...") + write_batch(writes_d, "D") + + print("\nWriting I column...") + write_batch(writes_i, "I") + + print("\nWriting J column...") + write_batch(writes_j, "J") + + print("\n=== SUMMARY ===") + print(f"Phones matched: {matched_count}") + print(f"H (UID) written: {len(writes_h)}") + print(f"D (trial count) written: {len(writes_d)}") + print(f"I (reg date) written: {len(writes_i)}") + print(f"J (channel) written: {len(writes_j)}") + +if __name__ == "__main__": + main()