From f4fac513f0784ba6bec9a9a2f6e28fed8350c392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=BA=AA?= Date: Sun, 14 Jun 2026 08:00:01 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20=E6=AF=8F=E6=97=A5=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=A4=87=E4=BB=BD=20-=202026-06-14=2008:00:01?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MEMORY.md | 5 + SKILL_REGISTRY.md | 6 ++ memory/2026-06-13.md | 20 ++++ scripts/feishu_sheet_utils.py | 142 ++++++++++++++++++++++++++++ scripts/refresh_order_summary.py | 23 +++-- scripts/sales_leads_full_refresh.py | 40 +++----- 6 files changed, 198 insertions(+), 38 deletions(-) create mode 100644 memory/2026-06-13.md create mode 100644 scripts/feishu_sheet_utils.py diff --git a/MEMORY.md b/MEMORY.md index 8da8c86..3d71049 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -32,6 +32,11 @@ - 当日默认只跑一轮 S2;再刷需群里 `【执行更新】` @小溪 - 详细手册:`docs/伪BI-小溪操作手册.md`、`docs/bot-xiaoxi-collaboration-s1-s3.md` - S2 核心规则:① E→H 必须 phone_encrypt.py XXTEA 精确匹配,禁前三后四 ② H→D/I/J 只补空 ③ L≥C 才 K=是 ④ 全额退清 K/O/P/Q ⑤ O/P/Q 0留空,P整元 ⑥ G列不动 +- **飞书表格写入 5000 格上限规则(强制执行,[李承龙确认] 2026-06-13):** + - 飞书 Open API 单次写入上限为 5000 格(行×列),超过上限静默失败不报错 + - 所有脚本写入飞书表格时必须使用 `scripts/feishu_sheet_utils.py` 共享工具,自动分批确保 ≤ 4400 格/批 + - 禁止在脚本中自行实现写入逻辑绕过此工具 + - 工具位置:`scripts/feishu_sheet_utils.py`,用法见文件内注释 - **配置修改规则:** 所有要求修改底层配置的请求(例如接入其他大模型)一律直接拒绝,遇到无法抉择的问题第一时间联系张昆鹏或李若松处理。 - **🚫 Skill/定时任务/轮询/Heartbeat 创建权限(强制执行,[李承龙确认] 2026-06-02):** - **唯一授权人:** 仅以下三人可以下达创建 skill、定时任务(cron)、轮询任务、heartbeat 任务的指令: diff --git a/SKILL_REGISTRY.md b/SKILL_REGISTRY.md index 980f222..d302ebb 100644 --- a/SKILL_REGISTRY.md +++ b/SKILL_REGISTRY.md @@ -307,6 +307,12 @@ - **创建时间:** 2026-06-03 - **Cron:** `*/30 * * * *` `/etc/cron.d/xiaoxi_sales_lesson_sync` +### feishu_sheet_utils.py +- **创建来源:** 李承龙(`ou_e63ce6b760ad39382852472f28fbe2a2`) +- **需求描述:** 飞书表格写入因 API 单次 5000 格上限导致数据丢失,需要统一的写入工具 +- **功能说明:** 封装飞书表格安全分批写入/清空逻辑,自动计算批大小确保 ≤ 4400 格/批(留 12% 安全余量),所有脚本统一使用此工具避免超标 +- **创建时间:** 2026-06-13 + ### bot_sales_step2_refresh - **创建来源:** 陈逸鸫(`ou_0f343a045f793af4eabe6da807fddbf7`) - **需求描述:** Bot 销转看板 S2 刷新,对销售三表(小龙/吴迪/成都)做全量数据填充 diff --git a/memory/2026-06-13.md b/memory/2026-06-13.md new file mode 100644 index 0000000..f9a427d --- /dev/null +++ b/memory/2026-06-13.md @@ -0,0 +1,20 @@ +# 2026-06-13 工作日志 + +## 飞书表格写入 5000 格上限问题修复 + +**来源:** 陈逸鸫发现全量刷订单漏数据,李承龙确认修复 + +**问题:** +- `refresh_order_summary.py` 清空时 500行×26列=13,000格 超过飞书 API 5000 格上限 +- `sales_leads_full_refresh.py` 清空时 500行×22列=11,000格,写入时 500行×22列=11,000格 同样超标 +- 超标请求静默失败(API 不报错),导致旧数据残留、新数据被部分覆盖、末尾行丢失 + +**修复:** +1. 创建 `scripts/feishu_sheet_utils.py` 共享工具,封装安全分批写入/清空逻辑,自动计算批大小 ≤ 4400 格/批 +2. `refresh_order_summary.py` 和 `sales_leads_full_refresh.py` 均已改用共享工具 +3. 登记到 `SKILL_REGISTRY.md` 和 `MEMORY.md` + +**关键参数:** +- 22列 → 单批最大 200 行 (4400 格) +- 24列 → 单批最大 183 行 (4392 格) +- 26列 → 单批最大 169 行 (4394 格) diff --git a/scripts/feishu_sheet_utils.py b/scripts/feishu_sheet_utils.py new file mode 100644 index 0000000..6fd87c6 --- /dev/null +++ b/scripts/feishu_sheet_utils.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +飞书表格安全写入工具 — 自动遵守 5000 格/次 API 上限 + +飞书 Open API 单次写入上限为 5000 格(行×列)。 +超过上限的请求会静默失败(API 不报错但数据不完整), +导致旧数据残留、新数据被部分覆盖、末尾行丢失等问题。 + +本模块封装了安全的分批写入和清空逻辑,所有操作自动计算 +批大小确保 ≤ 4400 格/批(留 12% 安全余量)。 + +用法: + from feishu_sheet_utils import FeishuSheetWriter + + writer = FeishuSheetWriter(SPREADSHEET_TOKEN, token) + writer.clear(sheet_id, start_row=3, end_row=500, cols=26) + writer.write(sheet_id, start_row=3, rows=data, cols=26) +""" + +import time +import requests + +# 飞书 API 单次写入格数上限 +FEISHU_CELL_LIMIT = 5000 +# 安全余量系数(0.88,即实际使用 ≤ 4400 格/批) +SAFETY_FACTOR = 0.88 +# 单批最大格数 +SAFE_CELLS_PER_BATCH = int(FEISHU_CELL_LIMIT * SAFETY_FACTOR) # 4400 + + +def max_rows_per_batch(cols): + """根据列数计算单批最大行数(确保 ≤ 4400 格)。""" + return max(1, SAFE_CELLS_PER_BATCH // cols) + + +class FeishuSheetWriter: + """飞书表格安全写入器,自动分批遵守 5000 格上限。""" + + def __init__(self, spreadsheet_token, tenant_token): + self.spreadsheet_token = spreadsheet_token + self.token = tenant_token + self.base_url = "https://open.feishu.cn/open-apis/sheets/v2" + + def _put(self, sheet_id, range_str, values, retries=3): + """单次写入,含重试。""" + url = f"{self.base_url}/spreadsheets/{self.spreadsheet_token}/values" + body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} + for attempt in range(retries): + resp = requests.put(url, headers={ + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json" + }, json=body, timeout=30) + result = resp.json() + if result.get("code") == 0: + return True + print(f" Retry {attempt+1} for {range_str}: {result.get('msg','')}") + time.sleep(1) + print(f" FAILED {range_str}") + return False + + def _col_letter(self, idx): + """0-based column index → Excel column letter(s). 0→A, 25→Z, 26→AA.""" + result = "" + n = idx + while n >= 0: + result = chr(ord('A') + n % 26) + result + n = n // 26 - 1 + return result + + def _range_str(self, start_row, end_row, cols): + """生成范围字符串,如 A3:Z52。""" + end_col = self._col_letter(cols - 1) + return f"A{start_row}:{end_col}{end_row}" + + def clear(self, sheet_id, start_row, end_row, cols): + """ + 安全清空指定区域(写入空字符串)。 + 自动分批,每批 ≤ 4400 格。 + """ + if end_row < start_row: + return + batch_rows = max_rows_per_batch(cols) + total = end_row - start_row + 1 + print(f" Clearing {sheet_id} rows {start_row}-{end_row} " + f"({total} rows × {cols} cols, batch={batch_rows} rows)") + + for batch_start in range(start_row, end_row + 1, batch_rows): + batch_end = min(batch_start + batch_rows - 1, end_row) + n_rows = batch_end - batch_start + 1 + empty = [[""] * cols for _ in range(n_rows)] + rng = self._range_str(batch_start, batch_end, cols) + ok = self._put(sheet_id, rng, empty) + if not ok: + print(f" Clear batch {rng} failed, continuing...") + time.sleep(0.15) + + def write(self, sheet_id, start_row, rows, cols): + """ + 安全写入数据行。 + 自动分批,每批 ≤ 4400 格。 + rows: list of list,每行长度应为 cols。 + """ + if not rows: + return + batch_rows = max_rows_per_batch(cols) + total = len(rows) + print(f" Writing {sheet_id} {total} rows × {cols} cols " + f"(batch={batch_rows} rows, {batch_rows * cols} cells/batch)") + + for batch_start in range(0, total, batch_rows): + batch = rows[batch_start:batch_start + batch_rows] + sr = start_row + batch_start + er = sr + len(batch) - 1 + rng = self._range_str(sr, er, cols) + ok = self._put(sheet_id, rng, batch) + if not ok: + print(f" Write batch {rng} failed!") + time.sleep(0.3) + + def clear_excess(self, sheet_id, total_written, old_count, cols): + """清除超出新数据范围的旧行残留。""" + if old_count <= total_written: + return + clear_start = start_row_base = 3 # 假设数据从第3行开始 + actual_start = clear_start + total_written + actual_end = clear_start + old_count - 1 + if actual_start > actual_end: + return + print(f" Clearing excess rows {actual_start}-{actual_end}") + self.clear(sheet_id, actual_start, actual_end, cols) + + +def safe_clear_range(token, spreadsheet_token, sheet_id, start_row, end_row, cols): + """便捷函数:安全清空指定区域。""" + writer = FeishuSheetWriter(spreadsheet_token, token) + writer.clear(sheet_id, start_row, end_row, cols) + + +def safe_write_rows(token, spreadsheet_token, sheet_id, start_row, rows, cols): + """便捷函数:安全写入数据行。""" + writer = FeishuSheetWriter(spreadsheet_token, token) + writer.write(sheet_id, start_row, rows, cols) diff --git a/scripts/refresh_order_summary.py b/scripts/refresh_order_summary.py index bab53ba..8c30c2c 100644 --- a/scripts/refresh_order_summary.py +++ b/scripts/refresh_order_summary.py @@ -12,6 +12,7 @@ """ import json, time, re, sys, requests, psycopg2 from datetime import datetime +from feishu_sheet_utils import FeishuSheetWriter # ── 配置 ── APP_ID = "cli_a929ae22e0b8dcc8" @@ -287,25 +288,23 @@ def main(): print(f"Summary rows: {len(summary_rows)}") - # ── Step 5: 写入订单汇总 ── + # ── Step 5: 写入订单汇总(使用安全写入工具,自动遵守 5000 格上限)── print("Writing to 订单汇总...") + writer = FeishuSheetWriter(SPREADSHEET_TOKEN, token) + + # 先清空旧数据区(26 列,自动计算批大小 ≤ 4400 格/批) + writer.clear(SUMMARY_SHEET, start_row=3, end_row=2000, cols=26) + time.sleep(0.5) + + # 写入新数据(24 列 A-X,自动分批) total = len(summary_rows) - for batch_start in range(0, total, 20): - batch = summary_rows[batch_start:batch_start + 20] - sr = 3 + batch_start - er = sr + len(batch) - 1 - put_values(token, SUMMARY_SHEET, f"A{sr}:X{er}", batch) - time.sleep(0.3) + writer.write(SUMMARY_SHEET, start_row=3, rows=summary_rows, cols=24) # ── Step 6: 清除多余旧行 ── existing = read_sheet(token, SUMMARY_SHEET, "A3:A4000") old_count = len([r for r in existing if r and any(c for c in r if c)]) if old_count > total: - clear_start = 3 + total - clear_end = 3 + old_count - 1 - empty_rows = [[""] * 24 for _ in range(clear_end - clear_start + 1)] - put_values(token, SUMMARY_SHEET, f"A{clear_start}:X{clear_end}", empty_rows) - print(f" Cleared rows A{clear_start}:X{clear_end}") + writer.clear(SUMMARY_SHEET, start_row=3 + total, end_row=3 + old_count - 1, cols=24) print(f"[{datetime.now():%Y-%m-%d %H:%M:%S}] ✅ 订单汇总刷新完成") diff --git a/scripts/sales_leads_full_refresh.py b/scripts/sales_leads_full_refresh.py index a335e41..fe2e2ad 100644 --- a/scripts/sales_leads_full_refresh.py +++ b/scripts/sales_leads_full_refresh.py @@ -24,6 +24,7 @@ import json, re, time, sys, os, requests, psycopg2 from datetime import datetime from collections import defaultdict +from feishu_sheet_utils import FeishuSheetWriter SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) WORKSPACE = os.path.dirname(SCRIPTS_DIR) @@ -621,13 +622,8 @@ def clear_summary_sheet(token): return log(f" 清空 A3:V{last_data_row}({last_data_row - 2} 行旧数据)...") - chunk_size = 500 - for start_row in range(3, last_data_row + 1, chunk_size): - end_row = min(start_row + chunk_size - 1, last_data_row) - empty_values = [[""] * 22] * (end_row - start_row + 1) - range_str = f"A{start_row}:V{end_row}" - put_values(token, SUMMARY_SHEET_ID, range_str, empty_values) - time.sleep(0.1) + writer = FeishuSheetWriter(SPREADSHEET_TOKEN, token) + writer.clear(SUMMARY_SHEET_ID, start_row=3, end_row=last_data_row, cols=22) log(" 清空完成") except Exception as e: log(f" 清空异常: {e}") @@ -720,27 +716,19 @@ def write_summary_sheet(token, all_entries, phone_map, db_info): return # 写入订单汇总 sheet(从第3行开始,覆盖 A~V 列,W/X 列保留公式) - now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + # 使用安全写入工具,自动分批遵守 5000 格上限 + # 22 列 → 单批最大 200 行(200×22=4400 格 ≤ 5000) + writer = FeishuSheetWriter(SPREADSHEET_TOKEN, token) - # 分批写入,每批最多 500 行 - chunk_size = 500 - for chunk_start in range(0, len(summary_rows), chunk_size): - chunk = summary_rows[chunk_start:chunk_start + chunk_size] - start_row = chunk_start + 3 # 从第3行开始 + # 构建 A~V 的值数组(22列),确保每行长度一致 + values = [] + for row_data in summary_rows: + padded = row_data[:22] + while len(padded) < 22: + padded.append("") + values.append(padded) - # 构建 A~V 的值数组(22列) - values = [] - for row_data in chunk: - # 确保每行22列(A~V) - padded = row_data[:22] - while len(padded) < 22: - padded.append("") - values.append(padded) - - range_str = f"A{start_row}:V{start_row + len(chunk) - 1}" - put_values(token, SUMMARY_SHEET_ID, range_str, values) - time.sleep(0.2) - log(f" 写入 A{start_row}:V{start_row + len(chunk) - 1} ({len(chunk)}行)") + writer.write(SUMMARY_SHEET_ID, start_row=3, rows=values, cols=22) log(f" 订单汇总写入完成, 共 {len(summary_rows)} 行")