#!/usr/bin/env python3 """ 飞书文档读取客户端 封装 wiki 文档读取、内嵌 sheet 读取等飞书 API 调用 使用 Bot 身份,凭证来自 /root/.openclaw/credentials/xiaoyan/config.json """ import os import re import json import logging import requests import subprocess logger = logging.getLogger("feishu_client") if not logger.handlers: handler = logging.StreamHandler() handler.setFormatter(logging.Formatter( "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" )) logger.addHandler(handler) logger.setLevel(logging.INFO) CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json" LARK_CLI_CONFIG_DIR = "/root/.openclaw/credentials/xiaoyan" def _load_credentials(): """从配置文件读取 app_id 和 app_secret""" with open(CRED_FILE, "r") as f: config = json.load(f) app = config["apps"][0] return app["appId"], app["appSecret"] _token_cache = {"token": None, "expires_at": 0} def get_tenant_token(): """获取 Bot 租户访问令牌(带缓存,有效期 2 小时)""" import time now = time.time() if _token_cache["token"] and now < _token_cache["expires_at"] - 60: return _token_cache["token"] app_id, app_secret = _load_credentials() resp = requests.post( "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", headers={"Content-Type": "application/json"}, json={"app_id": app_id, "app_secret": app_secret}, timeout=10, ) resp.raise_for_status() data = resp.json() if data.get("code") != 0: raise Exception(f"获取 tenant_access_token 失败: {data}") token = data["tenant_access_token"] _token_cache["token"] = token _token_cache["expires_at"] = now + data.get("expire", 7200) logger.info("已获取 tenant_access_token") return token def get_wiki_node(wiki_token): """ 获取知识库节点信息 Returns: dict: {"obj_token", "obj_type", "title", "space_id", "has_child", ...} """ env = os.environ.copy() env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR result = subprocess.run( ["lark-cli", "wiki", "spaces", "get_node", "--params", json.dumps({"token": wiki_token}), "--as", "bot"], capture_output=True, text=True, env=env, timeout=30, ) if result.returncode != 0: raise Exception(f"lark-cli get_node 失败: {result.stderr}") data = json.loads(result.stdout) if data.get("code") != 0: raise Exception(f"get_node API error: {data}") node = data["data"]["node"] logger.info(f"wiki节点: title={node.get('title')}, obj_type={node.get('obj_type')}, obj_token={node.get('obj_token')}") return node def fetch_doc_markdown(obj_token): """ 获取文档 markdown 内容 Returns: str: 文档 markdown 文本 """ env = os.environ.copy() env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR result = subprocess.run( ["lark-cli", "docs", "+fetch", "--doc", obj_token, "--as", "bot"], capture_output=True, text=True, env=env, timeout=60, ) if result.returncode != 0: raise Exception(f"lark-cli docs +fetch 失败: {result.stderr}") data = json.loads(result.stdout) markdown = data.get("data", {}).get("markdown", "") if not markdown: raise Exception(f"文档内容为空, obj_token={obj_token}") logger.info(f"获取文档内容: {len(markdown)} 字符") return markdown def extract_sheet_token(markdown): """ 从文档 markdown 中提取内嵌 sheet token(返回第一个) Returns: tuple: (spreadsheet_token, sheet_id) 或 None """ tokens = extract_sheet_tokens(markdown) return tokens[0] if tokens else None def extract_sheet_tokens(markdown): """ 从文档 markdown 中提取所有内嵌 sheet token Returns: list[tuple]: [(spreadsheet_token, sheet_id), ...] """ matches = re.findall(r'', markdown) result = [] for full_token in matches: if "_" in full_token: parts = full_token.split("_", 1) result.append((parts[0], parts[1])) else: result.append((full_token, None)) return result def get_sheet_metadata(spreadsheet_token): """获取 sheet 元数据(子表列表、行列数等)""" token = get_tenant_token() resp = requests.get( f"https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/{spreadsheet_token}/sheets/query", headers={"Authorization": f"Bearer {token}"}, timeout=15, ) resp.raise_for_status() data = resp.json() if data.get("code") != 0: raise Exception(f"get_sheet_metadata 失败: {data}") return data["data"]["sheets"] def read_sheet_data(spreadsheet_token, sheet_id, cell_range=None): """ 读取内嵌 sheet 数据 Args: spreadsheet_token: 电子表格 token sheet_id: 子表 ID cell_range: 单元格范围(如 "A1:H200"),为 None 时自动读取全表 Returns: list[list]: 二维数组,每行一个 list """ token = get_tenant_token() if cell_range is None: # 先获取元数据确定范围 sheets = get_sheet_metadata(spreadsheet_token) target = None for s in sheets: if s["sheet_id"] == sheet_id: target = s break if target is None: raise Exception(f"未找到 sheet_id={sheet_id} in spreadsheet={spreadsheet_token}") grid = target.get("grid_properties", {}) row_count = grid.get("row_count", 200) col_count = grid.get("column_count", 10) # 列号转字母 end_col = _col_to_letter(col_count) cell_range = f"A1:{end_col}{row_count}" range_str = f"{sheet_id}!{cell_range}" resp = requests.get( f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet_token}/values/{range_str}", params={"valueRenderOption": "ToString"}, headers={"Authorization": f"Bearer {token}"}, timeout=30, ) resp.raise_for_status() data = resp.json() if data.get("code") != 0: raise Exception(f"read_sheet_data 失败: {data}") rows = data.get("data", {}).get("valueRange", {}).get("values", []) logger.info(f"读取 sheet 数据: {len(rows)} 行, range={range_str}") return rows def read_wiki_doc_with_sheet(wiki_url_or_token): """ 一站式读取: wiki URL/token → 文档markdown + 内嵌sheet数据 Args: wiki_url_or_token: 飞书 wiki URL 或 wiki_token Returns: dict: { "wiki_token": str, "obj_token": str, "title": str, "markdown": str, "sheet_token": (spreadsheet_token, sheet_id) or None, "sheet_rows": list[list] or None, } """ # 解析 wiki_token wiki_token = wiki_url_or_token match = re.search(r'/wiki/([A-Za-z0-9]+)', wiki_url_or_token) if match: wiki_token = match.group(1) # 获取节点信息 node = get_wiki_node(wiki_token) obj_token = node["obj_token"] title = node.get("title", "") # 获取文档内容 markdown = fetch_doc_markdown(obj_token) # 提取并读取所有内嵌 sheet sheet_infos = extract_sheet_tokens(markdown) all_sheet_rows = [] sheet_tokens = [] for sheet_info in sheet_infos: spreadsheet_token, sheet_id = sheet_info if sheet_id: try: sheet_rows = read_sheet_data(spreadsheet_token, sheet_id) logger.info(f"成功读取内嵌sheet ({sheet_id}): {len(sheet_rows)} 行") all_sheet_rows.append(sheet_rows) sheet_tokens.append(sheet_info) except Exception as e: logger.warning(f"读取内嵌sheet ({sheet_id}) 失败: {e}") else: logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}") # 兼容旧接口:sheet_rows 取第一个(向后兼容),新增 all_sheets sheet_rows = all_sheet_rows[0] if all_sheet_rows else None sheet_token = sheet_tokens[0] if sheet_tokens else None return { "wiki_token": wiki_token, "obj_token": obj_token, "title": title, "markdown": markdown, "sheet_token": sheet_token, "sheet_rows": sheet_rows, "all_sheets": all_sheet_rows, "all_sheet_tokens": sheet_tokens, } def _col_to_letter(col_num): """列号(1-based) → 字母(A, B, ..., Z, AA, AB, ...)""" result = "" while col_num > 0: col_num -= 1 result = chr(65 + col_num % 26) + result col_num //= 26 return result def read_bitable_records(app_token, table_id, page_size=50): """ 读取多维表格记录 Args: app_token: 多维表格 app_token (bitable_token) table_id: 数据表 ID page_size: 每页记录数 Returns: list[dict]: 记录列表,每条为 {"record_id": str, "fields": dict} """ token = get_tenant_token() resp = requests.get( f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records", params={"page_size": page_size}, headers={"Authorization": f"Bearer {token}"}, timeout=30, ) resp.raise_for_status() data = resp.json() if data.get("code") != 0: raise Exception(f"read_bitable_records 失败: {data}") items = data.get("data", {}).get("items", []) records = [] for item in items: records.append({ "record_id": item.get("record_id"), "fields": item.get("fields", {}), }) logger.info(f"读取 bitable 记录: {len(records)} 条, app={app_token}, table={table_id}") return records def list_bitable_tables(app_token): """列出多维表格的所有数据表""" token = get_tenant_token() resp = requests.get( f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables", headers={"Authorization": f"Bearer {token}"}, timeout=15, ) resp.raise_for_status() data = resp.json() if data.get("code") != 0: raise Exception(f"list_bitable_tables 失败: {data}") tables = data.get("data", {}).get("items", []) return [{"table_id": t["table_id"], "name": t.get("name", "")} for t in tables] # ============ CLI 测试 ============ if __name__ == "__main__": import sys if len(sys.argv) < 2: print("用法: python3 feishu_client.py ") print(" 测试读取飞书 wiki 文档及其内嵌 sheet") sys.exit(1) result = read_wiki_doc_with_sheet(sys.argv[1]) print(f"标题: {result['title']}") print(f"obj_token: {result['obj_token']}") print(f"markdown长度: {len(result['markdown'])} 字符") print(f"sheet_token: {result['sheet_token']}") if result["sheet_rows"]: print(f"sheet行数: {len(result['sheet_rows'])}") print(f"表头: {result['sheet_rows'][0]}")