#!/usr/bin/env python3
"""
飞书文档读取客户端
封装 wiki 文档读取、内嵌 sheet 读取等飞书 API 调用
使用 Bot 身份,凭证来自 /root/.openclaw/credentials/xiaoyan/config.json
"""
import os
import re
import json
import logging
import requests
import subprocess
logger = logging.getLogger("feishu_client")
if not logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
LARK_CLI_CONFIG_DIR = "/root/.openclaw/credentials/xiaoyan"
def _load_credentials():
"""从配置文件读取 app_id 和 app_secret"""
with open(CRED_FILE, "r") as f:
config = json.load(f)
app = config["apps"][0]
return app["appId"], app["appSecret"]
_token_cache = {"token": None, "expires_at": 0}
def get_tenant_token():
"""获取 Bot 租户访问令牌(带缓存,有效期 2 小时)"""
import time
now = time.time()
if _token_cache["token"] and now < _token_cache["expires_at"] - 60:
return _token_cache["token"]
app_id, app_secret = _load_credentials()
resp = requests.post(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
headers={"Content-Type": "application/json"},
json={"app_id": app_id, "app_secret": app_secret},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"获取 tenant_access_token 失败: {data}")
token = data["tenant_access_token"]
_token_cache["token"] = token
_token_cache["expires_at"] = now + data.get("expire", 7200)
logger.info("已获取 tenant_access_token")
return token
def get_wiki_node(wiki_token):
"""
获取知识库节点信息
Returns:
dict: {"obj_token", "obj_type", "title", "space_id", "has_child", ...}
"""
env = os.environ.copy()
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
result = subprocess.run(
["lark-cli", "wiki", "spaces", "get_node",
"--params", json.dumps({"token": wiki_token}),
"--as", "bot"],
capture_output=True, text=True, env=env, timeout=30,
)
if result.returncode != 0:
raise Exception(f"lark-cli get_node 失败: {result.stderr}")
data = json.loads(result.stdout)
if data.get("code") != 0:
raise Exception(f"get_node API error: {data}")
node = data["data"]["node"]
logger.info(f"wiki节点: title={node.get('title')}, obj_type={node.get('obj_type')}, obj_token={node.get('obj_token')}")
return node
def fetch_doc_markdown(obj_token):
"""
获取文档 markdown 内容
Returns:
str: 文档 markdown 文本
"""
env = os.environ.copy()
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
result = subprocess.run(
["lark-cli", "docs", "+fetch", "--doc", obj_token, "--as", "bot"],
capture_output=True, text=True, env=env, timeout=60,
)
if result.returncode != 0:
raise Exception(f"lark-cli docs +fetch 失败: {result.stderr}")
data = json.loads(result.stdout)
markdown = data.get("data", {}).get("markdown", "")
if not markdown:
raise Exception(f"文档内容为空, obj_token={obj_token}")
logger.info(f"获取文档内容: {len(markdown)} 字符")
return markdown
def extract_sheet_token(markdown):
"""
从文档 markdown 中提取内嵌 sheet token(返回第一个)
Returns:
tuple: (spreadsheet_token, sheet_id) 或 None
"""
tokens = extract_sheet_tokens(markdown)
return tokens[0] if tokens else None
def extract_sheet_tokens(markdown):
"""
从文档 markdown 中提取所有内嵌 sheet token
Returns:
list[tuple]: [(spreadsheet_token, sheet_id), ...]
"""
matches = re.findall(r'', markdown)
result = []
for full_token in matches:
if "_" in full_token:
parts = full_token.split("_", 1)
result.append((parts[0], parts[1]))
else:
result.append((full_token, None))
return result
def get_sheet_metadata(spreadsheet_token):
"""获取 sheet 元数据(子表列表、行列数等)"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/{spreadsheet_token}/sheets/query",
headers={"Authorization": f"Bearer {token}"},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"get_sheet_metadata 失败: {data}")
return data["data"]["sheets"]
def read_sheet_data(spreadsheet_token, sheet_id, cell_range=None):
"""
读取内嵌 sheet 数据
Args:
spreadsheet_token: 电子表格 token
sheet_id: 子表 ID
cell_range: 单元格范围(如 "A1:H200"),为 None 时自动读取全表
Returns:
list[list]: 二维数组,每行一个 list
"""
token = get_tenant_token()
if cell_range is None:
# 先获取元数据确定范围
sheets = get_sheet_metadata(spreadsheet_token)
target = None
for s in sheets:
if s["sheet_id"] == sheet_id:
target = s
break
if target is None:
raise Exception(f"未找到 sheet_id={sheet_id} in spreadsheet={spreadsheet_token}")
grid = target.get("grid_properties", {})
row_count = grid.get("row_count", 200)
col_count = grid.get("column_count", 10)
# 列号转字母
end_col = _col_to_letter(col_count)
cell_range = f"A1:{end_col}{row_count}"
range_str = f"{sheet_id}!{cell_range}"
resp = requests.get(
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet_token}/values/{range_str}",
params={"valueRenderOption": "ToString"},
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"read_sheet_data 失败: {data}")
rows = data.get("data", {}).get("valueRange", {}).get("values", [])
logger.info(f"读取 sheet 数据: {len(rows)} 行, range={range_str}")
return rows
def read_wiki_doc_with_sheet(wiki_url_or_token):
"""
一站式读取: wiki URL/token → 文档markdown + 内嵌sheet数据
Args:
wiki_url_or_token: 飞书 wiki URL 或 wiki_token
Returns:
dict: {
"wiki_token": str,
"obj_token": str,
"title": str,
"markdown": str,
"sheet_token": (spreadsheet_token, sheet_id) or None,
"sheet_rows": list[list] or None,
}
"""
# 解析 wiki_token
wiki_token = wiki_url_or_token
match = re.search(r'/wiki/([A-Za-z0-9]+)', wiki_url_or_token)
if match:
wiki_token = match.group(1)
# 获取节点信息
node = get_wiki_node(wiki_token)
obj_token = node["obj_token"]
title = node.get("title", "")
# 获取文档内容
markdown = fetch_doc_markdown(obj_token)
# 提取并读取所有内嵌 sheet
sheet_infos = extract_sheet_tokens(markdown)
all_sheet_rows = []
sheet_tokens = []
for sheet_info in sheet_infos:
spreadsheet_token, sheet_id = sheet_info
if sheet_id:
try:
sheet_rows = read_sheet_data(spreadsheet_token, sheet_id)
logger.info(f"成功读取内嵌sheet ({sheet_id}): {len(sheet_rows)} 行")
all_sheet_rows.append(sheet_rows)
sheet_tokens.append(sheet_info)
except Exception as e:
logger.warning(f"读取内嵌sheet ({sheet_id}) 失败: {e}")
else:
logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}")
# 兼容旧接口:sheet_rows 取第一个(向后兼容),新增 all_sheets
sheet_rows = all_sheet_rows[0] if all_sheet_rows else None
sheet_token = sheet_tokens[0] if sheet_tokens else None
return {
"wiki_token": wiki_token,
"obj_token": obj_token,
"title": title,
"markdown": markdown,
"sheet_token": sheet_token,
"sheet_rows": sheet_rows,
"all_sheets": all_sheet_rows,
"all_sheet_tokens": sheet_tokens,
}
def _col_to_letter(col_num):
"""列号(1-based) → 字母(A, B, ..., Z, AA, AB, ...)"""
result = ""
while col_num > 0:
col_num -= 1
result = chr(65 + col_num % 26) + result
col_num //= 26
return result
def read_bitable_records(app_token, table_id, page_size=50):
"""
读取多维表格记录
Args:
app_token: 多维表格 app_token (bitable_token)
table_id: 数据表 ID
page_size: 每页记录数
Returns:
list[dict]: 记录列表,每条为 {"record_id": str, "fields": dict}
"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records",
params={"page_size": page_size},
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"read_bitable_records 失败: {data}")
items = data.get("data", {}).get("items", [])
records = []
for item in items:
records.append({
"record_id": item.get("record_id"),
"fields": item.get("fields", {}),
})
logger.info(f"读取 bitable 记录: {len(records)} 条, app={app_token}, table={table_id}")
return records
def list_bitable_tables(app_token):
"""列出多维表格的所有数据表"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables",
headers={"Authorization": f"Bearer {token}"},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"list_bitable_tables 失败: {data}")
tables = data.get("data", {}).get("items", [])
return [{"table_id": t["table_id"], "name": t.get("name", "")} for t in tables]
# ============ CLI 测试 ============
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("用法: python3 feishu_client.py ")
print(" 测试读取飞书 wiki 文档及其内嵌 sheet")
sys.exit(1)
result = read_wiki_doc_with_sheet(sys.argv[1])
print(f"标题: {result['title']}")
print(f"obj_token: {result['obj_token']}")
print(f"markdown长度: {len(result['markdown'])} 字符")
print(f"sheet_token: {result['sheet_token']}")
if result["sheet_rows"]:
print(f"sheet行数: {len(result['sheet_rows'])}")
print(f"表头: {result['sheet_rows'][0]}")