352 lines
11 KiB
Python
352 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
飞书文档读取客户端
|
||
封装 wiki 文档读取、内嵌 sheet 读取等飞书 API 调用
|
||
使用 Bot 身份,凭证来自 /root/.openclaw/credentials/xiaoyan/config.json
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import json
|
||
import logging
|
||
import requests
|
||
import subprocess
|
||
|
||
logger = logging.getLogger("feishu_client")
|
||
if not logger.handlers:
|
||
handler = logging.StreamHandler()
|
||
handler.setFormatter(logging.Formatter(
|
||
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
|
||
))
|
||
logger.addHandler(handler)
|
||
logger.setLevel(logging.INFO)
|
||
|
||
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
|
||
LARK_CLI_CONFIG_DIR = "/root/.openclaw/credentials/xiaoyan"
|
||
|
||
|
||
def _load_credentials():
|
||
"""从配置文件读取 app_id 和 app_secret"""
|
||
with open(CRED_FILE, "r") as f:
|
||
config = json.load(f)
|
||
app = config["apps"][0]
|
||
return app["appId"], app["appSecret"]
|
||
|
||
|
||
_token_cache = {"token": None, "expires_at": 0}
|
||
|
||
|
||
def get_tenant_token():
|
||
"""获取 Bot 租户访问令牌(带缓存,有效期 2 小时)"""
|
||
import time
|
||
now = time.time()
|
||
if _token_cache["token"] and now < _token_cache["expires_at"] - 60:
|
||
return _token_cache["token"]
|
||
|
||
app_id, app_secret = _load_credentials()
|
||
resp = requests.post(
|
||
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
|
||
headers={"Content-Type": "application/json"},
|
||
json={"app_id": app_id, "app_secret": app_secret},
|
||
timeout=10,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") != 0:
|
||
raise Exception(f"获取 tenant_access_token 失败: {data}")
|
||
|
||
token = data["tenant_access_token"]
|
||
_token_cache["token"] = token
|
||
_token_cache["expires_at"] = now + data.get("expire", 7200)
|
||
logger.info("已获取 tenant_access_token")
|
||
return token
|
||
|
||
|
||
def get_wiki_node(wiki_token):
|
||
"""
|
||
获取知识库节点信息
|
||
|
||
Returns:
|
||
dict: {"obj_token", "obj_type", "title", "space_id", "has_child", ...}
|
||
"""
|
||
env = os.environ.copy()
|
||
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
|
||
result = subprocess.run(
|
||
["lark-cli", "wiki", "spaces", "get_node",
|
||
"--params", json.dumps({"token": wiki_token}),
|
||
"--as", "bot"],
|
||
capture_output=True, text=True, env=env, timeout=30,
|
||
)
|
||
if result.returncode != 0:
|
||
raise Exception(f"lark-cli get_node 失败: {result.stderr}")
|
||
|
||
data = json.loads(result.stdout)
|
||
if data.get("code") != 0:
|
||
raise Exception(f"get_node API error: {data}")
|
||
|
||
node = data["data"]["node"]
|
||
logger.info(f"wiki节点: title={node.get('title')}, obj_type={node.get('obj_type')}, obj_token={node.get('obj_token')}")
|
||
return node
|
||
|
||
|
||
def fetch_doc_markdown(obj_token):
|
||
"""
|
||
获取文档 markdown 内容
|
||
|
||
Returns:
|
||
str: 文档 markdown 文本
|
||
"""
|
||
env = os.environ.copy()
|
||
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
|
||
result = subprocess.run(
|
||
["lark-cli", "docs", "+fetch", "--doc", obj_token, "--as", "bot"],
|
||
capture_output=True, text=True, env=env, timeout=60,
|
||
)
|
||
if result.returncode != 0:
|
||
raise Exception(f"lark-cli docs +fetch 失败: {result.stderr}")
|
||
|
||
data = json.loads(result.stdout)
|
||
markdown = data.get("data", {}).get("markdown", "")
|
||
if not markdown:
|
||
raise Exception(f"文档内容为空, obj_token={obj_token}")
|
||
|
||
logger.info(f"获取文档内容: {len(markdown)} 字符")
|
||
return markdown
|
||
|
||
|
||
def extract_sheet_token(markdown):
|
||
"""
|
||
从文档 markdown 中提取内嵌 sheet token(返回第一个)
|
||
|
||
Returns:
|
||
tuple: (spreadsheet_token, sheet_id) 或 None
|
||
"""
|
||
tokens = extract_sheet_tokens(markdown)
|
||
return tokens[0] if tokens else None
|
||
|
||
|
||
def extract_sheet_tokens(markdown):
|
||
"""
|
||
从文档 markdown 中提取所有内嵌 sheet token
|
||
|
||
Returns:
|
||
list[tuple]: [(spreadsheet_token, sheet_id), ...]
|
||
"""
|
||
matches = re.findall(r'<sheet\s+token="([^"]+)"\s*/>', markdown)
|
||
result = []
|
||
for full_token in matches:
|
||
if "_" in full_token:
|
||
parts = full_token.split("_", 1)
|
||
result.append((parts[0], parts[1]))
|
||
else:
|
||
result.append((full_token, None))
|
||
return result
|
||
|
||
|
||
def get_sheet_metadata(spreadsheet_token):
|
||
"""获取 sheet 元数据(子表列表、行列数等)"""
|
||
token = get_tenant_token()
|
||
resp = requests.get(
|
||
f"https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/{spreadsheet_token}/sheets/query",
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
timeout=15,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") != 0:
|
||
raise Exception(f"get_sheet_metadata 失败: {data}")
|
||
return data["data"]["sheets"]
|
||
|
||
|
||
def read_sheet_data(spreadsheet_token, sheet_id, cell_range=None):
|
||
"""
|
||
读取内嵌 sheet 数据
|
||
|
||
Args:
|
||
spreadsheet_token: 电子表格 token
|
||
sheet_id: 子表 ID
|
||
cell_range: 单元格范围(如 "A1:H200"),为 None 时自动读取全表
|
||
|
||
Returns:
|
||
list[list]: 二维数组,每行一个 list
|
||
"""
|
||
token = get_tenant_token()
|
||
|
||
if cell_range is None:
|
||
# 先获取元数据确定范围
|
||
sheets = get_sheet_metadata(spreadsheet_token)
|
||
target = None
|
||
for s in sheets:
|
||
if s["sheet_id"] == sheet_id:
|
||
target = s
|
||
break
|
||
if target is None:
|
||
raise Exception(f"未找到 sheet_id={sheet_id} in spreadsheet={spreadsheet_token}")
|
||
|
||
grid = target.get("grid_properties", {})
|
||
row_count = grid.get("row_count", 200)
|
||
col_count = grid.get("column_count", 10)
|
||
# 列号转字母
|
||
end_col = _col_to_letter(col_count)
|
||
cell_range = f"A1:{end_col}{row_count}"
|
||
|
||
range_str = f"{sheet_id}!{cell_range}"
|
||
resp = requests.get(
|
||
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet_token}/values/{range_str}",
|
||
params={"valueRenderOption": "ToString"},
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
timeout=30,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") != 0:
|
||
raise Exception(f"read_sheet_data 失败: {data}")
|
||
|
||
rows = data.get("data", {}).get("valueRange", {}).get("values", [])
|
||
logger.info(f"读取 sheet 数据: {len(rows)} 行, range={range_str}")
|
||
return rows
|
||
|
||
|
||
def read_wiki_doc_with_sheet(wiki_url_or_token):
|
||
"""
|
||
一站式读取: wiki URL/token → 文档markdown + 内嵌sheet数据
|
||
|
||
Args:
|
||
wiki_url_or_token: 飞书 wiki URL 或 wiki_token
|
||
|
||
Returns:
|
||
dict: {
|
||
"wiki_token": str,
|
||
"obj_token": str,
|
||
"title": str,
|
||
"markdown": str,
|
||
"sheet_token": (spreadsheet_token, sheet_id) or None,
|
||
"sheet_rows": list[list] or None,
|
||
}
|
||
"""
|
||
# 解析 wiki_token
|
||
wiki_token = wiki_url_or_token
|
||
match = re.search(r'/wiki/([A-Za-z0-9]+)', wiki_url_or_token)
|
||
if match:
|
||
wiki_token = match.group(1)
|
||
|
||
# 获取节点信息
|
||
node = get_wiki_node(wiki_token)
|
||
obj_token = node["obj_token"]
|
||
title = node.get("title", "")
|
||
|
||
# 获取文档内容
|
||
markdown = fetch_doc_markdown(obj_token)
|
||
|
||
# 提取并读取所有内嵌 sheet
|
||
sheet_infos = extract_sheet_tokens(markdown)
|
||
all_sheet_rows = []
|
||
sheet_tokens = []
|
||
for sheet_info in sheet_infos:
|
||
spreadsheet_token, sheet_id = sheet_info
|
||
if sheet_id:
|
||
try:
|
||
sheet_rows = read_sheet_data(spreadsheet_token, sheet_id)
|
||
logger.info(f"成功读取内嵌sheet ({sheet_id}): {len(sheet_rows)} 行")
|
||
all_sheet_rows.append(sheet_rows)
|
||
sheet_tokens.append(sheet_info)
|
||
except Exception as e:
|
||
logger.warning(f"读取内嵌sheet ({sheet_id}) 失败: {e}")
|
||
else:
|
||
logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}")
|
||
|
||
# 兼容旧接口:sheet_rows 取第一个(向后兼容),新增 all_sheets
|
||
sheet_rows = all_sheet_rows[0] if all_sheet_rows else None
|
||
sheet_token = sheet_tokens[0] if sheet_tokens else None
|
||
|
||
return {
|
||
"wiki_token": wiki_token,
|
||
"obj_token": obj_token,
|
||
"title": title,
|
||
"markdown": markdown,
|
||
"sheet_token": sheet_token,
|
||
"sheet_rows": sheet_rows,
|
||
"all_sheets": all_sheet_rows,
|
||
"all_sheet_tokens": sheet_tokens,
|
||
}
|
||
|
||
|
||
def _col_to_letter(col_num):
|
||
"""列号(1-based) → 字母(A, B, ..., Z, AA, AB, ...)"""
|
||
result = ""
|
||
while col_num > 0:
|
||
col_num -= 1
|
||
result = chr(65 + col_num % 26) + result
|
||
col_num //= 26
|
||
return result
|
||
|
||
|
||
def read_bitable_records(app_token, table_id, page_size=50):
|
||
"""
|
||
读取多维表格记录
|
||
|
||
Args:
|
||
app_token: 多维表格 app_token (bitable_token)
|
||
table_id: 数据表 ID
|
||
page_size: 每页记录数
|
||
|
||
Returns:
|
||
list[dict]: 记录列表,每条为 {"record_id": str, "fields": dict}
|
||
"""
|
||
token = get_tenant_token()
|
||
resp = requests.get(
|
||
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records",
|
||
params={"page_size": page_size},
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
timeout=30,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") != 0:
|
||
raise Exception(f"read_bitable_records 失败: {data}")
|
||
|
||
items = data.get("data", {}).get("items", [])
|
||
records = []
|
||
for item in items:
|
||
records.append({
|
||
"record_id": item.get("record_id"),
|
||
"fields": item.get("fields", {}),
|
||
})
|
||
logger.info(f"读取 bitable 记录: {len(records)} 条, app={app_token}, table={table_id}")
|
||
return records
|
||
|
||
|
||
def list_bitable_tables(app_token):
|
||
"""列出多维表格的所有数据表"""
|
||
token = get_tenant_token()
|
||
resp = requests.get(
|
||
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables",
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
timeout=15,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") != 0:
|
||
raise Exception(f"list_bitable_tables 失败: {data}")
|
||
|
||
tables = data.get("data", {}).get("items", [])
|
||
return [{"table_id": t["table_id"], "name": t.get("name", "")} for t in tables]
|
||
|
||
|
||
# ============ CLI 测试 ============
|
||
if __name__ == "__main__":
|
||
import sys
|
||
if len(sys.argv) < 2:
|
||
print("用法: python3 feishu_client.py <wiki_url_or_token>")
|
||
print(" 测试读取飞书 wiki 文档及其内嵌 sheet")
|
||
sys.exit(1)
|
||
|
||
result = read_wiki_doc_with_sheet(sys.argv[1])
|
||
print(f"标题: {result['title']}")
|
||
print(f"obj_token: {result['obj_token']}")
|
||
print(f"markdown长度: {len(result['markdown'])} 字符")
|
||
print(f"sheet_token: {result['sheet_token']}")
|
||
if result["sheet_rows"]:
|
||
print(f"sheet行数: {len(result['sheet_rows'])}")
|
||
print(f"表头: {result['sheet_rows'][0]}")
|