ai_member_xiaoyan/skills/interactive-component-json/scripts/feishu_client.py

352 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
飞书文档读取客户端
封装 wiki 文档读取、内嵌 sheet 读取等飞书 API 调用
使用 Bot 身份,凭证来自 /root/.openclaw/credentials/xiaoyan/config.json
"""
import os
import re
import json
import logging
import requests
import subprocess
logger = logging.getLogger("feishu_client")
if not logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
LARK_CLI_CONFIG_DIR = "/root/.openclaw/credentials/xiaoyan"
def _load_credentials():
"""从配置文件读取 app_id 和 app_secret"""
with open(CRED_FILE, "r") as f:
config = json.load(f)
app = config["apps"][0]
return app["appId"], app["appSecret"]
_token_cache = {"token": None, "expires_at": 0}
def get_tenant_token():
"""获取 Bot 租户访问令牌(带缓存,有效期 2 小时)"""
import time
now = time.time()
if _token_cache["token"] and now < _token_cache["expires_at"] - 60:
return _token_cache["token"]
app_id, app_secret = _load_credentials()
resp = requests.post(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
headers={"Content-Type": "application/json"},
json={"app_id": app_id, "app_secret": app_secret},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"获取 tenant_access_token 失败: {data}")
token = data["tenant_access_token"]
_token_cache["token"] = token
_token_cache["expires_at"] = now + data.get("expire", 7200)
logger.info("已获取 tenant_access_token")
return token
def get_wiki_node(wiki_token):
"""
获取知识库节点信息
Returns:
dict: {"obj_token", "obj_type", "title", "space_id", "has_child", ...}
"""
env = os.environ.copy()
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
result = subprocess.run(
["lark-cli", "wiki", "spaces", "get_node",
"--params", json.dumps({"token": wiki_token}),
"--as", "bot"],
capture_output=True, text=True, env=env, timeout=30,
)
if result.returncode != 0:
raise Exception(f"lark-cli get_node 失败: {result.stderr}")
data = json.loads(result.stdout)
if data.get("code") != 0:
raise Exception(f"get_node API error: {data}")
node = data["data"]["node"]
logger.info(f"wiki节点: title={node.get('title')}, obj_type={node.get('obj_type')}, obj_token={node.get('obj_token')}")
return node
def fetch_doc_markdown(obj_token):
"""
获取文档 markdown 内容
Returns:
str: 文档 markdown 文本
"""
env = os.environ.copy()
env["LARKSUITE_CLI_CONFIG_DIR"] = LARK_CLI_CONFIG_DIR
result = subprocess.run(
["lark-cli", "docs", "+fetch", "--doc", obj_token, "--as", "bot"],
capture_output=True, text=True, env=env, timeout=60,
)
if result.returncode != 0:
raise Exception(f"lark-cli docs +fetch 失败: {result.stderr}")
data = json.loads(result.stdout)
markdown = data.get("data", {}).get("markdown", "")
if not markdown:
raise Exception(f"文档内容为空, obj_token={obj_token}")
logger.info(f"获取文档内容: {len(markdown)} 字符")
return markdown
def extract_sheet_token(markdown):
"""
从文档 markdown 中提取内嵌 sheet token返回第一个
Returns:
tuple: (spreadsheet_token, sheet_id) 或 None
"""
tokens = extract_sheet_tokens(markdown)
return tokens[0] if tokens else None
def extract_sheet_tokens(markdown):
"""
从文档 markdown 中提取所有内嵌 sheet token
Returns:
list[tuple]: [(spreadsheet_token, sheet_id), ...]
"""
matches = re.findall(r'<sheet\s+token="([^"]+)"\s*/>', markdown)
result = []
for full_token in matches:
if "_" in full_token:
parts = full_token.split("_", 1)
result.append((parts[0], parts[1]))
else:
result.append((full_token, None))
return result
def get_sheet_metadata(spreadsheet_token):
"""获取 sheet 元数据(子表列表、行列数等)"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/{spreadsheet_token}/sheets/query",
headers={"Authorization": f"Bearer {token}"},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"get_sheet_metadata 失败: {data}")
return data["data"]["sheets"]
def read_sheet_data(spreadsheet_token, sheet_id, cell_range=None):
"""
读取内嵌 sheet 数据
Args:
spreadsheet_token: 电子表格 token
sheet_id: 子表 ID
cell_range: 单元格范围(如 "A1:H200"),为 None 时自动读取全表
Returns:
list[list]: 二维数组,每行一个 list
"""
token = get_tenant_token()
if cell_range is None:
# 先获取元数据确定范围
sheets = get_sheet_metadata(spreadsheet_token)
target = None
for s in sheets:
if s["sheet_id"] == sheet_id:
target = s
break
if target is None:
raise Exception(f"未找到 sheet_id={sheet_id} in spreadsheet={spreadsheet_token}")
grid = target.get("grid_properties", {})
row_count = grid.get("row_count", 200)
col_count = grid.get("column_count", 10)
# 列号转字母
end_col = _col_to_letter(col_count)
cell_range = f"A1:{end_col}{row_count}"
range_str = f"{sheet_id}!{cell_range}"
resp = requests.get(
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet_token}/values/{range_str}",
params={"valueRenderOption": "ToString"},
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"read_sheet_data 失败: {data}")
rows = data.get("data", {}).get("valueRange", {}).get("values", [])
logger.info(f"读取 sheet 数据: {len(rows)} 行, range={range_str}")
return rows
def read_wiki_doc_with_sheet(wiki_url_or_token):
"""
一站式读取: wiki URL/token → 文档markdown + 内嵌sheet数据
Args:
wiki_url_or_token: 飞书 wiki URL 或 wiki_token
Returns:
dict: {
"wiki_token": str,
"obj_token": str,
"title": str,
"markdown": str,
"sheet_token": (spreadsheet_token, sheet_id) or None,
"sheet_rows": list[list] or None,
}
"""
# 解析 wiki_token
wiki_token = wiki_url_or_token
match = re.search(r'/wiki/([A-Za-z0-9]+)', wiki_url_or_token)
if match:
wiki_token = match.group(1)
# 获取节点信息
node = get_wiki_node(wiki_token)
obj_token = node["obj_token"]
title = node.get("title", "")
# 获取文档内容
markdown = fetch_doc_markdown(obj_token)
# 提取并读取所有内嵌 sheet
sheet_infos = extract_sheet_tokens(markdown)
all_sheet_rows = []
sheet_tokens = []
for sheet_info in sheet_infos:
spreadsheet_token, sheet_id = sheet_info
if sheet_id:
try:
sheet_rows = read_sheet_data(spreadsheet_token, sheet_id)
logger.info(f"成功读取内嵌sheet ({sheet_id}): {len(sheet_rows)}")
all_sheet_rows.append(sheet_rows)
sheet_tokens.append(sheet_info)
except Exception as e:
logger.warning(f"读取内嵌sheet ({sheet_id}) 失败: {e}")
else:
logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}")
# 兼容旧接口sheet_rows 取第一个(向后兼容),新增 all_sheets
sheet_rows = all_sheet_rows[0] if all_sheet_rows else None
sheet_token = sheet_tokens[0] if sheet_tokens else None
return {
"wiki_token": wiki_token,
"obj_token": obj_token,
"title": title,
"markdown": markdown,
"sheet_token": sheet_token,
"sheet_rows": sheet_rows,
"all_sheets": all_sheet_rows,
"all_sheet_tokens": sheet_tokens,
}
def _col_to_letter(col_num):
"""列号(1-based) → 字母A, B, ..., Z, AA, AB, ..."""
result = ""
while col_num > 0:
col_num -= 1
result = chr(65 + col_num % 26) + result
col_num //= 26
return result
def read_bitable_records(app_token, table_id, page_size=50):
"""
读取多维表格记录
Args:
app_token: 多维表格 app_token (bitable_token)
table_id: 数据表 ID
page_size: 每页记录数
Returns:
list[dict]: 记录列表,每条为 {"record_id": str, "fields": dict}
"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records",
params={"page_size": page_size},
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"read_bitable_records 失败: {data}")
items = data.get("data", {}).get("items", [])
records = []
for item in items:
records.append({
"record_id": item.get("record_id"),
"fields": item.get("fields", {}),
})
logger.info(f"读取 bitable 记录: {len(records)} 条, app={app_token}, table={table_id}")
return records
def list_bitable_tables(app_token):
"""列出多维表格的所有数据表"""
token = get_tenant_token()
resp = requests.get(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables",
headers={"Authorization": f"Bearer {token}"},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
raise Exception(f"list_bitable_tables 失败: {data}")
tables = data.get("data", {}).get("items", [])
return [{"table_id": t["table_id"], "name": t.get("name", "")} for t in tables]
# ============ CLI 测试 ============
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("用法: python3 feishu_client.py <wiki_url_or_token>")
print(" 测试读取飞书 wiki 文档及其内嵌 sheet")
sys.exit(1)
result = read_wiki_doc_with_sheet(sys.argv[1])
print(f"标题: {result['title']}")
print(f"obj_token: {result['obj_token']}")
print(f"markdown长度: {len(result['markdown'])} 字符")
print(f"sheet_token: {result['sheet_token']}")
if result["sheet_rows"]:
print(f"sheet行数: {len(result['sheet_rows'])}")
print(f"表头: {result['sheet_rows'][0]}")