ai_member_xiaoxi/business_knowledge/fetch_wiki_docs.py
小溪 8ab4e07490 更新:新增数据库连接信息和业务知识库
- 新增 TOOLS.md 包含所有数据库连接信息
- 新增 business_knowledge/ 目录,包含:
  * 13个 SQL 查询文档
  * 业务术语表
  * 数据表说明
  * 16个数据抽取脚本
  * 知识总结文档
2026-03-02 18:29:55 +08:00

84 lines
3.9 KiB
Python

#!/usr/bin/env python3
"""
批量读取飞书 Wiki 文档并保存到本地知识库
"""
import json
import os
from datetime import datetime
# Wiki 子页面列表
wiki_pages = [
{"node_token": "O7QvwdY8piO8aUkhxYecA1qZnBe", "title": "全字段大表", "obj_token": "VVyWd5491o6tuqxceCVci6dVnFd"},
{"node_token": "Y6Iywqf75iepbUkvJzLcfiUYnkg", "title": "平均通关时长", "obj_token": "EpP7d6h2SoaTyJx1lZRcXXdLnVe"},
{"node_token": "KQihwMjO9i1zjFkqTgBcq67Snzc", "title": "新增注册用户数by渠道", "obj_token": "AzRPddp97o7To8x8VkxcFGr8nBh"},
{"node_token": "Zt7RwfGLWiacslkO2glcheWjnwf", "title": "课程进入完成率", "obj_token": "PwIydfZcHo5eZgxi8XLcOtjOnSb"},
{"node_token": "LTaiw3OmUi2pcckDWuNcyBIVnAd", "title": "账号角色年龄地址", "obj_token": "CUa2du2sSoNFSRxl3vFc8ucInEm"},
{"node_token": "ZAPJwIODRiNYE5kTuNtcpSlvnIX", "title": "退费率", "obj_token": "DC1Qdhpitowt9lxxo1acEzOwnFc"},
{"node_token": "Cb3KwPWLriG7GgkN73pcM0Idnch", "title": "销转学习进度", "obj_token": "G1p9dhK63oLWMzxyGQ8csZGMnDh"},
{"node_token": "EBEiwQsw2iOtgekDldHcQxgwnOh", "title": "班主任关注数据", "obj_token": "NcVqdRKtrowglNxs9CocDekunje"},
{"node_token": "BZPkwARxiixUZRk4BW9cij50nDe", "title": "端内GMV", "obj_token": "FkVCd1AruoD9xWxxVpzc16hinVh"},
{"node_token": "AQpnwpsfOixYGtk4jf0c6t9XncG", "title": "端内用户课程进入完成率", "obj_token": "Ueu7dtgSHoNYfsxCDHmcY6E4nid"},
{"node_token": "PyqEwXXqsiQybPkpGbscUjUFnOg", "title": "端内购课用户学习行为", "obj_token": "ZTxod4IUWo5yMexf8AHcBbpFnMg"},
{"node_token": "OyXlwY2vyisvV1kc3HhcMyMVnTd", "title": "转化率", "obj_token": "ATJ0dfajQo5CSexQd8hc9i3pnWe"},
{"node_token": "MWpZwV01fitaKjkCRSxckMUunRb", "title": "课程ID映射", "obj_token": "GenUdsXCloUdYhxMvxqcWBMdnhb"}
]
def safe_filename(title):
"""生成安全的文件名"""
return "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).rstrip().replace(' ', '_')
def main():
print("="*60)
print("飞书 Wiki 文档批量获取")
print("="*60)
output_dir = "sql_queries"
os.makedirs(output_dir, exist_ok=True)
print(f"\n{len(wiki_pages)} 个文档需要获取")
print(f"输出目录: {output_dir}")
# 创建索引文件
index_content = "# SQL 查询文档索引\n\n"
index_content += f"创建时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
index_content += "## 文档列表\n\n"
for i, page in enumerate(wiki_pages, 1):
filename = safe_filename(page['title']) + ".md"
filepath = os.path.join(output_dir, filename)
print(f"\n[{i}/{len(wiki_pages)}] 处理: {page['title']}")
print(f" 文件: {filepath}")
# 创建占位文件
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"# {page['title']}\n\n")
f.write(f"**获取时间:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write(f"**飞书文档 Token:** {page['obj_token']}\n\n")
f.write(f"**注意:** 此文档需要通过 feishu_doc 工具读取完整内容\n\n")
f.write("---\n\n")
f.write("## 使用说明\n\n")
f.write("使用以下命令读取完整文档内容:\n\n")
f.write("```bash\n")
f.write(f"feishu_doc read {page['obj_token']}\n")
f.write("```\n")
# 更新索引
index_content += f"- [{page['title']}]({filename})\n"
print(f" ✅ 已创建占位文件")
# 写入索引文件
with open(os.path.join(output_dir, "README.md"), 'w', encoding='utf-8') as f:
f.write(index_content)
print("\n" + "="*60)
print("✅ 初始化完成")
print("="*60)
print("\n下一步: 使用 feishu_doc 工具逐个读取文档内容")
print("或者让我继续为你读取这些文档的完整内容")
if __name__ == "__main__":
main()