- 新增 TOOLS.md 包含所有数据库连接信息 - 新增 business_knowledge/ 目录,包含: * 13个 SQL 查询文档 * 业务术语表 * 数据表说明 * 16个数据抽取脚本 * 知识总结文档
84 lines
3.9 KiB
Python
84 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
批量读取飞书 Wiki 文档并保存到本地知识库
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Wiki 子页面列表
|
|
wiki_pages = [
|
|
{"node_token": "O7QvwdY8piO8aUkhxYecA1qZnBe", "title": "全字段大表", "obj_token": "VVyWd5491o6tuqxceCVci6dVnFd"},
|
|
{"node_token": "Y6Iywqf75iepbUkvJzLcfiUYnkg", "title": "平均通关时长", "obj_token": "EpP7d6h2SoaTyJx1lZRcXXdLnVe"},
|
|
{"node_token": "KQihwMjO9i1zjFkqTgBcq67Snzc", "title": "新增注册用户数by渠道", "obj_token": "AzRPddp97o7To8x8VkxcFGr8nBh"},
|
|
{"node_token": "Zt7RwfGLWiacslkO2glcheWjnwf", "title": "课程进入完成率", "obj_token": "PwIydfZcHo5eZgxi8XLcOtjOnSb"},
|
|
{"node_token": "LTaiw3OmUi2pcckDWuNcyBIVnAd", "title": "账号角色年龄地址", "obj_token": "CUa2du2sSoNFSRxl3vFc8ucInEm"},
|
|
{"node_token": "ZAPJwIODRiNYE5kTuNtcpSlvnIX", "title": "退费率", "obj_token": "DC1Qdhpitowt9lxxo1acEzOwnFc"},
|
|
{"node_token": "Cb3KwPWLriG7GgkN73pcM0Idnch", "title": "销转学习进度", "obj_token": "G1p9dhK63oLWMzxyGQ8csZGMnDh"},
|
|
{"node_token": "EBEiwQsw2iOtgekDldHcQxgwnOh", "title": "班主任关注数据", "obj_token": "NcVqdRKtrowglNxs9CocDekunje"},
|
|
{"node_token": "BZPkwARxiixUZRk4BW9cij50nDe", "title": "端内GMV", "obj_token": "FkVCd1AruoD9xWxxVpzc16hinVh"},
|
|
{"node_token": "AQpnwpsfOixYGtk4jf0c6t9XncG", "title": "端内用户课程进入完成率", "obj_token": "Ueu7dtgSHoNYfsxCDHmcY6E4nid"},
|
|
{"node_token": "PyqEwXXqsiQybPkpGbscUjUFnOg", "title": "端内购课用户学习行为", "obj_token": "ZTxod4IUWo5yMexf8AHcBbpFnMg"},
|
|
{"node_token": "OyXlwY2vyisvV1kc3HhcMyMVnTd", "title": "转化率", "obj_token": "ATJ0dfajQo5CSexQd8hc9i3pnWe"},
|
|
{"node_token": "MWpZwV01fitaKjkCRSxckMUunRb", "title": "课程ID映射", "obj_token": "GenUdsXCloUdYhxMvxqcWBMdnhb"}
|
|
]
|
|
|
|
def safe_filename(title):
|
|
"""生成安全的文件名"""
|
|
return "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).rstrip().replace(' ', '_')
|
|
|
|
def main():
|
|
print("="*60)
|
|
print("飞书 Wiki 文档批量获取")
|
|
print("="*60)
|
|
|
|
output_dir = "sql_queries"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
print(f"\n共 {len(wiki_pages)} 个文档需要获取")
|
|
print(f"输出目录: {output_dir}")
|
|
|
|
# 创建索引文件
|
|
index_content = "# SQL 查询文档索引\n\n"
|
|
index_content += f"创建时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
index_content += "## 文档列表\n\n"
|
|
|
|
for i, page in enumerate(wiki_pages, 1):
|
|
filename = safe_filename(page['title']) + ".md"
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
print(f"\n[{i}/{len(wiki_pages)}] 处理: {page['title']}")
|
|
print(f" 文件: {filepath}")
|
|
|
|
# 创建占位文件
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(f"# {page['title']}\n\n")
|
|
f.write(f"**获取时间:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
f.write(f"**飞书文档 Token:** {page['obj_token']}\n\n")
|
|
f.write(f"**注意:** 此文档需要通过 feishu_doc 工具读取完整内容\n\n")
|
|
f.write("---\n\n")
|
|
f.write("## 使用说明\n\n")
|
|
f.write("使用以下命令读取完整文档内容:\n\n")
|
|
f.write("```bash\n")
|
|
f.write(f"feishu_doc read {page['obj_token']}\n")
|
|
f.write("```\n")
|
|
|
|
# 更新索引
|
|
index_content += f"- [{page['title']}]({filename})\n"
|
|
|
|
print(f" ✅ 已创建占位文件")
|
|
|
|
# 写入索引文件
|
|
with open(os.path.join(output_dir, "README.md"), 'w', encoding='utf-8') as f:
|
|
f.write(index_content)
|
|
|
|
print("\n" + "="*60)
|
|
print("✅ 初始化完成")
|
|
print("="*60)
|
|
print("\n下一步: 使用 feishu_doc 工具逐个读取文档内容")
|
|
print("或者让我继续为你读取这些文档的完整内容")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|