ai_member_xiaoyan/skills/interactive-component-json/scripts/db_manager.py

307 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
互动组件配置 SQLite 数据库管理器
负责建表、CRUD操作、查询、导出
"""
import sqlite3
import json
import os
import time
from datetime import datetime
DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "db", "components.db")
def get_connection():
"""获取数据库连接"""
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH, timeout=30)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
conn.execute("PRAGMA busy_timeout=5000")
return conn
def init_db():
"""初始化数据库表结构"""
conn = get_connection()
cursor = conn.cursor()
# 组件主表
cursor.execute("""
CREATE TABLE IF NOT EXISTS components (
component_id INTEGER PRIMARY KEY AUTOINCREMENT,
script_id TEXT NOT NULL, -- 剧本标识飞书文档obj_token
script_title TEXT, -- 剧本标题
component_index INTEGER DEFAULT 0, -- 组件在剧本中的顺序号
category TEXT NOT NULL DEFAULT 'mid'
CHECK(category IN ('mid','core')), -- 中互动 or 核心互动
cType TEXT NOT NULL, -- 英文类型标识(如 mid_dialog_repeat
type_name TEXT NOT NULL, -- 中文类型名(如 对话朗读)
has_image INTEGER DEFAULT 0, -- 是否为配图变体
level TEXT DEFAULT 'L1', -- 级别L1/L2
unit_id TEXT, -- 单元标识
knowledge_points_raw TEXT, -- 原始知识点文本(来自剧本)
knowledge_points TEXT, -- JSON: 匹配后的知识点结构化数据
raw_config TEXT, -- 原始教研配置文本来自内嵌sheet的"组件"列)
parsed_data TEXT, -- JSON: 解析后的结构化中间数据
ai_derived_fields TEXT, -- JSON: AI衍生字段
final_config_json TEXT, -- JSON: 最终jsonData (中互动用)
kp_info_json TEXT, -- JSON: 最终kpInfo
task_info_json TEXT, -- JSON: 核心互动 taskInfo
material_info_json TEXT, -- JSON: 合作阅读/听力 materialInfo
flow_info_json TEXT, -- JSON: 合作阅读/听力 flowInfo
study_info_json TEXT, -- JSON: 核心互动 studyInfo
intermediate_json TEXT, -- JSON: 核心互动中间结果
question_group_json TEXT, -- JSON: 看图选词/听力选择 questionGroup
dialog_setting_json TEXT, -- JSON: 口语快答/妙问 dialogSetting
dialog_config_json TEXT, -- JSON: 口语/看图说话 dialogConfig
image_info_json TEXT, -- JSON: 看图说话 imageInfo
option_list_json TEXT, -- JSON: 看图说话 optionList
question_list_json TEXT, -- JSON: 看图说话/听力拖拽 questionList
pre_dialog_json TEXT, -- JSON: 听力拖拽 preDialog
dialog_list_json TEXT, -- JSON: 听力拖拽 dialogList
text_info_json TEXT, -- JSON: 写作类 textInfo
eval_info_json TEXT, -- JSON: 写作类 evalInfo
status TEXT DEFAULT 'draft'
CHECK(status IN ('draft','parsed','matched','generated','validated','exported')),
bitable_token TEXT, -- 对应飞书多维表格token
db_table TEXT, -- 目标数据库表名
created_at TEXT DEFAULT (datetime('now','localtime')),
updated_at TEXT DEFAULT (datetime('now','localtime'))
)
""")
# 生成日志表
cursor.execute("""
CREATE TABLE IF NOT EXISTS generation_logs (
log_id INTEGER PRIMARY KEY AUTOINCREMENT,
component_id INTEGER NOT NULL,
step TEXT NOT NULL, -- parse/match_type/match_knowledge/ai_derive/generate_json/validate
input_summary TEXT, -- 输入摘要
output_summary TEXT, -- 输出摘要
model_used TEXT, -- 使用的AI模型如有
success INTEGER DEFAULT 1, -- 1=成功 0=失败
error_message TEXT, -- 失败时的错误信息
duration_ms INTEGER, -- 耗时(毫秒)
created_at TEXT DEFAULT (datetime('now','localtime')),
FOREIGN KEY (component_id) REFERENCES components(component_id) ON DELETE CASCADE
)
""")
# 迁移: 为已存在的表添加核心互动字段
existing_cols = {row[1] for row in cursor.execute("PRAGMA table_info(components)").fetchall()}
for col, col_type in [
("task_info_json", "TEXT"),
("material_info_json", "TEXT"),
("flow_info_json", "TEXT"),
("study_info_json", "TEXT"),
("intermediate_json", "TEXT"),
("question_group_json", "TEXT"),
("dialog_setting_json", "TEXT"),
("dialog_config_json", "TEXT"),
("image_info_json", "TEXT"),
("option_list_json", "TEXT"),
("question_list_json", "TEXT"),
("pre_dialog_json", "TEXT"),
("dialog_list_json", "TEXT"),
("text_info_json", "TEXT"),
("eval_info_json", "TEXT"),
]:
if col not in existing_cols:
cursor.execute(f"ALTER TABLE components ADD COLUMN {col} {col_type}")
# 索引
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_script ON components(script_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_type ON components(cType)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_status ON components(status)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_logs_component ON generation_logs(component_id)")
conn.commit()
conn.close()
print(f"✅ 数据库初始化完成: {DB_PATH}")
# ============ CRUD 操作 ============
def insert_component(script_id, cType, type_name, category="mid",
has_image=False, level="L1", unit_id=None,
knowledge_points_raw=None, raw_config=None,
component_index=0, script_title=None,
bitable_token=None, db_table=None):
"""插入一条新的组件记录,返回 component_id"""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
INSERT INTO components (script_id, script_title, component_index, category, cType, type_name,
has_image, level, unit_id, knowledge_points_raw, raw_config,
bitable_token, db_table)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (script_id, script_title, component_index, category, cType, type_name,
1 if has_image else 0, level, unit_id, knowledge_points_raw, raw_config,
bitable_token, db_table))
component_id = cursor.lastrowid
conn.commit()
conn.close()
return component_id
def update_component_field(component_id, field, value):
"""更新组件的某个字段(支持 parsed_data, knowledge_points, ai_derived_fields, final_config_json, status"""
allowed_fields = {'parsed_data', 'knowledge_points', 'ai_derived_fields',
'final_config_json', 'kp_info_json', 'status', 'raw_config',
'knowledge_points_raw',
'task_info_json', 'material_info_json', 'flow_info_json',
'study_info_json', 'intermediate_json',
'question_group_json',
'dialog_setting_json', 'dialog_config_json',
'image_info_json', 'option_list_json', 'question_list_json',
'pre_dialog_json', 'dialog_list_json',
'text_info_json', 'eval_info_json'}
if field not in allowed_fields:
raise ValueError(f"不允许更新的字段: {field}")
conn = get_connection()
cursor = conn.cursor()
cursor.execute(f"""
UPDATE components SET {field} = ?, updated_at = datetime('now','localtime')
WHERE component_id = ?
""", (value, component_id))
conn.commit()
conn.close()
def get_component(component_id):
"""获取单个组件"""
conn = get_connection()
row = conn.execute("SELECT * FROM components WHERE component_id = ?",
(component_id,)).fetchone()
conn.close()
return dict(row) if row else None
def list_components(script_id=None, component_type=None, status=None, limit=100):
"""查询组件列表"""
conn = get_connection()
query = "SELECT * FROM components WHERE 1=1"
params = []
if script_id:
query += " AND script_id = ?"
params.append(script_id)
if component_type:
query += " AND cType = ?"
params.append(component_type)
if status:
query += " AND status = ?"
params.append(status)
query += " ORDER BY script_id, component_index LIMIT ?"
params.append(limit)
rows = conn.execute(query, params).fetchall()
conn.close()
return [dict(r) for r in rows]
def insert_log(component_id, step, input_summary=None, output_summary=None,
model_used=None, success=True, error_message=None, duration_ms=None):
"""插入一条生成日志"""
conn = get_connection()
conn.execute("""
INSERT INTO generation_logs (component_id, step, input_summary, output_summary,
model_used, success, error_message, duration_ms)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (component_id, step, input_summary, output_summary,
model_used, 1 if success else 0, error_message, duration_ms))
conn.commit()
conn.close()
def get_logs(component_id):
"""获取某组件的所有日志"""
conn = get_connection()
rows = conn.execute("""
SELECT * FROM generation_logs WHERE component_id = ?
ORDER BY created_at
""", (component_id,)).fetchall()
conn.close()
return [dict(r) for r in rows]
def get_stats():
"""获取数据库统计信息"""
conn = get_connection()
stats = {}
stats['total'] = conn.execute("SELECT COUNT(*) FROM components").fetchone()[0]
for status in ('draft', 'parsed', 'matched', 'generated', 'validated', 'exported'):
stats[status] = conn.execute(
"SELECT COUNT(*) FROM components WHERE status = ?", (status,)
).fetchone()[0]
stats['by_type'] = {}
rows = conn.execute(
"SELECT cType, COUNT(*) as cnt FROM components GROUP BY cType"
).fetchall()
for r in rows:
stats['by_type'][r['cType']] = r['cnt']
conn.close()
return stats
def export_final_json(script_id=None, status='validated'):
"""导出最终配置JSON"""
components = list_components(script_id=script_id, status=status, limit=10000)
result = []
for c in components:
if c['final_config_json']:
entry = {
'component_id': c['component_id'],
'script_id': c['script_id'],
'component_index': c['component_index'],
'cType': c['cType'],
'type_name': c['type_name'],
'category': c['category'],
'config': json.loads(c['final_config_json'])
}
# 核心互动附加多JSON输出
if c['category'] == 'core':
for field, key in [('task_info_json', 'taskInfo'), ('material_info_json', 'materialInfo'),
('flow_info_json', 'flowInfo'), ('study_info_json', 'studyInfo'),
('question_group_json', 'questionGroup'),
('dialog_setting_json', 'dialogSetting'), ('dialog_config_json', 'dialogConfig'),
('image_info_json', 'imageInfo'), ('option_list_json', 'optionList'),
('question_list_json', 'questionList'),
('pre_dialog_json', 'preDialog'), ('dialog_list_json', 'dialogList'),
('text_info_json', 'textInfo'), ('eval_info_json', 'evalInfo')]:
if c.get(field):
entry[key] = json.loads(c[field])
result.append(entry)
return result
# ============ CLI入口 ============
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("用法: python3 db_manager.py <command>")
print(" init - 初始化数据库")
print(" stats - 查看统计信息")
print(" list - 列出所有组件")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "init":
init_db()
elif cmd == "stats":
init_db() # 确保表存在
stats = get_stats()
print(json.dumps(stats, indent=2, ensure_ascii=False))
elif cmd == "list":
init_db()
components = list_components()
for c in components:
print(f"[{c['component_id']}] {c['script_id']} | {c['cType']} ({c['category']}) | {c['status']}")
else:
print(f"未知命令: {cmd}")