307 lines
13 KiB
Python
307 lines
13 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
互动组件配置 SQLite 数据库管理器
|
||
负责建表、CRUD操作、查询、导出
|
||
"""
|
||
|
||
import sqlite3
|
||
import json
|
||
import os
|
||
import time
|
||
from datetime import datetime
|
||
|
||
DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "db", "components.db")
|
||
|
||
|
||
def get_connection():
|
||
"""获取数据库连接"""
|
||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||
conn = sqlite3.connect(DB_PATH, timeout=30)
|
||
conn.row_factory = sqlite3.Row
|
||
conn.execute("PRAGMA journal_mode=WAL")
|
||
conn.execute("PRAGMA foreign_keys=ON")
|
||
conn.execute("PRAGMA busy_timeout=5000")
|
||
return conn
|
||
|
||
|
||
def init_db():
|
||
"""初始化数据库表结构"""
|
||
conn = get_connection()
|
||
cursor = conn.cursor()
|
||
|
||
# 组件主表
|
||
cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS components (
|
||
component_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
script_id TEXT NOT NULL, -- 剧本标识(飞书文档obj_token)
|
||
script_title TEXT, -- 剧本标题
|
||
component_index INTEGER DEFAULT 0, -- 组件在剧本中的顺序号
|
||
category TEXT NOT NULL DEFAULT 'mid'
|
||
CHECK(category IN ('mid','core')), -- 中互动 or 核心互动
|
||
cType TEXT NOT NULL, -- 英文类型标识(如 mid_dialog_repeat)
|
||
type_name TEXT NOT NULL, -- 中文类型名(如 对话朗读)
|
||
has_image INTEGER DEFAULT 0, -- 是否为配图变体
|
||
level TEXT DEFAULT 'L1', -- 级别(L1/L2)
|
||
unit_id TEXT, -- 单元标识
|
||
knowledge_points_raw TEXT, -- 原始知识点文本(来自剧本)
|
||
knowledge_points TEXT, -- JSON: 匹配后的知识点结构化数据
|
||
raw_config TEXT, -- 原始教研配置文本(来自内嵌sheet的"组件"列)
|
||
parsed_data TEXT, -- JSON: 解析后的结构化中间数据
|
||
ai_derived_fields TEXT, -- JSON: AI衍生字段
|
||
final_config_json TEXT, -- JSON: 最终jsonData (中互动用)
|
||
kp_info_json TEXT, -- JSON: 最终kpInfo
|
||
task_info_json TEXT, -- JSON: 核心互动 taskInfo
|
||
material_info_json TEXT, -- JSON: 合作阅读/听力 materialInfo
|
||
flow_info_json TEXT, -- JSON: 合作阅读/听力 flowInfo
|
||
study_info_json TEXT, -- JSON: 核心互动 studyInfo
|
||
intermediate_json TEXT, -- JSON: 核心互动中间结果
|
||
question_group_json TEXT, -- JSON: 看图选词/听力选择 questionGroup
|
||
dialog_setting_json TEXT, -- JSON: 口语快答/妙问 dialogSetting
|
||
dialog_config_json TEXT, -- JSON: 口语/看图说话 dialogConfig
|
||
image_info_json TEXT, -- JSON: 看图说话 imageInfo
|
||
option_list_json TEXT, -- JSON: 看图说话 optionList
|
||
question_list_json TEXT, -- JSON: 看图说话/听力拖拽 questionList
|
||
pre_dialog_json TEXT, -- JSON: 听力拖拽 preDialog
|
||
dialog_list_json TEXT, -- JSON: 听力拖拽 dialogList
|
||
text_info_json TEXT, -- JSON: 写作类 textInfo
|
||
eval_info_json TEXT, -- JSON: 写作类 evalInfo
|
||
status TEXT DEFAULT 'draft'
|
||
CHECK(status IN ('draft','parsed','matched','generated','validated','exported')),
|
||
bitable_token TEXT, -- 对应飞书多维表格token
|
||
db_table TEXT, -- 目标数据库表名
|
||
created_at TEXT DEFAULT (datetime('now','localtime')),
|
||
updated_at TEXT DEFAULT (datetime('now','localtime'))
|
||
)
|
||
""")
|
||
|
||
# 生成日志表
|
||
cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS generation_logs (
|
||
log_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
component_id INTEGER NOT NULL,
|
||
step TEXT NOT NULL, -- parse/match_type/match_knowledge/ai_derive/generate_json/validate
|
||
input_summary TEXT, -- 输入摘要
|
||
output_summary TEXT, -- 输出摘要
|
||
model_used TEXT, -- 使用的AI模型(如有)
|
||
success INTEGER DEFAULT 1, -- 1=成功 0=失败
|
||
error_message TEXT, -- 失败时的错误信息
|
||
duration_ms INTEGER, -- 耗时(毫秒)
|
||
created_at TEXT DEFAULT (datetime('now','localtime')),
|
||
FOREIGN KEY (component_id) REFERENCES components(component_id) ON DELETE CASCADE
|
||
)
|
||
""")
|
||
|
||
# 迁移: 为已存在的表添加核心互动字段
|
||
existing_cols = {row[1] for row in cursor.execute("PRAGMA table_info(components)").fetchall()}
|
||
for col, col_type in [
|
||
("task_info_json", "TEXT"),
|
||
("material_info_json", "TEXT"),
|
||
("flow_info_json", "TEXT"),
|
||
("study_info_json", "TEXT"),
|
||
("intermediate_json", "TEXT"),
|
||
("question_group_json", "TEXT"),
|
||
("dialog_setting_json", "TEXT"),
|
||
("dialog_config_json", "TEXT"),
|
||
("image_info_json", "TEXT"),
|
||
("option_list_json", "TEXT"),
|
||
("question_list_json", "TEXT"),
|
||
("pre_dialog_json", "TEXT"),
|
||
("dialog_list_json", "TEXT"),
|
||
("text_info_json", "TEXT"),
|
||
("eval_info_json", "TEXT"),
|
||
]:
|
||
if col not in existing_cols:
|
||
cursor.execute(f"ALTER TABLE components ADD COLUMN {col} {col_type}")
|
||
|
||
# 索引
|
||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_script ON components(script_id)")
|
||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_type ON components(cType)")
|
||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_status ON components(status)")
|
||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_logs_component ON generation_logs(component_id)")
|
||
|
||
conn.commit()
|
||
conn.close()
|
||
print(f"✅ 数据库初始化完成: {DB_PATH}")
|
||
|
||
|
||
# ============ CRUD 操作 ============
|
||
|
||
def insert_component(script_id, cType, type_name, category="mid",
|
||
has_image=False, level="L1", unit_id=None,
|
||
knowledge_points_raw=None, raw_config=None,
|
||
component_index=0, script_title=None,
|
||
bitable_token=None, db_table=None):
|
||
"""插入一条新的组件记录,返回 component_id"""
|
||
conn = get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
INSERT INTO components (script_id, script_title, component_index, category, cType, type_name,
|
||
has_image, level, unit_id, knowledge_points_raw, raw_config,
|
||
bitable_token, db_table)
|
||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||
""", (script_id, script_title, component_index, category, cType, type_name,
|
||
1 if has_image else 0, level, unit_id, knowledge_points_raw, raw_config,
|
||
bitable_token, db_table))
|
||
component_id = cursor.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
return component_id
|
||
|
||
|
||
def update_component_field(component_id, field, value):
|
||
"""更新组件的某个字段(支持 parsed_data, knowledge_points, ai_derived_fields, final_config_json, status)"""
|
||
allowed_fields = {'parsed_data', 'knowledge_points', 'ai_derived_fields',
|
||
'final_config_json', 'kp_info_json', 'status', 'raw_config',
|
||
'knowledge_points_raw',
|
||
'task_info_json', 'material_info_json', 'flow_info_json',
|
||
'study_info_json', 'intermediate_json',
|
||
'question_group_json',
|
||
'dialog_setting_json', 'dialog_config_json',
|
||
'image_info_json', 'option_list_json', 'question_list_json',
|
||
'pre_dialog_json', 'dialog_list_json',
|
||
'text_info_json', 'eval_info_json'}
|
||
if field not in allowed_fields:
|
||
raise ValueError(f"不允许更新的字段: {field}")
|
||
|
||
conn = get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute(f"""
|
||
UPDATE components SET {field} = ?, updated_at = datetime('now','localtime')
|
||
WHERE component_id = ?
|
||
""", (value, component_id))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
|
||
def get_component(component_id):
|
||
"""获取单个组件"""
|
||
conn = get_connection()
|
||
row = conn.execute("SELECT * FROM components WHERE component_id = ?",
|
||
(component_id,)).fetchone()
|
||
conn.close()
|
||
return dict(row) if row else None
|
||
|
||
|
||
def list_components(script_id=None, component_type=None, status=None, limit=100):
|
||
"""查询组件列表"""
|
||
conn = get_connection()
|
||
query = "SELECT * FROM components WHERE 1=1"
|
||
params = []
|
||
if script_id:
|
||
query += " AND script_id = ?"
|
||
params.append(script_id)
|
||
if component_type:
|
||
query += " AND cType = ?"
|
||
params.append(component_type)
|
||
if status:
|
||
query += " AND status = ?"
|
||
params.append(status)
|
||
query += " ORDER BY script_id, component_index LIMIT ?"
|
||
params.append(limit)
|
||
|
||
rows = conn.execute(query, params).fetchall()
|
||
conn.close()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
def insert_log(component_id, step, input_summary=None, output_summary=None,
|
||
model_used=None, success=True, error_message=None, duration_ms=None):
|
||
"""插入一条生成日志"""
|
||
conn = get_connection()
|
||
conn.execute("""
|
||
INSERT INTO generation_logs (component_id, step, input_summary, output_summary,
|
||
model_used, success, error_message, duration_ms)
|
||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||
""", (component_id, step, input_summary, output_summary,
|
||
model_used, 1 if success else 0, error_message, duration_ms))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
|
||
def get_logs(component_id):
|
||
"""获取某组件的所有日志"""
|
||
conn = get_connection()
|
||
rows = conn.execute("""
|
||
SELECT * FROM generation_logs WHERE component_id = ?
|
||
ORDER BY created_at
|
||
""", (component_id,)).fetchall()
|
||
conn.close()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
def get_stats():
|
||
"""获取数据库统计信息"""
|
||
conn = get_connection()
|
||
stats = {}
|
||
stats['total'] = conn.execute("SELECT COUNT(*) FROM components").fetchone()[0]
|
||
for status in ('draft', 'parsed', 'matched', 'generated', 'validated', 'exported'):
|
||
stats[status] = conn.execute(
|
||
"SELECT COUNT(*) FROM components WHERE status = ?", (status,)
|
||
).fetchone()[0]
|
||
stats['by_type'] = {}
|
||
rows = conn.execute(
|
||
"SELECT cType, COUNT(*) as cnt FROM components GROUP BY cType"
|
||
).fetchall()
|
||
for r in rows:
|
||
stats['by_type'][r['cType']] = r['cnt']
|
||
conn.close()
|
||
return stats
|
||
|
||
|
||
def export_final_json(script_id=None, status='validated'):
|
||
"""导出最终配置JSON"""
|
||
components = list_components(script_id=script_id, status=status, limit=10000)
|
||
result = []
|
||
for c in components:
|
||
if c['final_config_json']:
|
||
entry = {
|
||
'component_id': c['component_id'],
|
||
'script_id': c['script_id'],
|
||
'component_index': c['component_index'],
|
||
'cType': c['cType'],
|
||
'type_name': c['type_name'],
|
||
'category': c['category'],
|
||
'config': json.loads(c['final_config_json'])
|
||
}
|
||
# 核心互动:附加多JSON输出
|
||
if c['category'] == 'core':
|
||
for field, key in [('task_info_json', 'taskInfo'), ('material_info_json', 'materialInfo'),
|
||
('flow_info_json', 'flowInfo'), ('study_info_json', 'studyInfo'),
|
||
('question_group_json', 'questionGroup'),
|
||
('dialog_setting_json', 'dialogSetting'), ('dialog_config_json', 'dialogConfig'),
|
||
('image_info_json', 'imageInfo'), ('option_list_json', 'optionList'),
|
||
('question_list_json', 'questionList'),
|
||
('pre_dialog_json', 'preDialog'), ('dialog_list_json', 'dialogList'),
|
||
('text_info_json', 'textInfo'), ('eval_info_json', 'evalInfo')]:
|
||
if c.get(field):
|
||
entry[key] = json.loads(c[field])
|
||
result.append(entry)
|
||
return result
|
||
|
||
|
||
# ============ CLI入口 ============
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
if len(sys.argv) < 2:
|
||
print("用法: python3 db_manager.py <command>")
|
||
print(" init - 初始化数据库")
|
||
print(" stats - 查看统计信息")
|
||
print(" list - 列出所有组件")
|
||
sys.exit(1)
|
||
|
||
cmd = sys.argv[1]
|
||
if cmd == "init":
|
||
init_db()
|
||
elif cmd == "stats":
|
||
init_db() # 确保表存在
|
||
stats = get_stats()
|
||
print(json.dumps(stats, indent=2, ensure_ascii=False))
|
||
elif cmd == "list":
|
||
init_db()
|
||
components = list_components()
|
||
for c in components:
|
||
print(f"[{c['component_id']}] {c['script_id']} | {c['cType']} ({c['category']}) | {c['status']}")
|
||
else:
|
||
print(f"未知命令: {cmd}")
|