#!/usr/bin/env python3 """ 互动组件配置 SQLite 数据库管理器 负责建表、CRUD操作、查询、导出 """ import sqlite3 import json import os import time from datetime import datetime DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "db", "components.db") def get_connection(): """获取数据库连接""" os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) conn = sqlite3.connect(DB_PATH, timeout=30) conn.row_factory = sqlite3.Row conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA foreign_keys=ON") conn.execute("PRAGMA busy_timeout=5000") return conn def init_db(): """初始化数据库表结构""" conn = get_connection() cursor = conn.cursor() # 组件主表 cursor.execute(""" CREATE TABLE IF NOT EXISTS components ( component_id INTEGER PRIMARY KEY AUTOINCREMENT, script_id TEXT NOT NULL, -- 剧本标识(飞书文档obj_token) script_title TEXT, -- 剧本标题 component_index INTEGER DEFAULT 0, -- 组件在剧本中的顺序号 category TEXT NOT NULL DEFAULT 'mid' CHECK(category IN ('mid','core')), -- 中互动 or 核心互动 cType TEXT NOT NULL, -- 英文类型标识(如 mid_dialog_repeat) type_name TEXT NOT NULL, -- 中文类型名(如 对话朗读) has_image INTEGER DEFAULT 0, -- 是否为配图变体 level TEXT DEFAULT 'L1', -- 级别(L1/L2) unit_id TEXT, -- 单元标识 knowledge_points_raw TEXT, -- 原始知识点文本(来自剧本) knowledge_points TEXT, -- JSON: 匹配后的知识点结构化数据 raw_config TEXT, -- 原始教研配置文本(来自内嵌sheet的"组件"列) parsed_data TEXT, -- JSON: 解析后的结构化中间数据 ai_derived_fields TEXT, -- JSON: AI衍生字段 final_config_json TEXT, -- JSON: 最终jsonData (中互动用) kp_info_json TEXT, -- JSON: 最终kpInfo task_info_json TEXT, -- JSON: 核心互动 taskInfo material_info_json TEXT, -- JSON: 合作阅读/听力 materialInfo flow_info_json TEXT, -- JSON: 合作阅读/听力 flowInfo study_info_json TEXT, -- JSON: 核心互动 studyInfo intermediate_json TEXT, -- JSON: 核心互动中间结果 question_group_json TEXT, -- JSON: 看图选词/听力选择 questionGroup dialog_setting_json TEXT, -- JSON: 口语快答/妙问 dialogSetting dialog_config_json TEXT, -- JSON: 口语/看图说话 dialogConfig image_info_json TEXT, -- JSON: 看图说话 imageInfo option_list_json TEXT, -- JSON: 看图说话 optionList question_list_json TEXT, -- JSON: 看图说话/听力拖拽 questionList pre_dialog_json TEXT, -- JSON: 听力拖拽 preDialog dialog_list_json TEXT, -- JSON: 听力拖拽 dialogList text_info_json TEXT, -- JSON: 写作类 textInfo eval_info_json TEXT, -- JSON: 写作类 evalInfo status TEXT DEFAULT 'draft' CHECK(status IN ('draft','parsed','matched','generated','validated','exported')), bitable_token TEXT, -- 对应飞书多维表格token db_table TEXT, -- 目标数据库表名 created_at TEXT DEFAULT (datetime('now','localtime')), updated_at TEXT DEFAULT (datetime('now','localtime')) ) """) # 生成日志表 cursor.execute(""" CREATE TABLE IF NOT EXISTS generation_logs ( log_id INTEGER PRIMARY KEY AUTOINCREMENT, component_id INTEGER NOT NULL, step TEXT NOT NULL, -- parse/match_type/match_knowledge/ai_derive/generate_json/validate input_summary TEXT, -- 输入摘要 output_summary TEXT, -- 输出摘要 model_used TEXT, -- 使用的AI模型(如有) success INTEGER DEFAULT 1, -- 1=成功 0=失败 error_message TEXT, -- 失败时的错误信息 duration_ms INTEGER, -- 耗时(毫秒) created_at TEXT DEFAULT (datetime('now','localtime')), FOREIGN KEY (component_id) REFERENCES components(component_id) ON DELETE CASCADE ) """) # 迁移: 为已存在的表添加核心互动字段 existing_cols = {row[1] for row in cursor.execute("PRAGMA table_info(components)").fetchall()} for col, col_type in [ ("task_info_json", "TEXT"), ("material_info_json", "TEXT"), ("flow_info_json", "TEXT"), ("study_info_json", "TEXT"), ("intermediate_json", "TEXT"), ("question_group_json", "TEXT"), ("dialog_setting_json", "TEXT"), ("dialog_config_json", "TEXT"), ("image_info_json", "TEXT"), ("option_list_json", "TEXT"), ("question_list_json", "TEXT"), ("pre_dialog_json", "TEXT"), ("dialog_list_json", "TEXT"), ("text_info_json", "TEXT"), ("eval_info_json", "TEXT"), ]: if col not in existing_cols: cursor.execute(f"ALTER TABLE components ADD COLUMN {col} {col_type}") # 索引 cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_script ON components(script_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_type ON components(cType)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_components_status ON components(status)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_logs_component ON generation_logs(component_id)") conn.commit() conn.close() print(f"✅ 数据库初始化完成: {DB_PATH}") # ============ CRUD 操作 ============ def insert_component(script_id, cType, type_name, category="mid", has_image=False, level="L1", unit_id=None, knowledge_points_raw=None, raw_config=None, component_index=0, script_title=None, bitable_token=None, db_table=None): """插入一条新的组件记录,返回 component_id""" conn = get_connection() cursor = conn.cursor() cursor.execute(""" INSERT INTO components (script_id, script_title, component_index, category, cType, type_name, has_image, level, unit_id, knowledge_points_raw, raw_config, bitable_token, db_table) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, (script_id, script_title, component_index, category, cType, type_name, 1 if has_image else 0, level, unit_id, knowledge_points_raw, raw_config, bitable_token, db_table)) component_id = cursor.lastrowid conn.commit() conn.close() return component_id def update_component_field(component_id, field, value): """更新组件的某个字段(支持 parsed_data, knowledge_points, ai_derived_fields, final_config_json, status)""" allowed_fields = {'parsed_data', 'knowledge_points', 'ai_derived_fields', 'final_config_json', 'kp_info_json', 'status', 'raw_config', 'knowledge_points_raw', 'task_info_json', 'material_info_json', 'flow_info_json', 'study_info_json', 'intermediate_json', 'question_group_json', 'dialog_setting_json', 'dialog_config_json', 'image_info_json', 'option_list_json', 'question_list_json', 'pre_dialog_json', 'dialog_list_json', 'text_info_json', 'eval_info_json'} if field not in allowed_fields: raise ValueError(f"不允许更新的字段: {field}") conn = get_connection() cursor = conn.cursor() cursor.execute(f""" UPDATE components SET {field} = ?, updated_at = datetime('now','localtime') WHERE component_id = ? """, (value, component_id)) conn.commit() conn.close() def get_component(component_id): """获取单个组件""" conn = get_connection() row = conn.execute("SELECT * FROM components WHERE component_id = ?", (component_id,)).fetchone() conn.close() return dict(row) if row else None def list_components(script_id=None, component_type=None, status=None, limit=100): """查询组件列表""" conn = get_connection() query = "SELECT * FROM components WHERE 1=1" params = [] if script_id: query += " AND script_id = ?" params.append(script_id) if component_type: query += " AND cType = ?" params.append(component_type) if status: query += " AND status = ?" params.append(status) query += " ORDER BY script_id, component_index LIMIT ?" params.append(limit) rows = conn.execute(query, params).fetchall() conn.close() return [dict(r) for r in rows] def insert_log(component_id, step, input_summary=None, output_summary=None, model_used=None, success=True, error_message=None, duration_ms=None): """插入一条生成日志""" conn = get_connection() conn.execute(""" INSERT INTO generation_logs (component_id, step, input_summary, output_summary, model_used, success, error_message, duration_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (component_id, step, input_summary, output_summary, model_used, 1 if success else 0, error_message, duration_ms)) conn.commit() conn.close() def get_logs(component_id): """获取某组件的所有日志""" conn = get_connection() rows = conn.execute(""" SELECT * FROM generation_logs WHERE component_id = ? ORDER BY created_at """, (component_id,)).fetchall() conn.close() return [dict(r) for r in rows] def get_stats(): """获取数据库统计信息""" conn = get_connection() stats = {} stats['total'] = conn.execute("SELECT COUNT(*) FROM components").fetchone()[0] for status in ('draft', 'parsed', 'matched', 'generated', 'validated', 'exported'): stats[status] = conn.execute( "SELECT COUNT(*) FROM components WHERE status = ?", (status,) ).fetchone()[0] stats['by_type'] = {} rows = conn.execute( "SELECT cType, COUNT(*) as cnt FROM components GROUP BY cType" ).fetchall() for r in rows: stats['by_type'][r['cType']] = r['cnt'] conn.close() return stats def export_final_json(script_id=None, status='validated'): """导出最终配置JSON""" components = list_components(script_id=script_id, status=status, limit=10000) result = [] for c in components: if c['final_config_json']: entry = { 'component_id': c['component_id'], 'script_id': c['script_id'], 'component_index': c['component_index'], 'cType': c['cType'], 'type_name': c['type_name'], 'category': c['category'], 'config': json.loads(c['final_config_json']) } # 核心互动:附加多JSON输出 if c['category'] == 'core': for field, key in [('task_info_json', 'taskInfo'), ('material_info_json', 'materialInfo'), ('flow_info_json', 'flowInfo'), ('study_info_json', 'studyInfo'), ('question_group_json', 'questionGroup'), ('dialog_setting_json', 'dialogSetting'), ('dialog_config_json', 'dialogConfig'), ('image_info_json', 'imageInfo'), ('option_list_json', 'optionList'), ('question_list_json', 'questionList'), ('pre_dialog_json', 'preDialog'), ('dialog_list_json', 'dialogList'), ('text_info_json', 'textInfo'), ('eval_info_json', 'evalInfo')]: if c.get(field): entry[key] = json.loads(c[field]) result.append(entry) return result # ============ CLI入口 ============ if __name__ == "__main__": import sys if len(sys.argv) < 2: print("用法: python3 db_manager.py ") print(" init - 初始化数据库") print(" stats - 查看统计信息") print(" list - 列出所有组件") sys.exit(1) cmd = sys.argv[1] if cmd == "init": init_db() elif cmd == "stats": init_db() # 确保表存在 stats = get_stats() print(json.dumps(stats, indent=2, ensure_ascii=False)) elif cmd == "list": init_db() components = list_components() for c in components: print(f"[{c['component_id']}] {c['script_id']} | {c['cType']} ({c['category']}) | {c['status']}") else: print(f"未知命令: {cmd}")