#!/usr/bin/env python3 """ LLM 驱动的 jsonData / kpInfo 生成器 从教研配置文本 + 组件类型 + per-type prompt模板 → 调用 LLM 生成结构化 JSON 核心设计 v2: - 每种组件类型使用 PRD 定义的精确 prompt 模板(prompts/prompt_registry.py) - 教研配置文本拆解为结构化字段作为 prompt 输入变量 - 角色配置通过 section-character 映射解析 - 对无模板的类型回退到 few-shot 样例方式 - 规则负责后处理:校验必需字段、修正 cType/cId """ import os import sys import json import logging import re CURRENT_PATH = os.path.dirname(os.path.abspath(__file__)) SKILL_ROOT = os.path.dirname(CURRENT_PATH) if CURRENT_PATH not in sys.path: sys.path.insert(0, CURRENT_PATH) # Ensure prompts package is importable if SKILL_ROOT not in sys.path: sys.path.insert(0, SKILL_ROOT) logger = logging.getLogger("generate_json") if not logger.handlers: handler = logging.StreamHandler() handler.setFormatter(logging.Formatter( "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" )) logger.addHandler(handler) logger.setLevel(logging.INFO) # ============ 参考样例加载 ============ _examples_cache = None def _load_examples(): """加载所有组件类型的参考样例""" global _examples_cache if _examples_cache is not None: return _examples_cache examples_path = os.path.join(SKILL_ROOT, "references", "examples", "all_types_examples.json") if not os.path.exists(examples_path): logger.warning(f"参考样例文件不存在: {examples_path}") _examples_cache = {} return _examples_cache with open(examples_path, "r") as f: _examples_cache = json.load(f) logger.info(f"加载参考样例: {len(_examples_cache)} 种类型") return _examples_cache def _get_examples_for_type(cType): """获取指定组件类型的参考样例""" examples = _load_examples() type_data = examples.get(cType) if not type_data or not type_data.get("examples"): return [] return type_data["examples"] # ============ Prompt 构建(v2 per-type 模板) ============ from prompts.prompt_registry import ( parse_teaching_config_fields, build_full_prompt, get_supported_types, CDESC_TYPES, build_cdesc_prompt, ) from prompts.core_reading_prompts import ( CORE_READING_STEPS, build_step_prompt, ) from prompts.core_imagedrag_prompts import ( CORE_IMAGEDRAG_STEPS, build_imagedrag_prompt, ) from prompts.core_speaking_prompts import ( CORE_SPEAKING_STEPS, build_speaking_prompt, ) from prompts.core_speaking_inquiry_prompts import ( CORE_SPEAKING_INQUIRY_STEPS, build_speaking_inquiry_prompt, ) from prompts.core_speaking_image_prompts import ( CORE_SPEAKING_IMAGE_STEPS, build_speaking_image_prompt, ) from prompts.core_listening_order_prompts import ( CORE_LISTENING_ORDER_STEPS, build_listening_order_prompt, ) from prompts.core_listening_drag_prompts import ( CORE_LISTENING_DRAG_STEPS, build_listening_drag_prompt, ) from prompts.core_listening_choose_prompts import ( CORE_LISTENING_CHOOSE_STEPS, build_listening_choose_prompt, ) from prompts.core_writing_img_prompts import ( CORE_WRITING_IMG_MAKE_SENTENCE_STEPS, CORE_WRITING_IMG_WRITE_STEPS, build_writing_img_prompt, ) from prompts.core_writing_question_prompts import ( CORE_WRITING_Q_MAKE_SENTENCE_STEPS, CORE_WRITING_Q_WRITE_STEPS, build_writing_question_prompt, ) # Fallback: 旧版 few-shot 方式(对无 prompt 模板的类型) FALLBACK_SYSTEM_PROMPT = """你是一个互动组件配置JSON生成专家。根据教研配置文本生成对应的 jsonData JSON。 ## 组件类型: {cType} ## 输出要求 1. 输出必须是单行合法JSON 2. cType 必须为 "{cType}" 3. cId 必须为提供的组件ID 4. 严格按照参考样例的JSON结构输出 5. "无"的字段使用空数组[]或空对象{{}} ## 参考样例 {examples_text} 请直接输出单行JSON,不要包含任何其他符号。""" def _build_fallback_examples_text(examples, max_examples=2): """构建 few-shot 样例文本(回退用)""" parts = [] for i, ex in enumerate(examples[:max_examples], 1): teaching_config = ex.get("teaching_config", "") json_data = ex.get("jsonData", "") try: jd_obj = json.loads(json_data) if isinstance(json_data, str) else json_data json_data_formatted = json.dumps(jd_obj, ensure_ascii=False) except (json.JSONDecodeError, TypeError): json_data_formatted = str(json_data) parts.append(f"### 样例 {i}\n教研配置:\n{teaching_config}\n\n输出:\n{json_data_formatted}") return "\n\n".join(parts) def _build_fallback_user_prompt(cId, teaching_config, character_map, has_image): """构建 user prompt(回退用)""" parts = [f"组件ID: {cId}"] if has_image: parts.append(f'配图: 是(img="{cId}.png")') if character_map: parts.append(f"角色配置: {json.dumps(character_map, ensure_ascii=False)}") parts.append(f"\n教研配置:\n{teaching_config}") parts.append("\n请直接输出单行JSON") return "\n".join(parts) # ============ 语音识别热词自动生成 ============ ASR_HOTWORD_PROMPT = """#任务 参考当前的互动配置,如果互动对话为[表达]类,则根据互动场景,给出一些可能高频出现的 或者 可能有一定识别难度的词汇,用","连接。如果互动对话为[朗读]类,则返回"无"。 语音识别词汇的考虑范围: - 交互中可能提及的专有名词(例如人名、地名、特定物品名称等) - 互动目标的一些场景表达,例如 题目中要谈论"目标",则热词中可以提供 goal/target 等。 #输入 情境引入: $pre_dialog$ 互动对话: $interaction$ 后置对话: $post_dialog$ #输出 现在,根据任务要求直接输出符合要求的热词字段("无",或者逗号连接的多个单词)。""" # 需要自动生成热词的组件类型 ASR_TYPES = {"mid_dialog_express"} def generate_asr_hotwords(cType, parsed_fields, llm_client): """ 用 LLM 自动生成语音识别热词。 表达类互动自动生成,朗读类返回"无"。 Args: cType: 组件类型 parsed_fields: 解析后的教研配置字段 llm_client: LLMClient 实例 Returns: str: 热词字符串(逗号分隔)或 "无" """ if cType not in ASR_TYPES: return "无" pre_dialog = parsed_fields.get("情境引入", "无") interaction = parsed_fields.get("互动内容", "无") post_dialog = parsed_fields.get("后置对话", "无") prompt = ASR_HOTWORD_PROMPT.replace( "$pre_dialog$", pre_dialog ).replace( "$interaction$", interaction ).replace( "$post_dialog$", post_dialog ) try: content, usage = llm_client.call("", prompt, max_tokens=256, temperature=0.3) hotwords = content.strip().strip('"').strip() logger.info(f"[热词生成] cType={cType}, hotwords={hotwords[:80]}, tokens={usage}") return hotwords if hotwords else "无" except Exception as e: logger.warning(f"[热词生成] 失败: {e}") return "无" # ============ kpInfo 生成 ============ KP_SYSTEM_PROMPT = """你是一个知识点信息JSON生成专家。根据提供的组件类型、组件ID和知识点文本,生成 kpInfo JSON。 ## 输出要求 1. 输出必须是合法JSON 2. 结构固定为: ```json { "pushType": "relationKp", "cType": "<组件cType>", "cId": "<组件ID>", "kpInfo": [ { "kpId": null, "kpType": "vocab 或 sentence", "kpTitle": "<知识点标题>", "kpSkill": "vocab_meaning 或 sentence_meaning", "kpSkillName": "词义 或 语义" } ] } ``` 3. 判断知识点类型的规则: - 如果知识点是单个英文单词(如 school, play),则 kpType="vocab", kpSkill="vocab_meaning", kpSkillName="词义" - 如果知识点包含 "..." 或是句子片段(如 "You need to..."),则 kpType="sentence", kpSkill="sentence_meaning", kpSkillName="语义" 4. 知识点文本中可能包含数字后缀(如 "school 1"),数字后缀表示出现次数,不要包含在 kpTitle 中 5. 多个知识点用换行分隔,每个生成一条 kpInfo 条目 6. 去除 标签及其属性 请直接输出JSON,不要包含任何其他文字。""" def _build_kp_user_prompt(cType, cId, knowledge_text): """构建知识点 user prompt""" return f"cType: {cType}\ncId: {cId}\n知识点文本:\n{knowledge_text}" # ============ 核心生成函数 ============ def generate_json_data(cType, cId, teaching_config, character_map=None, has_image=False, knowledge_text="", llm_client=None): """ 使用 LLM 生成组件的 jsonData。 优先使用 per-type prompt 模板,对无模板的类型回退到 few-shot。 Args: cType: 英文类型标识(如 "mid_dialog_repeat") cId: 组件ID(如 "1214101") teaching_config: 教研配置文本 character_map: 角色映射 {"Eva": 663, "Peter": 653} has_image: 是否配图 knowledge_text: 知识点文本 llm_client: LLMClient 实例 Returns: dict: 生成的 jsonData 对象 """ if llm_client is None: from llm_client import get_client llm_client = get_client() # 优先使用 per-type prompt 模板 if cType in get_supported_types(): json_data = _generate_with_template(cType, cId, teaching_config, character_map, llm_client) else: # 回退到 few-shot 方式 logger.info(f"组件类型 {cType} 无 prompt 模板,使用 few-shot 回退") json_data = _generate_with_fallback(cType, cId, teaching_config, character_map, has_image, llm_client) # 后处理:确保必需字段正确 json_data = _post_process_json_data(json_data, cType, cId, has_image) logger.info(f"生成 jsonData: cType={cType}, cId={cId}") return json_data def _generate_with_template(cType, cId, teaching_config, character_map, llm_client): """使用 per-type prompt 模板生成 jsonData""" # 解析教研配置为结构化字段 parsed_fields = parse_teaching_config_fields(teaching_config) # 对需要热词的类型,自动生成语音识别热词 if cType in ASR_TYPES: existing_hotwords = parsed_fields.get("语音识别热词", "").strip() if not existing_hotwords or existing_hotwords == "无": hotwords = generate_asr_hotwords(cType, parsed_fields, llm_client) parsed_fields["语音识别热词"] = hotwords # 对需要互动描述的类型,LLM 预生成 cDesc 文本 if cType in CDESC_TYPES: cdesc_prompt = build_cdesc_prompt(cType, parsed_fields) 互动描述, usage = llm_client.call("", cdesc_prompt, max_tokens=1024, temperature=0.3) parsed_fields["互动描述"] = 互动描述.strip() logger.info(f"[cDesc] cType={cType}, cId={cId}, tokens={usage}") # 构建角色配置字符串(如 "663-EVA, 653-peter, 658-Vicky") resource_mapping_str = "" if character_map: parts = [f"{v}-{k}" for k, v in character_map.items()] resource_mapping_str = ", ".join(parts) # 构建完整 prompt full_prompt = build_full_prompt(cType, cId, parsed_fields, resource_mapping_str) # 调用 LLM(单一 user prompt,无 system prompt) json_data, usage = llm_client.call_for_json("", full_prompt, max_tokens=4096, temperature=0.1) logger.info(f"[template] cType={cType}, cId={cId}, tokens={usage}") return json_data def _generate_with_fallback(cType, cId, teaching_config, character_map, has_image, llm_client): """使用 few-shot 样例回退生成 jsonData""" examples = _get_examples_for_type(cType) examples_text = _build_fallback_examples_text(examples) if examples else "(无参考样例)" system_prompt = FALLBACK_SYSTEM_PROMPT.format(cType=cType, examples_text=examples_text) user_prompt = _build_fallback_user_prompt(cId, teaching_config, character_map or {}, has_image) json_data, usage = llm_client.call_for_json(system_prompt, user_prompt, max_tokens=4096, temperature=0.1) logger.info(f"[fallback] cType={cType}, cId={cId}, tokens={usage}") return json_data def generate_kp_info(cType, cId, knowledge_text, llm_client=None, level=None): """ 生成组件的 kpInfo(通过 MySQL 知识点库匹配) Args: cType: 英文类型标识 cId: 组件ID knowledge_text: 知识点文本(来自sheet H列) llm_client: LLMClient 实例 level: 剧本级别(如 "L1", "L2") Returns: dict: 生成的 kpInfo 对象,如果无知识点则返回 None """ if not knowledge_text or not knowledge_text.strip(): return None from kp_matcher import match_knowledge_points return match_knowledge_points(knowledge_text, cType, cId, level=level, llm_client=llm_client) CORE_TYPES = {"core_reading_order", "core_reading_imageDrag", "core_speaking_reply", "core_speaking_inquiry", "core_speaking_image", "core_listening_order", "core_listening_drag", "core_listening_choose", "core_writing_imgMakeSentence", "core_writing_imgWrite", "core_writing_questionMakeSentence", "core_writing_questionWrite"} def parse_core_reading_fields(teaching_config): """ 从组件配置文本中提取 core_reading_order 所需的输入字段。 组件配置 = 教研配置-任务 + 教研配置-材料 拼接,包含 【xxx】 sections。 额外可能包含 【流程编排】【角色配置】【专有词汇】【学习材料】 等。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 任务配置 = 任务标题 + 任务背景 task_parts = [] if fields.get("任务标题"): task_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): task_parts.append(f"【任务背景】\n{fields['任务背景']}") 任务配置 = "\n\n".join(task_parts) if task_parts else "" # 通关知识 通关知识 = fields.get("通关知识", "") # 材料配置 = 阅读材料 section (用于 Step1/Step2) 材料配置 = fields.get("阅读材料", "") # 学习材料 / 学习过程 (用于 Step2, bitable中有时叫【学习过程】) 学习材料 = fields.get("学习材料", "") or fields.get("学习过程", "") # 互动题目 / 题目部分 互动题目 = fields.get("题目部分", "") or fields.get("互动题目", "") # 流程编排 流程编排 = fields.get("流程编排", "") # 角色配置 (从组件配置的 【角色配置】section) 角色配置 = fields.get("角色配置", "") # 专有词汇 / 专有名词 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "任务配置": 任务配置, "通关知识": 通关知识, "材料配置": 材料配置, "学习材料": 学习材料, "互动题目": 互动题目, "流程编排": 流程编排, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_reading_order(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 合作阅读(core_reading_order)7步 LLM 生成流水线。 Step 1: 阅读材料格式化 → 材料正文 (text) Step 2: 学习材料格式化 → 学习内容 (text) Step 3: 学习过程配置 → 学习配置 (text) Step 4: taskData → JSON Step 5: textData → JSON Step 6: sequenceData → JSON Step 7: learningData → JSON Args: cId: 组件ID teaching_config: 教研配置文本(组件配置列) character_map: 角色映射 dict(可选,从section-character映射解析而来) llm_client: LLMClient 实例 Returns: dict: { "taskData": dict, "textData": dict, "sequenceData": dict, "learningData": dict, "kpInfo": dict or None, "intermediate": {"阅读材料格式化": str, "学习材料格式化": str, "学习过程配置": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_reading_order" # 解析输入字段 fields = parse_core_reading_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) # 学习材料输入:优先使用【学习材料】section,回退到【阅读材料】 step2_input = fields["学习材料"] if fields["学习材料"].strip() else fields["材料配置"] logger.info(f"[core_reading] 开始7步生成: cId={cId}") # ── Step 1: 阅读材料格式化 ── logger.info(f"[core_reading] Step 1/7: 阅读材料格式化") prompt1 = build_step_prompt("step1_format_reading", {"材料配置": fields["材料配置"]}) 材料正文, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.1) 材料正文 = 材料正文.strip() logger.info(f" Step 1 完成: {len(材料正文)} 字符, tokens={usage1}") # ── Step 2: 学习材料格式化 ── logger.info(f"[core_reading] Step 2/7: 学习材料格式化") prompt2 = build_step_prompt("step2_format_learning", {"材料配置": step2_input}) 学习内容, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.1) 学习内容 = 学习内容.strip() logger.info(f" Step 2 完成: {len(学习内容)} 字符, tokens={usage2}") # ── Step 3: 学习过程配置 ── logger.info(f"[core_reading] Step 3/7: 学习过程配置") prompt3 = build_step_prompt("step3_learning_process", { "文章材料": 材料正文, "学习内容": 学习内容, "专有词汇": fields["专有词汇"], }) 学习配置, usage3 = llm_client.call("", prompt3, max_tokens=8192, temperature=0.3) 学习配置 = 学习配置.strip() logger.info(f" Step 3 完成: {len(学习配置)} 字符, tokens={usage3}") # ── Step 4: taskData ── logger.info(f"[core_reading] Step 4/7: taskData") prompt4 = build_step_prompt("step4_task_data", { "ID": str(cId), "任务配置": fields["任务配置"], "通关知识": fields["通关知识"], "角色配置": 角色配置, }) taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1) logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}") # ── Step 5: textData ── logger.info(f"[core_reading] Step 5/7: textData") prompt5 = build_step_prompt("step5_text_data", { "ID": str(cId), "材料正文": 材料正文, "互动题目": fields["互动题目"], "角色配置": 角色配置, }) textData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1) logger.info(f" Step 5 完成: textData keys={list(textData.keys())}, tokens={usage5}") # ── Step 6: sequenceData ── logger.info(f"[core_reading] Step 6/7: sequenceData") prompt6 = build_step_prompt("step6_sequence_data", { "ID": str(cId), "流程编排": fields["流程编排"], "角色配置": 角色配置, }) sequenceData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1) logger.info(f" Step 6 完成: sequenceData keys={list(sequenceData.keys())}, tokens={usage6}") # ── Step 7: learningData ── logger.info(f"[core_reading] Step 7/7: learningData") prompt7 = build_step_prompt("step7_learning_data", {"学习配置": 学习配置}) learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1) logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}") # ── kpInfo (MySQL 匹配) ── kp_info = generate_kp_info(cType, cId, fields["通关知识"], llm_client=llm_client, level=level) logger.info(f"[core_reading] 7步生成全部完成: cId={cId}") return { "taskInfo": taskData, "materialInfo": textData, "flowInfo": sequenceData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "阅读材料格式化": 材料正文, "学习材料格式化": 学习内容, "学习过程配置": 学习配置, }, } def parse_core_imagedrag_fields(teaching_config): """ 从组件配置文本中提取 core_reading_imageDrag 所需的输入字段。 组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 用户任务描述 = 任务标题 + 任务背景 + 任务描述 desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") if fields.get("任务描述"): desc_parts.append(f"【任务描述】\n{fields['任务描述']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" # 用户任务知识 / 通关知识 用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "") # 互动配置 (包含 阅读材料 + 互动问题) 互动配置 = fields.get("互动配置", "") or fields.get("阅读材料", "") # 如果没有单独的互动配置 section, 尝试拼接阅读材料+互动问题 if not 互动配置: parts = [] for key in ["阅读材料1", "阅读材料2", "阅读材料", "材料部分", "互动问题"]: if fields.get(key): parts.append(fields[key]) 互动配置 = "\n\n".join(parts) # 角色配置 角色配置 = fields.get("角色配置", "") # 专有词汇 / 专有名词 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "用户任务描述": 用户任务描述, "用户任务知识": 用户任务知识, "互动配置": 互动配置, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_reading_imagedrag(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 看图选词(core_reading_imageDrag)5步 LLM 生成流水线。 Step 1: 学习流程配置 → 中间文本 Step 2: 互动描述 → 中间文本 Step 3: taskInfo → JSON Step 4: questionGroup → JSON Step 5: studyInfo → JSON Returns: dict: { "taskData": dict, "questionGroupData": list/dict, "studyData": dict, "kpInfo": dict or None, "intermediate": {"学习流程配置": str, "互动描述": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_reading_imageDrag" # 解析输入字段 fields = parse_core_imagedrag_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_imagedrag] 开始5步生成: cId={cId}") # ── Step 1: 学习流程配置 ── logger.info(f"[core_imagedrag] Step 1/5: 学习流程配置") prompt1 = build_imagedrag_prompt("step1_learning_flow", { "互动配置": fields["互动配置"], "用户任务知识": fields["用户任务知识"], "专有词汇": fields["专有词汇"], }) 学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习流程配置 = 学习流程配置.strip() logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}") # ── Step 2: 互动描述 ── logger.info(f"[core_imagedrag] Step 2/5: 互动描述") prompt2 = build_imagedrag_prompt("step2_interaction_desc", { "用户任务描述": fields["用户任务描述"], "互动配置": fields["互动配置"], }) 互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3) 互动描述 = 互动描述.strip() logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}") # ── Step 3: taskInfo ── logger.info(f"[core_imagedrag] Step 3/5: taskInfo") prompt3 = build_imagedrag_prompt("step3_task_info", { "ID": str(cId), "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, "互动描述": 互动描述, }) taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}") # ── Step 4: questionGroup ── logger.info(f"[core_imagedrag] Step 4/5: questionGroup") prompt4 = build_imagedrag_prompt("step4_question_group", { "配置材料": fields["互动配置"], }) questionGroupData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1) logger.info(f" Step 4 完成: questionGroupData type={type(questionGroupData).__name__}, tokens={usage4}") # ── Step 5: studyInfo ── logger.info(f"[core_imagedrag] Step 5/5: studyInfo") prompt5 = build_imagedrag_prompt("step5_study_info", { "学习流程配置": 学习流程配置, }) studyData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1) logger.info(f" Step 5 完成: studyData keys={list(studyData.keys()) if isinstance(studyData, dict) else 'list'}, tokens={usage5}") # ── kpInfo (MySQL 匹配) ── kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level) logger.info(f"[core_imagedrag] 5步生成全部完成: cId={cId}") return { "taskInfo": taskData, "questionGroup": questionGroupData, "studyInfo": studyData, "kpInfo": kp_info, "intermediate": { "学习流程配置": 学习流程配置, "互动描述": 互动描述, }, } def parse_core_speaking_fields(teaching_config): """ 从组件配置文本中提取 core_speaking_reply 所需的输入字段。 组件配置 = 教研-用户视角任务信息 + 教研-任务规则与NPC设定 拼接。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 用户任务描述 = 任务标题 + 任务背景 + 任务描述 desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") if fields.get("任务描述"): desc_parts.append(f"【任务描述】\n{fields['任务描述']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" # 用户任务知识 / 通关知识 用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "") # 示例对话 示例对话 = fields.get("示例对话", "") # 角色配置 角色配置 = fields.get("角色配置", "") # 专有词汇 / 专有名词 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") # 对话配置 = 对话角色 + 回合设定 dialog_parts = [] if fields.get("对话角色"): dialog_parts.append(f"【对话角色】\n{fields['对话角色']}") if fields.get("回合设定"): dialog_parts.append(f"【回合设定】\n{fields['回合设定']}") 对话配置 = "\n\n".join(dialog_parts) if dialog_parts else "" # NPC任务设定 / 任务设定 npc任务设定 = fields.get("任务设定", "") or fields.get("npc任务设定", "") # NPC视角任务背景 / 任务背景(NPC视角) # 如果有单独的NPC背景section用它,否则复用任务背景 npc任务背景 = fields.get("npc视角任务背景", "") or fields.get("任务背景", "") # 通过规则 通过规则 = fields.get("通过规则", "") return { "用户任务描述": 用户任务描述, "用户任务知识": 用户任务知识, "示例对话": 示例对话, "角色配置": 角色配置, "专有词汇": 专有词汇, "对话配置": 对话配置, "npc任务设定": npc任务设定, "npc视角任务背景": npc任务背景, "通过规则": 通过规则, } def generate_core_speaking_reply(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 口语快答(core_speaking_reply)7步 LLM 生成流水线。 Step 1: 学习流程配置 → 中间文本 Step 2: promptInfo配置 → 中间文本 Step 3: 语音识别热词 → 中间文本 Step 4: taskData → JSON Step 5: settingData → JSON Step 6: configData → JSON Step 7: learningData → JSON Returns: dict: { "taskData": dict, "settingData": dict, "configData": dict, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习流程配置": str, "promptInfo配置": str, "语音识别热词": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_speaking_reply" # 解析输入字段 fields = parse_core_speaking_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_speaking] 开始7步生成: cId={cId}") # ── Step 1: 学习流程配置 ── logger.info(f"[core_speaking] Step 1/7: 学习流程配置") prompt1 = build_speaking_prompt("step1_learning_flow", { "用户任务描述": fields["用户任务描述"], "示例对话": fields["示例对话"], "专有词汇": fields["专有词汇"], }) 学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习流程配置 = 学习流程配置.strip() logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}") # ── Step 2: promptInfo配置 ── logger.info(f"[core_speaking] Step 2/7: promptInfo配置") prompt2 = build_speaking_prompt("step2_prompt_info", { "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "示例对话": fields["示例对话"], "npc视角任务背景": fields["npc视角任务背景"], "npc任务设定": fields["npc任务设定"], }) promptInfo配置, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.3) promptInfo配置 = promptInfo配置.strip() logger.info(f" Step 2 完成: {len(promptInfo配置)} 字符, tokens={usage2}") # ── Step 3: 语音识别热词 ── logger.info(f"[core_speaking] Step 3/7: 语音识别热词") prompt3 = build_speaking_prompt("step3_asr_hotwords", { "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "示例对话": fields["示例对话"], }) 语音识别热词, usage3 = llm_client.call("", prompt3, max_tokens=1024, temperature=0.1) 语音识别热词 = 语音识别热词.strip() logger.info(f" Step 3 完成: {len(语音识别热词)} 字符, tokens={usage3}") # ── Step 4: taskData ── logger.info(f"[core_speaking] Step 4/7: taskData") prompt4 = build_speaking_prompt("step4_task_data", { "ID": str(cId), "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, }) taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1) logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}") # ── Step 5: settingData ── logger.info(f"[core_speaking] Step 5/7: settingData") prompt5 = build_speaking_prompt("step5_setting_data", { "ID": str(cId), "对话配置": fields["对话配置"], }) settingData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1) logger.info(f" Step 5 完成: settingData keys={list(settingData.keys())}, tokens={usage5}") # ── Step 6: configData ── logger.info(f"[core_speaking] Step 6/7: configData") prompt6 = build_speaking_prompt("step6_config_data", { "语音识别热词": 语音识别热词, "任务背景": fields["npc视角任务背景"], "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色驱动配置": promptInfo配置, "示例对话": fields["示例对话"], "通过规则": fields["通过规则"], }) configData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1) logger.info(f" Step 6 完成: configData keys={list(configData.keys())}, tokens={usage6}") # ── Step 7: learningData ── logger.info(f"[core_speaking] Step 7/7: learningData") prompt7 = build_speaking_prompt("step7_learning_data", { "学习配置": 学习流程配置, }) learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1) logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}") # ── kpInfo (MySQL 匹配) ── kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level) logger.info(f"[core_speaking] 7步生成全部完成: cId={cId}") return { "taskInfo": taskData, "dialogSetting": settingData, "dialogConfig": configData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习流程配置": 学习流程配置, "promptInfo配置": promptInfo配置, "语音识别热词": 语音识别热词, }, } def parse_core_speaking_inquiry_fields(teaching_config): """ 从组件配置文本中提取 core_speaking_inquiry 所需的输入字段。 组件配置 = 教研-用户视角任务信息 + 教研-任务规则与NPC设定 拼接。 与口语快答相同的输入源,额外提取【知识库】section。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") if fields.get("任务描述"): desc_parts.append(f"【任务描述】\n{fields['任务描述']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" 用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "") 示例对话 = fields.get("示例对话", "") 角色配置 = fields.get("角色配置", "") 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") dialog_parts = [] if fields.get("对话角色"): dialog_parts.append(f"【对话角色】\n{fields['对话角色']}") if fields.get("回合设定"): dialog_parts.append(f"【回合设定】\n{fields['回合设定']}") 对话配置 = "\n\n".join(dialog_parts) if dialog_parts else "" npc任务设定 = fields.get("任务设定", "") or fields.get("npc任务设定", "") npc任务背景 = fields.get("npc视角任务背景", "") or fields.get("任务背景", "") 通过规则 = fields.get("通过规则", "") 知识库 = fields.get("知识库", "") return { "用户任务描述": 用户任务描述, "用户任务知识": 用户任务知识, "示例对话": 示例对话, "角色配置": 角色配置, "专有词汇": 专有词汇, "对话配置": 对话配置, "npc任务设定": npc任务设定, "npc视角任务背景": npc任务背景, "通过规则": 通过规则, "知识库": 知识库, } def generate_core_speaking_inquiry(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 口语妙问(core_speaking_inquiry)7步 LLM 生成流水线。 Step 1: 学习流程配置 → 中间文本 Step 2: promptInfo配置 → 中间文本 Step 3: 语音识别热词 → 中间文本 Step 4: taskData → JSON Step 5: settingData → JSON Step 6: configData → JSON Step 7: learningData → JSON Returns: dict: { "taskData": dict, "settingData": dict, "configData": dict, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习流程配置": str, "promptInfo配置": str, "语音识别热词": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_speaking_inquiry" fields = parse_core_speaking_inquiry_fields(teaching_config) 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_speaking_inquiry] 开始7步生成: cId={cId}") # ── Step 1: 学习流程配置 ── logger.info(f"[core_speaking_inquiry] Step 1/7: 学习流程配置") prompt1 = build_speaking_inquiry_prompt("step1_learning_flow", { "用户任务描述": fields["用户任务描述"], "示例对话": fields["示例对话"], "专有词汇": fields["专有词汇"], }) 学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习流程配置 = 学习流程配置.strip() logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}") # ── Step 2: promptInfo配置 ── logger.info(f"[core_speaking_inquiry] Step 2/7: promptInfo配置") prompt2 = build_speaking_inquiry_prompt("step2_prompt_info", { "用户任务描述": fields["用户任务描述"], "示例对话": fields["示例对话"], "npc视角任务背景": fields["npc视角任务背景"], "npc知识库": fields["知识库"], }) promptInfo配置, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.3) promptInfo配置 = promptInfo配置.strip() logger.info(f" Step 2 完成: {len(promptInfo配置)} 字符, tokens={usage2}") # ── Step 3: 语音识别热词 ── logger.info(f"[core_speaking_inquiry] Step 3/7: 语音识别热词") prompt3 = build_speaking_inquiry_prompt("step3_asr_hotwords", { "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "示例对话": fields["示例对话"], }) 语音识别热词, usage3 = llm_client.call("", prompt3, max_tokens=1024, temperature=0.1) 语音识别热词 = 语音识别热词.strip() logger.info(f" Step 3 完成: {len(语音识别热词)} 字符, tokens={usage3}") # ── Step 4: taskData ── logger.info(f"[core_speaking_inquiry] Step 4/7: taskData") prompt4 = build_speaking_inquiry_prompt("step4_task_data", { "ID": str(cId), "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, }) taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1) logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}") # ── Step 5: settingData ── logger.info(f"[core_speaking_inquiry] Step 5/7: settingData") prompt5 = build_speaking_inquiry_prompt("step5_setting_data", { "ID": str(cId), "对话配置": fields["对话配置"], }) settingData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1) logger.info(f" Step 5 完成: settingData keys={list(settingData.keys())}, tokens={usage5}") # ── Step 6: configData ── logger.info(f"[core_speaking_inquiry] Step 6/7: configData") prompt6 = build_speaking_inquiry_prompt("step6_config_data", { "语音识别热词": 语音识别热词, "任务背景": fields["npc视角任务背景"], "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色驱动配置": promptInfo配置, "示例对话": fields["示例对话"], "通过规则": fields["通过规则"], }) configData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1) logger.info(f" Step 6 完成: configData keys={list(configData.keys())}, tokens={usage6}") # ── Step 7: learningData ── logger.info(f"[core_speaking_inquiry] Step 7/7: learningData") prompt7 = build_speaking_inquiry_prompt("step7_learning_data", { "学习配置": 学习流程配置, }) learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1) logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}") # ── kpInfo (MySQL 匹配) ── kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level) logger.info(f"[core_speaking_inquiry] 7步生成全部完成: cId={cId}") return { "taskInfo": taskData, "dialogSetting": settingData, "dialogConfig": configData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习流程配置": 学习流程配置, "promptInfo配置": promptInfo配置, "语音识别热词": 语音识别热词, }, } def parse_core_speaking_image_fields(teaching_config): """ 从组件配置文本中提取 core_speaking_image 所需的输入字段。 组件配置 = 教研-导览配置 + 教研-对话配置 拼接。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") if fields.get("任务描述"): desc_parts.append(f"【任务描述】\n{fields['任务描述']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" 用户任务知识 = fields.get("知识", "") or fields.get("用户任务知识", "") or fields.get("通关知识", "") # 对话信息 = 对话角色 + 角色背景 dialog_info_parts = [] if fields.get("对话角色"): dialog_info_parts.append(fields["对话角色"]) if fields.get("角色背景"): dialog_info_parts.append(fields["角色背景"]) 对话信息 = "\n".join(dialog_info_parts) if dialog_info_parts else "" 互动问题 = fields.get("互动问题", "") 通过规则 = fields.get("通过规则", "") 示例对话 = fields.get("示例对话", "") 角色配置 = fields.get("角色配置", "") 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "用户任务描述": 用户任务描述, "用户任务知识": 用户任务知识, "对话信息": 对话信息, "互动问题": 互动问题, "通过规则": 通过规则, "示例对话": 示例对话, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_speaking_image(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 看图说话(core_speaking_image)8步 LLM 生成流水线。 Step 1: 学习流程配置 → 中间文本 Step 2: 语音识别热词 → 中间文本 Step 3: taskData → JSON Step 4: imageInfo → JSON Step 5: optionList → JSON Step 6: questionList → JSON Step 7: configData → JSON (depends on questionList) Step 8: learningData → JSON (depends on 学习流程配置) Returns: dict: { "taskData": dict, "configData": dict, "imageInfo": dict, "optionList": list, "questionList": list, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习流程配置": str, "语音识别热词": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_speaking_image" fields = parse_core_speaking_image_fields(teaching_config) 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_speaking_image] 开始8步生成: cId={cId}") # ── Step 1: 学习流程配置 ── logger.info(f"[core_speaking_image] Step 1/8: 学习流程配置") prompt1 = build_speaking_image_prompt("step1_learning_flow", { "用户任务描述": fields["用户任务描述"], "示例对话": fields["示例对话"], "专有词汇": fields["专有词汇"], "对话信息": fields["对话信息"], }) 学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习流程配置 = 学习流程配置.strip() logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}") # ── Step 2: 语音识别热词 ── logger.info(f"[core_speaking_image] Step 2/8: 语音识别热词") prompt2 = build_speaking_image_prompt("step2_asr_hotwords", { "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "示例对话": fields["示例对话"], }) 语音识别热词, usage2 = llm_client.call("", prompt2, max_tokens=1024, temperature=0.1) 语音识别热词 = 语音识别热词.strip() logger.info(f" Step 2 完成: {len(语音识别热词)} 字符, tokens={usage2}") # ── Step 3: taskData ── # 互动描述 = 示例对话内容的英文部分 互动描述 = fields["示例对话"] logger.info(f"[core_speaking_image] Step 3/8: taskData") prompt3 = build_speaking_image_prompt("step3_task_data", { "ID": str(cId), "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, "互动描述": 互动描述, }) taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}") # ── Step 4: imageInfo ── logger.info(f"[core_speaking_image] Step 4/8: imageInfo") prompt4 = build_speaking_image_prompt("step4_image_info", { "ID": str(cId), }) imageInfo, usage4 = llm_client.call_for_json("", prompt4, max_tokens=512, temperature=0.0) logger.info(f" Step 4 完成: imageInfo keys={list(imageInfo.keys()) if isinstance(imageInfo, dict) else 'N/A'}, tokens={usage4}") # ── Step 5: optionList ── logger.info(f"[core_speaking_image] Step 5/8: optionList") prompt5 = build_speaking_image_prompt("step5_option_list", { "ID": str(cId), "互动问题": fields["互动问题"], }) optionList, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1) logger.info(f" Step 5 完成: optionList items={len(optionList) if isinstance(optionList, list) else 'N/A'}, tokens={usage5}") # ── Step 6: questionList ── logger.info(f"[core_speaking_image] Step 6/8: questionList") prompt6 = build_speaking_image_prompt("step6_question_list", { "ID": str(cId), "互动问题": fields["互动问题"], "对话信息": fields["对话信息"], }) questionList, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1) logger.info(f" Step 6 完成: questionList items={len(questionList) if isinstance(questionList, list) else 'N/A'}, tokens={usage6}") # ── Step 7: configData (depends on questionList) ── logger.info(f"[core_speaking_image] Step 7/8: configData") questionList_str = json.dumps(questionList, ensure_ascii=False) if questionList else "[]" prompt7 = build_speaking_image_prompt("step7_config_data", { "语音识别热词": 语音识别热词, "用户任务描述": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "示例对话": fields["示例对话"], "通过规则": fields["通过规则"], "questionList": questionList_str, }) configData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1) logger.info(f" Step 7 完成: configData keys={list(configData.keys())}, tokens={usage7}") # ── Step 8: learningData (depends on 学习流程配置) ── logger.info(f"[core_speaking_image] Step 8/8: learningData") prompt8 = build_speaking_image_prompt("step8_learning_data", { "学习配置": 学习流程配置, "互动问题": fields["互动问题"], }) learningData, usage8 = llm_client.call_for_json("", prompt8, max_tokens=8192, temperature=0.1) logger.info(f" Step 8 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage8}") # ── kpInfo (MySQL 匹配) ── kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level) logger.info(f"[core_speaking_image] 8步生成全部完成: cId={cId}") return { "taskInfo": taskData, "dialogConfig": configData, "imageInfo": imageInfo, "optionList": optionList, "questionList": questionList, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习流程配置": 学习流程配置, "语音识别热词": 语音识别热词, }, } # ============ 合作听力 (core_listening_order) ============ def parse_core_listening_order_fields(teaching_config): """ 从组件配置文本中提取 core_listening_order 所需的输入字段。 组件配置 = 教研配置-任务 + 教研配置-材料 拼接,包含 【xxx】 sections。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 任务配置 = 任务标题 + 任务背景 task_parts = [] if fields.get("任务标题"): task_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): task_parts.append(f"【任务背景】\n{fields['任务背景']}") 任务配置 = "\n\n".join(task_parts) if task_parts else "" # 任务名称 (for 互动描述 prompt) 任务名称 = fields.get("任务标题", "") # 通关知识 通关知识 = fields.get("通关知识", "") # 听力材料 听力材料 = fields.get("听力材料", "") or fields.get("阅读材料", "") or fields.get("材料配置", "") # 学习流程 学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "") or fields.get("学习材料", "") # 互动题目 / 题目部分 互动题目 = fields.get("互动题目", "") or fields.get("题目部分", "") or fields.get("题目配置", "") # 流程编排 流程编排 = fields.get("流程编排", "") # 角色配置 角色配置 = fields.get("角色配置", "") # 专有词汇 (暂不实现,保留占位) 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "任务配置": 任务配置, "任务名称": 任务名称, "通关知识": 通关知识, "听力材料": 听力材料, "学习流程": 学习流程, "互动题目": 互动题目, "流程编排": 流程编排, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_listening_order(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 合作听力(core_listening_order)6步 LLM 生成流水线。 Step 1: 学习过程配置 → 中间文本 Step 2: 互动描述 → 中间文本 Step 3: taskData → JSON Step 4: textData → JSON Step 5: sequenceData → JSON Step 6: learningData → JSON Returns: dict: { "taskData": dict, "textData": dict, "sequenceData": dict, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习过程配置": str, "互动描述": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_listening_order" # 解析输入字段 fields = parse_core_listening_order_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_listening_order] 开始6步生成: cId={cId}") # ── Step 1: 学习过程配置 ── logger.info(f"[core_listening_order] Step 1/6: 学习过程配置") prompt1 = build_listening_order_prompt("step1_learning_process", { "听力材料": fields["听力材料"], "学习流程": fields["学习流程"], "专有词汇": fields["专有词汇"], }) 学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习过程配置 = 学习过程配置.strip() logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}") # ── Step 2: 互动描述 ── logger.info(f"[core_listening_order] Step 2/6: 互动描述") prompt2 = build_listening_order_prompt("step2_interaction_desc", { "任务名称": fields["任务名称"], "听力材料": fields["听力材料"], "互动题目": fields["互动题目"], }) 互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3) 互动描述 = 互动描述.strip() logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}") # ── Step 3: taskData ── logger.info(f"[core_listening_order] Step 3/6: taskData") prompt3 = build_listening_order_prompt("step3_task_data", { "ID": str(cId), "用户任务配置": fields["任务配置"], "用户任务知识": fields["通关知识"], "角色配置": 角色配置, }) taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}") # ── Step 4: textData ── logger.info(f"[core_listening_order] Step 4/6: textData") prompt4 = build_listening_order_prompt("step4_text_data", { "ID": str(cId), "听力材料": fields["听力材料"], "题目配置": fields["互动题目"], "角色配置": 角色配置, }) textData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1) logger.info(f" Step 4 完成: textData keys={list(textData.keys())}, tokens={usage4}") # ── Step 5: sequenceData ── logger.info(f"[core_listening_order] Step 5/6: sequenceData") prompt5 = build_listening_order_prompt("step5_sequence_data", { "ID": str(cId), "流程编排": fields["流程编排"], "角色配置": 角色配置, }) sequenceData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1) logger.info(f" Step 5 完成: sequenceData keys={list(sequenceData.keys())}, tokens={usage5}") # ── Step 6: learningData ── logger.info(f"[core_listening_order] Step 6/6: learningData") prompt6 = build_listening_order_prompt("step6_learning_data", { "学习配置": 学习过程配置, }) learningData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=16384, temperature=0.1) logger.info(f" Step 6 完成: learningData keys={list(learningData.keys())}, tokens={usage6}") # kpInfo kp_info = None if fields["通关知识"]: try: kp_info = generate_kp_info(cType, cId, fields["通关知识"], llm_client, level=level) except Exception as e: logger.warning(f"kpInfo 生成失败: {e}") logger.info(f"[core_listening_order] 6步生成完成: cId={cId}") return { "taskInfo": taskData, "materialInfo": textData, "flowInfo": sequenceData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习过程配置": 学习过程配置, "互动描述": 互动描述, }, } # ============ 听力拖拽 (core_listening_drag) ============ def parse_core_listening_drag_fields(teaching_config): """ 从组件配置文本中提取 core_listening_drag 所需的输入字段。 组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 用户任务描述 = 任务标题 + 任务背景 desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" # 任务名称 任务名称 = fields.get("任务标题", "") # 用户任务知识 用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "") # 听力材料(配置中可能写作【听力文本】或【听力材料】) 听力材料 = fields.get("听力材料", "") or fields.get("听力文本", "") or fields.get("互动配置", "") or fields.get("阅读材料", "") # 学习流程(配置中可能写作【学习过程】或【学习流程】) 学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "") # 关联知识 关联知识 = fields.get("关联知识", "") # 题目(配置中可能写作【题目信息】或【题目】) 题目 = fields.get("题目", "") or fields.get("题目信息", "") or fields.get("互动题目", "") or fields.get("题目部分", "") # 题目材料 (for 互动描述) 题目材料 = fields.get("题目材料", "") or 题目 # 开场语 开场语 = fields.get("开场语", "") # 角色配置 角色配置 = fields.get("角色配置", "") # 专有词汇 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "用户任务描述": 用户任务描述, "任务名称": 任务名称, "用户任务知识": 用户任务知识, "听力材料": 听力材料, "学习流程": 学习流程, "关联知识": 关联知识, "题目": 题目, "题目材料": 题目材料, "开场语": 开场语, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_listening_drag(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 听力拖拽(core_listening_drag)7步 LLM 生成流水线。 Step 1: 学习过程配置 → 中间文本 Step 2: 互动描述 → 中间文本 Step 3: taskData → JSON Step 4: preDialog → JSON Step 5: dialogList → JSON Step 6: questionList → JSON Step 7: learningData → JSON Returns: dict: { "taskData": dict, "preDialogData": list, "dialogListData": list, "questionListData": list, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习过程配置": str, "互动描述": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_listening_drag" # 解析输入字段 fields = parse_core_listening_drag_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_listening_drag] 开始7步生成: cId={cId}") # ── Step 1: 学习过程配置 ── logger.info(f"[core_listening_drag] Step 1/7: 学习过程配置") prompt1 = build_listening_drag_prompt("step1_learning_process", { "听力材料": fields["听力材料"], "学习流程": fields["学习流程"], "专有词汇": fields["专有词汇"], "关联知识": fields["关联知识"], }) 学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习过程配置 = 学习过程配置.strip() logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}") # ── Step 2: 互动描述 ── logger.info(f"[core_listening_drag] Step 2/7: 互动描述") prompt2 = build_listening_drag_prompt("step2_interaction_desc", { "任务名称": fields["任务名称"], "题目材料": fields["题目材料"], }) 互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3) 互动描述 = 互动描述.strip() logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}") # ── Step 3: taskData ── logger.info(f"[core_listening_drag] Step 3/7: taskData") prompt3 = build_listening_drag_prompt("step3_task_data", { "ID": str(cId), "用户任务配置": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, "互动描述": 互动描述, }) taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}") # ── Step 4: preDialog ── logger.info(f"[core_listening_drag] Step 4/7: preDialog") prompt4 = build_listening_drag_prompt("step4_pre_dialog", { "开场语": fields["开场语"], }) preDialogData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1) logger.info(f" Step 4 完成: preDialogData type={type(preDialogData).__name__}, tokens={usage4}") # ── Step 5: dialogList ── logger.info(f"[core_listening_drag] Step 5/7: dialogList") prompt5 = build_listening_drag_prompt("step5_dialog_list", { "听力材料": fields["听力材料"], }) dialogListData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1) logger.info(f" Step 5 完成: dialogListData type={type(dialogListData).__name__}, tokens={usage5}") # ── Step 6: questionList ── logger.info(f"[core_listening_drag] Step 6/7: questionList") prompt6 = build_listening_drag_prompt("step6_question_list", { "ID": str(cId), "题目": fields["题目"], }) questionListData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1) logger.info(f" Step 6 完成: questionListData type={type(questionListData).__name__}, tokens={usage6}") # ── Step 7: learningData ── logger.info(f"[core_listening_drag] Step 7/7: learningData") prompt7 = build_listening_drag_prompt("step7_learning_data", { "学习配置": 学习过程配置, }) learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=16384, temperature=0.1) logger.info(f" Step 7 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage7}") # kpInfo kp_info = None if fields["用户任务知识"]: try: kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client, level=level) except Exception as e: logger.warning(f"kpInfo 生成失败: {e}") logger.info(f"[core_listening_drag] 7步生成完成: cId={cId}") return { "taskInfo": taskData, "preDialog": preDialogData, "dialogList": dialogListData, "questionList": questionListData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习过程配置": 学习过程配置, "互动描述": 互动描述, }, } # ============ 听力选择 (core_listening_choose) ============ def parse_core_listening_choose_fields(teaching_config): """ 从组件配置文本中提取 core_listening_choose 所需的输入字段。 组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。 注意:听力选择的题目可能以【题目组1】【题目组2】...形式出现。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) # 用户任务描述 = 任务标题 + 任务背景 desc_parts = [] if fields.get("任务标题"): desc_parts.append(f"【任务标题】\n{fields['任务标题']}") if fields.get("任务背景"): desc_parts.append(f"【任务背景】\n{fields['任务背景']}") 用户任务描述 = "\n\n".join(desc_parts) if desc_parts else "" # 任务名称 任务名称 = fields.get("任务标题", "") # 用户任务知识 用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "") # 听力材料(配置中可能写作【听力文本】或【听力材料】) 听力材料 = fields.get("听力材料", "") or fields.get("听力文本", "") or fields.get("互动配置", "") or fields.get("阅读材料", "") # 学习流程(配置中可能写作【学习过程】或【学习流程】) 学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "") # 题目:支持【题目组1】【题目组2】...格式,合并所有题目组 题目 = fields.get("题目", "") or fields.get("互动题目", "") or fields.get("题目部分", "") if not 题目: # 查找所有 题目组N 字段并按序合并 题目组_parts = [] for key in sorted(fields.keys()): if key.startswith("题目组"): 题目组_parts.append(f"【{key}】\n{fields[key]}") if 题目组_parts: 题目 = "\n".join(题目组_parts) # 听力材料为空时,fallback 到题目组内容(听力选择的听力文本嵌在题目组中) if not 听力材料 and 题目: 听力材料 = 题目 # 题目材料 (for 互动描述) 题目材料 = fields.get("题目材料", "") or 题目 # 角色配置 角色配置 = fields.get("角色配置", "") # 专有词汇 专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "") return { "用户任务描述": 用户任务描述, "任务名称": 任务名称, "用户任务知识": 用户任务知识, "听力材料": 听力材料, "学习流程": 学习流程, "题目": 题目, "题目材料": 题目材料, "角色配置": 角色配置, "专有词汇": 专有词汇, } def generate_core_listening_choose(cId, teaching_config, character_map=None, llm_client=None, level=None): """ 听力选择(core_listening_choose)5步 LLM 生成流水线。 Step 1: 学习过程配置 → 中间文本 Step 2: 互动描述 → 中间文本 Step 3: taskData → JSON Step 4: questionGroup → JSON Step 5: learningData → JSON Returns: dict: { "taskData": dict, "questionGroupData": list/dict, "learningData": dict, "kpInfo": dict or None, "intermediate": {"学习过程配置": str, "互动描述": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() cType = "core_listening_choose" # 解析输入字段 fields = parse_core_listening_choose_fields(teaching_config) # 角色配置:优先使用组件配置中的,回退到 character_map 角色配置 = fields["角色配置"] if not 角色配置 and character_map: 角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items()) logger.info(f"[core_listening_choose] 开始5步生成: cId={cId}") # ── Step 1: 学习过程配置 ── logger.info(f"[core_listening_choose] Step 1/5: 学习过程配置") prompt1 = build_listening_choose_prompt("step1_learning_process", { "听力材料": fields["听力材料"], "学习流程": fields["学习流程"], "专有词汇": fields["专有词汇"], }) 学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习过程配置 = 学习过程配置.strip() logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}") # ── Step 2: 互动描述 ── logger.info(f"[core_listening_choose] Step 2/5: 互动描述") prompt2 = build_listening_choose_prompt("step2_interaction_desc", { "任务名称": fields["任务名称"], "题目材料": fields["题目材料"], }) 互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3) 互动描述 = 互动描述.strip() logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}") # ── Step 3: taskData ── logger.info(f"[core_listening_choose] Step 3/5: taskData") prompt3 = build_listening_choose_prompt("step3_task_data", { "ID": str(cId), "用户任务配置": fields["用户任务描述"], "用户任务知识": fields["用户任务知识"], "角色配置": 角色配置, "互动描述": 互动描述, }) taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}") # ── Step 4: questionGroup ── logger.info(f"[core_listening_choose] Step 4/5: questionGroup") prompt4 = build_listening_choose_prompt("step4_question_group", { "ID": str(cId), "题目": fields["题目"], }) questionGroupData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1) logger.info(f" Step 4 完成: questionGroupData type={type(questionGroupData).__name__}, tokens={usage4}") # ── Step 5: learningData ── logger.info(f"[core_listening_choose] Step 5/5: learningData") prompt5 = build_listening_choose_prompt("step5_learning_data", { "学习配置": 学习过程配置, }) learningData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=16384, temperature=0.1) logger.info(f" Step 5 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage5}") # kpInfo kp_info = None if fields["用户任务知识"]: try: kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client, level=level) except Exception as e: logger.warning(f"kpInfo 生成失败: {e}") logger.info(f"[core_listening_choose] 5步生成完成: cId={cId}") return { "taskInfo": taskData, "questionGroup": questionGroupData, "studyInfo": learningData, "kpInfo": kp_info, "intermediate": { "学习过程配置": 学习过程配置, "互动描述": 互动描述, }, } # ============ 写作类核心互动(4种) ============ def parse_core_writing_fields(teaching_config): """ 从组件配置文本中提取写作类核心互动所需的输入字段。 组件配置 = 教研配置-任务信息 + 教研配置-全文信息 拼接,包含 【xxx】 sections。 Returns: dict: 字段名 → 值 """ fields = parse_teaching_config_fields(teaching_config) 任务名称 = fields.get("任务名称", "") 任务背景 = fields.get("任务背景", "") 任务描述 = fields.get("任务描述", "") 剧情提要 = fields.get("剧情提要", "") 知识 = fields.get("知识", "") # 学习内容:可能写作【学习内容】【学习流程】【学习过程】,也可能用【知识】代替 学习内容 = fields.get("学习内容", "") or fields.get("学习流程", "") or fields.get("学习过程", "") or 知识 写作材料 = fields.get("写作材料", "") 通过规则 = fields.get("通过规则", "") 题目类型 = fields.get("题目类型", "") or fields.get("题干类型", "") # 题干描述 may be in teaching_config as 【题干描述】 or inside 题目类型 题干描述 = fields.get("题干描述", "") # 写作类型 写作类型 = fields.get("写作类型", "") # 构建任务信息 (用于 taskInfo prompt) 任务信息_parts = [] if 任务名称: 任务信息_parts.append(f"【任务名称】\n{任务名称}") if 任务背景: 任务信息_parts.append(f"【任务背景】\n{任务背景}") if 任务描述: 任务信息_parts.append(f"【任务描述】\n{任务描述}") if 剧情提要: 任务信息_parts.append(f"【剧情提要】\n{剧情提要}") if 知识: 任务信息_parts.append(f"【知识】\n{知识}") 任务信息 = "\n\n".join(任务信息_parts) # 重建题目类型文本 题目类型_text = "" if 题目类型 or 题干描述 or 写作类型: parts = [] if 题目类型: parts.append(f"【题干类型】\n{题目类型}") if 题干描述: parts.append(f"【题干描述】\n{题干描述}") if 写作类型: parts.append(f"【写作类型】\n{写作类型}") 题目类型_text = "\n".join(parts) return { "任务名称": 任务名称, "任务背景": 任务背景, "任务描述": 任务描述, "剧情提要": 剧情提要, "知识": 知识, "学习内容": 学习内容, "写作材料": 写作材料, "通过规则": 通过规则, "题目类型": 题目类型_text, "写作类型": 写作类型, "任务信息": 任务信息, } def _generate_core_writing(cId, cType, teaching_config, character_map=None, llm_client=None, level=None): """ 写作类核心互动通用6步 LLM 生成流水线。 Step 1: 学习流程配置 → 中间文本 Step 2: 互动描述 → 中间文本 Step 3: taskInfo → JSON Step 4: textInfo → JSON Step 5: studyInfo → JSON Step 6: evalInfo → JSON Returns: dict: { "taskInfo": dict, "textInfo": dict, "studyInfo": dict, "evalInfo": dict, "kpInfo": dict or None, "intermediate": {"学习流程配置": str, "互动描述": str} } """ if llm_client is None: from llm_client import get_client llm_client = get_client() fields = parse_core_writing_fields(teaching_config) # 选择 prompt builder is_img_type = cType in ("core_writing_imgMakeSentence", "core_writing_imgWrite") if is_img_type: build_prompt = lambda step, repl: build_writing_img_prompt(cType, step, repl) else: build_prompt = lambda step, repl: build_writing_question_prompt(cType, step, repl) log_prefix = f"[{cType}]" logger.info(f"{log_prefix} 开始6步生成: cId={cId}") # ── Step 1: 学习流程配置 ── logger.info(f"{log_prefix} Step 1/6: 学习流程配置") prompt1 = build_prompt("step1_learning_flow", { "任务背景": fields["任务背景"], "任务描述": fields["任务描述"], "写作材料": fields["写作材料"], "学习内容": fields["学习内容"], }) 学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3) 学习流程配置 = 学习流程配置.strip() logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}") # ── Step 2: 互动描述 ── logger.info(f"{log_prefix} Step 2/6: 互动描述") prompt2 = build_prompt("step2_interaction_desc", { "任务背景": fields["任务背景"], "写作材料": fields["写作材料"], }) 互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3) 互动描述 = 互动描述.strip() logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}") # ── Step 3: taskInfo ── logger.info(f"{log_prefix} Step 3/6: taskInfo") prompt3 = build_prompt("step3_task_info", { "ID": str(cId), "题目类型": fields["题目类型"], "任务信息": fields["任务信息"], "知识": fields["知识"], "写作材料": fields["写作材料"], "互动描述": 互动描述, }) taskInfo, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1) logger.info(f" Step 3 完成: taskInfo keys={list(taskInfo.keys())}, tokens={usage3}") # ── Step 4: textInfo ── logger.info(f"{log_prefix} Step 4/6: textInfo") prompt4 = build_prompt("step4_text_info", { "通过规则": fields["通过规则"], "写作材料": fields["写作材料"], }) textInfo, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1) logger.info(f" Step 4 完成: textInfo keys={list(textInfo.keys())}, tokens={usage4}") # ── Step 5: studyInfo ── logger.info(f"{log_prefix} Step 5/6: studyInfo") taskInfo_str = json.dumps(taskInfo, ensure_ascii=False) prompt5 = build_prompt("step5_study_info", { "taskInfo": taskInfo_str, "学习流程配置": 学习流程配置, "学习内容": fields["学习内容"], }) studyInfo, usage5 = llm_client.call_for_json("", prompt5, max_tokens=16384, temperature=0.1) logger.info(f" Step 5 完成: studyInfo keys={list(studyInfo.keys()) if isinstance(studyInfo, dict) else 'list'}, tokens={usage5}") # ── Step 6: evalInfo ── logger.info(f"{log_prefix} Step 6/6: evalInfo") textInfo_str = json.dumps(textInfo, ensure_ascii=False) prompt6 = build_prompt("step6_eval_info", { "taskInfo": taskInfo_str, "任务信息": fields["任务信息"], "textInfo": textInfo_str, }) evalInfo, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1) logger.info(f" Step 6 完成: evalInfo keys={list(evalInfo.keys())}, tokens={usage6}") # kpInfo kp_info = None if fields["知识"]: try: kp_info = generate_kp_info(cType, cId, fields["知识"], llm_client, level=level) except Exception as e: logger.warning(f"kpInfo 生成失败: {e}") logger.info(f"{log_prefix} 6步生成完成: cId={cId}") return { "taskInfo": taskInfo, "textInfo": textInfo, "studyInfo": studyInfo, "evalInfo": evalInfo, "kpInfo": kp_info, "intermediate": { "学习流程配置": 学习流程配置, "互动描述": 互动描述, }, } def generate_core_writing_img_make_sentence(cId, teaching_config, character_map=None, llm_client=None, level=None): """看图组句(core_writing_imgMakeSentence)6步 LLM 生成流水线。""" return _generate_core_writing(cId, "core_writing_imgMakeSentence", teaching_config, character_map, llm_client, level=level) def generate_core_writing_img_write(cId, teaching_config, character_map=None, llm_client=None, level=None): """看图撰写(core_writing_imgWrite)6步 LLM 生成流水线。""" return _generate_core_writing(cId, "core_writing_imgWrite", teaching_config, character_map, llm_client, level=level) def generate_core_writing_question_make_sentence(cId, teaching_config, character_map=None, llm_client=None, level=None): """邮件组句(core_writing_questionMakeSentence)6步 LLM 生成流水线。""" return _generate_core_writing(cId, "core_writing_questionMakeSentence", teaching_config, character_map, llm_client, level=level) def generate_core_writing_question_write(cId, teaching_config, character_map=None, llm_client=None, level=None): """邮件撰写(core_writing_questionWrite)6步 LLM 生成流水线。""" return _generate_core_writing(cId, "core_writing_questionWrite", teaching_config, character_map, llm_client, level=level) def generate_component(component, character_map=None, section_char_map=None, llm_client=None, level=None): """ 一站式生成单个组件的 jsonData + kpInfo Args: component: parse_sheet_rows 返回的组件 dict character_map: 全局角色映射(回退用) section_char_map: 角色-section对应表(优先用) llm_client: LLMClient 实例 level: 剧本级别(如 "L1", "L2"),用于知识点匹配 Returns: dict: {"jsonData": {...}, "kpInfo": {...} or None, "cType": str} 对核心互动类型返回额外字段: taskData, textData, sequenceData, learningData """ from match_component import match_component_type from parse_script import resolve_resource_mapping # 类型匹配 type_info = match_component_type(component["type_name"]) cType = type_info["cType"] cId = component["cId"] has_image = component.get("has_image", False) or type_info.get("has_image", False) # 解析角色配置:优先通过 section-character 映射 resolved_char_map = character_map or {} config_info = component.get("config_info", "") if section_char_map and config_info: resolved_char_map = resolve_resource_mapping( section_char_map, config_info, character_map ) # ── 核心互动类型:多步 LLM 生成 ── if cType in CORE_TYPES: _core_call_args = dict( cId=cId, teaching_config=component["teaching_config"], character_map=resolved_char_map, llm_client=llm_client, level=level, ) _core_base = { "cType": cType, "type_name": component["type_name"], "cId": cId, "has_image": has_image, "category": "core", "bitable": type_info.get("bitable") or type_info.get("bitable_wiki"), "db_table": type_info.get("db_table"), } if cType == "core_reading_order": result = generate_core_reading_order(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "materialInfo": result["materialInfo"], "flowInfo": result["flowInfo"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_reading_imageDrag": result = generate_core_reading_imagedrag(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "questionGroup": result["questionGroup"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_speaking_reply": result = generate_core_speaking_reply(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "dialogSetting": result["dialogSetting"], "dialogConfig": result["dialogConfig"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_speaking_inquiry": result = generate_core_speaking_inquiry(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "dialogSetting": result["dialogSetting"], "dialogConfig": result["dialogConfig"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_speaking_image": result = generate_core_speaking_image(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "dialogConfig": result["dialogConfig"], "imageInfo": result["imageInfo"], "optionList": result["optionList"], "questionList": result["questionList"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_listening_order": result = generate_core_listening_order(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "materialInfo": result["materialInfo"], "flowInfo": result["flowInfo"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_listening_drag": result = generate_core_listening_drag(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "preDialog": result["preDialog"], "dialogList": result["dialogList"], "questionList": result["questionList"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType == "core_listening_choose": result = generate_core_listening_choose(**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "questionGroup": result["questionGroup"], "studyInfo": result["studyInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } elif cType in ("core_writing_imgMakeSentence", "core_writing_imgWrite", "core_writing_questionMakeSentence", "core_writing_questionWrite"): _writing_funcs = { "core_writing_imgMakeSentence": generate_core_writing_img_make_sentence, "core_writing_imgWrite": generate_core_writing_img_write, "core_writing_questionMakeSentence": generate_core_writing_question_make_sentence, "core_writing_questionWrite": generate_core_writing_question_write, } result = _writing_funcs[cType](**_core_call_args) return { **_core_base, "jsonData": result["taskInfo"], "taskInfo": result["taskInfo"], "textInfo": result["textInfo"], "studyInfo": result["studyInfo"], "evalInfo": result["evalInfo"], "kpInfo": result["kpInfo"], "intermediate": result["intermediate"], } else: raise ValueError(f"核心互动类型 {cType} 尚未实现生成器") # ── 中互动类型:单步 LLM 生成 ── # 生成 jsonData json_data = generate_json_data( cType=cType, cId=cId, teaching_config=component["teaching_config"], character_map=resolved_char_map, has_image=has_image, knowledge_text=component.get("knowledge_text", ""), llm_client=llm_client, ) # 生成 kpInfo kp_info = generate_kp_info( cType=cType, cId=cId, knowledge_text=component.get("knowledge_text", ""), llm_client=llm_client, level=level, ) return { "jsonData": json_data, "kpInfo": kp_info, "cType": cType, "type_name": component["type_name"], "cId": cId, "has_image": has_image, "category": "mid", "bitable": type_info.get("bitable"), "db_table": type_info.get("db_table"), } # ============ 辅助函数 ============ def _get_cn_name(cType): """从 cType 获取中文类型名""" from match_component import MID_INTERACTION_TYPES for cn_name, info in MID_INTERACTION_TYPES.items(): if info["cType"] == cType: return cn_name return cType def _post_process_json_data(json_data, cType, cId, has_image): """后处理 jsonData:确保必需字段正确""" if not isinstance(json_data, dict): logger.warning(f"jsonData 不是 dict: {type(json_data)}") return json_data # 强制覆盖 cType 和 cId json_data["cType"] = cType json_data["cId"] = str(cId) # 确保配图相关字段(仅在 has_image 且 LLM 遗漏时补充) if has_image and "img" not in json_data: json_data["img"] = f"{cId}.png" return json_data def _try_rule_based_kp(cType, cId, knowledge_text): """ 尝试规则化生成 kpInfo(知识点结构固定,多数情况可规则处理) Returns: dict or None: 成功返回 kpInfo,无法处理返回 None """ # 清理 标签 clean_text = re.sub(r']*>', '', knowledge_text) clean_text = re.sub(r'', '', clean_text) clean_text = clean_text.strip() if not clean_text: return None lines = [l.strip() for l in clean_text.split("\n") if l.strip()] kp_list = [] for line in lines: # 去除数字后缀(如 "school 1" → "school", "You need to... 2" → "You need to...") stripped = re.sub(r'\s+\d+$', '', line).strip() if not stripped: continue # 判断 vocab vs sentence if "..." in stripped or len(stripped.split()) > 3: kp_list.append({ "kpId": None, "kpType": "sentence", "kpTitle": stripped, "kpSkill": "sentence_meaning", "kpSkillName": "语义", }) else: kp_list.append({ "kpId": None, "kpType": "vocab", "kpTitle": stripped, "kpSkill": "vocab_meaning", "kpSkillName": "词义", }) if not kp_list: return None return { "pushType": "relationKp", "cType": cType, "cId": str(cId), "kpInfo": kp_list, } # ============ CLI 测试 ============ if __name__ == "__main__": sys.path.insert(0, CURRENT_PATH) from llm_client import get_client client = get_client() # 测试用教研配置 test_config = """【任务标题】 表示大家一起上学 【资源配置】 图片时机: 互动内容 【情境引入】 User: Great! User: Oh, there's no time left. 【互动内容】 User: Let's go to school now. (朗读) 【后置对话】 无""" test_kp = "school" test_char_map = {"Eva": 663, "Peter": 653, "Vicky": 658} print("=== 测试 jsonData 生成 (v2 template) ===") jd = generate_json_data( cType="mid_dialog_repeat", cId="1214101", teaching_config=test_config, character_map=test_char_map, has_image=True, knowledge_text=test_kp, llm_client=client, ) print(json.dumps(jd, ensure_ascii=False, indent=2)) print("\n=== 测试 kpInfo 生成 ===") kp = generate_kp_info("mid_dialog_repeat", "1214101", test_kp, client) print(json.dumps(kp, ensure_ascii=False, indent=2))