2383 lines
93 KiB
Python
2383 lines
93 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
LLM 驱动的 jsonData / kpInfo 生成器
|
||
从教研配置文本 + 组件类型 + per-type prompt模板 → 调用 LLM 生成结构化 JSON
|
||
|
||
核心设计 v2:
|
||
- 每种组件类型使用 PRD 定义的精确 prompt 模板(prompts/prompt_registry.py)
|
||
- 教研配置文本拆解为结构化字段作为 prompt 输入变量
|
||
- 角色配置通过 section-character 映射解析
|
||
- 对无模板的类型回退到 few-shot 样例方式
|
||
- 规则负责后处理:校验必需字段、修正 cType/cId
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import logging
|
||
import re
|
||
|
||
CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
|
||
SKILL_ROOT = os.path.dirname(CURRENT_PATH)
|
||
|
||
if CURRENT_PATH not in sys.path:
|
||
sys.path.insert(0, CURRENT_PATH)
|
||
|
||
# Ensure prompts package is importable
|
||
if SKILL_ROOT not in sys.path:
|
||
sys.path.insert(0, SKILL_ROOT)
|
||
|
||
logger = logging.getLogger("generate_json")
|
||
if not logger.handlers:
|
||
handler = logging.StreamHandler()
|
||
handler.setFormatter(logging.Formatter(
|
||
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
|
||
))
|
||
logger.addHandler(handler)
|
||
logger.setLevel(logging.INFO)
|
||
|
||
|
||
# ============ 参考样例加载 ============
|
||
|
||
_examples_cache = None
|
||
|
||
def _load_examples():
|
||
"""加载所有组件类型的参考样例"""
|
||
global _examples_cache
|
||
if _examples_cache is not None:
|
||
return _examples_cache
|
||
|
||
examples_path = os.path.join(SKILL_ROOT, "references", "examples", "all_types_examples.json")
|
||
if not os.path.exists(examples_path):
|
||
logger.warning(f"参考样例文件不存在: {examples_path}")
|
||
_examples_cache = {}
|
||
return _examples_cache
|
||
|
||
with open(examples_path, "r") as f:
|
||
_examples_cache = json.load(f)
|
||
logger.info(f"加载参考样例: {len(_examples_cache)} 种类型")
|
||
return _examples_cache
|
||
|
||
|
||
def _get_examples_for_type(cType):
|
||
"""获取指定组件类型的参考样例"""
|
||
examples = _load_examples()
|
||
type_data = examples.get(cType)
|
||
if not type_data or not type_data.get("examples"):
|
||
return []
|
||
return type_data["examples"]
|
||
|
||
|
||
# ============ Prompt 构建(v2 per-type 模板) ============
|
||
|
||
from prompts.prompt_registry import (
|
||
parse_teaching_config_fields,
|
||
build_full_prompt,
|
||
get_supported_types,
|
||
CDESC_TYPES,
|
||
build_cdesc_prompt,
|
||
)
|
||
|
||
from prompts.core_reading_prompts import (
|
||
CORE_READING_STEPS,
|
||
build_step_prompt,
|
||
)
|
||
|
||
from prompts.core_imagedrag_prompts import (
|
||
CORE_IMAGEDRAG_STEPS,
|
||
build_imagedrag_prompt,
|
||
)
|
||
|
||
from prompts.core_speaking_prompts import (
|
||
CORE_SPEAKING_STEPS,
|
||
build_speaking_prompt,
|
||
)
|
||
|
||
from prompts.core_speaking_inquiry_prompts import (
|
||
CORE_SPEAKING_INQUIRY_STEPS,
|
||
build_speaking_inquiry_prompt,
|
||
)
|
||
|
||
from prompts.core_speaking_image_prompts import (
|
||
CORE_SPEAKING_IMAGE_STEPS,
|
||
build_speaking_image_prompt,
|
||
)
|
||
|
||
from prompts.core_listening_order_prompts import (
|
||
CORE_LISTENING_ORDER_STEPS,
|
||
build_listening_order_prompt,
|
||
)
|
||
|
||
from prompts.core_listening_drag_prompts import (
|
||
CORE_LISTENING_DRAG_STEPS,
|
||
build_listening_drag_prompt,
|
||
)
|
||
|
||
from prompts.core_listening_choose_prompts import (
|
||
CORE_LISTENING_CHOOSE_STEPS,
|
||
build_listening_choose_prompt,
|
||
)
|
||
|
||
from prompts.core_writing_img_prompts import (
|
||
CORE_WRITING_IMG_MAKE_SENTENCE_STEPS,
|
||
CORE_WRITING_IMG_WRITE_STEPS,
|
||
build_writing_img_prompt,
|
||
)
|
||
|
||
from prompts.core_writing_question_prompts import (
|
||
CORE_WRITING_Q_MAKE_SENTENCE_STEPS,
|
||
CORE_WRITING_Q_WRITE_STEPS,
|
||
build_writing_question_prompt,
|
||
)
|
||
|
||
# Fallback: 旧版 few-shot 方式(对无 prompt 模板的类型)
|
||
FALLBACK_SYSTEM_PROMPT = """你是一个互动组件配置JSON生成专家。根据教研配置文本生成对应的 jsonData JSON。
|
||
|
||
## 组件类型: {cType}
|
||
|
||
## 输出要求
|
||
1. 输出必须是单行合法JSON
|
||
2. cType 必须为 "{cType}"
|
||
3. cId 必须为提供的组件ID
|
||
4. 严格按照参考样例的JSON结构输出
|
||
5. "无"的字段使用空数组[]或空对象{{}}
|
||
|
||
## 参考样例
|
||
{examples_text}
|
||
|
||
请直接输出单行JSON,不要包含任何其他符号。"""
|
||
|
||
|
||
def _build_fallback_examples_text(examples, max_examples=2):
|
||
"""构建 few-shot 样例文本(回退用)"""
|
||
parts = []
|
||
for i, ex in enumerate(examples[:max_examples], 1):
|
||
teaching_config = ex.get("teaching_config", "")
|
||
json_data = ex.get("jsonData", "")
|
||
try:
|
||
jd_obj = json.loads(json_data) if isinstance(json_data, str) else json_data
|
||
json_data_formatted = json.dumps(jd_obj, ensure_ascii=False)
|
||
except (json.JSONDecodeError, TypeError):
|
||
json_data_formatted = str(json_data)
|
||
parts.append(f"### 样例 {i}\n教研配置:\n{teaching_config}\n\n输出:\n{json_data_formatted}")
|
||
return "\n\n".join(parts)
|
||
|
||
|
||
def _build_fallback_user_prompt(cId, teaching_config, character_map, has_image):
|
||
"""构建 user prompt(回退用)"""
|
||
parts = [f"组件ID: {cId}"]
|
||
if has_image:
|
||
parts.append(f'配图: 是(img="{cId}.png")')
|
||
if character_map:
|
||
parts.append(f"角色配置: {json.dumps(character_map, ensure_ascii=False)}")
|
||
parts.append(f"\n教研配置:\n{teaching_config}")
|
||
parts.append("\n请直接输出单行JSON")
|
||
return "\n".join(parts)
|
||
|
||
|
||
# ============ 语音识别热词自动生成 ============
|
||
|
||
ASR_HOTWORD_PROMPT = """#任务
|
||
参考当前的互动配置,如果互动对话为[表达]类,则根据互动场景,给出一些可能高频出现的 或者 可能有一定识别难度的词汇,用","连接。如果互动对话为[朗读]类,则返回"无"。
|
||
|
||
语音识别词汇的考虑范围:
|
||
- 交互中可能提及的专有名词(例如人名、地名、特定物品名称等)
|
||
- 互动目标的一些场景表达,例如 题目中要谈论"目标",则热词中可以提供 goal/target 等。
|
||
|
||
#输入
|
||
情境引入:
|
||
$pre_dialog$
|
||
|
||
互动对话:
|
||
$interaction$
|
||
|
||
后置对话:
|
||
$post_dialog$
|
||
|
||
#输出
|
||
现在,根据任务要求直接输出符合要求的热词字段("无",或者逗号连接的多个单词)。"""
|
||
|
||
# 需要自动生成热词的组件类型
|
||
ASR_TYPES = {"mid_dialog_express"}
|
||
|
||
|
||
def generate_asr_hotwords(cType, parsed_fields, llm_client):
|
||
"""
|
||
用 LLM 自动生成语音识别热词。
|
||
表达类互动自动生成,朗读类返回"无"。
|
||
|
||
Args:
|
||
cType: 组件类型
|
||
parsed_fields: 解析后的教研配置字段
|
||
llm_client: LLMClient 实例
|
||
|
||
Returns:
|
||
str: 热词字符串(逗号分隔)或 "无"
|
||
"""
|
||
if cType not in ASR_TYPES:
|
||
return "无"
|
||
|
||
pre_dialog = parsed_fields.get("情境引入", "无")
|
||
interaction = parsed_fields.get("互动内容", "无")
|
||
post_dialog = parsed_fields.get("后置对话", "无")
|
||
|
||
prompt = ASR_HOTWORD_PROMPT.replace(
|
||
"$pre_dialog$", pre_dialog
|
||
).replace(
|
||
"$interaction$", interaction
|
||
).replace(
|
||
"$post_dialog$", post_dialog
|
||
)
|
||
|
||
try:
|
||
content, usage = llm_client.call("", prompt, max_tokens=256, temperature=0.3)
|
||
hotwords = content.strip().strip('"').strip()
|
||
logger.info(f"[热词生成] cType={cType}, hotwords={hotwords[:80]}, tokens={usage}")
|
||
return hotwords if hotwords else "无"
|
||
except Exception as e:
|
||
logger.warning(f"[热词生成] 失败: {e}")
|
||
return "无"
|
||
|
||
|
||
# ============ kpInfo 生成 ============
|
||
|
||
KP_SYSTEM_PROMPT = """你是一个知识点信息JSON生成专家。根据提供的组件类型、组件ID和知识点文本,生成 kpInfo JSON。
|
||
|
||
## 输出要求
|
||
1. 输出必须是合法JSON
|
||
2. 结构固定为:
|
||
```json
|
||
{
|
||
"pushType": "relationKp",
|
||
"cType": "<组件cType>",
|
||
"cId": "<组件ID>",
|
||
"kpInfo": [
|
||
{
|
||
"kpId": null,
|
||
"kpType": "vocab 或 sentence",
|
||
"kpTitle": "<知识点标题>",
|
||
"kpSkill": "vocab_meaning 或 sentence_meaning",
|
||
"kpSkillName": "词义 或 语义"
|
||
}
|
||
]
|
||
}
|
||
```
|
||
3. 判断知识点类型的规则:
|
||
- 如果知识点是单个英文单词(如 school, play),则 kpType="vocab", kpSkill="vocab_meaning", kpSkillName="词义"
|
||
- 如果知识点包含 "..." 或是句子片段(如 "You need to..."),则 kpType="sentence", kpSkill="sentence_meaning", kpSkillName="语义"
|
||
4. 知识点文本中可能包含数字后缀(如 "school 1"),数字后缀表示出现次数,不要包含在 kpTitle 中
|
||
5. 多个知识点用换行分隔,每个生成一条 kpInfo 条目
|
||
6. 去除 <text> 标签及其属性
|
||
|
||
请直接输出JSON,不要包含任何其他文字。"""
|
||
|
||
|
||
def _build_kp_user_prompt(cType, cId, knowledge_text):
|
||
"""构建知识点 user prompt"""
|
||
return f"cType: {cType}\ncId: {cId}\n知识点文本:\n{knowledge_text}"
|
||
|
||
|
||
# ============ 核心生成函数 ============
|
||
|
||
def generate_json_data(cType, cId, teaching_config, character_map=None,
|
||
has_image=False, knowledge_text="", llm_client=None):
|
||
"""
|
||
使用 LLM 生成组件的 jsonData。
|
||
优先使用 per-type prompt 模板,对无模板的类型回退到 few-shot。
|
||
|
||
Args:
|
||
cType: 英文类型标识(如 "mid_dialog_repeat")
|
||
cId: 组件ID(如 "1214101")
|
||
teaching_config: 教研配置文本
|
||
character_map: 角色映射 {"Eva": 663, "Peter": 653}
|
||
has_image: 是否配图
|
||
knowledge_text: 知识点文本
|
||
llm_client: LLMClient 实例
|
||
|
||
Returns:
|
||
dict: 生成的 jsonData 对象
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
# 优先使用 per-type prompt 模板
|
||
if cType in get_supported_types():
|
||
json_data = _generate_with_template(cType, cId, teaching_config, character_map, llm_client)
|
||
else:
|
||
# 回退到 few-shot 方式
|
||
logger.info(f"组件类型 {cType} 无 prompt 模板,使用 few-shot 回退")
|
||
json_data = _generate_with_fallback(cType, cId, teaching_config, character_map, has_image, llm_client)
|
||
|
||
# 后处理:确保必需字段正确
|
||
json_data = _post_process_json_data(json_data, cType, cId, has_image)
|
||
|
||
logger.info(f"生成 jsonData: cType={cType}, cId={cId}")
|
||
return json_data
|
||
|
||
|
||
def _generate_with_template(cType, cId, teaching_config, character_map, llm_client):
|
||
"""使用 per-type prompt 模板生成 jsonData"""
|
||
# 解析教研配置为结构化字段
|
||
parsed_fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 对需要热词的类型,自动生成语音识别热词
|
||
if cType in ASR_TYPES:
|
||
existing_hotwords = parsed_fields.get("语音识别热词", "").strip()
|
||
if not existing_hotwords or existing_hotwords == "无":
|
||
hotwords = generate_asr_hotwords(cType, parsed_fields, llm_client)
|
||
parsed_fields["语音识别热词"] = hotwords
|
||
|
||
# 对需要互动描述的类型,LLM 预生成 cDesc 文本
|
||
if cType in CDESC_TYPES:
|
||
cdesc_prompt = build_cdesc_prompt(cType, parsed_fields)
|
||
互动描述, usage = llm_client.call("", cdesc_prompt, max_tokens=1024, temperature=0.3)
|
||
parsed_fields["互动描述"] = 互动描述.strip()
|
||
logger.info(f"[cDesc] cType={cType}, cId={cId}, tokens={usage}")
|
||
|
||
# 构建角色配置字符串(如 "663-EVA, 653-peter, 658-Vicky")
|
||
resource_mapping_str = ""
|
||
if character_map:
|
||
parts = [f"{v}-{k}" for k, v in character_map.items()]
|
||
resource_mapping_str = ", ".join(parts)
|
||
|
||
# 构建完整 prompt
|
||
full_prompt = build_full_prompt(cType, cId, parsed_fields, resource_mapping_str)
|
||
|
||
# 调用 LLM(单一 user prompt,无 system prompt)
|
||
json_data, usage = llm_client.call_for_json("", full_prompt, max_tokens=4096, temperature=0.1)
|
||
logger.info(f"[template] cType={cType}, cId={cId}, tokens={usage}")
|
||
return json_data
|
||
|
||
|
||
def _generate_with_fallback(cType, cId, teaching_config, character_map, has_image, llm_client):
|
||
"""使用 few-shot 样例回退生成 jsonData"""
|
||
examples = _get_examples_for_type(cType)
|
||
examples_text = _build_fallback_examples_text(examples) if examples else "(无参考样例)"
|
||
system_prompt = FALLBACK_SYSTEM_PROMPT.format(cType=cType, examples_text=examples_text)
|
||
user_prompt = _build_fallback_user_prompt(cId, teaching_config, character_map or {}, has_image)
|
||
json_data, usage = llm_client.call_for_json(system_prompt, user_prompt, max_tokens=4096, temperature=0.1)
|
||
logger.info(f"[fallback] cType={cType}, cId={cId}, tokens={usage}")
|
||
return json_data
|
||
|
||
|
||
def generate_kp_info(cType, cId, knowledge_text, llm_client=None, level=None):
|
||
"""
|
||
生成组件的 kpInfo(通过 MySQL 知识点库匹配)
|
||
|
||
Args:
|
||
cType: 英文类型标识
|
||
cId: 组件ID
|
||
knowledge_text: 知识点文本(来自sheet H列)
|
||
llm_client: LLMClient 实例
|
||
level: 剧本级别(如 "L1", "L2")
|
||
|
||
Returns:
|
||
dict: 生成的 kpInfo 对象,如果无知识点则返回 None
|
||
"""
|
||
if not knowledge_text or not knowledge_text.strip():
|
||
return None
|
||
|
||
from kp_matcher import match_knowledge_points
|
||
return match_knowledge_points(knowledge_text, cType, cId, level=level, llm_client=llm_client)
|
||
|
||
|
||
CORE_TYPES = {"core_reading_order", "core_reading_imageDrag", "core_speaking_reply", "core_speaking_inquiry", "core_speaking_image", "core_listening_order", "core_listening_drag", "core_listening_choose", "core_writing_imgMakeSentence", "core_writing_imgWrite", "core_writing_questionMakeSentence", "core_writing_questionWrite"}
|
||
|
||
|
||
def parse_core_reading_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_reading_order 所需的输入字段。
|
||
|
||
组件配置 = 教研配置-任务 + 教研配置-材料 拼接,包含 【xxx】 sections。
|
||
额外可能包含 【流程编排】【角色配置】【专有词汇】【学习材料】 等。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 任务配置 = 任务标题 + 任务背景
|
||
task_parts = []
|
||
if fields.get("任务标题"):
|
||
task_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
task_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
任务配置 = "\n\n".join(task_parts) if task_parts else ""
|
||
|
||
# 通关知识
|
||
通关知识 = fields.get("通关知识", "")
|
||
|
||
# 材料配置 = 阅读材料 section (用于 Step1/Step2)
|
||
材料配置 = fields.get("阅读材料", "")
|
||
|
||
# 学习材料 / 学习过程 (用于 Step2, bitable中有时叫【学习过程】)
|
||
学习材料 = fields.get("学习材料", "") or fields.get("学习过程", "")
|
||
|
||
# 互动题目 / 题目部分
|
||
互动题目 = fields.get("题目部分", "") or fields.get("互动题目", "")
|
||
|
||
# 流程编排
|
||
流程编排 = fields.get("流程编排", "")
|
||
|
||
# 角色配置 (从组件配置的 【角色配置】section)
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇 / 专有名词
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"任务配置": 任务配置,
|
||
"通关知识": 通关知识,
|
||
"材料配置": 材料配置,
|
||
"学习材料": 学习材料,
|
||
"互动题目": 互动题目,
|
||
"流程编排": 流程编排,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_reading_order(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
合作阅读(core_reading_order)7步 LLM 生成流水线。
|
||
|
||
Step 1: 阅读材料格式化 → 材料正文 (text)
|
||
Step 2: 学习材料格式化 → 学习内容 (text)
|
||
Step 3: 学习过程配置 → 学习配置 (text)
|
||
Step 4: taskData → JSON
|
||
Step 5: textData → JSON
|
||
Step 6: sequenceData → JSON
|
||
Step 7: learningData → JSON
|
||
|
||
Args:
|
||
cId: 组件ID
|
||
teaching_config: 教研配置文本(组件配置列)
|
||
character_map: 角色映射 dict(可选,从section-character映射解析而来)
|
||
llm_client: LLMClient 实例
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "textData": dict,
|
||
"sequenceData": dict, "learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"阅读材料格式化": str, "学习材料格式化": str, "学习过程配置": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_reading_order"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_reading_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
# 学习材料输入:优先使用【学习材料】section,回退到【阅读材料】
|
||
step2_input = fields["学习材料"] if fields["学习材料"].strip() else fields["材料配置"]
|
||
|
||
logger.info(f"[core_reading] 开始7步生成: cId={cId}")
|
||
|
||
# ── Step 1: 阅读材料格式化 ──
|
||
logger.info(f"[core_reading] Step 1/7: 阅读材料格式化")
|
||
prompt1 = build_step_prompt("step1_format_reading", {"材料配置": fields["材料配置"]})
|
||
材料正文, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.1)
|
||
材料正文 = 材料正文.strip()
|
||
logger.info(f" Step 1 完成: {len(材料正文)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 学习材料格式化 ──
|
||
logger.info(f"[core_reading] Step 2/7: 学习材料格式化")
|
||
prompt2 = build_step_prompt("step2_format_learning", {"材料配置": step2_input})
|
||
学习内容, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.1)
|
||
学习内容 = 学习内容.strip()
|
||
logger.info(f" Step 2 完成: {len(学习内容)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: 学习过程配置 ──
|
||
logger.info(f"[core_reading] Step 3/7: 学习过程配置")
|
||
prompt3 = build_step_prompt("step3_learning_process", {
|
||
"文章材料": 材料正文,
|
||
"学习内容": 学习内容,
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习配置, usage3 = llm_client.call("", prompt3, max_tokens=8192, temperature=0.3)
|
||
学习配置 = 学习配置.strip()
|
||
logger.info(f" Step 3 完成: {len(学习配置)} 字符, tokens={usage3}")
|
||
|
||
# ── Step 4: taskData ──
|
||
logger.info(f"[core_reading] Step 4/7: taskData")
|
||
prompt4 = build_step_prompt("step4_task_data", {
|
||
"ID": str(cId),
|
||
"任务配置": fields["任务配置"],
|
||
"通关知识": fields["通关知识"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}")
|
||
|
||
# ── Step 5: textData ──
|
||
logger.info(f"[core_reading] Step 5/7: textData")
|
||
prompt5 = build_step_prompt("step5_text_data", {
|
||
"ID": str(cId),
|
||
"材料正文": 材料正文,
|
||
"互动题目": fields["互动题目"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
textData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 5 完成: textData keys={list(textData.keys())}, tokens={usage5}")
|
||
|
||
# ── Step 6: sequenceData ──
|
||
logger.info(f"[core_reading] Step 6/7: sequenceData")
|
||
prompt6 = build_step_prompt("step6_sequence_data", {
|
||
"ID": str(cId),
|
||
"流程编排": fields["流程编排"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
sequenceData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 6 完成: sequenceData keys={list(sequenceData.keys())}, tokens={usage6}")
|
||
|
||
# ── Step 7: learningData ──
|
||
logger.info(f"[core_reading] Step 7/7: learningData")
|
||
prompt7 = build_step_prompt("step7_learning_data", {"学习配置": 学习配置})
|
||
learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}")
|
||
|
||
# ── kpInfo (MySQL 匹配) ──
|
||
kp_info = generate_kp_info(cType, cId, fields["通关知识"], llm_client=llm_client, level=level)
|
||
|
||
logger.info(f"[core_reading] 7步生成全部完成: cId={cId}")
|
||
|
||
return {
|
||
"taskInfo": taskData,
|
||
"materialInfo": textData,
|
||
"flowInfo": sequenceData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"阅读材料格式化": 材料正文,
|
||
"学习材料格式化": 学习内容,
|
||
"学习过程配置": 学习配置,
|
||
},
|
||
}
|
||
|
||
|
||
def parse_core_imagedrag_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_reading_imageDrag 所需的输入字段。
|
||
|
||
组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 用户任务描述 = 任务标题 + 任务背景 + 任务描述
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
if fields.get("任务描述"):
|
||
desc_parts.append(f"【任务描述】\n{fields['任务描述']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
# 用户任务知识 / 通关知识
|
||
用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
|
||
# 互动配置 (包含 阅读材料 + 互动问题)
|
||
互动配置 = fields.get("互动配置", "") or fields.get("阅读材料", "")
|
||
# 如果没有单独的互动配置 section, 尝试拼接阅读材料+互动问题
|
||
if not 互动配置:
|
||
parts = []
|
||
for key in ["阅读材料1", "阅读材料2", "阅读材料", "材料部分", "互动问题"]:
|
||
if fields.get(key):
|
||
parts.append(fields[key])
|
||
互动配置 = "\n\n".join(parts)
|
||
|
||
# 角色配置
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇 / 专有名词
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"用户任务知识": 用户任务知识,
|
||
"互动配置": 互动配置,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_reading_imagedrag(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
看图选词(core_reading_imageDrag)5步 LLM 生成流水线。
|
||
|
||
Step 1: 学习流程配置 → 中间文本
|
||
Step 2: 互动描述 → 中间文本
|
||
Step 3: taskInfo → JSON
|
||
Step 4: questionGroup → JSON
|
||
Step 5: studyInfo → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "questionGroupData": list/dict,
|
||
"studyData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习流程配置": str, "互动描述": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_reading_imageDrag"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_imagedrag_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_imagedrag] 开始5步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习流程配置 ──
|
||
logger.info(f"[core_imagedrag] Step 1/5: 学习流程配置")
|
||
prompt1 = build_imagedrag_prompt("step1_learning_flow", {
|
||
"互动配置": fields["互动配置"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习流程配置 = 学习流程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 互动描述 ──
|
||
logger.info(f"[core_imagedrag] Step 2/5: 互动描述")
|
||
prompt2 = build_imagedrag_prompt("step2_interaction_desc", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"互动配置": fields["互动配置"],
|
||
})
|
||
互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3)
|
||
互动描述 = 互动描述.strip()
|
||
logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskInfo ──
|
||
logger.info(f"[core_imagedrag] Step 3/5: taskInfo")
|
||
prompt3 = build_imagedrag_prompt("step3_task_info", {
|
||
"ID": str(cId),
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
"互动描述": 互动描述,
|
||
})
|
||
taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: questionGroup ──
|
||
logger.info(f"[core_imagedrag] Step 4/5: questionGroup")
|
||
prompt4 = build_imagedrag_prompt("step4_question_group", {
|
||
"配置材料": fields["互动配置"],
|
||
})
|
||
questionGroupData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 4 完成: questionGroupData type={type(questionGroupData).__name__}, tokens={usage4}")
|
||
|
||
# ── Step 5: studyInfo ──
|
||
logger.info(f"[core_imagedrag] Step 5/5: studyInfo")
|
||
prompt5 = build_imagedrag_prompt("step5_study_info", {
|
||
"学习流程配置": 学习流程配置,
|
||
})
|
||
studyData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 5 完成: studyData keys={list(studyData.keys()) if isinstance(studyData, dict) else 'list'}, tokens={usage5}")
|
||
|
||
# ── kpInfo (MySQL 匹配) ──
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level)
|
||
|
||
logger.info(f"[core_imagedrag] 5步生成全部完成: cId={cId}")
|
||
|
||
return {
|
||
"taskInfo": taskData,
|
||
"questionGroup": questionGroupData,
|
||
"studyInfo": studyData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习流程配置": 学习流程配置,
|
||
"互动描述": 互动描述,
|
||
},
|
||
}
|
||
|
||
|
||
def parse_core_speaking_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_speaking_reply 所需的输入字段。
|
||
|
||
组件配置 = 教研-用户视角任务信息 + 教研-任务规则与NPC设定 拼接。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 用户任务描述 = 任务标题 + 任务背景 + 任务描述
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
if fields.get("任务描述"):
|
||
desc_parts.append(f"【任务描述】\n{fields['任务描述']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
# 用户任务知识 / 通关知识
|
||
用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
|
||
# 示例对话
|
||
示例对话 = fields.get("示例对话", "")
|
||
|
||
# 角色配置
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇 / 专有名词
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
# 对话配置 = 对话角色 + 回合设定
|
||
dialog_parts = []
|
||
if fields.get("对话角色"):
|
||
dialog_parts.append(f"【对话角色】\n{fields['对话角色']}")
|
||
if fields.get("回合设定"):
|
||
dialog_parts.append(f"【回合设定】\n{fields['回合设定']}")
|
||
对话配置 = "\n\n".join(dialog_parts) if dialog_parts else ""
|
||
|
||
# NPC任务设定 / 任务设定
|
||
npc任务设定 = fields.get("任务设定", "") or fields.get("npc任务设定", "")
|
||
|
||
# NPC视角任务背景 / 任务背景(NPC视角)
|
||
# 如果有单独的NPC背景section用它,否则复用任务背景
|
||
npc任务背景 = fields.get("npc视角任务背景", "") or fields.get("任务背景", "")
|
||
|
||
# 通过规则
|
||
通过规则 = fields.get("通过规则", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"用户任务知识": 用户任务知识,
|
||
"示例对话": 示例对话,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
"对话配置": 对话配置,
|
||
"npc任务设定": npc任务设定,
|
||
"npc视角任务背景": npc任务背景,
|
||
"通过规则": 通过规则,
|
||
}
|
||
|
||
|
||
def generate_core_speaking_reply(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
口语快答(core_speaking_reply)7步 LLM 生成流水线。
|
||
|
||
Step 1: 学习流程配置 → 中间文本
|
||
Step 2: promptInfo配置 → 中间文本
|
||
Step 3: 语音识别热词 → 中间文本
|
||
Step 4: taskData → JSON
|
||
Step 5: settingData → JSON
|
||
Step 6: configData → JSON
|
||
Step 7: learningData → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "settingData": dict,
|
||
"configData": dict, "learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习流程配置": str, "promptInfo配置": str, "语音识别热词": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_speaking_reply"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_speaking_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_speaking] 开始7步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习流程配置 ──
|
||
logger.info(f"[core_speaking] Step 1/7: 学习流程配置")
|
||
prompt1 = build_speaking_prompt("step1_learning_flow", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"示例对话": fields["示例对话"],
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习流程配置 = 学习流程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: promptInfo配置 ──
|
||
logger.info(f"[core_speaking] Step 2/7: promptInfo配置")
|
||
prompt2 = build_speaking_prompt("step2_prompt_info", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"示例对话": fields["示例对话"],
|
||
"npc视角任务背景": fields["npc视角任务背景"],
|
||
"npc任务设定": fields["npc任务设定"],
|
||
})
|
||
promptInfo配置, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.3)
|
||
promptInfo配置 = promptInfo配置.strip()
|
||
logger.info(f" Step 2 完成: {len(promptInfo配置)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: 语音识别热词 ──
|
||
logger.info(f"[core_speaking] Step 3/7: 语音识别热词")
|
||
prompt3 = build_speaking_prompt("step3_asr_hotwords", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"示例对话": fields["示例对话"],
|
||
})
|
||
语音识别热词, usage3 = llm_client.call("", prompt3, max_tokens=1024, temperature=0.1)
|
||
语音识别热词 = 语音识别热词.strip()
|
||
logger.info(f" Step 3 完成: {len(语音识别热词)} 字符, tokens={usage3}")
|
||
|
||
# ── Step 4: taskData ──
|
||
logger.info(f"[core_speaking] Step 4/7: taskData")
|
||
prompt4 = build_speaking_prompt("step4_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}")
|
||
|
||
# ── Step 5: settingData ──
|
||
logger.info(f"[core_speaking] Step 5/7: settingData")
|
||
prompt5 = build_speaking_prompt("step5_setting_data", {
|
||
"ID": str(cId),
|
||
"对话配置": fields["对话配置"],
|
||
})
|
||
settingData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1)
|
||
logger.info(f" Step 5 完成: settingData keys={list(settingData.keys())}, tokens={usage5}")
|
||
|
||
# ── Step 6: configData ──
|
||
logger.info(f"[core_speaking] Step 6/7: configData")
|
||
prompt6 = build_speaking_prompt("step6_config_data", {
|
||
"语音识别热词": 语音识别热词,
|
||
"任务背景": fields["npc视角任务背景"],
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色驱动配置": promptInfo配置,
|
||
"示例对话": fields["示例对话"],
|
||
"通过规则": fields["通过规则"],
|
||
})
|
||
configData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 6 完成: configData keys={list(configData.keys())}, tokens={usage6}")
|
||
|
||
# ── Step 7: learningData ──
|
||
logger.info(f"[core_speaking] Step 7/7: learningData")
|
||
prompt7 = build_speaking_prompt("step7_learning_data", {
|
||
"学习配置": 学习流程配置,
|
||
})
|
||
learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}")
|
||
|
||
# ── kpInfo (MySQL 匹配) ──
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level)
|
||
|
||
logger.info(f"[core_speaking] 7步生成全部完成: cId={cId}")
|
||
|
||
return {
|
||
"taskInfo": taskData,
|
||
"dialogSetting": settingData,
|
||
"dialogConfig": configData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习流程配置": 学习流程配置,
|
||
"promptInfo配置": promptInfo配置,
|
||
"语音识别热词": 语音识别热词,
|
||
},
|
||
}
|
||
|
||
|
||
def parse_core_speaking_inquiry_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_speaking_inquiry 所需的输入字段。
|
||
|
||
组件配置 = 教研-用户视角任务信息 + 教研-任务规则与NPC设定 拼接。
|
||
与口语快答相同的输入源,额外提取【知识库】section。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
if fields.get("任务描述"):
|
||
desc_parts.append(f"【任务描述】\n{fields['任务描述']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
示例对话 = fields.get("示例对话", "")
|
||
角色配置 = fields.get("角色配置", "")
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
dialog_parts = []
|
||
if fields.get("对话角色"):
|
||
dialog_parts.append(f"【对话角色】\n{fields['对话角色']}")
|
||
if fields.get("回合设定"):
|
||
dialog_parts.append(f"【回合设定】\n{fields['回合设定']}")
|
||
对话配置 = "\n\n".join(dialog_parts) if dialog_parts else ""
|
||
|
||
npc任务设定 = fields.get("任务设定", "") or fields.get("npc任务设定", "")
|
||
npc任务背景 = fields.get("npc视角任务背景", "") or fields.get("任务背景", "")
|
||
通过规则 = fields.get("通过规则", "")
|
||
知识库 = fields.get("知识库", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"用户任务知识": 用户任务知识,
|
||
"示例对话": 示例对话,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
"对话配置": 对话配置,
|
||
"npc任务设定": npc任务设定,
|
||
"npc视角任务背景": npc任务背景,
|
||
"通过规则": 通过规则,
|
||
"知识库": 知识库,
|
||
}
|
||
|
||
|
||
def generate_core_speaking_inquiry(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
口语妙问(core_speaking_inquiry)7步 LLM 生成流水线。
|
||
|
||
Step 1: 学习流程配置 → 中间文本
|
||
Step 2: promptInfo配置 → 中间文本
|
||
Step 3: 语音识别热词 → 中间文本
|
||
Step 4: taskData → JSON
|
||
Step 5: settingData → JSON
|
||
Step 6: configData → JSON
|
||
Step 7: learningData → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "settingData": dict,
|
||
"configData": dict, "learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习流程配置": str, "promptInfo配置": str, "语音识别热词": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_speaking_inquiry"
|
||
|
||
fields = parse_core_speaking_inquiry_fields(teaching_config)
|
||
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_speaking_inquiry] 开始7步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习流程配置 ──
|
||
logger.info(f"[core_speaking_inquiry] Step 1/7: 学习流程配置")
|
||
prompt1 = build_speaking_inquiry_prompt("step1_learning_flow", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"示例对话": fields["示例对话"],
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习流程配置 = 学习流程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: promptInfo配置 ──
|
||
logger.info(f"[core_speaking_inquiry] Step 2/7: promptInfo配置")
|
||
prompt2 = build_speaking_inquiry_prompt("step2_prompt_info", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"示例对话": fields["示例对话"],
|
||
"npc视角任务背景": fields["npc视角任务背景"],
|
||
"npc知识库": fields["知识库"],
|
||
})
|
||
promptInfo配置, usage2 = llm_client.call("", prompt2, max_tokens=8192, temperature=0.3)
|
||
promptInfo配置 = promptInfo配置.strip()
|
||
logger.info(f" Step 2 完成: {len(promptInfo配置)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: 语音识别热词 ──
|
||
logger.info(f"[core_speaking_inquiry] Step 3/7: 语音识别热词")
|
||
prompt3 = build_speaking_inquiry_prompt("step3_asr_hotwords", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"示例对话": fields["示例对话"],
|
||
})
|
||
语音识别热词, usage3 = llm_client.call("", prompt3, max_tokens=1024, temperature=0.1)
|
||
语音识别热词 = 语音识别热词.strip()
|
||
logger.info(f" Step 3 完成: {len(语音识别热词)} 字符, tokens={usage3}")
|
||
|
||
# ── Step 4: taskData ──
|
||
logger.info(f"[core_speaking_inquiry] Step 4/7: taskData")
|
||
prompt4 = build_speaking_inquiry_prompt("step4_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
taskData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 4 完成: taskData keys={list(taskData.keys())}, tokens={usage4}")
|
||
|
||
# ── Step 5: settingData ──
|
||
logger.info(f"[core_speaking_inquiry] Step 5/7: settingData")
|
||
prompt5 = build_speaking_inquiry_prompt("step5_setting_data", {
|
||
"ID": str(cId),
|
||
"对话配置": fields["对话配置"],
|
||
})
|
||
settingData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1)
|
||
logger.info(f" Step 5 完成: settingData keys={list(settingData.keys())}, tokens={usage5}")
|
||
|
||
# ── Step 6: configData ──
|
||
logger.info(f"[core_speaking_inquiry] Step 6/7: configData")
|
||
prompt6 = build_speaking_inquiry_prompt("step6_config_data", {
|
||
"语音识别热词": 语音识别热词,
|
||
"任务背景": fields["npc视角任务背景"],
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色驱动配置": promptInfo配置,
|
||
"示例对话": fields["示例对话"],
|
||
"通过规则": fields["通过规则"],
|
||
})
|
||
configData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 6 完成: configData keys={list(configData.keys())}, tokens={usage6}")
|
||
|
||
# ── Step 7: learningData ──
|
||
logger.info(f"[core_speaking_inquiry] Step 7/7: learningData")
|
||
prompt7 = build_speaking_inquiry_prompt("step7_learning_data", {
|
||
"学习配置": 学习流程配置,
|
||
})
|
||
learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 7 完成: learningData keys={list(learningData.keys())}, tokens={usage7}")
|
||
|
||
# ── kpInfo (MySQL 匹配) ──
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level)
|
||
|
||
logger.info(f"[core_speaking_inquiry] 7步生成全部完成: cId={cId}")
|
||
|
||
return {
|
||
"taskInfo": taskData,
|
||
"dialogSetting": settingData,
|
||
"dialogConfig": configData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习流程配置": 学习流程配置,
|
||
"promptInfo配置": promptInfo配置,
|
||
"语音识别热词": 语音识别热词,
|
||
},
|
||
}
|
||
|
||
|
||
def parse_core_speaking_image_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_speaking_image 所需的输入字段。
|
||
|
||
组件配置 = 教研-导览配置 + 教研-对话配置 拼接。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
if fields.get("任务描述"):
|
||
desc_parts.append(f"【任务描述】\n{fields['任务描述']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
用户任务知识 = fields.get("知识", "") or fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
|
||
# 对话信息 = 对话角色 + 角色背景
|
||
dialog_info_parts = []
|
||
if fields.get("对话角色"):
|
||
dialog_info_parts.append(fields["对话角色"])
|
||
if fields.get("角色背景"):
|
||
dialog_info_parts.append(fields["角色背景"])
|
||
对话信息 = "\n".join(dialog_info_parts) if dialog_info_parts else ""
|
||
|
||
互动问题 = fields.get("互动问题", "")
|
||
通过规则 = fields.get("通过规则", "")
|
||
示例对话 = fields.get("示例对话", "")
|
||
角色配置 = fields.get("角色配置", "")
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"用户任务知识": 用户任务知识,
|
||
"对话信息": 对话信息,
|
||
"互动问题": 互动问题,
|
||
"通过规则": 通过规则,
|
||
"示例对话": 示例对话,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_speaking_image(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
看图说话(core_speaking_image)8步 LLM 生成流水线。
|
||
|
||
Step 1: 学习流程配置 → 中间文本
|
||
Step 2: 语音识别热词 → 中间文本
|
||
Step 3: taskData → JSON
|
||
Step 4: imageInfo → JSON
|
||
Step 5: optionList → JSON
|
||
Step 6: questionList → JSON
|
||
Step 7: configData → JSON (depends on questionList)
|
||
Step 8: learningData → JSON (depends on 学习流程配置)
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "configData": dict,
|
||
"imageInfo": dict, "optionList": list,
|
||
"questionList": list, "learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习流程配置": str, "语音识别热词": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_speaking_image"
|
||
|
||
fields = parse_core_speaking_image_fields(teaching_config)
|
||
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_speaking_image] 开始8步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习流程配置 ──
|
||
logger.info(f"[core_speaking_image] Step 1/8: 学习流程配置")
|
||
prompt1 = build_speaking_image_prompt("step1_learning_flow", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"示例对话": fields["示例对话"],
|
||
"专有词汇": fields["专有词汇"],
|
||
"对话信息": fields["对话信息"],
|
||
})
|
||
学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习流程配置 = 学习流程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 语音识别热词 ──
|
||
logger.info(f"[core_speaking_image] Step 2/8: 语音识别热词")
|
||
prompt2 = build_speaking_image_prompt("step2_asr_hotwords", {
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"示例对话": fields["示例对话"],
|
||
})
|
||
语音识别热词, usage2 = llm_client.call("", prompt2, max_tokens=1024, temperature=0.1)
|
||
语音识别热词 = 语音识别热词.strip()
|
||
logger.info(f" Step 2 完成: {len(语音识别热词)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskData ──
|
||
# 互动描述 = 示例对话内容的英文部分
|
||
互动描述 = fields["示例对话"]
|
||
logger.info(f"[core_speaking_image] Step 3/8: taskData")
|
||
prompt3 = build_speaking_image_prompt("step3_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
"互动描述": 互动描述,
|
||
})
|
||
taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: imageInfo ──
|
||
logger.info(f"[core_speaking_image] Step 4/8: imageInfo")
|
||
prompt4 = build_speaking_image_prompt("step4_image_info", {
|
||
"ID": str(cId),
|
||
})
|
||
imageInfo, usage4 = llm_client.call_for_json("", prompt4, max_tokens=512, temperature=0.0)
|
||
logger.info(f" Step 4 完成: imageInfo keys={list(imageInfo.keys()) if isinstance(imageInfo, dict) else 'N/A'}, tokens={usage4}")
|
||
|
||
# ── Step 5: optionList ──
|
||
logger.info(f"[core_speaking_image] Step 5/8: optionList")
|
||
prompt5 = build_speaking_image_prompt("step5_option_list", {
|
||
"ID": str(cId),
|
||
"互动问题": fields["互动问题"],
|
||
})
|
||
optionList, usage5 = llm_client.call_for_json("", prompt5, max_tokens=2048, temperature=0.1)
|
||
logger.info(f" Step 5 完成: optionList items={len(optionList) if isinstance(optionList, list) else 'N/A'}, tokens={usage5}")
|
||
|
||
# ── Step 6: questionList ──
|
||
logger.info(f"[core_speaking_image] Step 6/8: questionList")
|
||
prompt6 = build_speaking_image_prompt("step6_question_list", {
|
||
"ID": str(cId),
|
||
"互动问题": fields["互动问题"],
|
||
"对话信息": fields["对话信息"],
|
||
})
|
||
questionList, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 6 完成: questionList items={len(questionList) if isinstance(questionList, list) else 'N/A'}, tokens={usage6}")
|
||
|
||
# ── Step 7: configData (depends on questionList) ──
|
||
logger.info(f"[core_speaking_image] Step 7/8: configData")
|
||
questionList_str = json.dumps(questionList, ensure_ascii=False) if questionList else "[]"
|
||
prompt7 = build_speaking_image_prompt("step7_config_data", {
|
||
"语音识别热词": 语音识别热词,
|
||
"用户任务描述": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"示例对话": fields["示例对话"],
|
||
"通过规则": fields["通过规则"],
|
||
"questionList": questionList_str,
|
||
})
|
||
configData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 7 完成: configData keys={list(configData.keys())}, tokens={usage7}")
|
||
|
||
# ── Step 8: learningData (depends on 学习流程配置) ──
|
||
logger.info(f"[core_speaking_image] Step 8/8: learningData")
|
||
prompt8 = build_speaking_image_prompt("step8_learning_data", {
|
||
"学习配置": 学习流程配置,
|
||
"互动问题": fields["互动问题"],
|
||
})
|
||
learningData, usage8 = llm_client.call_for_json("", prompt8, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 8 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage8}")
|
||
|
||
# ── kpInfo (MySQL 匹配) ──
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client=llm_client, level=level)
|
||
|
||
logger.info(f"[core_speaking_image] 8步生成全部完成: cId={cId}")
|
||
|
||
return {
|
||
"taskInfo": taskData,
|
||
"dialogConfig": configData,
|
||
"imageInfo": imageInfo,
|
||
"optionList": optionList,
|
||
"questionList": questionList,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习流程配置": 学习流程配置,
|
||
"语音识别热词": 语音识别热词,
|
||
},
|
||
}
|
||
|
||
|
||
# ============ 合作听力 (core_listening_order) ============
|
||
|
||
def parse_core_listening_order_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_listening_order 所需的输入字段。
|
||
|
||
组件配置 = 教研配置-任务 + 教研配置-材料 拼接,包含 【xxx】 sections。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 任务配置 = 任务标题 + 任务背景
|
||
task_parts = []
|
||
if fields.get("任务标题"):
|
||
task_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
task_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
任务配置 = "\n\n".join(task_parts) if task_parts else ""
|
||
|
||
# 任务名称 (for 互动描述 prompt)
|
||
任务名称 = fields.get("任务标题", "")
|
||
|
||
# 通关知识
|
||
通关知识 = fields.get("通关知识", "")
|
||
|
||
# 听力材料
|
||
听力材料 = fields.get("听力材料", "") or fields.get("阅读材料", "") or fields.get("材料配置", "")
|
||
|
||
# 学习流程
|
||
学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "") or fields.get("学习材料", "")
|
||
|
||
# 互动题目 / 题目部分
|
||
互动题目 = fields.get("互动题目", "") or fields.get("题目部分", "") or fields.get("题目配置", "")
|
||
|
||
# 流程编排
|
||
流程编排 = fields.get("流程编排", "")
|
||
|
||
# 角色配置
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇 (暂不实现,保留占位)
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"任务配置": 任务配置,
|
||
"任务名称": 任务名称,
|
||
"通关知识": 通关知识,
|
||
"听力材料": 听力材料,
|
||
"学习流程": 学习流程,
|
||
"互动题目": 互动题目,
|
||
"流程编排": 流程编排,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_listening_order(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
合作听力(core_listening_order)6步 LLM 生成流水线。
|
||
|
||
Step 1: 学习过程配置 → 中间文本
|
||
Step 2: 互动描述 → 中间文本
|
||
Step 3: taskData → JSON
|
||
Step 4: textData → JSON
|
||
Step 5: sequenceData → JSON
|
||
Step 6: learningData → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "textData": dict,
|
||
"sequenceData": dict, "learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习过程配置": str, "互动描述": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_listening_order"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_listening_order_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_listening_order] 开始6步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习过程配置 ──
|
||
logger.info(f"[core_listening_order] Step 1/6: 学习过程配置")
|
||
prompt1 = build_listening_order_prompt("step1_learning_process", {
|
||
"听力材料": fields["听力材料"],
|
||
"学习流程": fields["学习流程"],
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习过程配置 = 学习过程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 互动描述 ──
|
||
logger.info(f"[core_listening_order] Step 2/6: 互动描述")
|
||
prompt2 = build_listening_order_prompt("step2_interaction_desc", {
|
||
"任务名称": fields["任务名称"],
|
||
"听力材料": fields["听力材料"],
|
||
"互动题目": fields["互动题目"],
|
||
})
|
||
互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3)
|
||
互动描述 = 互动描述.strip()
|
||
logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskData ──
|
||
logger.info(f"[core_listening_order] Step 3/6: taskData")
|
||
prompt3 = build_listening_order_prompt("step3_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务配置": fields["任务配置"],
|
||
"用户任务知识": fields["通关知识"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: textData ──
|
||
logger.info(f"[core_listening_order] Step 4/6: textData")
|
||
prompt4 = build_listening_order_prompt("step4_text_data", {
|
||
"ID": str(cId),
|
||
"听力材料": fields["听力材料"],
|
||
"题目配置": fields["互动题目"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
textData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 4 完成: textData keys={list(textData.keys())}, tokens={usage4}")
|
||
|
||
# ── Step 5: sequenceData ──
|
||
logger.info(f"[core_listening_order] Step 5/6: sequenceData")
|
||
prompt5 = build_listening_order_prompt("step5_sequence_data", {
|
||
"ID": str(cId),
|
||
"流程编排": fields["流程编排"],
|
||
"角色配置": 角色配置,
|
||
})
|
||
sequenceData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 5 完成: sequenceData keys={list(sequenceData.keys())}, tokens={usage5}")
|
||
|
||
# ── Step 6: learningData ──
|
||
logger.info(f"[core_listening_order] Step 6/6: learningData")
|
||
prompt6 = build_listening_order_prompt("step6_learning_data", {
|
||
"学习配置": 学习过程配置,
|
||
})
|
||
learningData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=16384, temperature=0.1)
|
||
logger.info(f" Step 6 完成: learningData keys={list(learningData.keys())}, tokens={usage6}")
|
||
|
||
# kpInfo
|
||
kp_info = None
|
||
if fields["通关知识"]:
|
||
try:
|
||
kp_info = generate_kp_info(cType, cId, fields["通关知识"], llm_client, level=level)
|
||
except Exception as e:
|
||
logger.warning(f"kpInfo 生成失败: {e}")
|
||
|
||
logger.info(f"[core_listening_order] 6步生成完成: cId={cId}")
|
||
return {
|
||
"taskInfo": taskData,
|
||
"materialInfo": textData,
|
||
"flowInfo": sequenceData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习过程配置": 学习过程配置,
|
||
"互动描述": 互动描述,
|
||
},
|
||
}
|
||
|
||
|
||
# ============ 听力拖拽 (core_listening_drag) ============
|
||
|
||
def parse_core_listening_drag_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_listening_drag 所需的输入字段。
|
||
|
||
组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 用户任务描述 = 任务标题 + 任务背景
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
# 任务名称
|
||
任务名称 = fields.get("任务标题", "")
|
||
|
||
# 用户任务知识
|
||
用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
|
||
# 听力材料(配置中可能写作【听力文本】或【听力材料】)
|
||
听力材料 = fields.get("听力材料", "") or fields.get("听力文本", "") or fields.get("互动配置", "") or fields.get("阅读材料", "")
|
||
|
||
# 学习流程(配置中可能写作【学习过程】或【学习流程】)
|
||
学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "")
|
||
|
||
# 关联知识
|
||
关联知识 = fields.get("关联知识", "")
|
||
|
||
# 题目(配置中可能写作【题目信息】或【题目】)
|
||
题目 = fields.get("题目", "") or fields.get("题目信息", "") or fields.get("互动题目", "") or fields.get("题目部分", "")
|
||
|
||
# 题目材料 (for 互动描述)
|
||
题目材料 = fields.get("题目材料", "") or 题目
|
||
|
||
# 开场语
|
||
开场语 = fields.get("开场语", "")
|
||
|
||
# 角色配置
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"任务名称": 任务名称,
|
||
"用户任务知识": 用户任务知识,
|
||
"听力材料": 听力材料,
|
||
"学习流程": 学习流程,
|
||
"关联知识": 关联知识,
|
||
"题目": 题目,
|
||
"题目材料": 题目材料,
|
||
"开场语": 开场语,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_listening_drag(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
听力拖拽(core_listening_drag)7步 LLM 生成流水线。
|
||
|
||
Step 1: 学习过程配置 → 中间文本
|
||
Step 2: 互动描述 → 中间文本
|
||
Step 3: taskData → JSON
|
||
Step 4: preDialog → JSON
|
||
Step 5: dialogList → JSON
|
||
Step 6: questionList → JSON
|
||
Step 7: learningData → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "preDialogData": list,
|
||
"dialogListData": list, "questionListData": list,
|
||
"learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习过程配置": str, "互动描述": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_listening_drag"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_listening_drag_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_listening_drag] 开始7步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习过程配置 ──
|
||
logger.info(f"[core_listening_drag] Step 1/7: 学习过程配置")
|
||
prompt1 = build_listening_drag_prompt("step1_learning_process", {
|
||
"听力材料": fields["听力材料"],
|
||
"学习流程": fields["学习流程"],
|
||
"专有词汇": fields["专有词汇"],
|
||
"关联知识": fields["关联知识"],
|
||
})
|
||
学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习过程配置 = 学习过程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 互动描述 ──
|
||
logger.info(f"[core_listening_drag] Step 2/7: 互动描述")
|
||
prompt2 = build_listening_drag_prompt("step2_interaction_desc", {
|
||
"任务名称": fields["任务名称"],
|
||
"题目材料": fields["题目材料"],
|
||
})
|
||
互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3)
|
||
互动描述 = 互动描述.strip()
|
||
logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskData ──
|
||
logger.info(f"[core_listening_drag] Step 3/7: taskData")
|
||
prompt3 = build_listening_drag_prompt("step3_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务配置": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
"互动描述": 互动描述,
|
||
})
|
||
taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: preDialog ──
|
||
logger.info(f"[core_listening_drag] Step 4/7: preDialog")
|
||
prompt4 = build_listening_drag_prompt("step4_pre_dialog", {
|
||
"开场语": fields["开场语"],
|
||
})
|
||
preDialogData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 4 完成: preDialogData type={type(preDialogData).__name__}, tokens={usage4}")
|
||
|
||
# ── Step 5: dialogList ──
|
||
logger.info(f"[core_listening_drag] Step 5/7: dialogList")
|
||
prompt5 = build_listening_drag_prompt("step5_dialog_list", {
|
||
"听力材料": fields["听力材料"],
|
||
})
|
||
dialogListData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 5 完成: dialogListData type={type(dialogListData).__name__}, tokens={usage5}")
|
||
|
||
# ── Step 6: questionList ──
|
||
logger.info(f"[core_listening_drag] Step 6/7: questionList")
|
||
prompt6 = build_listening_drag_prompt("step6_question_list", {
|
||
"ID": str(cId),
|
||
"题目": fields["题目"],
|
||
})
|
||
questionListData, usage6 = llm_client.call_for_json("", prompt6, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 6 完成: questionListData type={type(questionListData).__name__}, tokens={usage6}")
|
||
|
||
# ── Step 7: learningData ──
|
||
logger.info(f"[core_listening_drag] Step 7/7: learningData")
|
||
prompt7 = build_listening_drag_prompt("step7_learning_data", {
|
||
"学习配置": 学习过程配置,
|
||
})
|
||
learningData, usage7 = llm_client.call_for_json("", prompt7, max_tokens=16384, temperature=0.1)
|
||
logger.info(f" Step 7 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage7}")
|
||
|
||
# kpInfo
|
||
kp_info = None
|
||
if fields["用户任务知识"]:
|
||
try:
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client, level=level)
|
||
except Exception as e:
|
||
logger.warning(f"kpInfo 生成失败: {e}")
|
||
|
||
logger.info(f"[core_listening_drag] 7步生成完成: cId={cId}")
|
||
return {
|
||
"taskInfo": taskData,
|
||
"preDialog": preDialogData,
|
||
"dialogList": dialogListData,
|
||
"questionList": questionListData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习过程配置": 学习过程配置,
|
||
"互动描述": 互动描述,
|
||
},
|
||
}
|
||
|
||
|
||
# ============ 听力选择 (core_listening_choose) ============
|
||
|
||
def parse_core_listening_choose_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取 core_listening_choose 所需的输入字段。
|
||
|
||
组件配置 = 教研-导览配置 + 教研-互动配置 拼接,包含 【xxx】 sections。
|
||
注意:听力选择的题目可能以【题目组1】【题目组2】...形式出现。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
# 用户任务描述 = 任务标题 + 任务背景
|
||
desc_parts = []
|
||
if fields.get("任务标题"):
|
||
desc_parts.append(f"【任务标题】\n{fields['任务标题']}")
|
||
if fields.get("任务背景"):
|
||
desc_parts.append(f"【任务背景】\n{fields['任务背景']}")
|
||
用户任务描述 = "\n\n".join(desc_parts) if desc_parts else ""
|
||
|
||
# 任务名称
|
||
任务名称 = fields.get("任务标题", "")
|
||
|
||
# 用户任务知识
|
||
用户任务知识 = fields.get("用户任务知识", "") or fields.get("通关知识", "")
|
||
|
||
# 听力材料(配置中可能写作【听力文本】或【听力材料】)
|
||
听力材料 = fields.get("听力材料", "") or fields.get("听力文本", "") or fields.get("互动配置", "") or fields.get("阅读材料", "")
|
||
|
||
# 学习流程(配置中可能写作【学习过程】或【学习流程】)
|
||
学习流程 = fields.get("学习流程", "") or fields.get("学习过程", "")
|
||
|
||
# 题目:支持【题目组1】【题目组2】...格式,合并所有题目组
|
||
题目 = fields.get("题目", "") or fields.get("互动题目", "") or fields.get("题目部分", "")
|
||
if not 题目:
|
||
# 查找所有 题目组N 字段并按序合并
|
||
题目组_parts = []
|
||
for key in sorted(fields.keys()):
|
||
if key.startswith("题目组"):
|
||
题目组_parts.append(f"【{key}】\n{fields[key]}")
|
||
if 题目组_parts:
|
||
题目 = "\n".join(题目组_parts)
|
||
|
||
# 听力材料为空时,fallback 到题目组内容(听力选择的听力文本嵌在题目组中)
|
||
if not 听力材料 and 题目:
|
||
听力材料 = 题目
|
||
|
||
# 题目材料 (for 互动描述)
|
||
题目材料 = fields.get("题目材料", "") or 题目
|
||
|
||
# 角色配置
|
||
角色配置 = fields.get("角色配置", "")
|
||
|
||
# 专有词汇
|
||
专有词汇 = fields.get("专有词汇", "") or fields.get("专有名词", "")
|
||
|
||
return {
|
||
"用户任务描述": 用户任务描述,
|
||
"任务名称": 任务名称,
|
||
"用户任务知识": 用户任务知识,
|
||
"听力材料": 听力材料,
|
||
"学习流程": 学习流程,
|
||
"题目": 题目,
|
||
"题目材料": 题目材料,
|
||
"角色配置": 角色配置,
|
||
"专有词汇": 专有词汇,
|
||
}
|
||
|
||
|
||
def generate_core_listening_choose(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
听力选择(core_listening_choose)5步 LLM 生成流水线。
|
||
|
||
Step 1: 学习过程配置 → 中间文本
|
||
Step 2: 互动描述 → 中间文本
|
||
Step 3: taskData → JSON
|
||
Step 4: questionGroup → JSON
|
||
Step 5: learningData → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskData": dict, "questionGroupData": list/dict,
|
||
"learningData": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习过程配置": str, "互动描述": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
cType = "core_listening_choose"
|
||
|
||
# 解析输入字段
|
||
fields = parse_core_listening_choose_fields(teaching_config)
|
||
|
||
# 角色配置:优先使用组件配置中的,回退到 character_map
|
||
角色配置 = fields["角色配置"]
|
||
if not 角色配置 and character_map:
|
||
角色配置 = "\n".join(f"{k}:{v}" for k, v in character_map.items())
|
||
|
||
logger.info(f"[core_listening_choose] 开始5步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习过程配置 ──
|
||
logger.info(f"[core_listening_choose] Step 1/5: 学习过程配置")
|
||
prompt1 = build_listening_choose_prompt("step1_learning_process", {
|
||
"听力材料": fields["听力材料"],
|
||
"学习流程": fields["学习流程"],
|
||
"专有词汇": fields["专有词汇"],
|
||
})
|
||
学习过程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习过程配置 = 学习过程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习过程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 互动描述 ──
|
||
logger.info(f"[core_listening_choose] Step 2/5: 互动描述")
|
||
prompt2 = build_listening_choose_prompt("step2_interaction_desc", {
|
||
"任务名称": fields["任务名称"],
|
||
"题目材料": fields["题目材料"],
|
||
})
|
||
互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3)
|
||
互动描述 = 互动描述.strip()
|
||
logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskData ──
|
||
logger.info(f"[core_listening_choose] Step 3/5: taskData")
|
||
prompt3 = build_listening_choose_prompt("step3_task_data", {
|
||
"ID": str(cId),
|
||
"用户任务配置": fields["用户任务描述"],
|
||
"用户任务知识": fields["用户任务知识"],
|
||
"角色配置": 角色配置,
|
||
"互动描述": 互动描述,
|
||
})
|
||
taskData, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskData keys={list(taskData.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: questionGroup ──
|
||
logger.info(f"[core_listening_choose] Step 4/5: questionGroup")
|
||
prompt4 = build_listening_choose_prompt("step4_question_group", {
|
||
"ID": str(cId),
|
||
"题目": fields["题目"],
|
||
})
|
||
questionGroupData, usage4 = llm_client.call_for_json("", prompt4, max_tokens=8192, temperature=0.1)
|
||
logger.info(f" Step 4 完成: questionGroupData type={type(questionGroupData).__name__}, tokens={usage4}")
|
||
|
||
# ── Step 5: learningData ──
|
||
logger.info(f"[core_listening_choose] Step 5/5: learningData")
|
||
prompt5 = build_listening_choose_prompt("step5_learning_data", {
|
||
"学习配置": 学习过程配置,
|
||
})
|
||
learningData, usage5 = llm_client.call_for_json("", prompt5, max_tokens=16384, temperature=0.1)
|
||
logger.info(f" Step 5 完成: learningData keys={list(learningData.keys()) if isinstance(learningData, dict) else 'list'}, tokens={usage5}")
|
||
|
||
# kpInfo
|
||
kp_info = None
|
||
if fields["用户任务知识"]:
|
||
try:
|
||
kp_info = generate_kp_info(cType, cId, fields["用户任务知识"], llm_client, level=level)
|
||
except Exception as e:
|
||
logger.warning(f"kpInfo 生成失败: {e}")
|
||
|
||
logger.info(f"[core_listening_choose] 5步生成完成: cId={cId}")
|
||
return {
|
||
"taskInfo": taskData,
|
||
"questionGroup": questionGroupData,
|
||
"studyInfo": learningData,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习过程配置": 学习过程配置,
|
||
"互动描述": 互动描述,
|
||
},
|
||
}
|
||
|
||
|
||
# ============ 写作类核心互动(4种) ============
|
||
|
||
def parse_core_writing_fields(teaching_config):
|
||
"""
|
||
从组件配置文本中提取写作类核心互动所需的输入字段。
|
||
|
||
组件配置 = 教研配置-任务信息 + 教研配置-全文信息 拼接,包含 【xxx】 sections。
|
||
|
||
Returns:
|
||
dict: 字段名 → 值
|
||
"""
|
||
fields = parse_teaching_config_fields(teaching_config)
|
||
|
||
任务名称 = fields.get("任务名称", "")
|
||
任务背景 = fields.get("任务背景", "")
|
||
任务描述 = fields.get("任务描述", "")
|
||
剧情提要 = fields.get("剧情提要", "")
|
||
知识 = fields.get("知识", "")
|
||
# 学习内容:可能写作【学习内容】【学习流程】【学习过程】,也可能用【知识】代替
|
||
学习内容 = fields.get("学习内容", "") or fields.get("学习流程", "") or fields.get("学习过程", "") or 知识
|
||
写作材料 = fields.get("写作材料", "")
|
||
通过规则 = fields.get("通过规则", "")
|
||
题目类型 = fields.get("题目类型", "") or fields.get("题干类型", "")
|
||
|
||
# 题干描述 may be in teaching_config as 【题干描述】 or inside 题目类型
|
||
题干描述 = fields.get("题干描述", "")
|
||
|
||
# 写作类型
|
||
写作类型 = fields.get("写作类型", "")
|
||
|
||
# 构建任务信息 (用于 taskInfo prompt)
|
||
任务信息_parts = []
|
||
if 任务名称:
|
||
任务信息_parts.append(f"【任务名称】\n{任务名称}")
|
||
if 任务背景:
|
||
任务信息_parts.append(f"【任务背景】\n{任务背景}")
|
||
if 任务描述:
|
||
任务信息_parts.append(f"【任务描述】\n{任务描述}")
|
||
if 剧情提要:
|
||
任务信息_parts.append(f"【剧情提要】\n{剧情提要}")
|
||
if 知识:
|
||
任务信息_parts.append(f"【知识】\n{知识}")
|
||
任务信息 = "\n\n".join(任务信息_parts)
|
||
|
||
# 重建题目类型文本
|
||
题目类型_text = ""
|
||
if 题目类型 or 题干描述 or 写作类型:
|
||
parts = []
|
||
if 题目类型:
|
||
parts.append(f"【题干类型】\n{题目类型}")
|
||
if 题干描述:
|
||
parts.append(f"【题干描述】\n{题干描述}")
|
||
if 写作类型:
|
||
parts.append(f"【写作类型】\n{写作类型}")
|
||
题目类型_text = "\n".join(parts)
|
||
|
||
return {
|
||
"任务名称": 任务名称,
|
||
"任务背景": 任务背景,
|
||
"任务描述": 任务描述,
|
||
"剧情提要": 剧情提要,
|
||
"知识": 知识,
|
||
"学习内容": 学习内容,
|
||
"写作材料": 写作材料,
|
||
"通过规则": 通过规则,
|
||
"题目类型": 题目类型_text,
|
||
"写作类型": 写作类型,
|
||
"任务信息": 任务信息,
|
||
}
|
||
|
||
|
||
def _generate_core_writing(cId, cType, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""
|
||
写作类核心互动通用6步 LLM 生成流水线。
|
||
|
||
Step 1: 学习流程配置 → 中间文本
|
||
Step 2: 互动描述 → 中间文本
|
||
Step 3: taskInfo → JSON
|
||
Step 4: textInfo → JSON
|
||
Step 5: studyInfo → JSON
|
||
Step 6: evalInfo → JSON
|
||
|
||
Returns:
|
||
dict: {
|
||
"taskInfo": dict, "textInfo": dict,
|
||
"studyInfo": dict, "evalInfo": dict,
|
||
"kpInfo": dict or None,
|
||
"intermediate": {"学习流程配置": str, "互动描述": str}
|
||
}
|
||
"""
|
||
if llm_client is None:
|
||
from llm_client import get_client
|
||
llm_client = get_client()
|
||
|
||
fields = parse_core_writing_fields(teaching_config)
|
||
|
||
# 选择 prompt builder
|
||
is_img_type = cType in ("core_writing_imgMakeSentence", "core_writing_imgWrite")
|
||
if is_img_type:
|
||
build_prompt = lambda step, repl: build_writing_img_prompt(cType, step, repl)
|
||
else:
|
||
build_prompt = lambda step, repl: build_writing_question_prompt(cType, step, repl)
|
||
|
||
log_prefix = f"[{cType}]"
|
||
logger.info(f"{log_prefix} 开始6步生成: cId={cId}")
|
||
|
||
# ── Step 1: 学习流程配置 ──
|
||
logger.info(f"{log_prefix} Step 1/6: 学习流程配置")
|
||
prompt1 = build_prompt("step1_learning_flow", {
|
||
"任务背景": fields["任务背景"],
|
||
"任务描述": fields["任务描述"],
|
||
"写作材料": fields["写作材料"],
|
||
"学习内容": fields["学习内容"],
|
||
})
|
||
学习流程配置, usage1 = llm_client.call("", prompt1, max_tokens=8192, temperature=0.3)
|
||
学习流程配置 = 学习流程配置.strip()
|
||
logger.info(f" Step 1 完成: {len(学习流程配置)} 字符, tokens={usage1}")
|
||
|
||
# ── Step 2: 互动描述 ──
|
||
logger.info(f"{log_prefix} Step 2/6: 互动描述")
|
||
prompt2 = build_prompt("step2_interaction_desc", {
|
||
"任务背景": fields["任务背景"],
|
||
"写作材料": fields["写作材料"],
|
||
})
|
||
互动描述, usage2 = llm_client.call("", prompt2, max_tokens=4096, temperature=0.3)
|
||
互动描述 = 互动描述.strip()
|
||
logger.info(f" Step 2 完成: {len(互动描述)} 字符, tokens={usage2}")
|
||
|
||
# ── Step 3: taskInfo ──
|
||
logger.info(f"{log_prefix} Step 3/6: taskInfo")
|
||
prompt3 = build_prompt("step3_task_info", {
|
||
"ID": str(cId),
|
||
"题目类型": fields["题目类型"],
|
||
"任务信息": fields["任务信息"],
|
||
"知识": fields["知识"],
|
||
"写作材料": fields["写作材料"],
|
||
"互动描述": 互动描述,
|
||
})
|
||
taskInfo, usage3 = llm_client.call_for_json("", prompt3, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 3 完成: taskInfo keys={list(taskInfo.keys())}, tokens={usage3}")
|
||
|
||
# ── Step 4: textInfo ──
|
||
logger.info(f"{log_prefix} Step 4/6: textInfo")
|
||
prompt4 = build_prompt("step4_text_info", {
|
||
"通过规则": fields["通过规则"],
|
||
"写作材料": fields["写作材料"],
|
||
})
|
||
textInfo, usage4 = llm_client.call_for_json("", prompt4, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 4 完成: textInfo keys={list(textInfo.keys())}, tokens={usage4}")
|
||
|
||
# ── Step 5: studyInfo ──
|
||
logger.info(f"{log_prefix} Step 5/6: studyInfo")
|
||
taskInfo_str = json.dumps(taskInfo, ensure_ascii=False)
|
||
prompt5 = build_prompt("step5_study_info", {
|
||
"taskInfo": taskInfo_str,
|
||
"学习流程配置": 学习流程配置,
|
||
"学习内容": fields["学习内容"],
|
||
})
|
||
studyInfo, usage5 = llm_client.call_for_json("", prompt5, max_tokens=16384, temperature=0.1)
|
||
logger.info(f" Step 5 完成: studyInfo keys={list(studyInfo.keys()) if isinstance(studyInfo, dict) else 'list'}, tokens={usage5}")
|
||
|
||
# ── Step 6: evalInfo ──
|
||
logger.info(f"{log_prefix} Step 6/6: evalInfo")
|
||
textInfo_str = json.dumps(textInfo, ensure_ascii=False)
|
||
prompt6 = build_prompt("step6_eval_info", {
|
||
"taskInfo": taskInfo_str,
|
||
"任务信息": fields["任务信息"],
|
||
"textInfo": textInfo_str,
|
||
})
|
||
evalInfo, usage6 = llm_client.call_for_json("", prompt6, max_tokens=4096, temperature=0.1)
|
||
logger.info(f" Step 6 完成: evalInfo keys={list(evalInfo.keys())}, tokens={usage6}")
|
||
|
||
# kpInfo
|
||
kp_info = None
|
||
if fields["知识"]:
|
||
try:
|
||
kp_info = generate_kp_info(cType, cId, fields["知识"], llm_client, level=level)
|
||
except Exception as e:
|
||
logger.warning(f"kpInfo 生成失败: {e}")
|
||
|
||
logger.info(f"{log_prefix} 6步生成完成: cId={cId}")
|
||
return {
|
||
"taskInfo": taskInfo,
|
||
"textInfo": textInfo,
|
||
"studyInfo": studyInfo,
|
||
"evalInfo": evalInfo,
|
||
"kpInfo": kp_info,
|
||
"intermediate": {
|
||
"学习流程配置": 学习流程配置,
|
||
"互动描述": 互动描述,
|
||
},
|
||
}
|
||
|
||
|
||
def generate_core_writing_img_make_sentence(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""看图组句(core_writing_imgMakeSentence)6步 LLM 生成流水线。"""
|
||
return _generate_core_writing(cId, "core_writing_imgMakeSentence", teaching_config, character_map, llm_client, level=level)
|
||
|
||
|
||
def generate_core_writing_img_write(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""看图撰写(core_writing_imgWrite)6步 LLM 生成流水线。"""
|
||
return _generate_core_writing(cId, "core_writing_imgWrite", teaching_config, character_map, llm_client, level=level)
|
||
|
||
|
||
def generate_core_writing_question_make_sentence(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""邮件组句(core_writing_questionMakeSentence)6步 LLM 生成流水线。"""
|
||
return _generate_core_writing(cId, "core_writing_questionMakeSentence", teaching_config, character_map, llm_client, level=level)
|
||
|
||
|
||
def generate_core_writing_question_write(cId, teaching_config, character_map=None, llm_client=None, level=None):
|
||
"""邮件撰写(core_writing_questionWrite)6步 LLM 生成流水线。"""
|
||
return _generate_core_writing(cId, "core_writing_questionWrite", teaching_config, character_map, llm_client, level=level)
|
||
|
||
|
||
def generate_component(component, character_map=None, section_char_map=None, llm_client=None, level=None):
|
||
"""
|
||
一站式生成单个组件的 jsonData + kpInfo
|
||
|
||
Args:
|
||
component: parse_sheet_rows 返回的组件 dict
|
||
character_map: 全局角色映射(回退用)
|
||
section_char_map: 角色-section对应表(优先用)
|
||
llm_client: LLMClient 实例
|
||
level: 剧本级别(如 "L1", "L2"),用于知识点匹配
|
||
|
||
Returns:
|
||
dict: {"jsonData": {...}, "kpInfo": {...} or None, "cType": str}
|
||
对核心互动类型返回额外字段: taskData, textData, sequenceData, learningData
|
||
"""
|
||
from match_component import match_component_type
|
||
from parse_script import resolve_resource_mapping
|
||
|
||
# 类型匹配
|
||
type_info = match_component_type(component["type_name"])
|
||
cType = type_info["cType"]
|
||
cId = component["cId"]
|
||
has_image = component.get("has_image", False) or type_info.get("has_image", False)
|
||
|
||
# 解析角色配置:优先通过 section-character 映射
|
||
resolved_char_map = character_map or {}
|
||
config_info = component.get("config_info", "")
|
||
if section_char_map and config_info:
|
||
resolved_char_map = resolve_resource_mapping(
|
||
section_char_map, config_info, character_map
|
||
)
|
||
|
||
# ── 核心互动类型:多步 LLM 生成 ──
|
||
if cType in CORE_TYPES:
|
||
_core_call_args = dict(
|
||
cId=cId,
|
||
teaching_config=component["teaching_config"],
|
||
character_map=resolved_char_map,
|
||
llm_client=llm_client,
|
||
level=level,
|
||
)
|
||
_core_base = {
|
||
"cType": cType,
|
||
"type_name": component["type_name"],
|
||
"cId": cId,
|
||
"has_image": has_image,
|
||
"category": "core",
|
||
"bitable": type_info.get("bitable") or type_info.get("bitable_wiki"),
|
||
"db_table": type_info.get("db_table"),
|
||
}
|
||
|
||
if cType == "core_reading_order":
|
||
result = generate_core_reading_order(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"materialInfo": result["materialInfo"],
|
||
"flowInfo": result["flowInfo"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_reading_imageDrag":
|
||
result = generate_core_reading_imagedrag(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"questionGroup": result["questionGroup"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_speaking_reply":
|
||
result = generate_core_speaking_reply(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"dialogSetting": result["dialogSetting"],
|
||
"dialogConfig": result["dialogConfig"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_speaking_inquiry":
|
||
result = generate_core_speaking_inquiry(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"dialogSetting": result["dialogSetting"],
|
||
"dialogConfig": result["dialogConfig"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_speaking_image":
|
||
result = generate_core_speaking_image(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"dialogConfig": result["dialogConfig"],
|
||
"imageInfo": result["imageInfo"],
|
||
"optionList": result["optionList"],
|
||
"questionList": result["questionList"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_listening_order":
|
||
result = generate_core_listening_order(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"materialInfo": result["materialInfo"],
|
||
"flowInfo": result["flowInfo"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_listening_drag":
|
||
result = generate_core_listening_drag(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"preDialog": result["preDialog"],
|
||
"dialogList": result["dialogList"],
|
||
"questionList": result["questionList"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType == "core_listening_choose":
|
||
result = generate_core_listening_choose(**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"questionGroup": result["questionGroup"],
|
||
"studyInfo": result["studyInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
elif cType in ("core_writing_imgMakeSentence", "core_writing_imgWrite",
|
||
"core_writing_questionMakeSentence", "core_writing_questionWrite"):
|
||
_writing_funcs = {
|
||
"core_writing_imgMakeSentence": generate_core_writing_img_make_sentence,
|
||
"core_writing_imgWrite": generate_core_writing_img_write,
|
||
"core_writing_questionMakeSentence": generate_core_writing_question_make_sentence,
|
||
"core_writing_questionWrite": generate_core_writing_question_write,
|
||
}
|
||
result = _writing_funcs[cType](**_core_call_args)
|
||
return {
|
||
**_core_base,
|
||
"jsonData": result["taskInfo"],
|
||
"taskInfo": result["taskInfo"],
|
||
"textInfo": result["textInfo"],
|
||
"studyInfo": result["studyInfo"],
|
||
"evalInfo": result["evalInfo"],
|
||
"kpInfo": result["kpInfo"],
|
||
"intermediate": result["intermediate"],
|
||
}
|
||
|
||
else:
|
||
raise ValueError(f"核心互动类型 {cType} 尚未实现生成器")
|
||
|
||
# ── 中互动类型:单步 LLM 生成 ──
|
||
# 生成 jsonData
|
||
json_data = generate_json_data(
|
||
cType=cType,
|
||
cId=cId,
|
||
teaching_config=component["teaching_config"],
|
||
character_map=resolved_char_map,
|
||
has_image=has_image,
|
||
knowledge_text=component.get("knowledge_text", ""),
|
||
llm_client=llm_client,
|
||
)
|
||
|
||
# 生成 kpInfo
|
||
kp_info = generate_kp_info(
|
||
cType=cType,
|
||
cId=cId,
|
||
knowledge_text=component.get("knowledge_text", ""),
|
||
llm_client=llm_client,
|
||
level=level,
|
||
)
|
||
|
||
return {
|
||
"jsonData": json_data,
|
||
"kpInfo": kp_info,
|
||
"cType": cType,
|
||
"type_name": component["type_name"],
|
||
"cId": cId,
|
||
"has_image": has_image,
|
||
"category": "mid",
|
||
"bitable": type_info.get("bitable"),
|
||
"db_table": type_info.get("db_table"),
|
||
}
|
||
|
||
|
||
# ============ 辅助函数 ============
|
||
|
||
def _get_cn_name(cType):
|
||
"""从 cType 获取中文类型名"""
|
||
from match_component import MID_INTERACTION_TYPES
|
||
for cn_name, info in MID_INTERACTION_TYPES.items():
|
||
if info["cType"] == cType:
|
||
return cn_name
|
||
return cType
|
||
|
||
|
||
def _post_process_json_data(json_data, cType, cId, has_image):
|
||
"""后处理 jsonData:确保必需字段正确"""
|
||
if not isinstance(json_data, dict):
|
||
logger.warning(f"jsonData 不是 dict: {type(json_data)}")
|
||
return json_data
|
||
|
||
# 强制覆盖 cType 和 cId
|
||
json_data["cType"] = cType
|
||
json_data["cId"] = str(cId)
|
||
|
||
# 确保配图相关字段(仅在 has_image 且 LLM 遗漏时补充)
|
||
if has_image and "img" not in json_data:
|
||
json_data["img"] = f"{cId}.png"
|
||
|
||
return json_data
|
||
|
||
|
||
def _try_rule_based_kp(cType, cId, knowledge_text):
|
||
"""
|
||
尝试规则化生成 kpInfo(知识点结构固定,多数情况可规则处理)
|
||
|
||
Returns:
|
||
dict or None: 成功返回 kpInfo,无法处理返回 None
|
||
"""
|
||
# 清理 <text> 标签
|
||
clean_text = re.sub(r'<text[^>]*>', '', knowledge_text)
|
||
clean_text = re.sub(r'</text>', '', clean_text)
|
||
clean_text = clean_text.strip()
|
||
|
||
if not clean_text:
|
||
return None
|
||
|
||
lines = [l.strip() for l in clean_text.split("\n") if l.strip()]
|
||
|
||
kp_list = []
|
||
for line in lines:
|
||
# 去除数字后缀(如 "school 1" → "school", "You need to... 2" → "You need to...")
|
||
stripped = re.sub(r'\s+\d+$', '', line).strip()
|
||
if not stripped:
|
||
continue
|
||
|
||
# 判断 vocab vs sentence
|
||
if "..." in stripped or len(stripped.split()) > 3:
|
||
kp_list.append({
|
||
"kpId": None,
|
||
"kpType": "sentence",
|
||
"kpTitle": stripped,
|
||
"kpSkill": "sentence_meaning",
|
||
"kpSkillName": "语义",
|
||
})
|
||
else:
|
||
kp_list.append({
|
||
"kpId": None,
|
||
"kpType": "vocab",
|
||
"kpTitle": stripped,
|
||
"kpSkill": "vocab_meaning",
|
||
"kpSkillName": "词义",
|
||
})
|
||
|
||
if not kp_list:
|
||
return None
|
||
|
||
return {
|
||
"pushType": "relationKp",
|
||
"cType": cType,
|
||
"cId": str(cId),
|
||
"kpInfo": kp_list,
|
||
}
|
||
|
||
|
||
# ============ CLI 测试 ============
|
||
if __name__ == "__main__":
|
||
sys.path.insert(0, CURRENT_PATH)
|
||
|
||
from llm_client import get_client
|
||
|
||
client = get_client()
|
||
|
||
# 测试用教研配置
|
||
test_config = """【任务标题】
|
||
表示大家一起上学
|
||
|
||
【资源配置】
|
||
图片时机:
|
||
互动内容
|
||
|
||
【情境引入】
|
||
User: Great!
|
||
User: Oh, there's no time left.
|
||
|
||
【互动内容】
|
||
User: Let's go to school now. (朗读)
|
||
|
||
【后置对话】
|
||
无"""
|
||
|
||
test_kp = "school"
|
||
test_char_map = {"Eva": 663, "Peter": 653, "Vicky": 658}
|
||
|
||
print("=== 测试 jsonData 生成 (v2 template) ===")
|
||
jd = generate_json_data(
|
||
cType="mid_dialog_repeat",
|
||
cId="1214101",
|
||
teaching_config=test_config,
|
||
character_map=test_char_map,
|
||
has_image=True,
|
||
knowledge_text=test_kp,
|
||
llm_client=client,
|
||
)
|
||
print(json.dumps(jd, ensure_ascii=False, indent=2))
|
||
|
||
print("\n=== 测试 kpInfo 生成 ===")
|
||
kp = generate_kp_info("mid_dialog_repeat", "1214101", test_kp, client)
|
||
print(json.dumps(kp, ensure_ascii=False, indent=2))
|