auto backup: 2026-06-02 08:10:01

This commit is contained in:
ai_member_only 2026-06-02 08:10:01 +08:00
parent 3da7fc67ac
commit 9365f60be9
11 changed files with 15317 additions and 34 deletions

View File

@ -209,10 +209,87 @@ def read_sheet_data(spreadsheet_token, sheet_id, cell_range=None):
return rows return rows
def _parse_lark_table(table_html):
"""
解析飞书 lark-table HTML 为二维数组
parse_script.parse_lark_table 功能相同在此独立实现以避免循环导入
注意保留单元格内的换行<br> 转为 \n因为组件类型列依赖换行分隔类型名和 cId
"""
rows = []
tr_pattern = re.compile(r'<lark-tr>(.*?)</lark-tr>', re.DOTALL)
td_pattern = re.compile(r'<lark-td(?:\s+[^>]*)?>(.*?)</lark-td>', re.DOTALL)
for tr_match in tr_pattern.finditer(table_html):
tr_content = tr_match.group(1)
cells = []
for td_match in td_pattern.finditer(tr_content):
cell_content = td_match.group(1).strip()
# <br> 标签转为换行符(保留多行结构)
cell_text = re.sub(r'<br\s*/?>', '\n', cell_content)
# 清理其他 HTML 标签
cell_text = re.sub(r'<[^>]+>', '', cell_text).strip()
# 清理多余空白,但保留换行符
cell_text = re.sub(r'[ \t]+', ' ', cell_text).strip()
cell_text = re.sub(r'\n{3,}', '\n\n', cell_text)
cells.append(cell_text)
rows.append(cells)
return rows
def _extract_lark_tables_from_markdown(markdown):
"""
从文档 markdown 中提取所有 lark-table 并转为二维数组
当文档没有内嵌 <sheet> 标签时作为 fallback 数据源
过滤规则
- 跳过只有 1 仅表头无数据的表格
- 跳过明显是"角色-section对应"的表格表头不含"类型"/"组件"等剧本关键词
- 跳过纯组件类型汇总表仅含"序号/类型/知识点"不含"组件配置"
Returns:
list[list[list]]: 二维数组列表每个元素代表一个有效表格
"""
table_pattern = re.compile(r'<lark-table[^>]*>(.*?)</lark-table>', re.DOTALL)
tables = []
for match in table_pattern.finditer(markdown):
table_html = match.group(0)
rows = _parse_lark_table(table_html)
if not rows or len(rows) < 2:
continue
header = rows[0]
header_text = ' '.join(str(c) for c in header if c)
# 跳过"角色-section对应"表
if '角色' in header_text and 'section' in header_text.lower():
logger.debug(f"跳过角色-section对应表: {header_text[:60]}")
continue
# 跳过纯组件类型汇总表(只有序号/类型/知识点,没有组件配置/台词等列)
has_script_columns = any(
keyword in header_text
for keyword in ['组件配置', '台词', '剧情描述', '角色名', '配置信息']
)
if not has_script_columns:
logger.debug(f"跳过非剧本表格(无组件配置/台词列): {header_text[:60]}")
continue
tables.append(rows)
logger.info(f"从 markdown lark-table 提取表格: {len(rows)} 行, 表头: {header_text[:80]}")
return tables
def read_wiki_doc_with_sheet(wiki_url_or_token): def read_wiki_doc_with_sheet(wiki_url_or_token):
""" """
一站式读取: wiki URL/token 文档markdown + 内嵌sheet数据 一站式读取: wiki URL/token 文档markdown + 内嵌sheet数据
数据源优先级
1. 内嵌 <sheet> 标签飞书电子表格
2. markdown 中的 <lark-table>内联表格fallback
Args: Args:
wiki_url_or_token: 飞书 wiki URL wiki_token wiki_url_or_token: 飞书 wiki URL wiki_token
@ -224,6 +301,9 @@ def read_wiki_doc_with_sheet(wiki_url_or_token):
"markdown": str, "markdown": str,
"sheet_token": (spreadsheet_token, sheet_id) or None, "sheet_token": (spreadsheet_token, sheet_id) or None,
"sheet_rows": list[list] or None, "sheet_rows": list[list] or None,
"all_sheets": list[list[list]],
"all_sheet_tokens": list,
"data_source": "sheet" | "lark_table" | None,
} }
""" """
# 解析 wiki_token # 解析 wiki_token
@ -244,6 +324,8 @@ def read_wiki_doc_with_sheet(wiki_url_or_token):
sheet_infos = extract_sheet_tokens(markdown) sheet_infos = extract_sheet_tokens(markdown)
all_sheet_rows = [] all_sheet_rows = []
sheet_tokens = [] sheet_tokens = []
data_source = None
for sheet_info in sheet_infos: for sheet_info in sheet_infos:
spreadsheet_token, sheet_id = sheet_info spreadsheet_token, sheet_id = sheet_info
if sheet_id: if sheet_id:
@ -257,6 +339,19 @@ def read_wiki_doc_with_sheet(wiki_url_or_token):
else: else:
logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}") logger.warning(f"sheet_token 中未包含 sheet_id: {sheet_info}")
if all_sheet_rows:
data_source = "sheet"
else:
# Fallback: 尝试从 markdown lark-table 中提取剧本表格
logger.info("未找到内嵌 sheet尝试从 markdown lark-table 解析...")
lark_tables = _extract_lark_tables_from_markdown(markdown)
if lark_tables:
all_sheet_rows = lark_tables
data_source = "lark_table"
logger.info(f"从 markdown lark-table 成功提取 {len(lark_tables)} 个表格作为 fallback 数据源")
else:
logger.warning("markdown 中也未找到有效的剧本 lark-table")
# 兼容旧接口sheet_rows 取第一个(向后兼容),新增 all_sheets # 兼容旧接口sheet_rows 取第一个(向后兼容),新增 all_sheets
sheet_rows = all_sheet_rows[0] if all_sheet_rows else None sheet_rows = all_sheet_rows[0] if all_sheet_rows else None
sheet_token = sheet_tokens[0] if sheet_tokens else None sheet_token = sheet_tokens[0] if sheet_tokens else None
@ -270,6 +365,7 @@ def read_wiki_doc_with_sheet(wiki_url_or_token):
"sheet_rows": sheet_rows, "sheet_rows": sheet_rows,
"all_sheets": all_sheet_rows, "all_sheets": all_sheet_rows,
"all_sheet_tokens": sheet_tokens, "all_sheet_tokens": sheet_tokens,
"data_source": data_source,
} }

View File

@ -219,6 +219,7 @@ def process_script(wiki_url_or_token, db_path=None, dry_run=False, target_cids=N
"skipped": 0, "skipped": 0,
"results": [], "results": [],
"errors": [], "errors": [],
"data_source": None,
} }
# 设置文件日志(早期设置,后续用标题重命名) # 设置文件日志(早期设置,后续用标题重命名)
@ -229,15 +230,20 @@ def process_script(wiki_url_or_token, db_path=None, dry_run=False, target_cids=N
try: try:
doc_data = read_wiki_doc_with_sheet(wiki_url_or_token) doc_data = read_wiki_doc_with_sheet(wiki_url_or_token)
report["title"] = doc_data["title"] report["title"] = doc_data["title"]
logger.info(f"文档: {doc_data['title']}, obj_token={doc_data['obj_token']}") report["data_source"] = doc_data.get("data_source")
logger.info(f"文档: {doc_data['title']}, obj_token={doc_data['obj_token']}, data_source={doc_data.get('data_source')}")
except Exception as e: except Exception as e:
report["errors"].append(f"读取文档失败: {e}") report["errors"].append(f"读取文档失败: {e}")
logger.error(f"读取文档失败: {e}") logger.error(f"读取文档失败: {e}")
return report return report
data_source = doc_data.get("data_source")
if data_source == "lark_table":
logger.info("数据源: markdown lark-table (fallback)")
if not doc_data["sheet_rows"] and not doc_data.get("all_sheets"): if not doc_data["sheet_rows"] and not doc_data.get("all_sheets"):
report["errors"].append("文档中未找到内嵌sheet数据") report["errors"].append("文档中未找到内嵌sheet数据也未找到有效的lark-table剧本表格")
logger.error("文档中未找到内嵌sheet数据") logger.error("文档中未找到任何有效数据源sheet + lark-table 均为空)")
return report return report
# Step 2: 解析剧本尝试所有sheet取组件数最多的结果 # Step 2: 解析剧本尝试所有sheet取组件数最多的结果

View File

@ -79,3 +79,6 @@
{"type":"memory.recall.recorded","timestamp":"2026-05-29T00:23:17.391Z","query":"输出文本和JSON 题型 单元挑战 questionSet 看图组词 单词释义","resultCount":1,"results":[{"path":"memory/2026-05-28.md","startLine":46,"endLine":62,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-29T00:23:17.391Z","query":"输出文本和JSON 题型 单元挑战 questionSet 看图组词 单词释义","resultCount":1,"results":[{"path":"memory/2026-05-28.md","startLine":46,"endLine":62,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-29T00:23:17.392Z","query":"reading_pic_makeWord reading_word_definition listening_listenWrite reading_select_cloze 修正","resultCount":5,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1},{"path":"memory/2026-05-11.md","startLine":1,"endLine":25,"score":1},{"path":"memory/2026-05-17.md","startLine":19,"endLine":41,"score":1},{"path":"memory/2026-05-12.md","startLine":170,"endLine":193,"score":1},{"path":"memory/2026-05-21.md","startLine":52,"endLine":67,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-29T00:23:17.392Z","query":"reading_pic_makeWord reading_word_definition listening_listenWrite reading_select_cloze 修正","resultCount":5,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1},{"path":"memory/2026-05-11.md","startLine":1,"endLine":25,"score":1},{"path":"memory/2026-05-17.md","startLine":19,"endLine":41,"score":1},{"path":"memory/2026-05-12.md","startLine":170,"endLine":193,"score":1},{"path":"memory/2026-05-21.md","startLine":52,"endLine":67,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-05-29T02:33:30.526Z","query":"梁晨 l1_pedagogy_rules pedagogy rules JS文件","resultCount":6,"results":[{"path":"memory/2026-05-26.md","startLine":130,"endLine":166,"score":1},{"path":"memory/2026-05-28.md","startLine":46,"endLine":62,"score":1},{"path":"memory/2026-05-25.md","startLine":283,"endLine":302,"score":1},{"path":"memory/2026-05-25.md","startLine":118,"endLine":137,"score":1},{"path":"memory/2026-05-28.md","startLine":60,"endLine":66,"score":1},{"path":"memory/2026-05-20.md","startLine":1,"endLine":31,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-29T02:33:30.526Z","query":"梁晨 l1_pedagogy_rules pedagogy rules JS文件","resultCount":6,"results":[{"path":"memory/2026-05-26.md","startLine":130,"endLine":166,"score":1},{"path":"memory/2026-05-28.md","startLine":46,"endLine":62,"score":1},{"path":"memory/2026-05-25.md","startLine":283,"endLine":302,"score":1},{"path":"memory/2026-05-25.md","startLine":118,"endLine":137,"score":1},{"path":"memory/2026-05-28.md","startLine":60,"endLine":66,"score":1},{"path":"memory/2026-05-20.md","startLine":1,"endLine":31,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-06-01T03:18:26.408Z","query":"句型库 L1 L2 pattern get home talk to","resultCount":2,"results":[{"path":"memory/2026-05-28.md","startLine":60,"endLine":66,"score":1},{"path":"memory/2026-05-28.md","startLine":46,"endLine":62,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-06-01T10:44:19.887Z","query":"互动组件生产 中互动 核心互动 剧本表格 对话类","resultCount":3,"results":[{"path":"memory/2026-05-07.md","startLine":354,"endLine":368,"score":1},{"path":"memory/2026-05-18.md","startLine":793,"endLine":810,"score":1},{"path":"memory/2026-04-22.md","startLine":1,"endLine":8,"score":1}]}
{"type":"memory.recall.recorded","timestamp":"2026-06-01T10:44:47.489Z","query":"王璐辰 反馈 组件配置 中互动 核心互动 问题","resultCount":2,"results":[{"path":"memory/2026-04-30.md","startLine":50,"endLine":63,"score":1},{"path":"memory/2026-05-07.md","startLine":354,"endLine":368,"score":1}]}

View File

@ -1,6 +1,6 @@
{ {
"version": 1, "version": 1,
"updatedAt": "2026-05-29T02:33:30.526Z", "updatedAt": "2026-06-01T10:44:47.489Z",
"entries": { "entries": {
"memory:memory/2026-05-07.md:57:74": { "memory:memory/2026-05-07.md:57:74": {
"key": "memory:memory/2026-05-07.md:57:74", "key": "memory:memory/2026-05-07.md:57:74",
@ -439,20 +439,22 @@
"endLine": 63, "endLine": 63,
"source": "memory", "source": "memory",
"snippet": "格式: 【任务标题】【情境引入】【互动内容】【互动反馈】【后置对话】。目标词用$...$包裹,选项为图片区域编号(00/01/02)。 #### 5. 对话选择 (1条) — 规则: 参考 teaching_config 示例格式 + mid_dialog_choose prompt | ID | 任务标题 | 选项 | 知识点 | |---|---------|------|--------| | 1115514 | 判断蕃茄的好坏 | It is a good one. / It is a bad one. | one | 格式: 【任务标题】【资源配置】【情境引入】【互动内容】【后置对话】。互动内容含要求+选项+反馈。 ### 关键经验 - **组件配置列位置变更**: 本轮操作中发现sheet新增了「配置信息」列(C列)导致组件配置从F列移至G列。后续操作需先确认列结构。 - **Bot身份操作**: 所有飞书 sheet 写入均使用 Bot 身份App ID: cli_a931175d41799cc7Feishu API Token: t-g1044ubUXXMPFXBW75RILL7YZKRGAKQGWWDV2R3D - **对话选择组件**: 无独立 SKILL.md需参照 interactive-component-json 的 prompt_registry.py 中 mid_dialog_choose 模板 + bitable 中已有的 teaching_config 示例格式", "snippet": "格式: 【任务标题】【情境引入】【互动内容】【互动反馈】【后置对话】。目标词用$...$包裹,选项为图片区域编号(00/01/02)。 #### 5. 对话选择 (1条) — 规则: 参考 teaching_config 示例格式 + mid_dialog_choose prompt | ID | 任务标题 | 选项 | 知识点 | |---|---------|------|--------| | 1115514 | 判断蕃茄的好坏 | It is a good one. / It is a bad one. | one | 格式: 【任务标题】【资源配置】【情境引入】【互动内容】【后置对话】。互动内容含要求+选项+反馈。 ### 关键经验 - **组件配置列位置变更**: 本轮操作中发现sheet新增了「配置信息」列(C列)导致组件配置从F列移至G列。后续操作需先确认列结构。 - **Bot身份操作**: 所有飞书 sheet 写入均使用 Bot 身份App ID: cli_a931175d41799cc7Feishu API Token: t-g1044ubUXXMPFXBW75RILL7YZKRGAKQGWWDV2R3D - **对话选择组件**: 无独立 SKILL.md需参照 interactive-component-json 的 prompt_registry.py 中 mid_dialog_choose 模板 + bitable 中已有的 teaching_config 示例格式",
"recallCount": 2, "recallCount": 3,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 2, "totalScore": 3,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-13T03:09:54.362Z", "firstRecalledAt": "2026-05-13T03:09:54.362Z",
"lastRecalledAt": "2026-05-28T10:04:34.024Z", "lastRecalledAt": "2026-06-01T10:44:47.489Z",
"queryHashes": [ "queryHashes": [
"f151bc633ad1", "f151bc633ad1",
"71e44ea68b09" "71e44ea68b09",
"f53c9769e692"
], ],
"recallDays": [ "recallDays": [
"2026-05-13", "2026-05-13",
"2026-05-28" "2026-05-28",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"00/01/02", "00/01/02",
@ -872,13 +874,13 @@
"endLine": 368, "endLine": 368,
"source": "memory", "source": "memory",
"snippet": "- **需求:** 将 020102I am...)和 020103I am ready / Thank you两套题合并为一个 `{first:..., second:...}` JSON统一 questionSetID=0000001 - **状态:** ✅ 已完成 - **核心考点分析(用户强调):** 需分析每个句型的核心考点(孩子最容易犯错的地方),挖空对准核心考点 - I am/from 组am系动词第一人称、from介词选择、studenta+名词结构) - Thank you for 组for介词选择非 you、helpingfor+动名词,非 help/to help - **输出文件:** `output/writing_pic_qa_combined.json` ### 刘彦江 — 组件配置-json 请求L1-S2-U13-L4 沙漠之花) - **时间:** 16:45 ~ 17:51 - **文档:** `https://makee-interactive.feishu.cn/wiki/K5E1wzwk7it9t7kXvcbc6Xugnhc` - **状态:** ⚠️ 未完成 — pipeline 识别到 0 组件 - **根因:** 剧本文档的13个组件数据存储在 markdown 内联表格中lark-table5列×36行而非内嵌 Sheet。当前 pipeline 的 parse_script 只从内嵌 Sheet 读取组件数据,不支持 markdown 表格组件解析 - **已识别组件ma", "snippet": "- **需求:** 将 020102I am...)和 020103I am ready / Thank you两套题合并为一个 `{first:..., second:...}` JSON统一 questionSetID=0000001 - **状态:** ✅ 已完成 - **核心考点分析(用户强调):** 需分析每个句型的核心考点(孩子最容易犯错的地方),挖空对准核心考点 - I am/from 组am系动词第一人称、from介词选择、studenta+名词结构) - Thank you for 组for介词选择非 you、helpingfor+动名词,非 help/to help - **输出文件:** `output/writing_pic_qa_combined.json` ### 刘彦江 — 组件配置-json 请求L1-S2-U13-L4 沙漠之花) - **时间:** 16:45 ~ 17:51 - **文档:** `https://makee-interactive.feishu.cn/wiki/K5E1wzwk7it9t7kXvcbc6Xugnhc` - **状态:** ⚠️ 未完成 — pipeline 识别到 0 组件 - **根因:** 剧本文档的13个组件数据存储在 markdown 内联表格中lark-table5列×36行而非内嵌 Sheet。当前 pipeline 的 parse_script 只从内嵌 Sheet 读取组件数据,不支持 markdown 表格组件解析 - **已识别组件ma",
"recallCount": 10, "recallCount": 12,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 10, "totalScore": 12,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-15T07:13:08.147Z", "firstRecalledAt": "2026-05-15T07:13:08.147Z",
"lastRecalledAt": "2026-05-28T02:24:09.414Z", "lastRecalledAt": "2026-06-01T10:44:47.489Z",
"queryHashes": [ "queryHashes": [
"08364c8746ab", "08364c8746ab",
"4f08741ab4fd", "4f08741ab4fd",
@ -889,7 +891,9 @@
"612fa3b04b06", "612fa3b04b06",
"d6a04b711fd9", "d6a04b711fd9",
"8742c0bf4e2b", "8742c0bf4e2b",
"eb0902db0156" "eb0902db0156",
"229e5d3943bb",
"f53c9769e692"
], ],
"recallDays": [ "recallDays": [
"2026-05-15", "2026-05-15",
@ -897,7 +901,8 @@
"2026-05-18", "2026-05-18",
"2026-05-21", "2026-05-21",
"2026-05-25", "2026-05-25",
"2026-05-28" "2026-05-28",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"am/from", "am/from",
@ -917,13 +922,13 @@
"endLine": 8, "endLine": 8,
"source": "memory", "source": "memory",
"snippet": "[李应瑛 2026-04-22 提出要求] 所有需要包含对话的内容(如剧本、互动组件等)必须要有【后置对话】字段,无后置对话时填写“无”。 [李应瑛 2026-04-22 确认规则] 剧本内嵌表格组件填写位置规则仅当表格第一列A列明确标注为对话类类型对话朗读/对话挖空/对话选读/对话组句等才在同一行的H列【组件】列填写对应的组件内容其他类型行TL/场景/角色/图片/非对话类等)无需填写。 [李应瑛 2026-04-22 确认格式规则] 对话类组件字段换行规则:每个结构单独占一行,格式为: 【任务标题】xxx 【情境引入】xxx 【互动内容】xxx 【后置对话】xxx 单元格内使用\\n作为换行符实现后续所有组件均遵循此格式。", "snippet": "[李应瑛 2026-04-22 提出要求] 所有需要包含对话的内容(如剧本、互动组件等)必须要有【后置对话】字段,无后置对话时填写“无”。 [李应瑛 2026-04-22 确认规则] 剧本内嵌表格组件填写位置规则仅当表格第一列A列明确标注为对话类类型对话朗读/对话挖空/对话选读/对话组句等才在同一行的H列【组件】列填写对应的组件内容其他类型行TL/场景/角色/图片/非对话类等)无需填写。 [李应瑛 2026-04-22 确认格式规则] 对话类组件字段换行规则:每个结构单独占一行,格式为: 【任务标题】xxx 【情境引入】xxx 【互动内容】xxx 【后置对话】xxx 单元格内使用\\n作为换行符实现后续所有组件均遵循此格式。",
"recallCount": 9, "recallCount": 10,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 9, "totalScore": 10,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-15T07:13:08.147Z", "firstRecalledAt": "2026-05-15T07:13:08.147Z",
"lastRecalledAt": "2026-05-28T02:24:09.414Z", "lastRecalledAt": "2026-06-01T10:44:19.887Z",
"queryHashes": [ "queryHashes": [
"08364c8746ab", "08364c8746ab",
"4f08741ab4fd", "4f08741ab4fd",
@ -933,7 +938,8 @@
"612fa3b04b06", "612fa3b04b06",
"d6a04b711fd9", "d6a04b711fd9",
"8742c0bf4e2b", "8742c0bf4e2b",
"eb0902db0156" "eb0902db0156",
"229e5d3943bb"
], ],
"recallDays": [ "recallDays": [
"2026-05-15", "2026-05-15",
@ -941,7 +947,8 @@
"2026-05-18", "2026-05-18",
"2026-05-21", "2026-05-21",
"2026-05-25", "2026-05-25",
"2026-05-28" "2026-05-28",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"对话朗读/对话挖空/对话选读/对话组句等", "对话朗读/对话挖空/对话选读/对话组句等",
@ -1484,25 +1491,27 @@
"endLine": 810, "endLine": 810,
"source": "memory", "source": "memory",
"snippet": "- Script sheet: `wMQVyV`186行×9列 - Knowledge points sheet: `DCcKsLbrmhfXgrtB7N2c9GA4ntf_NtIcXt` - 列结构A=类型, D=剧情描述, E=角色名, F=编剧台词English已填好, **G=组件配置** - 知识点point, talk, understand, a lot of + `talk to...` / `I can/can't understand...` ### 交互模式差异 - L5 文档 B列/C列 为空,无详细组件类型标签(仅 A=互动/核心互动-口语) - 编剧台词F列已全部填写英文 - 互动行的 User 台词含红色标注知识点词 ### 完成事项 - 24个互动行全部生成G列配置组件类型推断听力挖空、朗读台词、口语表达 - 写入方式:同上 Sheets v2 API - 24/24 全部回读验证通过 ### 脚本 `scripts/write_L5_G_configs.py`", "snippet": "- Script sheet: `wMQVyV`186行×9列 - Knowledge points sheet: `DCcKsLbrmhfXgrtB7N2c9GA4ntf_NtIcXt` - 列结构A=类型, D=剧情描述, E=角色名, F=编剧台词English已填好, **G=组件配置** - 知识点point, talk, understand, a lot of + `talk to...` / `I can/can't understand...` ### 交互模式差异 - L5 文档 B列/C列 为空,无详细组件类型标签(仅 A=互动/核心互动-口语) - 编剧台词F列已全部填写英文 - 互动行的 User 台词含红色标注知识点词 ### 完成事项 - 24个互动行全部生成G列配置组件类型推断听力挖空、朗读台词、口语表达 - 写入方式:同上 Sheets v2 API - 24/24 全部回读验证通过 ### 脚本 `scripts/write_L5_G_configs.py`",
"recallCount": 5, "recallCount": 6,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 5, "totalScore": 6,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-20T06:31:39.981Z", "firstRecalledAt": "2026-05-20T06:31:39.981Z",
"lastRecalledAt": "2026-05-26T02:59:03.427Z", "lastRecalledAt": "2026-06-01T10:44:19.887Z",
"queryHashes": [ "queryHashes": [
"688d2dceca9d", "688d2dceca9d",
"d05a0257d44b", "d05a0257d44b",
"e637236fe74b", "e637236fe74b",
"14d903a64d04", "14d903a64d04",
"240a6a5dca41" "240a6a5dca41",
"229e5d3943bb"
], ],
"recallDays": [ "recallDays": [
"2026-05-20", "2026-05-20",
"2026-05-21", "2026-05-21",
"2026-05-25", "2026-05-25",
"2026-05-26" "2026-05-26",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"can/can", "can/can",
@ -2655,19 +2664,21 @@
"endLine": 62, "endLine": 62,
"source": "memory", "source": "memory",
"snippet": "- B级规则听力句子 5-8 词(均 7 词),每组 3 张同类物品不同属性图片 - 能力标签:显性事实理解|关键词识别 ×2 + 基础语境理解|场景/物品/动作识别 ## 梁辰user_id: ou_28f02dcada1193913cfbb6310f8daf07— HTML 诊断页面教研规则 JS 文件 - 用户有一个 L1 关卡诊断工作台 HTML 页面(部署在腾讯云空间),当前只做数量级统计,缺少教研规则校验 - 需求:提供可嵌入 HTML 的前端 JS 校验规则文件,使诊断有据可依 - 输出:`output/l1_pedagogy_rules.js`42KB包含 8 个校验维度: 1. 词汇超纲检测(基于 L1/L2 词库) 2. 英式拼写检测color→colour 等 115 组映射) 3. Markdown 标记检测(`**`/`#`/`>` 等) 4. 标点规范检测(全角混入、``、`!!!` 5. 题型-阶段匹配校验20 种题型对应的 L1/L2 阶段映射) 6. 台词质量分析(句子长度、负面评价) 7. 知识点曝光度 8. 句型合规检测 - 接入方式:`<script src=\"l1_pedagogy_rules.js\"></script>` + 调用 `PedagogyRules.validate(summary, level)` - 数据源L1 词库 147 词(过滤 enabled=true、L2 词库 52 词、L1 句型 8 个、L2 句型(从 437MB bitable 导出中提取唯一结", "snippet": "- B级规则听力句子 5-8 词(均 7 词),每组 3 张同类物品不同属性图片 - 能力标签:显性事实理解|关键词识别 ×2 + 基础语境理解|场景/物品/动作识别 ## 梁辰user_id: ou_28f02dcada1193913cfbb6310f8daf07— HTML 诊断页面教研规则 JS 文件 - 用户有一个 L1 关卡诊断工作台 HTML 页面(部署在腾讯云空间),当前只做数量级统计,缺少教研规则校验 - 需求:提供可嵌入 HTML 的前端 JS 校验规则文件,使诊断有据可依 - 输出:`output/l1_pedagogy_rules.js`42KB包含 8 个校验维度: 1. 词汇超纲检测(基于 L1/L2 词库) 2. 英式拼写检测color→colour 等 115 组映射) 3. Markdown 标记检测(`**`/`#`/`>` 等) 4. 标点规范检测(全角混入、``、`!!!` 5. 题型-阶段匹配校验20 种题型对应的 L1/L2 阶段映射) 6. 台词质量分析(句子长度、负面评价) 7. 知识点曝光度 8. 句型合规检测 - 接入方式:`<script src=\"l1_pedagogy_rules.js\"></script>` + 调用 `PedagogyRules.validate(summary, level)` - 数据源L1 词库 147 词(过滤 enabled=true、L2 词库 52 词、L1 句型 8 个、L2 句型(从 437MB bitable 导出中提取唯一结",
"recallCount": 2, "recallCount": 3,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 2, "totalScore": 3,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-29T00:23:17.391Z", "firstRecalledAt": "2026-05-29T00:23:17.391Z",
"lastRecalledAt": "2026-05-29T02:33:30.526Z", "lastRecalledAt": "2026-06-01T03:18:26.408Z",
"queryHashes": [ "queryHashes": [
"c1d4076205e1", "c1d4076205e1",
"4aa0ef719160" "4aa0ef719160",
"e30c130b9d1d"
], ],
"recallDays": [ "recallDays": [
"2026-05-29" "2026-05-29",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"5-8", "5-8",
@ -2842,18 +2853,20 @@
"endLine": 66, "endLine": 66,
"source": "memory", "source": "memory",
"snippet": "8. 句型合规检测 - 接入方式:`<script src=\"l1_pedagogy_rules.js\"></script>` + 调用 `PedagogyRules.validate(summary, level)` - 数据源L1 词库 147 词(过滤 enabled=true、L2 词库 52 词、L1 句型 8 个、L2 句型(从 437MB bitable 导出中提取唯一结构) - 技术注意L2_pattern_list.json 体积 437MB1,082,450 条记录),直接嵌入前端不可行,已提取唯一句型结构后嵌入 - 所有 6 个测试用例通过 - 用户后续想尝试其他对接方式API 模式 / 飞书 Bot 联动 / CI 集成)", "snippet": "8. 句型合规检测 - 接入方式:`<script src=\"l1_pedagogy_rules.js\"></script>` + 调用 `PedagogyRules.validate(summary, level)` - 数据源L1 词库 147 词(过滤 enabled=true、L2 词库 52 词、L1 句型 8 个、L2 句型(从 437MB bitable 导出中提取唯一结构) - 技术注意L2_pattern_list.json 体积 437MB1,082,450 条记录),直接嵌入前端不可行,已提取唯一句型结构后嵌入 - 所有 6 个测试用例通过 - 用户后续想尝试其他对接方式API 模式 / 飞书 Bot 联动 / CI 集成)",
"recallCount": 1, "recallCount": 2,
"dailyCount": 0, "dailyCount": 0,
"groundedCount": 0, "groundedCount": 0,
"totalScore": 1, "totalScore": 2,
"maxScore": 1, "maxScore": 1,
"firstRecalledAt": "2026-05-29T02:33:30.526Z", "firstRecalledAt": "2026-05-29T02:33:30.526Z",
"lastRecalledAt": "2026-05-29T02:33:30.526Z", "lastRecalledAt": "2026-06-01T03:18:26.408Z",
"queryHashes": [ "queryHashes": [
"4aa0ef719160" "4aa0ef719160",
"e30c130b9d1d"
], ],
"recallDays": [ "recallDays": [
"2026-05-29" "2026-05-29",
"2026-06-01"
], ],
"conceptTags": [ "conceptTags": [
"l1-pedagogy-rules.js", "l1-pedagogy-rules.js",