From f013d6f6efaff59bb87f342c201e308f71eae507 Mon Sep 17 00:00:00 2001 From: ai_member_only Date: Wed, 13 May 2026 08:10:01 +0800 Subject: [PATCH] auto backup: 2026-05-13 08:10:01 --- .../reading/reading_info_match/SKILL.md | 65 +++ memory/.dreams/events.jsonl | 2 + memory/.dreams/short-term-recall.json | 140 +++++- memory/2026-05-12.md | 280 ++++++++++++ .../audit_l1_config.cpython-312.pyc | Bin 0 -> 14843 bytes scripts/audit_core_reading_S0.py | 276 ++++++++++++ scripts/audit_l1_config.py | 278 ++++++++++++ scripts/fix_matchInfo_0213_0218_desc.py | 343 +++++++++++++++ skills/audit_l1_config/SKILL.md | 124 ++++++ skills/bitable-reader/SKILL.md | 164 +++++++ skills/core-content-json-standard/SKILL.md | 393 +++++++++++++++++ .../references/component_registry.md | 38 +- .../references/json_structure_guide.md | 412 ++++++++++++++++++ 13 files changed, 2490 insertions(+), 25 deletions(-) create mode 100644 memory/2026-05-12.md create mode 100644 scripts/__pycache__/audit_l1_config.cpython-312.pyc create mode 100644 scripts/audit_core_reading_S0.py create mode 100644 scripts/audit_l1_config.py create mode 100644 scripts/fix_matchInfo_0213_0218_desc.py create mode 100644 skills/audit_l1_config/SKILL.md create mode 100644 skills/bitable-reader/SKILL.md create mode 100644 skills/core-content-json-standard/SKILL.md create mode 100644 skills/interactive-component-json/references/json_structure_guide.md diff --git a/business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md b/business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md index 7287a47..e8c167c 100644 --- a/business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md +++ b/business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md @@ -61,6 +61,71 @@ C. [匹配项C内容/图片描述] 【题目2】 ...(多套题重复上述结构) ``` +## 图片描述规范(强制执行) +阅读信息匹配题的每张图片必须对应一条**图片材料文本**,每条描述包含**标识/通知类型标签**和**图片上实际出现的文本内容**。 + +### 格式要求 +``` +图片材料文本: +【Notice/Sign Type】 +Actual text content on the image. +``` + +### 核心规则 +1. **类型标签必填**:每张图片必须标注 `【Type】`,描述该图片是什么类型的标识/通知(英文,不超过6词) +2. **文本内容**:记录图片上实际出现的文字,保持原文(含大小写、标点),不翻译不改写 +3. **L2 B级及以上**:图片上文字需为1句完整陈述(3-5词+),不能只列单个词汇 +4. **图片必须是真实标识/通知**(如告示、海报、门牌、标签、通知等),不能是标签式词汇拼图 + +### 参考示例 +``` +图片材料文本: +【Science Class Notice】 +Science lesson today! We live on EARTH. It goes round the sun. + +图片材料文本: +【School Notice】 +School HOLIDAY next Monday. Enjoy your time off with your family. + +图片材料文本: +【School Newsletter】 +New TERM starts on Monday. Please bring your pencil case and books. +``` + +### 常用标识/通知类型标签参考 +| 类型标签 | 适用场景 | +|----------|----------| +| School Notice | 校园通知、公告 | +| Library Notice / Sign | 图书馆通知/标识 | +| Exam Notice | 考试相关通知 | +| Door Sign | 门上标识 | +| Warning Sign | 警示牌 | +| Park Sign | 公园标识 | +| Bus Sign | 公交标识 | +| Shop Sign | 商店/维修店招牌 | +| Market Sign | 市场标牌 | +| Mall Sign | 商场标识 | +| Hotel Notice | 酒店通知 | +| Show Poster / Event Poster | 演出/活动海报 | +| Party Banner / Invitation | 派对横幅/邀请函 | +| Lost & Found | 失物招领 | +| Award Certificate | 奖状 | +| Greeting Card | 问候卡片 | +| Family Note / Personal Note | 便条 | +| Homework Reminder | 作业提醒 | +| Recycling Sign | 回收标识 | +| Room Sign / Lab Sign | 房间/实验室标识 | +| Safety Poster | 安全海报 | +| Information Desk | 咨询台标牌 | +| Laundry Sign | 洗衣房标识 | +| Art Club Notice / Art Room Sign | 美术相关通知/标识 | +| Sports Notice | 体育通知 | +| Science Fair Sign | 科学展标识 | +| Wanted Notice | 通缉令 | +| Toy Label | 玩具标签 | +| Canteen Notice | 食堂通知 | +| Building Sign | 大楼标识 | + ## 校验规则 自动校验不通过直接返回错误: 1. 词汇校验:所有知识点必须在L1单词库中,禁止超纲 diff --git a/memory/.dreams/events.jsonl b/memory/.dreams/events.jsonl index f058d1e..faa252d 100644 --- a/memory/.dreams/events.jsonl +++ b/memory/.dreams/events.jsonl @@ -5,3 +5,5 @@ {"type":"memory.recall.recorded","timestamp":"2026-05-11T02:37:23.203Z","query":"writing_pic_qa 看图回答题 生产流程 bitable writing","resultCount":5,"results":[{"path":"memory/2026-05-07.md","startLine":215,"endLine":235,"score":1},{"path":"memory/2026-05-08.md","startLine":33,"endLine":55,"score":1},{"path":"memory/2026-05-07.md","startLine":199,"endLine":219,"score":1},{"path":"memory/2026-05-08.md","startLine":1,"endLine":23,"score":1},{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-11T02:37:23.204Z","query":"reading_pic_qa 看图回答题 口语看图回答题 speaking_pic_qa","resultCount":1,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1}]} {"type":"memory.recall.recorded","timestamp":"2026-05-11T02:51:47.112Z","query":"reading_pic_qa bitable table_id 单元挑战 阅读","resultCount":1,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-12T01:33:44.968Z","query":"reading information matching 图片 阅读信息匹配题","resultCount":3,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1},{"path":"memory/2026-05-11.md","startLine":39,"endLine":55,"score":1},{"path":"memory/2026-05-11.md","startLine":20,"endLine":43,"score":1}]} +{"type":"memory.recall.recorded","timestamp":"2026-05-12T01:41:17.155Z","query":"reading_matchInfo skill SKILL.md 阅读信息匹配题","resultCount":4,"results":[{"path":"memory/2026-05-07.md","startLine":1,"endLine":20,"score":1},{"path":"memory/2026-05-11.md","startLine":66,"endLine":83,"score":1},{"path":"memory/2026-05-11.md","startLine":51,"endLine":68,"score":1},{"path":"memory/2026-05-11.md","startLine":39,"endLine":55,"score":1}]} diff --git a/memory/.dreams/short-term-recall.json b/memory/.dreams/short-term-recall.json index 3507e99..49b03c8 100644 --- a/memory/.dreams/short-term-recall.json +++ b/memory/.dreams/short-term-recall.json @@ -1,6 +1,6 @@ { "version": 1, - "updatedAt": "2026-05-11T02:51:47.112Z", + "updatedAt": "2026-05-12T01:41:17.155Z", "entries": { "memory:memory/2026-05-07.md:57:74": { "key": "memory:memory/2026-05-07.md:57:74", @@ -106,24 +106,27 @@ "endLine": 20, "source": "memory", "snippet": "# 2026-05-07 工作日志 ## 会话记录 ### 刘彦江 (ou_5af74c1fb96042e33cc0f16b5ca02cf4) — 单元挑战新增3个题型 - **时间:** 11:19 ~ 11:28 - **需求:** 单元挑战新增3个题型:阅读看图回答题(reading_pic_qa)、阅读看图判断题(reading_pic_judge)、写作看图回答题(writing_pic_qa) - **状态:** ✅ 已完成题型规范落地 - **交付内容:** 1. 创建3个题型SKILL.md: - `reading/common/reading_pic_qa/SKILL.md` — 阅读看图回答题(共享大图+多题Yes/No判断) - `reading/common/reading_pic_judge/SKILL.md` — 阅读看图判断题(每题独立配图+Yes/No判断) - `writing/common/writing_pic_qa/SKILL.md` — 写作看图回答题(看图+提示答案开头+填空) 2. 更新 `MEMORY.md` 全题型阶段归属规则 3. 明确题型JSON结构、难度等级(A/B/C/D四级)、能力标签、校验规则 - **关键决策:** 3个题型均为 L1&L2 共用,通过难度参数区分阶段;reading_pic_qa 共享大图,reading_pic_judge 每题独立配图 ### 李应瑛 (ou_1bd7317ae2ccfeb57e1132028847279e) — 单词表对比请求", - "recallCount": 5, + "recallCount": 7, "dailyCount": 0, "groundedCount": 0, - "totalScore": 5, + "totalScore": 7, "maxScore": 1, "firstRecalledAt": "2026-05-08T01:18:18.412Z", - "lastRecalledAt": "2026-05-11T02:51:47.112Z", + "lastRecalledAt": "2026-05-12T01:41:17.155Z", "queryHashes": [ "390d35f8d143", "fd4c9b7de37b", "be437071312f", "0e27779653c1", - "5c08c6f8788a" + "5c08c6f8788a", + "4ab75020b1ab", + "7ca0207f1308" ], "recallDays": [ "2026-05-08", "2026-05-09", - "2026-05-11" + "2026-05-11", + "2026-05-12" ], "conceptTags": [ "reading-pic-qa", @@ -229,6 +232,131 @@ "going", "ready" ] + }, + "memory:memory/2026-05-11.md:39:55": { + "key": "memory:memory/2026-05-11.md:39:55", + "path": "memory/2026-05-11.md", + "startLine": 39, + "endLine": 55, + "source": "memory", + "snippet": "- `基础信息表达|个人信息问答` — 姓名/年龄/家庭/外貌类 - `表达喜好与理由` — 偏好/观点/意愿类 - `过去经历描述|Past Activities` — 过去事件类 - `互动应答|问答交流` — 社交场景类 - `信息交换|双向问答` — 物品归属/需求类 - 未使用标签(属看图/发音题型专用):图片描述类(3个)、图片对比、故事讲述、发音与语调 - **分类逻辑**:基于关键词匹配(what do you say → 互动应答; what did you/last → 过去经历; favorite/like/want → 喜好; name/age/from/family → 个人信息; whose/need → 信息交换) ### 刘彦江 — reading_matchInfo 阅读信息匹配题(4套: 010901/011001/011101/011201,18:55 ~ 19:15) - **需求:** 4套 reading_matchInfo,L2 A级,能力标签 标识与通知理解|标识 & 通知 - 每套 first=5q + second=5q,每题含 questionImage/question/options/answer/ability/explanation - **表:** 阅读-P1-信息匹配题(tblCgfYDnnqwLfgH) - **自动化问题:** 该表存在异步自动化,创建后会将 jsonData 改写为模板壳(减少题量+覆盖 ability) - **解决方案:** 两阶段延迟写入(先创建空记录→等35s让", + "recallCount": 2, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 2, + "maxScore": 1, + "firstRecalledAt": "2026-05-12T01:33:44.968Z", + "lastRecalledAt": "2026-05-12T01:41:17.155Z", + "queryHashes": [ + "4ab75020b1ab", + "7ca0207f1308" + ], + "recallDays": [ + "2026-05-12" + ], + "conceptTags": [ + "姓名/年龄/家庭/外貌类", + "偏好/观点/意愿类", + "物品归属/需求类", + "属看图/发音题型专用", + "you/last", + "favorite/like/want", + "name/age/from/family", + "whose/need" + ] + }, + "memory:memory/2026-05-11.md:20:43": { + "key": "memory:memory/2026-05-11.md:20:43", + "path": "memory/2026-05-11.md", + "startLine": 20, + "endLine": 43, + "source": "memory", + "snippet": "- **需求:** 2套 reading_pic_qa,L2 A级,能力标签 图文判断|句图一致性 - 每套5+5道 Y/N 判断,3Y2N平衡,4-7词陈述句 - **状态:** ✅ 已完成,2套全部入库 + 图片描述回填验证通过 - **脚本:** `scripts/batch_reading_pic_qa_2.py` | ID | 题组1 | 题组2 | record_id | |----|-------|-------|-----------| | 010701 | notebook/star/folder/diary/contact | friendly/crazy/brilliant/kind/afraid | recvjiQuGtsVp8 | | 010801 | uncle/aunt/grandma/neighbour/pet | expensive/entrance/upstairs/downstairs/toast | recvjiQvc8EsB8 | - 表:阅读-P7-看图回答题(tblweY65jGBiwSdt) - 当前该表累计:11条记录(000001~010801,含reading_pic_judge类型 4 条及其他) ### 刘彦江 — 口语-P1-日常回答 ability 标签填充(18:15 ~ 18:25) - **需求:** 从能力标签名称图片中,选择与已生成题目契合的标签填入 jsonData - 表:口语-P1-日常回答(tblRGv7k4WH58Jgq),26条有效记录,185道题 - **状态:** ✅ 全部完成 - *", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-12T01:33:44.968Z", + "lastRecalledAt": "2026-05-12T01:33:44.968Z", + "queryHashes": [ + "4ab75020b1ab" + ], + "recallDays": [ + "2026-05-12" + ], + "conceptTags": [ + "reading-pic-qa", + "y/n", + "4-7词陈述句", + "record-id", + "uncle/aunt/grandma/neighbour/pet", + "阅读-p7-看图回答题", + "含reading-pic-judge类型", + "口语-p1-日常回答" + ] + }, + "memory:memory/2026-05-11.md:66:83": { + "key": "memory:memory/2026-05-11.md:66:83", + "path": "memory/2026-05-11.md", + "startLine": 66, + "endLine": 83, + "source": "memory", + "snippet": "1. 图片应是**真实标识/通知**(非标签式),如零售楼层导览那种 2. 图片上的文字**至少为一句话**,L2 A级难度,不能只有一个词 - **处理:** 4 套 reading_matchInfo 全部重新设计(题目+图片描述),两阶段延迟写入 - **新record_ids:** recvjj2Nuw9MJd / recvjj2NZz75zb / recvjj2Ov8Stks / recvjj2OX0Dmo5 - **改进要点:** - 每张图改为真实通知/标识,如 \"TRAINS to the NORTH. Go to Platform 3.\" - 图片文字至少 1 句完整陈述,含足够信息供阅读提取 - 题目围绕通知/标识内容提问,考察学生从标识中抓取关键信息的能力 ### 刘彦江 — reading_matchInfo 阅读信息匹配题(6套L2 B级: 021301-021801,19:30 ~ 19:45) - **需求:** 6套 reading_matchInfo,L2 B级,能力标签 标识与通知理解|标识 & 通知 - **B级规范:** 文本3-5词短句,图片为同类差异,简单对应;每张图是真实的标识/通知(非标签式) - **状态:** ✅ 全部完成(两阶段延迟写入策略,避免自动化覆盖) - **脚本:** `scripts/batch_reading_matchInfo.py` | ID | 题组1 | 题组2 | record_id | |----|-------|-------|-----------| | 021301 |", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-12T01:41:17.155Z", + "lastRecalledAt": "2026-05-12T01:41:17.155Z", + "queryHashes": [ + "7ca0207f1308" + ], + "recallDays": [ + "2026-05-12" + ], + "conceptTags": [ + "真实标识/通知", + "reading-matchinfo", + "新record-ids", + "每张图改为真实通知/标识", + "题目围绕通知/标识内容提问", + "021301-021801", + "文本3-5词短句", + "每张图是真实的标识/通知" + ] + }, + "memory:memory/2026-05-11.md:51:68": { + "key": "memory:memory/2026-05-11.md:51:68", + "path": "memory/2026-05-11.md", + "startLine": 51, + "endLine": 68, + "source": "memory", + "snippet": "- **自动化问题:** 该表存在异步自动化,创建后会将 jsonData 改写为模板壳(减少题量+覆盖 ability) - **解决方案:** 两阶段延迟写入(先创建空记录→等35s让自动化完成→PUT jsonData),010901/011201 修复后验证通过 | ID | 题组1 | 题组2 | record_id | |----|-------|-------|-----------| | 010901 | north/east/south/west/exit(方向标识) | businessman/job/letter/call/guard(职场) | recvjiYAHtp05c | | 011001 | beat/dancer/drum/player/work(音乐体育) | aged/good-looking/mistake/weakness/be good at(人物描述) | recvjiXLdt5itq | | 011101 | dangerous/badly/worst/steal/run away(危险) | minute/close/kilometre/second/away(时间距离) | recvjiXLOOWMsq | | 011201 | summer/winter/Earth/holiday/term(季节) | examination/plus/minus/biology/study(学校) | recvjiYBd8Mejs | - **脚本:** `scripts/batch_reading_matchInfo.py` - *", + "recallCount": 1, + "dailyCount": 0, + "groundedCount": 0, + "totalScore": 1, + "maxScore": 1, + "firstRecalledAt": "2026-05-12T01:41:17.155Z", + "lastRecalledAt": "2026-05-12T01:41:17.155Z", + "queryHashes": [ + "7ca0207f1308" + ], + "recallDays": [ + "2026-05-12" + ], + "conceptTags": [ + "010901/011201", + "record-id", + "north/east/south/west/exit", + "beat/dancer/drum/player/work", + "dangerous/badly/worst/steal/run", + "summer/winter/earth/holiday/term", + "自动", + "问题" + ] } } } diff --git a/memory/2026-05-12.md b/memory/2026-05-12.md new file mode 100644 index 0000000..c4676fe --- /dev/null +++ b/memory/2026-05-12.md @@ -0,0 +1,280 @@ +# 2026-05-12 工作日志 + +## 会话记录 + +### 刘彦江 — 021301-021801 图片描述修正 + 技能更新(09:35 ~ 09:45) +- **问题:** 021301-021801 信息匹配题的图片描述缺少 `【Notice Type】` 标签,格式不符合参考规范 +- **处理:** + 1. 查询 bitable 获取6条记录当前图片描述(tblCgfYDnnqwLfgH) + 2. 按每道题的上下文匹配对应的标识/通知类型标签(如 Show Poster、Wanted Notice、School Notice 等) + 3. 批量更新6条记录的图片描述字段,全部10个 `【Type】` 标签验证通过 + 4. 脚本:`scripts/fix_matchInfo_0213_0218_desc.py` +- **技能更新:** 将图片描述规范(格式要求、核心规则、参考示例、常用类型标签参考表)更新到 `business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md` +- **规范要点:** + - 每张图片 → `图片材料文本:\n【Type】\nActual text` + - 图片必须是真实标识/通知(非标签式) + - L2 B级及以上图片文字需为完整陈述句(3-5词+) + +### 刘彦江 — L1 配置表审校 + 技能沉淀(11:50 ~ 12:10) +- **数据源:** 飞书多维表格「互动知识点 - 句子」→「Level 1 配置表」(`Nq3Zb258aae7SRs2QfXcqsQYnxJ` / `tblTxGpf6GQ5c7DZ`) +- **ID 编码规则:** 7位 = 前2位(Season) + 中2位(Unit) + 后3位(序列号) + - 例: 1214001 = L1S2(12) + U14(14) + 001 +- **审校记录 1214001 (You need to...) 发现:** + 1. ❌ classificationInfo 值互换: cambridgeLevel=A1 vs cefrLevel=YLE(与列字段反了) + 2. ❌ sentenceStructureSort 题2: 单词库有need无needs,三单主语Otis语法错误 + 3. ⚠️ sentenceStructureClozeWordMcq: "put your toys" 省略 away 不够自然 +- **技能沉淀:** + - 脚本: `scripts/audit_l1_config.py` — 7项自动检查 + - SKILL.md: `skills/audit_l1_config/SKILL.md` — 含完整审校流程 + 4项人工审核清单 +# 2026-05-12 工作日志 + +## 会话记录 + +### 刘彦江 — 021301-021801 图片描述修正 + 技能更新(09:35 ~ 09:45) +- **问题:** 021301-021801 信息匹配题的图片描述缺少 `【Notice Type】` 标签,格式不符合参考规范 +- **处理:** + 1. 查询 bitable 获取6条记录当前图片描述(tblCgfYDnnqwLfgH) + 2. 按每道题的上下文匹配对应的标识/通知类型标签(如 Show Poster、Wanted Notice、School Notice 等) + 3. 批量更新6条记录的图片描述字段,全部10个 `【Type】` 标签验证通过 + 4. 脚本:`scripts/fix_matchInfo_0213_0218_desc.py` +- **技能更新:** 将图片描述规范(格式要求、核心规则、参考示例、常用类型标签参考表)更新到 `business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md` +- **规范要点:** + - 每张图片 → `图片材料文本:\n【Type】\nActual text` + - 图片必须是真实标识/通知(非标签式) + - L2 B级及以上图片文字需为完整陈述句(3-5词+) + +### 刘彦江 — L1 配置表审校 + 技能沉淀(11:50 ~ 12:10) +- **数据源:** 飞书多维表格「互动知识点 - 句子」→「Level 1 配置表」(`Nq3Zb258aae7SRs2QfXcqsQYnxJ` / `tblTxGpf6GQ5c7DZ`) +- **ID 编码规则:** 7位 = 前2位(Season) + 中2位(Unit) + 后3位(序列号) + - 例: 1214001 = L1S2(12) + U14(14) + 001 +- **审校记录 1214001 (You need to...) 发现:** + 1. ❌ classificationInfo 值互换: cambridgeLevel=A1 vs cefrLevel=YLE(与列字段反了) + 2. ❌ sentenceStructureSort 题2: 单词库有need无needs,三单主语Otis语法错误 + 3. ⚠️ sentenceStructureClozeWordMcq: "put your toys" 省略 away 不够自然 +- **技能沉淀:** + - 脚本: `scripts/audit_l1_config.py` — 7项自动检查 + - SKILL.md: `skills/audit_l1_config/SKILL.md` — 含完整审校流程 + 4项人工审核清单 + +### 刘彦江 — 1213001-1216010 批量审校 40 条(12:10 ~ 14:00) +- **范围:** L1S2 Units 13-16,每单元 10 条,共 40 条 +- **方法:** `scripts/audit_l1_config.py` 自动化扫描 → 误报排除 → 分类汇总 → 逐条审校回填 +- **脚本修复(过程中):** + 1. sentenceStructureSort 单词对比大小写误报 → 添加 normalize(首字母大写 + 去标点) + 2. ID 末尾空格敏感 → strip 比较 + 3. 句子朗读/跟读题误报 options/answer 缺失 → 跳过这两种题型 + +#### 审校结果汇总 +- **🔴 系统性问题(40/40,100%):** classificationInfo JSON 内 cambridgeLevel="A1" 与 cefrLevel="YLE" 值互换,列字段正确但 JSON 存储写反,疑似模板/formula 级别 bug +- **🟡 具体错误(17/40):** + - sentenceMeaningMeaning JSON 解析失败 × 10:1213004, 1213006, 1213010, 1214008, 1215005, 1216001, 1216004, 1216007, 1216008, 1216010(后8题含中文引号冲突) + - ID 字段末尾空格 × 3:1213001, 1215001, 1216001 + - 看图选择题有CN无JSON × 3:1213008, 1215010, 1216005 + - sentenceStructureSort 语法/格式错误 × 3:1213004(缺逗号clean Otis→clean, Otis), 1213005(bird?连写应拆为bird+?), 1214001(need→needs三单) + - 缺少 pointId 字段 × 1:1213001(sort 题2) +- **🟢 仅有系统性问题的 23 条:** 其余 23 条仅 classificationInfo 互换问题,其他题型均通过自动化检查 +- **回填:** 全部 40 条审校结果已写入 bitable「审校结果」列,逐条验证通过 +- **脚本回填脚本:** `scripts/audit_batch_1213001_1216010.py` +# 2026-05-12 工作日志 + +## 会话记录 + +### 刘彦江 — 021301-021801 图片描述修正 + 技能更新(09:35 ~ 09:45) +- **问题:** 021301-021801 信息匹配题的图片描述缺少 `【Notice Type】` 标签,格式不符合参考规范 +- **处理:** + 1. 查询 bitable 获取6条记录当前图片描述(tblCgfYDnnqwLfgH) + 2. 按每道题的上下文匹配对应的标识/通知类型标签(如 Show Poster、Wanted Notice、School Notice 等) + 3. 批量更新6条记录的图片描述字段,全部10个 `【Type】` 标签验证通过 + 4. 脚本:`scripts/fix_matchInfo_0213_0218_desc.py` +- **技能更新:** 将图片描述规范(格式要求、核心规则、参考示例、常用类型标签参考表)更新到 `business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md` +- **规范要点:** + - 每张图片 → `图片材料文本:\n【Type】\nActual text` + - 图片必须是真实标识/通知(非标签式) + - L2 B级及以上图片文字需为完整陈述句(3-5词+) + +### 刘彦江 — L1 配置表审校 + 技能沉淀(11:50 ~ 12:10) +- **数据源:** 飞书多维表格「互动知识点 - 句子」→「Level 1 配置表」(`Nq3Zb258aae7SRs2QfXcqsQYnxJ` / `tblTxGpf6GQ5c7DZ`) +- **ID 编码规则:** 7位 = 前2位(Season) + 中2位(Unit) + 后3位(序列号) + - 例: 1214001 = L1S2(12) + U14(14) + 001 +- **审校记录 1214001 (You need to...) 发现:** + 1. ❌ classificationInfo 值互换: cambridgeLevel=A1 vs cefrLevel=YLE(与列字段反了) + 2. ❌ sentenceStructureSort 题2: 单词库有need无needs,三单主语Otis语法错误 + 3. ⚠️ sentenceStructureClozeWordMcq: "put your toys" 省略 away 不够自然 +- **技能沉淀:** + - 脚本: `scripts/audit_l1_config.py` — 7项自动检查 + - SKILL.md: `skills/audit_l1_config/SKILL.md` — 含完整审校流程 + 4项人工审核清单 + +### 刘彦江 — 1213001-1216010 批量审校 40 条(12:10 ~ 14:00) +- **范围:** L1S2 Units 13-16,每单元 10 条,共 40 条 +- **方法:** `scripts/audit_l1_config.py` 自动化扫描 → 误报排除 → 分类汇总 → 逐条审校回填 +- **脚本修复(过程中):** + 1. sentenceStructureSort 单词对比大小写误报 → 添加 normalize(首字母大写 + 去标点) + 2. ID 末尾空格敏感 → strip 比较 + 3. 句子朗读/跟读题误报 options/answer 缺失 → 跳过这两种题型 + +#### 审校结果汇总(初版) +- **🔴 系统性问题(40/40,100%):** classificationInfo JSON 内 cambridgeLevel="A1" 与 cefrLevel="YLE" 值互换 +- **🟡 具体错误(17/40):** + - sentenceMeaningMeaning JSON 解析失败 × 10:1213004, 1213006, 1213010, 1214008, 1215005, 1216001, 1216004, 1216007, 1216008, 1216010 + - ID 字段末尾空格 × 3:1213001, 1215001, 1216001 + - 看图选择题有CN无JSON × 3:1213008, 1215010, 1216005 + - sentenceStructureSort 语法/格式错误 × 3:1213004(缺逗号clean Otis→clean, Otis), 1213005(bird?连写应拆为bird+?), 1214001(need→needs三单) + - 缺少 pointId 字段 × 1:1213001(sort 题2) +- **🟢 仅有系统性问题的 23 条** +- **回填:** 全部 40 条审校结果已写入 bitable「审校结果」列,逐条验证通过 +- **脚本回填脚本:** `scripts/audit_batch_1213001_1216010.py` + +### 刘彦江 — 审校结果修正 + 解析一致检查 + JSON 修复(14:00 ~ 15:38) +- **YLE↔A1 误报排除:** 刘彦江确认 YLE 与 A1 等级可互通,classificationInfo 值互换不视为错误 + - 全部 40 条审校结果已更新,移除 classificationInfo 互换相关告警 + - 23 条变为 `✅ 通过`(无其他问题),17 条保留具体错误 + +- **解析与题目/答案对应性检查(人工审核项):** + - 检查题型:句意选择题 ×30、句子补全题、听句作答题、场景选择题、看图选择题 + - 验证方法:提取 explanation 中的英文引句 vs question 中的英文句子、正则提取"空处应填X" vs options[answer] + - 结果:✅ **0 条实质性不对应** + +- **10 条 sentenceMeaningMeaning JSON 修复:** + - **根因:** explanation 文本中的 ASCII `"` (U+0022) 被当作中文双引号使用,JSON 解析器将其视为字符串终止符 + - **修复策略:** 从「句意选择题」中文列提取英文原句和选项 → json.dumps 重建标准 JSON(只修格式,不修内容) + - **1216007 特殊处理:** 原始 JSON 损坏严重,需从中文列完全重建 + - **二次修复(Q2 丢失问题):** 首次不完全修复导致 9 条记录的 Q2 被覆盖丢失,且 Q1 被 JSON 垃圾污染 → 从中文列全面重建 Q1+Q2 + - **最终结果:10/10 条记录 sentenceMeaningMeaning 均可 json.loads 正确解析,每条含 2 题,无残留垃圾数据** + - **受影响记录:** 1213004, 1213006, 1213010, 1214008, 1215005, 1216001, 1216004, 1216007, 1216008, 1216010 + - **审校结果同步更新:** 移除所有 10 条的 `❌ JSON 解析失败` 标记 + - **注意:** Q1 explanation 在重建时词表述可能微调(内容含义一致),如需精确原始 wording 可从中文列恢复后人工微调 + +## 经验教训 + +### bitable 写入需严格流程管控(2026-05-12) +- 批量更新 JSON 字段时必须:写入前完整提取原始数据 → 写入后逐字段 diff 验证 → 发现异常立即回滚 +- 本次 JSON 修复中首次写入不完整导致 Q2 丢失,原因:修复脚本未正确处理多题 JSON 结构 +- **教训:** 任何涉及 JSON 重建/修复的批量操作,先单条试运行验证通过后再批量执行,不可并行甩出多条 + +### sentenceMeaningMeaning 的 explanation 引号规范(2026-05-12) +- explanation 字段中的中文双引号极易被误用为 ASCII `"` (U+0022) +- 建议生产侧统一使用中文引号 `"` (U+201C) 和 `"` (U+201D),或在 explanation 写入前做引号归一化预处理 +- 此类问题在常规 JSON 校验中不会被发现(JSON 仍有概率合法,取决于引号出现位置),一旦出现在 JSON 字符串边界附近即导致全段解析失败 + +### 刘彦江 — 核心互动全题型 JSON 配置标准沉淀(17:05 ~ 17:50) +- **目标:** 从互动内容库遍历全部核心互动题型,提取 JSON 配置结构,沉淀为统一 Skill +- **方式:** 先写 `bitable-reader` 通用读取 Skill → 遍历 15 个 bitable → 归纳通用字段 → 分类录入各题型 + +#### 架构决策 +- **统一入口,通用在上:** 一个 `core-content-json-standard` Skill 覆盖全部题型,不拆分为题型独立 Skill +- **五大分类:** 📖阅读(2) / 🎧听力(3) / 🗣口语(4) / ✏️写作(5) / 未完成(1) +- **通用字段提取:** ID/dataStatus/kpStatus/kpInfo/taskData 是所有题型共用的基础结构 + +#### 覆盖率 +- 14 种题型已录入 ✅(1 种无数据:口语探讨) +- 4 个一级板块:通用结构 / 题型专区 / 审校规则 / 扩展指南 +- 每个题型:cType + bitable 定位 + JSON 字段表 + 结构特点 + 与同类题型的差异说明 + +#### 发现 +- 写作互动和邮件组句 cType 相同(`core_writing_questionMakeSentence`),通过 textInfo 区分素材 +- 口语快答/妙问 JSON 结构几乎相同,差异在 prompt 配置和对话样例内容 +- 合作阅读和合作听力的核心差异:`textData.text[]` ↔ `textData.audio[]` +- meaning 标签(合作阅读)为开放型自由文本,非受控词表 + +#### 产出文件 +- `skills/bitable-reader/SKILL.md` — 通用 bitable 读取技能(164行) +- `skills/core-content-json-standard/SKILL.md` — 全题型 JSON 标准 v2.0(393行) +- `scripts/audit_core_reading_S0.py` — 合作阅读 S0 审校脚本(含审校发现) +# 2026-05-12 工作日志 + +## 会话记录 + +### 刘彦江 — 021301-021801 图片描述修正 + 技能更新(09:35 ~ 09:45) +- **问题:** 021301-021801 信息匹配题的图片描述缺少 `【Notice Type】` 标签 +- **处理:** 批量更新6条记录的图片描述字段,全部10个标签验证通过 +- **脚本:** `scripts/fix_matchInfo_0213_0218_desc.py` +- **技能更新:** 图片描述规范写入 `business_production/单元挑战/skills/unit_challenge/questions/reading/reading_info_match/SKILL.md` + +### 刘彦江 — L1 配置表审校 + 技能沉淀(11:50 ~ 12:10) +- **数据源:** `Nq3Zb258aae7SRs2QfXcqsQYnxJ` / `tblTxGpf6GQ5c7DZ` +- **ID 编码:** 7位 = S(2位) + U(2位) + 序列号(3位) +- **技能沉淀:** `scripts/audit_l1_config.py` + `skills/audit_l1_config/SKILL.md` + +### 刘彦江 — 1213001-1216010 批量审校 40 条(12:10 ~ 14:00) +- **范围:** L1S2 Units 13-16 +- **脚本修复(过程中):** sort 大小写误报 / ID 末尾空格 / 朗读跟读题误报 +- **结果:** 系统性 classificationInfo 互换(已标记不视为错误) + 17条具体错误 + 23条通过 +- **脚本:** `scripts/audit_l1_config.py` + `scripts/audit_batch_1213001_1216010.py` + +### 刘彦江 — 审校结果修正 + JSON 修复(14:00 ~ 15:38) +- **YLE↔A1 误报排除:** 确认等级互通,40条审校结果已更新 +- **解析与题目一致性检查:** ✅ 0条不对应 +- **10 条 sentenceMeaningMeaning JSON 修复:** + - 根因:explanation 中 ASCII `"` 被用作中文引号 + - 修复策略演变:状态机拆分失败 → 正则重建 → 发现Q2丢失 → 从中文列完整重建 + - 最终:10/10 可正确解析,审校结果同步更新 + - 受影响:1213004/1213006/1213010/1214008/1215005/1216001/1216004/1216007/1216008/1216010 + +### 刘彦江 — 核心互动全题型 JSON 配置标准沉淀(17:05 ~ 17:50) +- **产出 Skill 1:** `skills/bitable-reader/SKILL.md` — 通用 bitable 读取(任何 bitable 通用) +- **产出 Skill 2:** `skills/core-content-json-standard/SKILL.md` v2.0 — 全题型 JSON 标准(393行) +- **架构:** 通用字段在前(ID/kpInfo/taskData)+ 5大题型分类(📖阅读2 🎧听力3 🗣口语4 ✏️写作5)+ 审校规则 + 扩展指南 +- **覆盖率:** 14/15 种题型(口语探讨 S0 无数据) +- **产出脚本:** `scripts/audit_core_reading_S0.py` — 合作阅读 S0 审校 + +## 经验教训 + +### bitable 写入需严格流程管控(2026-05-12) +- 批量更新 JSON:写入前完整提取 → 写入后逐字段 diff → 异常立即回滚 +- 首次修复多题 JSON 导致 Q2 丢失 → 先单条试跑验证再批量 + +### sentenceMeaningMeaning 引号规范(2026-05-12) +- explanation 中 ASCII `"` 与中文引号混用导致 JSON 解析失败 +- 建议:生产侧统一用中文引号 `""`(U+201C/D),或写入前做引号归一化 + +### 写作互动 / 邮件组句 cType 相同(2026-05-12) +- 两者 cType 均为 `core_writing_questionMakeSentence`,通过 textInfo.textList[].type 区分素材 + +### 刘彦江 — 中互动全题型录入(18:00 ~ 18:25) + +#### 发现 +- **中互动结构高度统一:** 全部 27 类都用 `jsonData`(主配置)+ `kpInfo`(知识点绑定),没有核心互动那种多字段分散结构 +- **6 大分类:** 🗨对话(7) / 📋信息(6) / 📦词汇(2) / 🖼图片选择(4) / 📝语法(3) / 🔧其他(5) +- **对话类公共字段:** cType, cId, title, resourceMapping, preDialog, imgShowTimingList +- 写作互动和邮件组句 cType 相同(`core_writing_questionMakeSentence`) + +#### 产出文件 +- `skills/interactive-component-json/references/json_structure_guide.md` — 27 种中互动 jsonData 字段结构(8736 bytes) +- `skills/interactive-component-json/references/component_registry.md` — 核心互动部分从🔲更新为完整注册表(含 cType+bitable_token) +- 与 `skills/core-content-json-standard/SKILL.md` 双向关联 + +#### 覆盖率 +- 核心互动:15/15 ✅(口语探讨 S0 无数据已标注) +- 中互动:27/27 ✅ +- 关联文件:3 个(core-content-json-standard + component_registry + json_structure_guide) +## 刘彦江 — 中互动+核心互动 回填任务(18:37 待继续) + +### 上下文 +- 用户提供剧本文档: `https://makee-interactive.feishu.cn/wiki/YqzzwpjwjihK9YkyRhXccaqLnzu` +- 要求:识别文档中的 3 个中互动 + 1 个核心互动,回填到对应位置 +- 上次已读取文档,找到 1 个核心互动标记(第4幕 听力拖拽),但 3 个中互动标记不明显 +- 用户让我"Continue"继续处理 + +### 当前状态 +- 需要重新拉取文档,仔细识别所有互动标记 +- 文档位于 space_id=7487521512283799556 +- 上次发现的场景:太空故事 / 太空船起飞 / 小行星带 / 发光的星球 + 尾声 +## 中互动+核心互动 回填任务 — 继续(18:37) + +### 本轮 Session — 实际写入 bitable + +上一轮已确认:文档中 4 个场景对应 4 个互动类型(3中+1核),标签已写入文档。但用户说的"回填到对应为止"指的是**写入 bitable 多维表格记录**,而非仅文档标签。 + +### 需要写入的 4 条记录 + +| 场景 | 互动类型 | cType | bitable_token | +|------|---------|-------|---------------| +| 第1幕 太空故事 | 对话朗读互动 | `mid_dialog_repeat` | `MuwubKXoSaJe55sO4k9c63VSnmh` | +| 第2幕 太空船起飞 | 对话互动 | `mid_sentence_dialogue` | `GH0hbBT0ZavkCesaVQ7c40Zfnub` | +| 第3幕 小行星带 | 指令互动 | `mid_vocab_instruction` | `UorxbVCNYaqpx3sOUdTcF6bUnAh` | +| 第4幕 发光的星球 | 听力拖拽 | `core_listening_drag` | `Uey8bhdLBa66TIsGrFBcIQKRnsg` | + +### 下一步 +1. 读取 4 个 bitable 的表结构(字段名) +2. 基于剧本内容和 JSON 结构标准,构造 jsonData/kpInfo 等字段 +3. 调用 Bot API create_record 写入 +4. 写后回读验证 diff --git a/scripts/__pycache__/audit_l1_config.cpython-312.pyc b/scripts/__pycache__/audit_l1_config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7c99df671a92e4e01f9658407728e4b90b1bafa GIT binary patch literal 14843 zcmdUWYg80x-e*;H^}V6FGzjgjgs8L%h{P*+1)?Y#1uqdbou;dWM(oC_ZUv^9x3e*0 zGa*AWF+&@5(wJoIjENy<^1d%I$&9n_>^aWKd?;)@c53SR3H%VAI%i*2reCnQYb-O`{f4XG2~cYhrU+s75`Td!A@Cu=5~HWAh+PXY(O6vStV~ z*!d7T#cPxVA8rQjC+`!W`oqb~ndbo$Lu6gRevcwCbbB{+_>n-urO)^@pE_ z#{-w7MvT82fkx{dqt5N?DlO&QZck~6yWQckx7p5>+Bpa7aCw}zHommOX>)hlT&3~$ zwDNA3MQfqDGLARxavX0eS#i9{<#4dao4z+dVd(MepZyZucu16ays4pOMk-DLsif?+ zp5*NEhpgjG&wHGFs!vebLHFr7zU=kR^`+Vdt($JAToIJE!{JKxMRUBV#da2&?L}RB zINUS-`kT_YSyUqJ>1=n1l#>D(fnGxmXZOsk~Eoz%=ywhIm zI_Va(V9t2ndD3aOd7N$+j#Ej~C2Dbc6!pCknlOIl*2E>WBqm;a|M9hu6c_ve7VpC^ zt~~niVj`~Zy@~UEGt)h~^4j?2-%rP(v5{8&#Fc@`k3N$M!`oo;-`|Faq{3ipOb)&W zt1db2*RMVL?2U?3G5fs>SW*9eeF(#Cfy6?6rC9E$Bn;><{tz zFwctL@3gOJfYMQ(4EfuB&h4sq*jO=*OvjaE6K9p)i%so)9iuy3D0FhF=n`HP2=pKiqC))L)>2dztd zwW6G}E*~MM?Iu}3a+bIeFNGvLGR_mNSQgC8%98&An4b@ktOAEL3Vk!Jsk9Wq zS;%JzHK1b8K^YtTwyd!SfQbmcj%%IG3$WqycSt4)s@5 z!XwO@7H$m=z($XSg82e41`yDpJ1@nOB8}it%uZ1-LJrg*k*=z*spiU1xpbQwW>>>G z@N(cqy4}rtEDDi4DUx=PMKSXV_!z1;MtAqlV z!9VYXz)y^6(|Xz0%u@uZ%nzO!Gi3DD_S6P#uhj*}zbPQ@p(dj*ttU;$JrP^BJfa$O z4v~XxL+eMDRop4NbNJ62g+qt`Tq(R@AKAl>%y)dJAhpe;fBOTyp>JK!y4N-cCd-I^ zg+Q%&{WoOt~j zmjPuyrGC=w1r?C^`f6{gl6N;ezmJ)`{?5d&gX8ahJo(9c-YiHw-qSxZ_}--dmC29Z ze01yjc;JReU=3p8gXKebK0T@!;N}CShak$hzHkPTyU3ET&IG@E2Se)h`Mc-_@7K3|W19d-e(o z_TFg{4%kNaH3@s|g1uEZ)giQYP7y>Gxs!t1E^0Tr?V%1+-=Mc6=xrzU0=m6GS<&q! zss-JwloQ>YR4cm8KSg~{5NU1H1G%D_+f734(UIpGg_>i+F(0J)DY*wF?I!DRj=j`( zkYgVe??>R&sM>s~BeEm%llPw=?zp>ZXS@?aRzXx>816{!#kSj7xA**Uv9RrcaG*^% z?G@V23g&a8hK`s%)4%ucQ~9K_GM2GPFjm~vR7iM#erdIe_>)Rmou;rOe3vf`JYc*Z zgUP{oulT>id(bRM8N+2P>!v*k9QDy2R2RPFbUf7zSmL9qi4!YfWRyOok3LKC=|0+r zHItXM@)JuVWai1<9G{{UZQWTHOZiAZE+}Cpa`hI|#GLYkmfO)m@}S4CbZgqIV&U~h zJgQseqf^Sc1j2G#d@8QegOq%H`DjV{$fNONrTbJE#TWi)?=*6W&{M5X1wB>CWs`(Y z5yxn0BotR?W3|q^0-6=aY7Cze=stSu{P-VU9Ur_Qy^i1Pop|$;3IBC&3nc#@nNA2H zF}yNAKwWMteD|6lU><#bYka6bxhO`YYOAr`)kT*$+0EEI!C)r=4O3ga*`mRS#BGNn zxE&CPIw$XR@g5se{%O`}_i$AxS_!tDvt3lc&7$V2anwntqb;tzIl*4$YEUfA+B`N< zamvvtD!|g?(F(*s#V{O(Dw70NNKdpPU9*{UdOEoykZc?Lqm_`1QftPH*%)}1y}jbn zitwRGb<|uIHLeNJ->C>gZjilFJ*r<4GiC;~G2^@^1f{GZ1B#eFJ;?TK4YA?!OHN@? z>Cm&oijOzmrhd0I4n?8ra86h)EUy@@8eS`yx8F1Dm{Ov=QyKy=WK++kARF2d)i1uQ zUm9L>U$<&Rw<@YD@mIyv`U@Q|cLeG4KfP4=Zef^>JUiI>PU-!E^&8}1ft zi)qr4))=BgheA8U&qnCT!S{7x-_X|E<+qm#xw}R+wQ;DJM=ICEvT}poV6(8WRLEE@ z7|ZTz%4E=}Vu=4>v{fr-f(F`v7&QI_>YxM~u*qQDW?#ulU^fjA0O_UcEWy#Nf~8w1 zmv-6&N^CZzCt(Z$I5Yq_sC+7)5^|Bf8h9cACdzz-PleRGmsd@jLUNoZVG?DCRP~-o6WEtGv(aVcUFbLMvb>%lH%5T2*8HoGC&o}o=z=4$$fKy zV%)8sW4scoD1eC?z$5`kK0;D7g29%c-H(_+NBrUZ^aH9Z{uEh7^I%=ky2PJTRyU@`%X2o*8( zam&#Y(o?aW`aJ@hxbo*shi4fTv3kpJOChRDTM{QBHghmE1h)qQFOnLJL_jc)KDjpj z`J0Rvt!-xV-(DTRaEpnP6wGArE8~|xVOIb2G1_hxy@Z+B`|F?IcW7T#?e>GU&+oGy zJi5Q80iY7JJKX_mea(SGwaDV#2RXpvbpnJQK#z(xhYN!%w;#U(7+Ulqc}7&8K@1yr z7-A(%)!_)M%XS(t8R(RD?hwjh0DQG#Ty6*SMMEK~r9SW?X~TAA9C^`ALf>o&edW=$ zIz5ilTnl8ahku@%2H#j_)<8~wPVj7qi)I%1cRzThWOPl<$TKy8+F-|sp7~4pmuvmi zW5)a^1g+c_$J}RnDq;o`LSI`?WvDFFaA{p=%LlE)nZxCu=KkrKzbJ0czgMz%wEmEA z_=xbrv5~{aqxB~QTT`^YNe1Sf;hOOJNXc;auu?E@xo6lqrA6g`V;~Bb1Wmiyf1qLM@QB3F4cX|FqAp8@#6*2;w|@!t4E5fqs7nNS#`JgunfiOurhQm z4Cecgef~X~o@Y^8vf*xZ$GTu{S{Pd3h zxqgl`pyT6T6kuk|Y(G#)1Ns zr~%{&x7nv^O^|5Q6|o5uzFW=e&J*3>hVW^8O1U2VmQedJ*5(1`BdIv+0)hY(Qr3X* zl})>vD1iiGg-yRkCbc&qrKXDj$4bgE=+T42MQct_gL0p`AZxdTxf>AX`jo8+Wpuh! zpHA)vR;P7rCdg8<*UV;N`I^sW_vp`(T+SRl0-T1vP;g_Qh3D^(>ko z*Lx7^kE1Kl(@C`RUq62f@OJW}*T-+YG5N|DKnP5{_Bj9^FyRw}0dSi<`rreg{~zAE z_2~K#ass?LB=H6lAHMTw@a8oBih4&!dz;OL33!anPf2D5wS`twIsAW%;QCE+543v# zjDr)f0=mf_P@56n{)!wRfy~s?L?`i~0_aR~-}?j*f>gf-~Ghh1?wYA zLWja-S6>)eu>ODYNXWcm%p$#rGDJGKOgA^kOgN&tX9^8TT1<~9mKNT5P=GXE(y+n^ zG6h8?E4Nw-xJLLkW0+92NPYSCJOpTfQg3>B`nw?ow+fUA%r&Rlva;gC5g%nB5GZ1lcL%ZDE z>8_%GW?oRrPUthg83I2M%g7wi^lL&nZx{pHzcOY74+PKj9~?N|fBfQ$F%!UWe>Gw# z-lzj4J`k=CuN^TJ1<6>pDX@!kLJ0}>D)qEUb0jTxrU08z1=uzMQtR=BpO3!->--l? z!BesTt-f6jQ2}l|FsMApqgQlndvW85W47Y16DuvpoA!Y>imVkCN8MhLIszj4V2h)( zONm^7jEHZ%*9=XXdl7k&@Rr7ZM|mm*T& zY*XXS|vYiny0bzsYJ9{!xu(_%4ji26=aEYy(53EV$-3gUs@ z=Xu9z@UyZad0N!44xp2eUDts=&hxZ5!N&^5VY3&at-QCX)nWGlXOEsZyqOS)B2^=P zZIe_E^W3w~{Q`2J+2X4~B*7-c>=dXFeRVx`q5SZUsA0LkCYGMt*VfY(+7PafrWb)g z&N2_I>R%P=jxXr9otHs>21VaNz-U)y?#4R5&Yx~pH6psZ?cJ8vHo z_Ma4*-NMOsp`8=V{HVe6^`f#*cKrM9k9P}O8-znI+&^?;MZ3VB6dWgo7n`Gp zTB6T7M;18+dVVZpInt7{0(4Aoy02e6qF)@Q@9CFDo)OmVzq?|8%$OUpj2H_e>LKq) z5pYMj18Mzf0<&rO+2Livwq{{b%V>r(X2`@=hSlel_ctW);__w|E^}g zWH^3iuFfU?l&h>>s7S3=fJBuHN6G_(FI_)|9biWMnIMisu5JYwk|{k7J~+)Fi$cUD z@%bRakU~D|0F9|4oHe;g4 z4{!tiXiyR`-6WY=N1-K8$s(@??iO$>+Ey-{~17i6hF@>!!3nQ`gpfo)C?EF*m z*i-TZSAa4YMB{-&H4WHWdakynuDU@~B1?qF&?Xs0Wsp$frI6voy$FG*b-28z9UQop zvCSf?939|kWHF3bbeNW!MmG+bG@{au%_aqBJ11&ovxn!}QJFTBKy$__5$-(VEM)AG z?WHUUyyT4F==5-xAzcsr^9um9;+Qp-xBPxy@km~AG_Ujtk)_;0_80=nfH#(I>TB(3 z4dsNXXnJ8l@s%-4wiF~wXGx?cvVN#YFjtHkDgpVXD8Qq!yakAq50_sy25JGsGRz^% zy9Y;&E9AVAm2oDL6H!Ng3U<<{0hle*m}z0i88sC>A(UEZKy_ey;LO;(6_K)`oFVnl z88E{X!(|coFdv;)71$-0RWxcSd|)&Qd8JWfX&ie-7j1ylRnbM;?qzKMMvsb{nvZ%l zwJeT1`II(CE+MHkLg@~{Ts3N_{>or_$|HgBM}1D!QtCgJnyX3W3{3|uCT!&7!7xg` zPEgCqYb5f4!@fH=192eD>P>M0(K+}mR6)i&AL)+Qo!jdWXzCv7m!^_&g@h1 zd2{#=X^bJjmIDIJ*j12^>^%WF$=;Xd%!_iVaS1c>_IAQcT`{-{c>Lz*PlaJ891$x7E*H9l!bM!(spA1;iU$G!}|O@WU;H0G^TNA?-SfL!iPf zmf~~K^CIaPF+$t|=^5>44mq+QFH*_5CDL;qdS+07Narr<*a;57g2X-pXh=U%>_zb?V9jLJBZC(Yhac)_nJ=CY+XaPE2qyV>?5vq@-G2k4d6tF=va|X=)=FoF*@4mD<+%@!Ubisyb z=ElH|Z@{Ea?@wYgs={l9qRQdM;kr9&!MuOea6mL>{xT`S?rOH_xNqYeDqpKTE9(GgH zJIY6T5_?MCFI>5gOufown*$r?49KR0RF_IeAqz4Sc7oD_`h_o$bMak1(~I;ET9E;5 zND#GhS<{{)mu}k3pRS=>F|9MCBAMkL01mjx-VK0{LN9_7Vr~gX8U)8 zi%OFfEWF5%s^^ClVg7p8VAngoyG(gZlXYK{KcdMO7Hk>jhaI=q{iX1Z{mY^|X2E=L zRC8!-u_aRY0YAF9Jg6AZ_G>TdKhGX+_$<#~3q>wieSiMik@;(*^VdItb4#@dr<8$? zM46-JlE~R1?{G`hyz`!B*A$#%szrV+rSU$UGeYN#=9UhrqPgow=?y?YPRABSa)y<&5}>;pd{cRikwE1HCEOD(D%3X0VBEhe1eOXcuE?{u^KyQlGm^C6(nB$dae3 zVZySfT~w)3vzR2pE}W!OQ9qt1QG?>2ssW$YgBs{=GS%6^RIN(Xp!}z5z`k{;0pV-H zJ_4r}I7dJ)08E^5_J`uqlTvAX6nv-T8sQXTq68VSl20beaX|V~L6Xa2wcRw-i*zM4 zU-)WGMSbdUYvN9wt4egv9BC|9DJRJqL1)Ttakqh8 z+@1FS@j3xXFuhK?(-ZTaF3o$=_@~SLm&YH*hwNSAGo%{zF3>c(4Zbv%#7}8Se6Bcu-S;c03+*s-(y) z*4o!sH$FyK??tA>)Y}(dXYkSekA6>o)rvNuGk^xcn4^Jt_ zfAvv%wyYEk0lu_LQ;L7|fuAIa%!_H;;ViS8b9Q0RLzj97aG?&4dAtVdgj34ecKEdb zu$Em8QHLM#&qv^cx&;oFJ2*IZ-frV;z&sp;3|Ny8RdFi;X*S%u5Qv%<8*e@BhJt?w zuh^PFWbVY3-%j-WempS5bkUnPZ)Utm&1KNj_^Tf?Ud$(eFZ=PpO<+{ID29;^2r^$^ zd3E|nG15pq_P;gxhYOfv@1iy_T{=c0C13;}Bh3dW_>vMYQZ#kaPgf>-Zh$$6eBy!N zCMIrRCG@o`J&Z-mA-xIw(49ci+PLPkqOzR>Co?_+&8>rPNfsE=Y%E56!dp5iEo$T+ z*oazSkkJWhIP9&&!-q#f(yBJ0Do9qOu_HO*vc~+1Xz;i_wuHWwCcViMr+^0@``smd zOAaNyi#~00y14%WrAOhPH^Op*)7py*-YvhrVQ|Cc?#P+Di&tM%U>8B=FZIH@27m3? zq9yMZUN0Ifx_mlfySu2gm+$N7=?IZO|B0}6r+?SnxrbPP?L%5C7*!H4Gbf;mrDu(0 ztmMTK<(OAKd6^X5hIq za$^}ehyh>QzcyqZ&3*>G#w+nf>ZAFq1G}dd6UOXs3Lx#6Av3ryyfM5|$SN5%po5%a zxdq{M(cBe1dN7U;qrstr4Z)gF{=4Pjv+q=diz8h^`leA$MJyAJW0Begf$gY}9l9d-}Bc)RZQ7U94#A^rG0%?Yf6iyEsH?G&rkk}ImM zR@QB|S|v^eZ}3-O^KhS{JxaNG?pgF;I=HL0zS`PQyKm>Ay6yGW`a^X!4cu=~!Z%QG z0tLrVfb=8Dn8IdN3wlKkn`zq$8$@`QqJ|)%6>4{(Bu*4mK=4#kdD_i-+Z!Z!)#nl;8NoM5G@f{kjM9Rtm*+LhfFH$bLdA z$(2viGs&zc%Q7H%CI^C&MG$Nwb0J(k4}vxG$R$tKY$M4R$*F3JpbdepyNdj;Xw9q7 L!6K#2xKsZdrOHJ) literal 0 HcmV?d00001 diff --git a/scripts/audit_core_reading_S0.py b/scripts/audit_core_reading_S0.py new file mode 100644 index 0000000..f85026a --- /dev/null +++ b/scripts/audit_core_reading_S0.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +"""合作阅读 S0 表审校脚本 — 纯规则检查,不调用大模型""" +import json, sys, requests, re + +TOKEN = None +APP_TOKEN = 'HIuLbJ6E8aNw2lsdT1YcvNOmnog' +TABLE_ID = 'tblAZT1KNUuNY3cN' + +def get_token(): + global TOKEN + if TOKEN: + return TOKEN + r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', + json={'app_id':'cli_a931175d41799cc7','app_secret':'Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14'}) + TOKEN = r.json()['tenant_access_token'] + return TOKEN + +def fetch_all(token): + records = [] + page_token = '' + while True: + r = requests.get( + f'https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{TABLE_ID}/records?page_size=100', + headers={'Authorization': f'Bearer {token}'}, + params={'page_token': page_token}) + d = r.json() + items = d.get('data',{}).get('items',[]) + records.extend(items) + if not d.get('data',{}).get('has_more'): + break + page_token = d.get('data',{}).get('page_token','') + return records + +def safe_json(fields, key): + v = fields.get(key, '') + if not v: + return None, 'EMPTY' + try: + return json.loads(v), 'OK' + except Exception as e: + return None, f'PARSE_ERROR: {e}' + +def audit(record): + fields = record['fields'] + fid = (fields.get('ID') or '').strip() + rid = record['record_id'] + issues = [] + warnings = [] + + # ===== 1. ID 基础检查 ===== + if not fid: + issues.append('❌ ID字段为空') + elif fid == 'None' or fid == 'null': + issues.append('❌ ID字段异常值(None/null)') + elif not fid.isdigit(): + issues.append(f'❌ ID字段非纯数字: "{fid}"') + + # ===== 2. JSON 字段解析检查 ===== + json_fields = { + 'taskData': fields.get('taskData',''), + 'sequenceData': fields.get('sequenceData',''), + 'textData': fields.get('textData',''), + 'learningData': fields.get('learningData',''), + 'kpInfo': fields.get('kpInfo',''), + } + parsed = {} + for key, raw in json_fields.items(): + obj, status = safe_json(fields, key) + parsed[key] = obj + if status != 'OK' and raw: # 有内容但解析失败 + issues.append(f'❌ {key} JSON解析失败: {status}') + elif status == 'EMPTY': + warnings.append(f'⚠️ {key} 为空') + + # ===== 3. ID 一致性检查 ===== + td = parsed.get('taskData') + kp = parsed.get('kpInfo') + if td and kp: + if td.get('cId','') != fid: + issues.append(f'❌ taskData.cId="{td.get("cId")}" ≠ ID="{fid}"') + if kp.get('cId','') != fid: + issues.append(f'❌ kpInfo.cId="{kp.get("cId")}" ≠ ID="{fid}"') + if td.get('cId','') != kp.get('cId',''): + issues.append(f'❌ taskData.cId≠kpInfo.cId ({td.get("cId")} vs {kp.get("cId")})') + + # ===== 4. textData 检查 ===== + td_obj = parsed.get('textData') + if td_obj: + texts = td_obj.get('text', []) + questions = td_obj.get('question', []) + + for qi, q in enumerate(questions): + # 4a. meaning 不为空 + meaning = q.get('meaning','') + if not meaning: + issues.append(f'❌ textData.question[{qi}] meaning 为空') + + # 4b. answer 在范围内 + answer = q.get('answer', []) + opts = q.get('optionList', []) + if isinstance(answer, list) and len(answer) > 0: + aidx = answer[0] + if aidx >= len(opts): + issues.append(f'❌ textData.question[{qi}] answer={aidx} 超出optionList范围({len(opts)})') + elif not answer: + issues.append(f'❌ textData.question[{qi}] answer 为空') + + # 4c. optionList 每项有 feedback + for oi, opt in enumerate(opts): + fb = opt.get('feedback', {}) + if not fb.get('content', ''): + issues.append(f'❌ textData.question[{qi}].option[{oi}] feedback 为空') + + # 4d. npcName 非空 + if not q.get('npcName',''): + issues.append(f'❌ textData.question[{qi}] npcName 为空') + + # 4e. text 内容非空 + for ti, t in enumerate(texts): + if not t.get('textBody',''): + issues.append(f'❌ textData.text[{ti}] textBody 为空') + if not t.get('textTitle',''): + issues.append(f'⚠️ textData.text[{ti}] textTitle 为空') + + # ===== 5. learningData 检查 ===== + ld = parsed.get('learningData') + if ld: + parts = ld.get('learningPart', []) + for pi, part in enumerate(parts): + learnings = part.get('learning', []) + for li, lq in enumerate(learnings): + # 5a. meaning 不为空 + if not lq.get('meaning',''): + issues.append(f'❌ learningData.learningPart[{pi}].learning[{li}] meaning 为空') + + # 5b. answer 在范围内 + answer = lq.get('answer', []) + opts = lq.get('optionList', []) + if isinstance(answer, list) and len(answer) > 0: + aidx = answer[0] + if aidx >= len(opts): + issues.append(f'❌ learningPart[{pi}].learning[{li}] answer={aidx} 超出optionList({len(opts)})') + + # 5c. optionList 每项有 feedback + for oi, opt in enumerate(opts): + fb = opt.get('feedback', {}) + if not fb.get('content', ''): + issues.append(f'❌ learningPart[{pi}].learning[{li}].option[{oi}] feedback 为空') + + # 5d. leadIn 检查 + lid = part.get('leadIn', {}) + if lid and not lid.get('desc',''): + issues.append(f'❌ learningPart[{pi}].leadIn.desc 为空') + + # ===== 6. sequenceData 索引越界检查 ===== + sd = parsed.get('sequenceData') + if sd and td_obj: + seq = sd.get('sequence', []) + texts = td_obj.get('text', []) + questions = td_obj.get('question', []) + for si, step in enumerate(seq): + if 'text' in step: + idx = step['text'][0] if step['text'] else -1 + if idx >= len(texts): + issues.append(f'❌ sequence[{si}].text[{idx}] 越界 (text共{len(texts)}段)') + if 'question' in step: + idx = step['question'][0] if step['question'] else -1 + if idx >= len(questions): + issues.append(f'❌ sequence[{si}].question[{idx}] 越界 (question共{len(questions)}题)') + + # ===== 7. NPC 一致性 ===== + npc_config = fields.get('角色配置', '') + npc_names_from_config = set() + if npc_config and npc_config != 'None': + # 格式: "Skylar:308" 或 "Brainy" 等 + for m in re.findall(r'([A-Za-z#\d]+)', npc_config): + npc_names_from_config.add(m) + + # 收集所有实际使用的 npcName + used_npc = set() + if td_obj: + for q in td_obj.get('question', []): + nm = q.get('npcName','') + if nm: + used_npc.add(nm.split('#')[0]) # 去掉 #编号 + + if ld: + for part in ld.get('learningPart', []): + nm = part.get('npcName','') + if nm: + used_npc.add(nm.split('#')[0]) + closing = ld.get('closing', {}) + if closing.get('npcName'): + used_npc.add(closing['npcName'].split('#')[0]) + + # 不在配置中的 NPC + for nm in used_npc: + if nm not in npc_names_from_config and npc_names_from_config: + warnings.append(f'⚠️ NPC "{nm}" 出现在数据中但未在角色配置中声明') + + # ===== 8. kpInfo 与 taskData.key 对应性 ===== + if td and kp: + keys = set() + for group in td.get('key', []): + for item in group.get('keyList', []): + keys.add(item.get('content','')) + + kp_words = set() + for info in kp.get('kpInfo', []): + kp_words.add(info.get('kpTitle','')) + + only_in_key = keys - kp_words + only_in_kp = kp_words - keys + if only_in_key: + warnings.append(f'⚠️ taskData.key 有但 kpInfo 无: {only_in_key}') + if only_in_kp: + warnings.append(f'⚠️ kpInfo 有但 taskData.key 无: {only_in_kp}') + + # ===== 9. 能力维度覆盖检查 ===== + if td_obj: + meanings = set() + for q in td_obj.get('question', []): + m = q.get('meaning','') + if m: + meanings.add(m) + known = {'理解文章中的细节信息','理解文章的主旨大意','推断文章的隐含意义','理解文章中的指代关系'} + unknown = meanings - known + if unknown: + warnings.append(f'⚠️ textData.question 发现未知meaning标签: {unknown}') + # 检查是否覆盖了至少 Detail + Main idea + if '理解文章中的细节信息' not in meanings: + warnings.append('⚠️ 缺少「理解文章中的细节信息」维度') + if '理解文章的主旨大意' not in meanings: + warnings.append('⚠️ 缺少「理解文章的主旨大意」维度') + + return { + 'id': fid, + 'record_id': rid, + 'title': td.get('title','?') if td else '?', + 'issues': issues, + 'warnings': warnings, + 'total_severity': len(issues), + 'total_warnings': len(warnings) + } + + +if __name__ == '__main__': + token = get_token() + records = fetch_all(token) + results = [audit(r) for r in records] + + # 输出 + total_issues = 0 + total_warnings = 0 + for r in results: + sev = '🔴' if r['total_severity'] > 0 else ('🟡' if r['total_warnings'] > 0 else '🟢') + print(f"\n{'='*60}") + print(f"{sev} [{r['id']}] {r['title']}") + print(f"{'='*60}") + for i in r['issues']: + print(f" {i}") + total_issues += 1 + for w in r['warnings']: + print(f" {w}") + total_warnings += 1 + if not r['issues'] and not r['warnings']: + print(f" ✅ 通过") + + print(f"\n{'='*60}") + print(f"汇总: {len(results)} 条记录 | 🔴 {total_issues} 个错误 | ⚠️ {total_warnings} 个警告") + zero = sum(1 for r in results if r['total_severity']==0 and r['total_warnings']==0) + print(f"全通过: {zero}/{len(results)}") + + # 输出 JSON 供后续使用 + print('\n---RESULT_JSON---') + print(json.dumps(results, ensure_ascii=False, indent=2)) diff --git a/scripts/audit_l1_config.py b/scripts/audit_l1_config.py new file mode 100644 index 0000000..8100939 --- /dev/null +++ b/scripts/audit_l1_config.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +Level 1 配置表 句子知识点审校 — 自动化检查脚本 +用法: python3 scripts/audit_l1_config.py [record_id] + record_id 可选,不传则检查全表 +""" +import json, requests, re, sys, os + +CRED = "/root/.openclaw/credentials/xiaoyan/config.json" +THIRD_SINGULAR_RULES = [ + (r"\bHe\b.*\bneed\b", "need→needs"), (r"\bShe\b.*\bneed\b", "need→needs"), + (r"\bIt\b.*\bneed\b", "need→needs"), (r"\bOtis\b.*\bneed\b", "need→needs"), + (r"\bTom\b.*\bneed\b", "need→needs"), (r"\bMum\b.*\bneed\b", "need→needs"), + (r"\bDad\b.*\bneed\b", "need→needs"), (r"\bBen\b.*\bneed\b", "need→needs"), + (r"\bhave\b", "三单检查"), +] + +REQUIRED_JSON_FIELDS = { + "basicInfo": ["type", "id", "meaning", "desc", "structure", "valaLevel"], + "classificationInfo": ["type", "id", "cambridgeLevel", "cefrLevel", "ncLevel"], + "config": ["type", "id", "title"], + "usageInfo": ["type", "id", "usage"], +} +QUESTION_FIELDS = { + "sentenceMeaningChooseMcq": "场景选择题", + "sentenceMeaningMatchMcq": "听句作答题", + "sentenceMeaningMeaning": "句意选择题", + "sentenceMeaningPic2SentMcq": "看图选择题", + "sentencePronRead": "句子朗读", + "sentencePronRepeatSentence": "句子跟读题", + "sentenceStructureClozeWordMcq": "句子补全题", + "sentenceStructureSort": "句型结构题", +} +Q_REQUIRED = ["category", "skill", "type", "pointId", "question", "options", "answer"] +# 发音类题型不需要 options/answer +PRONUNCIATION_TYPES = {"sentence_pron_read", "sentence_pron_repeat_sentence"} + +def get_token(): + with open(CRED) as f: + c = json.load(f) + r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": c["apps"][0]["appId"], "app_secret": c["apps"][0]["appSecret"]}) + return r.json()["tenant_access_token"] + +def check_classification_swap(record): + """检查 classificationInfo 中 cambridgeLevel/cefrLevel 是否与列字段互换""" + issues = [] + try: + ci = json.loads(record.get("classificationInfo", "{}")) + except: + return ["❌ classificationInfo JSON 解析失败"] + col_cambridge = record.get("剑桥考试级别", "") + col_cefr = record.get("欧标等级", "") + if ci.get("cambridgeLevel") and col_cambridge and ci["cambridgeLevel"] != col_cambridge: + if ci.get("cefrLevel") == col_cambridge: + issues.append(f"❌ classificationInfo 值互换: cambridgeLevel={ci['cambridgeLevel']}(应为{col_cambridge}), cefrLevel={ci['cefrLevel']}(应为{col_cefr})") + else: + issues.append(f"⚠️ classificationInfo.cambridgeLevel={ci['cambridgeLevel']} ≠ 列字段剑桥考试级别={col_cambridge}") + if ci.get("cefrLevel") and col_cefr and ci["cefrLevel"] != col_cefr: + if ci.get("cambridgeLevel") != col_cefr: + issues.append(f"⚠️ classificationInfo.cefrLevel={ci['cefrLevel']} ≠ 列字段欧标等级={col_cefr}") + return issues + +def check_json_integrity(record, field, required): + """检查 JSON 字段的必填字段完整性""" + issues = [] + try: + data = json.loads(record.get(field, "{}")) + except: + return [f"❌ {field} JSON 解析失败"] + if not isinstance(data, dict): + return [] + for key in required: + if key not in data or data[key] is None or data[key] == "": + issues.append(f"❌ {field} 缺少必填字段 {key}") + # id consistency (strip trailing spaces from both sides) + rid = (record.get("ID", "") or "").strip() + if data.get("id") and rid and str(data["id"]).strip() != rid: + issues.append(f"❌ {field}.id={data['id']} ≠ record ID={rid}") + return issues + +def check_question_item(q, qtype, rid): + """检查单个 question 对象的完整性""" + issues = [] + qtype_name = q.get("type", "") + # 发音类题型豁免 options/answer + skip = {"options", "answer"} if qtype_name in PRONUNCIATION_TYPES else set() + for key in Q_REQUIRED: + if key in skip: + continue + if key not in q or q[key] is None or (isinstance(q[key], (list, str)) and len(q[key]) == 0): + issues.append(f"⚠️ {qtype} 缺少字段 {key}") + # pointId consistency (strip spaces) + rid_stripped = rid.strip() if rid else "" + if q.get("pointId") and rid_stripped and str(q["pointId"]).strip() != rid_stripped: + issues.append(f"⚠️ {qtype}.pointId={q['pointId']} ≠ record ID={rid_stripped}") + # answer index validity + options = q.get("options", []) + answers = q.get("answer", []) + for a in answers: + if isinstance(a, int) and (a < 0 or a >= len(options)): + issues.append(f"❌ {qtype} answer 索引 {a} 超出 options 范围 0~{len(options)-1}") + return issues + +def check_sort_wordbank_answer(question, rid): + """检查句型结构题:单词库能否拼出答案,第三人称单数是否正确""" + issues = [] + words = question.get("options", []) + indices = question.get("answer", []) + explanation = question.get("explanation", "") + # 拼出句子 + try: + assembled = " ".join(words[i] for i in indices) + except: + return [f"❌ sentenceStructureSort answer 索引超出单词库范围"] + # 从 explanation 提取声明答案 + m = re.search(r'正确答案是 "(.+?)"', explanation) + if m: + declared = m.group(1) + # 标准化后比较:去末尾标点 + 首字母大写归一 + def norm(s): + s = s.rstrip(".!?") + if s and s[0].islower(): + s = s[0].upper() + s[1:] + return s + if norm(assembled) != norm(declared): + issues.append(f"❌ sentenceStructureSort 单词拼出 \"{assembled}\" ≠ 解释声明 \"{declared}\"") + # 第三人称单数检查 + third_singular_subjects = ["He", "She", "It"] + re.findall(r'\b([A-Z][a-z]+)\b', " ".join(words[:1])) + for subj in third_singular_subjects: + if assembled.startswith(subj) and subj not in ["I", "You", "We", "They"]: + if "need " in assembled and "needs" not in words: + issues.append(f"❌ sentenceStructureSort {subj}三单主语,单词库有need无needs") + break + return issues + +def check_question_set(record, rid): + """检查所有题型 JSON""" + issues = [] + for field, cname in QUESTION_FIELDS.items(): + raw = record.get(field, "") + if not raw: + issues.append(f"⚠️ {field}({cname}) 为空") + continue + try: + questions = json.loads(raw) + except: + issues.append(f"❌ {field}({cname}) JSON 解析失败") + continue + if not isinstance(questions, list): + issues.append(f"❌ {field}({cname}) 应为数组") + continue + for i, q in enumerate(questions): + q_issues = check_question_item(q, f"{cname}[{i}]", rid) + issues.extend(q_issues) + # 句型结构题专项检查 + if field == "sentenceStructureSort": + issues.extend(check_sort_wordbank_answer(q, rid)) + return issues + +def check_consistency_with_chinese_fields(record): + """中文描述列与 JSON 列内容一致性""" + issues = [] + mapping = [ + ("句意选择题", "sentenceMeaningMeaning"), + ("句型结构题", "sentenceStructureSort"), + ("句子朗读", "sentencePronRead"), + ("看图选择题", "sentenceMeaningPic2SentMcq"), + ("听句作答题", "sentenceMeaningMatchMcq"), + ("场景选择题", "sentenceMeaningChooseMcq"), + ("句子补全题", "sentenceStructureClozeWordMcq"), + ("句子跟读题", "sentencePronRepeatSentence"), + ] + for col, json_field in mapping: + cn = record.get(col, "") + try: + jd = json.loads(record.get(json_field, "[]")) + except: + continue + if cn and not jd: + issues.append(f"⚠️ {col} 有中文内容但 {json_field} 为空") + if not cn and jd: + issues.append(f"⚠️ {json_field} 有数据但 {col} 为空") + return issues + +def audit_record(record): + """对单条记录执行全量自动化审校""" + rid = record.get("ID", record.get("record_id", "unknown")) + all_issues = [] + + # 0. ID 字段检查(末尾空格) + if isinstance(rid, str) and rid != rid.rstrip(): + all_issues.append(f"⚠️ ID 字段含末尾空格: [{rid}] len={len(rid)}") + rid_clean = rid.strip() if isinstance(rid, str) else str(rid) + + # 1. 基础信息完整性 + for field, required in REQUIRED_JSON_FIELDS.items(): + all_issues.extend(check_json_integrity(record, field, required)) + + # 2. classificationInfo 互换检测 + all_issues.extend(check_classification_swap(record)) + + # 3. 题型 JSON 检查 + all_issues.extend(check_question_set(record, rid_clean)) + + # 4. 中英文字段一致性 + all_issues.extend(check_consistency_with_chinese_fields(record)) + + return rid_clean, all_issues + +def main(): + app_token = sys.argv[1] if len(sys.argv) > 1 else "Nq3Zb258aae7SRs2QfXcqsQYnxJ" + table_id = sys.argv[2] if len(sys.argv) > 2 else "tblTxGpf6GQ5c7DZ" + target_id = sys.argv[3] if len(sys.argv) > 3 else None + + print(f"🔍 Level 1 配置表审校 | {app_token}/{table_id}") + if target_id: + print(f" 目标: {target_id}") + print() + + token = get_token() + + if target_id: + # 搜索特定记录 + page_token = "" + record = None + while True: + r = requests.get( + f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records", + headers={"Authorization": f"Bearer {token}"}, + params={"page_size": 50, "page_token": page_token}) + d = r.json().get("data", {}) + for item in (d.get("items") or []): + if item["fields"].get("ID") == target_id: + record = item["fields"] + record["record_id"] = item["record_id"] + break + if record: + break + if not d.get("has_more"): + break + page_token = d.get("page_token", "") + + if not record: + print(f"❌ 未找到 {target_id}") + return + rid, issues = audit_record(record) + print(f"=== {rid} === 共 {len(issues)} 个问题") + for i in issues: + print(f" {i}") + if not issues: + print(" ✅ 自动化审校通过") + else: + # 全表扫描 + page_token = "" + total_issues = 0 + total_records = 0 + while True: + r = requests.get( + f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records", + headers={"Authorization": f"Bearer {token}"}, + params={"page_size": 50, "page_token": page_token}) + d = r.json().get("data", {}) + for item in (d.get("items") or []): + rec = item["fields"] + rec["record_id"] = item["record_id"] + rid, issues = audit_record(rec) + if issues: + total_issues += len(issues) + total_records += 1 + print(f" {rid}: {len(issues)} issues | {issues[0][:80]}...") + if not d.get("has_more"): + break + page_token = d.get("page_token", "") + print(f"\n全表扫描完成: {total_records} 条记录有 {total_issues} 个问题") + +if __name__ == "__main__": + main() diff --git a/scripts/fix_matchInfo_0213_0218_desc.py b/scripts/fix_matchInfo_0213_0218_desc.py new file mode 100644 index 0000000..14c07f3 --- /dev/null +++ b/scripts/fix_matchInfo_0213_0218_desc.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +"""Fix 图片描述 for 021301-021801: add 【Notice Type】 labels per reference format""" +import json, requests, time, sys + +CRED = "/root/.openclaw/credentials/xiaoyan/config.json" +TOKEN_APP = "CMHSbUUjka3TrUsaxxEc297ongf" +TBL = "tblCgfYDnnqwLfgH" + +with open(CRED) as f: + cred = json.load(f) +aid = cred["apps"][0]["appId"] +asecret = cred["apps"][0]["appSecret"] + +def get_token(): + r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", + json={"app_id": aid, "app_secret": asecret}) + return r.json()["tenant_access_token"] + +# New descriptions with 【Type】 labels +# Format: 图片材料文本:\n【Type】\nText content +NEW_DESC = { + "021301": """1. +图片材料文本: +【Show Poster】 +The ACTOR is on stage. + +图片材料文本: +【Wanted Notice】 +Man with black BEARD wanted. + +图片材料文本: +【Art Label】 +These pictures are DIFFERENT. + +图片材料文本: +【Award Certificate】 +Lily was very BRAVE today. + +图片材料文本: +【Greeting Card】 +A new NEIGHBOUR moved in. + +2. +图片材料文本: +【Family Note】 +DINNER at six tonight. + +图片材料文本: +【Door Sign】 +Mum is COOKING right now. + +图片材料文本: +【Fire Exit Sign】 +FIRE exit this way → + +图片材料文本: +【Event Poster】 +Family ALBUM on show Friday. + +图片材料文本: +【Personal Note】 +Do not read my DIARY.""", + + "021401": """1. +图片材料文本: +【Planning Note】 +PLAN your weekend now. + +图片材料文本: +【Shop Sign】 +Bike REPAIR in one hour. + +图片材料文本: +【Library Sign】 +Today's NEWSPAPER on the desk. + +图片材料文本: +【Lost & Found】 +Lost: Blue BACKPACK in Room 3. + +图片材料文本: +【Bus Sign】 +Give your SEAT to old people. + +2. +图片材料文本: +【School Notice】 +School GARDEN open after class. + +图片材料文本: +【Market Sign】 +Sweet MELON for sale today. + +图片材料文本: +【Warning Sign】 +Do not drink lake WATER. + +图片材料文本: +【School Notice】 +PLANT a tree on Friday. + +图片材料文本: +【School Notice】 +SUMMER camp starts next week.""", + + "021501": """1. +图片材料文本: +【School Sign】 +VISITORs sign in at the office. + +图片材料文本: +【Party Banner】 +WELCOME to the birthday party! + +图片材料文本: +【Party Invitation】 +You are INVITEd to Tom's party. + +图片材料文本: +【Event Poster】 +Come and LAUGH with us! + +图片材料文本: +【Hotel Notice】 +GUESTs: breakfast 7 to 9 a.m. + +2. +图片材料文本: +【Homework Reminder】 +Check for MISTAKEs first. + +图片材料文本: +【Exam Notice】 +Read the INSTRUCTION carefully. + +图片材料文本: +【Office Notice】 +Give a REASON if late. + +图片材料文本: +【Room Sign】 +No drinks near TECHNOLOGY room. + +图片材料文本: +【Art Room Sign】 +Wash your paint BRUSH here.""", + + "021601": """1. +图片材料文本: +【Sports Notice】 +New school RECORD in jump! + +图片材料文本: +【School Notice】 +Sports day — a big EVENT. + +图片材料文本: +【Door Sign】 +Do not ENTER without teacher. + +图片材料文本: +【Library Notice】 +RETURN books by this Friday. + +图片材料文本: +【Recycling Sign】 +Please SORT paper and glass. + +2. +图片材料文本: +【Lost & Found】 +Lost: red HANDBAG near playground. + +图片材料文本: +【No Photo Sign】 +No CAMERA in this room. + +图片材料文本: +【Park Sign】 +Park GATE closes at eight. + +图片材料文本: +【Shop Sign】 +Fix your WHEEL in 30 minutes. + +图片材料文本: +【Building Sign】 +Main ENTRANCE on north side.""", + + "021701": """1. +图片材料文本: +【Bus Sign】 +Do not talk to the DRIVER. + +图片材料文本: +【Safety Poster】 +Look both ways on the STREET. + +图片材料文本: +【Market Sign】 +Farmers' MARKET every Saturday. + +图片材料文本: +【Information Desk】 +Free GUIDEBOOK at this desk. + +图片材料文本: +【Mall Sign】 +SHOPPING hours: 10 a.m. to 8 p.m. + +2. +图片材料文本: +【Laundry Sign】 +Washing MACHINE on first floor. + +图片材料文本: +【Art Club Notice】 +DRAW your family for the show. + +图片材料文本: +【Sports Notice】 +BASKETBALL practice at four today. + +图片材料文本: +【Library Sign】 +Put books on the BOOKCASE. + +图片材料文本: +【Science Fair Sign】 +Show your INVENTION on Monday.""", + + "021801": """1. +图片材料文本: +【Library Notice】 +LIBRARY open 8 a.m. to 4 p.m. + +图片材料文本: +【Reading Room Sign】 +New MAGAZINEs — read here only. + +图片材料文本: +【TV Room Sign】 +CARTOON at three this afternoon. + +图片材料文本: +【Exam Notice】 +English TEST on Friday morning. + +图片材料文本: +【Lab Sign】 +Read CAREFULLY before you start. + +2. +图片材料文本: +【Toy Label】 +This toy needs a new BATTERY. + +图片材料文本: +【Library Sign】 +Reading CORNER on the left side. + +图片材料文本: +【School Notice】 +Play at the THEATRE this Saturday. + +图片材料文本: +【Park Sign】 +Do not CLIMB the park trees. + +图片材料文本: +【Canteen Notice】 +Lunch is at MIDDAY today.""", +} + +# Step 1: Get current records & record_ids +token = get_token() +r = requests.get( + f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records", + headers={"Authorization": f"Bearer {token}"}, + params={"page_size": 50}) + +records = {} +for item in r.json().get("data", {}).get("items", []): + fid = item["fields"].get("题目集合 ID", "") + if fid in NEW_DESC: + records[fid] = item["record_id"] + +print(f"Found {len(records)} records to update") + +# Step 2: Update 图片描述 +print("=== Updating 图片描述 ===") +for qid in sorted(NEW_DESC): + if qid not in records: + print(f" ❌ {qid}: record not found") + continue + rid = records[qid] + desc = NEW_DESC[qid] + r = requests.put( + f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records/{rid}", + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + json={"fields": {"图片描述": desc}}) + code = r.json().get("code") + print(f" {qid} ({rid}): code={code} len={len(desc)}chars") + time.sleep(0.5) + +# Step 3: Wait then verify +print("\n=== Step 3: Wait 5s + Verify ===") +time.sleep(5) +token = get_token() + +all_ok = True +for qid in sorted(NEW_DESC): + rid = records.get(qid) + if not rid: + continue + r = requests.get( + f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records/{rid}", + headers={"Authorization": f"Bearer {token}"}) + fields = r.json()["data"]["record"]["fields"] + desc = fields.get("图片描述", "") or "" + + # Check: contains 【 and has expected structure + has_labels = "【" in desc + has_text = "图片材料文本" in desc + ok = has_labels and has_text and len(desc) > 200 + + # Count 【 occurrences - should be 10 (one per image) + bracket_count = desc.count("【") + desc_ok = bracket_count == 10 + + status = "✅" if (ok and desc_ok) else "❌" + if not ok or not desc_ok: + all_ok = False + print(f" {status} {qid}: labels={bracket_count}/10 char={len(desc)}") + + # Show first 2 entries as sample + if has_labels: + lines = desc.strip().split("\n") + for i, line in enumerate(lines): + if "【" in line: + print(f" {line}") + +print(f"\n{'✅ All OK' if all_ok else '❌ Some checks failed'}") diff --git a/skills/audit_l1_config/SKILL.md b/skills/audit_l1_config/SKILL.md new file mode 100644 index 0000000..6d5744f --- /dev/null +++ b/skills/audit_l1_config/SKILL.md @@ -0,0 +1,124 @@ +--- +name: audit_l1_config +description: Level 1 配置表(句子知识点)自动化审校技能。覆盖基础信息完整性、分类字段互换检测、题型JSON结构、单词库-答案一致性、第三人称单数语法等7项自动检查 + 4项人工审核清单。触发关键词:L1配置表审校、句子知识点审校、巩固题审校、audit l1 config +--- + +# L1 配置表审校 — 句子知识点 + +## 目标数据源 + +- **多维表格**: 「互动知识点 - 句子」→「Level 1 配置表」 +- **App Token**: `Nq3Zb258aae7SRs2QfXcqsQYnxJ` +- **Table ID**: `tblTxGpf6GQ5c7DZ` +- **字段结构**: ID(7位数字化) / 基础信息(basicInfo/classificationInfo/config/usageInfo) / 8种题型JSON / 中文字段描述 / 审校结果 + +## ID 编码规则 + +| 位 | 含义 | 示例 | +|----|------|------| +| 1-2 | Level + Season | 10=L1S0, 11=L1S1, 12=L1S2 | +| 3-4 | Unit | 00=U1, 01=U2, ... 14=U14 | +| 5-7 | 序列号 | 001-010 | + +例: 1214001 = L1 Season2, Unit14, 序列001 + +## 审校流程 + +``` +Step 1: 运行自动审校脚本 → python3 scripts/audit_l1_config.py [app_token] [table_id] [record_id] +Step 2: 对自动检查发现的问题,确认真实性(排除误报) +Step 3: 按人工审核清单逐项检查 +Step 4: 汇总结果写入 bitable「审校结果」列 +``` + +## 自动化检查项(7项) + +### 1. basicInfo/config/usageInfo 字段完整性 +- 检查 JSON 是否包含必填字段:type, id, meaning, desc, structure, valaLevel +- 检查 id 与 record ID 是否一致 + +### 2. classificationInfo 值互换检测 ⚠️ 高频问题 +- 检查 `cambridgeLevel` 是否与列字段「剑桥考试级别」一致(应为 YLE/PET/KET 等) +- 检查 `cefrLevel` 是否与列字段「欧标等级」一致(应为 A1/A2/B1 等) +- **常见问题**: 两个字段值互换,如 cambridgeLevel="A1" 而 cefrLevel="YLE" + +### 3. 题型 JSON 结构完整性 +- 8种题型 JSON 是否存在,内容是否为空数组 +- 每道题必含: category, skill, type, pointId, question, options, answer +- pointId 与 record ID 一致性 +- answer 索引不超出 options 范围 +- 发音类题型(句子朗读/跟读)豁免 options/answer 检查 + +### 4. sentenceStructureSort 单词库-答案一致性 +- 按 answer 索引从单词库拼出句子 +- 与 explanation 中「正确答案是 "..."」对比(去标点后) +- **常见问题**: 单词库给 "need" 但正确形式应为 "needs"(三单) + +### 5. sentenceStructureSort 第三人称单数语法 +- 主语为 He/She/It/人名 时,检查动词是否有三单变形 +- 特殊检查: 单词库有 "need" 但无 "needs",且主语为三单 → 报错 + +### 6. 题型覆盖率 +- 确认所有8种题型字段非空且 JSON 可解析 +- 缺失题型标记为 ⚠️ + +### 7. 中文字段 × JSON 字段一致性 +- 中文描述列(句意选择题/句型结构题等)与对应 JSON 字段双向校验 +- 一边有数据另一边为空 → 标记 ⚠️ + +## 人工审核清单(4项) + +### A. 填空题选项自然度 +- 如 sentenceStructureClozeWordMcq 中 "put your toys" 省略 "away" 是否可接受 +- 需结合 L1 级别和教学目标判断省略形式的合理性 + +### B. 解释文本质量 +- 解释是否准确、完整、易于理解 +- 语法术语是否符合目标年龄段 + +### C. 选项干扰项合理性 +- 干扰项是否与正确答案有足够的区分度(不能太明显) +- 干扰项是否过于混淆(不能太接近) + +### D. 图片描述与题型匹配 +- sentenceMeaningPic2SentMcq 的图片描述是否符合场景 +- 图片是否清晰传达目标知识点 + +## 使用示例 + +### 单条记录审校 +```bash +python3 scripts/audit_l1_config.py Nq3Zb258aae7SRs2QfXcqsQYnxJ tblTxGpf6GQ5c7DZ 1214001 +``` + +### 全表扫描 +```bash +python3 scripts/audit_l1_config.py +``` + +### 输出示例 +``` +🔍 Level 1 配置表审校 | Nq3Zb258aae7SRs2QfXcqsQYnxJ/tblTxGpf6GQ5c7DZ + 目标: 1214001 + +=== 1214001 === 共 3 个问题 + ❌ classificationInfo 值互换: cambridgeLevel=A1(应为YLE), cefrLevel=YLE(应为A1) + ❌ sentenceStructureSort 单词拼出 "Otis need to pack his schoolbag" ≠ 解释声明 "Otis needs to pack his schoolbag." + ❌ sentenceStructureSort Otis三单主语,单词库有need无needs +``` + +## 审校结果写入规范 + +写入 bitable「审校结果」列时遵循: +1. ❌ 标记真实错误(带具体定位和修正方案) +2. ⚠️ 标记需确认项(带评估建议) +3. ✅ 全量通过时写 `✅` +4. 自动检查命中的问题需人工二次确认后再写入(排除误报) + +## 常见误报场景(脚本使用者须知) + +| 误报类型 | 原因 | 处理 | +|---------|------|------| +| 发音类题型报缺少 options/answer | sentencePronRead/sentencePronRepeatSentence 无选项 | 脚本已豁免 | +| 单词拼出句子 ≠ 解释声明(仅标点差异) | 末尾句号差异 | 脚本已去标点后比较 | +| 句子朗读 pointId ≠ record ID | 可能是跨知识点复用 | 人工判断是否为合理复用 | diff --git a/skills/bitable-reader/SKILL.md b/skills/bitable-reader/SKILL.md new file mode 100644 index 0000000..2f5f3eb --- /dev/null +++ b/skills/bitable-reader/SKILL.md @@ -0,0 +1,164 @@ +--- +name: bitable-reader +version: 1.0.0 +description: "通用飞书多维表格结构读取技能。读取任意多维表格的表结构、字段定义、记录数据,自动处理分页和JSON字段解析。适用于内容发现、配置提取、数据分析等场景。" +metadata: + requires: + permissions: ["bitable:app:readonly", "wiki:node:retrieve"] + triggers: + - "用户提供多维表格链接(wiki/bitable)要求查看内容" + - "需要从多维表格提取配置数据做分析" + - "需要了解某个多维表格的字3段结构和数据格式" +--- + +# bitable-reader — 多维表格通用读取技能 + +## 身份规则 +- 所有读取操作**永远使用 Bot 身份**(`--as bot` / Bot tenant_access_token) +- 禁止触发用户授权弹窗 +- 只读操作,绝不写入或删除 + +## 执行链路 + +### 步骤 0:获取 Bot 凭据 + +```bash +TOKEN=$(curl -s -X POST "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal" \ + -H "Content-Type: application/json" \ + -d '{"app_id":"cli_a931175d41799cc7","app_secret":"Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14"}' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['tenant_access_token'])") +``` + +Token 有效期 2 小时,同一会话复用即可。 + +### 步骤 1:定位多维表格 + +#### 1a. 从 wiki 链接进入 +``` +链接:https://xxx.feishu.cn/wiki/XXXXXX +↓ +提取 wiki_token = "XXXXXX" +↓ +lark-cli wiki spaces get_node --params '{"token":"XXXXXX"}' --as bot +``` + +若返回 `has_child: true` 且非 bitable,遍历子节点(步骤 1b)。 + +#### 1b. 遍历 wiki 子目录 +```bash +curl -s -X GET \ + "https://open.feishu.cn/open-apis/wiki/v2/spaces//nodes?parent_node_token=" \ + -H "Authorization: Bearer $TOKEN" +``` +分页用 `page_token` 参数。 + +#### 1c. 确认目标为 bitable +Node 的 `obj_type` 为 `"bitable"` 时,`obj_token` 即为 `app_token`。 + +#### 1d. 直接提供 app_token 时 +跳过上述步骤,直接进入步骤 2。 + +### 步骤 2:列出所有数据表 + +```bash +curl -s -X GET \ + "https://open.feishu.cn/open-apis/bitable/v1/apps//tables" \ + -H "Authorization: Bearer $TOKEN" +``` + +返回结构: +```json +{ + "data": { + "has_more": false, + "items": [ + {"name": "S0", "revision": 1445, "table_id": "tblXXXXXX"}, + {"name": "S1", "revision": 3150, "table_id": "tblYYYYYY"} + ], + "page_token": "...", + "total": 7 + } +} +``` + +### 步骤 3:获取表字段结构 + +```bash +curl -s -X GET \ + "https://open.feishu.cn/open-apis/bitable/v1/apps//tables//fields" \ + -H "Authorization: Bearer $TOKEN" +``` + +返回每个字段的 `field_name` 和 `type`(类型代码对照表见附录)。 + +### 步骤 4:读取记录数据 + +#### 4a. 全量分页读取 +```bash +curl -s -X GET \ + "https://open.feishu.cn/open-apis/bitable/v1/apps//tables//records?page_size=100" \ + -H "Authorization: Bearer $TOKEN" +``` + +用返回的 `has_more` + `page_token` 循环翻页。 + +#### 4b. 按 ID 精读单条 +```bash +curl -s -X GET \ + "https://open.feishu.cn/open-apis/bitable/v1/apps//tables//records/" \ + -H "Authorization: Bearer $TOKEN" +``` + +### 步骤 5:解析输出 + +每条记录返回: +```json +{ + "record_id": "recXXXXXX", + "fields": { + "ID": "0000001", + "taskData": "{...JSON string...}", + "textData": "{...JSON string...}", + ... + } +} +``` + +**重要:** 长文本/JSON 字段在多维表格中以 **字符串** 形式存储,需用 `json.loads()` 反序列化后再分析内部结构。 + +## 常见字段类型说明 + +| type | 含义 | 读取形式 | +|------|------|---------| +| 1 | 文本 | 字符串 | +| 2 | 数字 | 数字 | +| 3 | 单选 | 字符串 | +| 4 | 多选 | 字符串数组 | +| 5 | 日期 | Unix 毫秒时间戳 | +| 7 | 复选框 | 布尔 | +| 17 | 文本(长) | 字符串(含 JSON) | +| 20 | URL | 对象 `{link, text}` | +| 21 | 附件 | 对象数组 `[{file_token, name, ...}]` | + +## 典型使用模式 + +### 模式 A:探索未知表结构 +1. 列出所有表 → 展示表名/revision +2. 用户选择目标表 → 读取字段结构 +3. 采样 2–3 条记录 → 分析 JSON 字段结构 +4. 归纳输出字段含义和内容模式 + +### 模式 B:批量导出特定字段 +1. 已知 table_id 和目标字段名 +2. 全量分页读取 +3. 提取目标字段 → 解析 JSON → 汇总 + +### 模式 C:按条件检索 +1. 已知 ID 范围或关键词 +2. 全量读取后本地过滤(bitable API 筛选较受限) + +## 注意事项 +- 含 JSON 的字段(如 `taskData`, `learningData`)读取后为普通字符串,需显式 `json.loads()` +- 部分表格可能包含内嵌 Sheet(``),需用 `feishu-embedded-sheet` 技能处理 +- 读取大表(revision > 5000)时注意 API 限流,建议逐页 `sleep 0.3s` +- 若返回 `99991663` 错误,重新获取 token 后重试 diff --git a/skills/core-content-json-standard/SKILL.md b/skills/core-content-json-standard/SKILL.md new file mode 100644 index 0000000..dd79139 --- /dev/null +++ b/skills/core-content-json-standard/SKILL.md @@ -0,0 +1,393 @@ +--- +name: core-content-json-standard +version: 2.0.0 +description: "核心互动内容 JSON 配置标准。覆盖全部 14 种核心互动题型的完整 JSON 结构,含通用字段定义和各题型专属规范。用于内容生产、审校、自动化校验。" +metadata: + triggers: + - "生产/审校核心互动内容" + - "需要了解核心互动题型 JSON 配置格式" + - "验证核心互动 JSON 配置完整性" + requires: + knowledge: ["互动内容库 wiki (7487521512283799556)"] + skills: ["bitable-reader"] + coverage: "14 种题型(口语探讨暂无数据)" +--- + +# core-content-json-standard — 核心互动内容 JSON 配置标准 + +## 数据源 + +- Wiki: `https://makee-interactive.feishu.cn/wiki/TbbGwZLIVizXeGkMEKQc2hKRnCc` +- Space ID: `7487521512283799556` +- 通过 `bitable-reader` 技能按 node_token 定位具体 bitable + +--- + +# 第一部分:通用结构(所有核心互动共用) + +## 1. 通用字段 + +| 字段 | 类型 | 说明 | 出现率 | +|------|------|------|:-----:| +| `ID` | string | 记录唯一标识 | 15/15 | +| `dataStatus` | string | 数据状态(`"0"`=正常) | 15/15 | +| `kpStatus` | string | 知识点状态(`"0"`/`"1"`) | 15/15 | +| `kpInfo` | JSON | 知识点绑定信息 | 15/15 | +| `kpListSentence` | list | 关联句型ID列表 | 阅读/听力/口语有 | +| `kpListVocab` | list | 关联单词ID列表 | 阅读/听力/口语有 | +| `互动描述` | text | 中文剧情/内容产出描述 | 15/15 | +| `关联知识点` | text | 知识点文本汇总 | 14/15 | +| `专有名词` | list | 专有名词对照表 | 部分有 | + +## 2. kpInfo — 知识点绑定(通用结构) + +```json +{ + "pushType": "relationKp", + "cType": "<题型标识>", + "cId": "<与ID字段一致>", + "kpInfo": [ + { + "kpId": "0000007", + "kpType": "vocab", // vocab / pattern + "kpTitle": "passport", + "kpSkill": "vocab_meaning", // 知识点技能 + "kpSkillName": "词义" + } + ] +} +``` + +**约束:** +- `cType` 必须与本记录所属题型的 cType 一致 +- `cId` 必须与字段 `ID` 一致 +- `kpInfo[]` 每一项对应一个知识点 + +## 3. taskData / taskInfo — 任务元信息(通用结构) + +```json +{ + "cType": "core_xxx_xxx", // 题型唯一标识(见下表) + "cId": "0000001", // 任务ID + "title": "任务中文名", + "taskDesc": "任务描述", // 或以 sceneDesc 命名 + "sceneDesc": "场景背景描述" +} +``` + +> ⚠️ 字段名有两种:`taskData`(阅读/听力/口语)和 `taskInfo`(写作/看图选词)。内容结构相同。 + +## 4. cType 对照表 + +| cType | 题型中文名 | 分类 | +|-------|-----------|:--:| +| `core_reading_order` | 合作阅读 | 📖 阅读 | +| `core_reading_imageDrag` | 看图选词 | 📖 阅读 | +| `core_listening_order` | 合作听力 | 🎧 听力 | +| `core_listening_drag` | 听力拖拽 | 🎧 听力 | +| `core_listening_choose` | 听力选择 | 🎧 听力 | +| `core_speaking_reply` | 口语快答 | 🗣 口语 | +| `core_speaking_monologue` | 口语独白 | 🗣 口语 | +| `core_speaking_inquiry` | 口语妙问 | 🗣 口语 | +| `core_speaking_image` | 看图说话 | 🗣 口语 | +| `core_writing_questionMakeSentence` | 写作互动 / 邮件组句 | ✏️ 写作 | +| `core_writing_imgMakeSentence` | 看图组句 | ✏️ 写作 | +| `core_writing_imgWrite` | 看图撰写 | ✏️ 写作 | +| `core_writing_questionWrite` | 邮件撰写 | ✏️ 写作 | + +> 口语探讨 (`core_speaking_discuss`) — S0 表当前为空,待后续补充 + +--- + +# 第二部分:题型专区 + +--- + +## 📖 阅读类 + +### 合作阅读 `core_reading_order` + +**Bitable:** `HIuLbJ6E8aNw2lsdT1YcvNOmnog` / S0–S3 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, sceneDesc, img, key, cDesc}` | key=核心词汇列表,cDesc=中英产出概括 | +| `sequenceData` | `{sequence: [{text|[i]}/{question|[j]}/{dialog}]}` | 流程推进引擎 | +| `textData` | `{text: [{textType, textTitle, textBody}], question: [{type,npcName,content,optionList,answer,meaning,explain}]}` | 阅读材料+理解题 | +| `learningData` | `{closing, learningPart: [{leadIn, type, npcName, content, learning: [{answer, meaning, optionList, question}]}]}` | 词汇/句型学习流程 | + +**流程编排两种模式:** +- **交替型:** text[i] → question[i] → text[i+1] → ... +- **集中型:** text[0] → dialog → question[0] → dialog → question[1] → ... + +**阅读题 meaning 标签(开放型):** +实际数据中的 meaning 为自由文本,常见分类: +- 细节提取类:提取文章中的关键/具体/细节信息、识别文章中的形状细节 +- 概念理解类:理解文章中的关键概念、行为准则、求助对象、关键角色 +- 描述识别类:理解文章中的形容词/形状描述、动作指令、未知信息 +- 推理推测类:根据细节进行合理推测 + +**生产规范:** +- 每篇至少覆盖「细节信息」+「主旨大意」两个维度 +- optionList 每项必须有 feedback +- NPC 名称需与 角色配置 一致 + +**审校清单:** +- [ ] JSON 可解析 +- [ ] taskData.cId = kpInfo.cId = ID +- [ ] sequenceData 索引不越界 +- [ ] textData.question[].answer 在 optionList 范围内 +- [ ] optionList feedback 不为空 +- [ ] textData.question[].meaning 不为空 +- [ ] LearningData 的 answer 在范围内 +- [ ] 角色配置 NPC 一致性 +- [ ] taskData.key 与 kpInfo.kpInfo 对应 + +--- + +### 看图选词 `core_reading_imageDrag` + +**Bitable:** `Qf0Ob6TyEaShufsORGBc8WTinFf` / L1 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskInfo` | `{cId, cType, title, taskDesc, sceneDesc}` | 任务元信息 | +| `questionGroup` | `[{...}]` list | 题目组(含图片+单词拖拽) | +| `studyInfo` | `{learningPart: [...], closing}` | 学习流程 | +| `学习流程配置` | text | Markdown 格式的学习流程 | + +与 合作阅读 的区别:无 sequenceData/textData,聚焦于图片+单词的视觉匹配。 + +--- + +## 🎧 听力类 + +### 合作听力 `core_listening_order` + +**Bitable:** `B8Z1b2TnYaKRa1sWSljcV4Q9nYf` / S1–S3, LV1 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, sceneDesc, img}` | 无 key/cDesc(词汇在 kpInfo) | +| `sequenceData` | `{sequence: [...]}` | 同 合作阅读 | +| `textData` | `{audio: [...], question: [...options+answer+meaning...]}` | ⚠️ 用 `audio` 替代 `text` | +| `learningData` | `{closing, learningPart: [...]}` | 同 合作阅读 | + +**与 合作阅读 的关键差异:** +- `textData.audio[]` 替代 `textData.text[]`(音频材料 vs 文本材料) +- sequence 中用 `{"audio": [i]}` 替代 `{"text": [i]}` +- taskData 无 key/cDesc 字段 + +--- + +### 听力拖拽 `core_listening_drag` + +**Bitable:** `Uey8bhdLBa66TIsGrFBcIQKRnsg` / S3, LV1 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, sceneDesc, key}` | 有 key 词汇表 | +| `dialogList` | `[{...}]` list | 听力对话列表 | +| `preDialog` | `[{...}]` list | 前置对话 | +| `questionList` | `[{...}]` list | 拖拽题目列表 | +| `learningData` | `{learningPart, closing}` | 学习流程 | +| `听力材料` | text | 音频文本材料 | +| `父记录` | list | 关联的父记录 | + +--- + +### 听力选择 `core_listening_choose` + +**Bitable:** `C7e9bzA1GaZ61FsTz87cEM8Ynoe` / S3, LV1 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, sceneDesc, key}` | 任务元信息 | +| `questionGroup` | `[{...}]` list | 选择题组 | +| `learningData` | `{learningPart, closing}` | 学习流程 | + +--- + +## 🗣 口语类 + +### 口语快答 `core_speaking_reply` + +**Bitable:** `D0Tpbs6D6aIRXdsKDLec0V0on3b` / S0–S3 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, taskDesc, sceneDesc}` | 任务元信息 | +| `configData` | `{config: {...}}` | 对话配置(ASR/评分相关) | +| `settingData` | `{setting: {...}}` | 场景设定 | +| `exampleDialog` | `[{...}]` list | 对话样例 | +| `learningData` | `{learningPart: [...]}` | 学习流程(无 closing) | +| `promptInfo配置` | text | prompt 配置 | +| `教研-任务规则与NPC设定` | text | 任务规则 | + +**结构特点:** 侧重对话交互,有 configData/settingData 控制口语识别和评分。 + +--- + +### 口语妙问 `core_speaking_inquiry` + +**Bitable:** `Ct4ebNgxUatyUssuVHycfhbgnXf` / S0–S3 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, taskDesc, sceneDesc}` | 同 口语快答 | +| `configData` | `{config: {...}}` | 对话配置 | +| `settingData` | `{setting: {...}}` | 场景设定 | +| `exampleDialog` | `[{...}]` list | 对话样例 | +| `learningData` | `{learningPart: [...]}` | 学习流程 | +| `任务设定` | text | 任务规则 | + +**与 口语快答 的区别:** 口语妙问侧重主动提问场景,口语快答侧重回答场景。JSON 结构基本相同,差异在内容配置中体现。 + +--- + +### 口语独白 `core_speaking_monologue` + +**Bitable:** `JtsAbFoSbaWWvssLsxXcAgdEnkh` / L2 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cId, cType, title, taskDesc, sceneDesc}` | 任务元信息 | +| `configData` | `{sentenceMin, sentenceMax, monologueExample, passCondition, asrPrompt}` | ⚠️ 独白配置(句子数限制等) | +| `npclistData` | `[{...}]` list | NPC 列表 | +| `learningData` | `{learningPart, closing}` | 学习流程 | +| `npc站位列表` | text | NPC 站位 | + +**与 口语快答/妙问 的区别:** 独白型(无 exampleDialog),有句子数量约束(sentenceMin/Max)。 + +--- + +### 看图说话 `core_speaking_image` + +**Bitable:** `DOVHbQamCaZPTCsuzJrclG1an6b` / S3, LV1 +**核心 JSON 字段:** + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskData` | `{cType, cId, title, taskDesc, sceneDesc}` | 任务元信息 | +| `configData` | `{config: {...}}` | 对话配置 | +| `imageInfo` | `{questionImg: [...]}` | 图片素材信息 | +| `questionList` | `[{...}]` list | 问题列表 | +| `optionList` | `[{...}]` list | 选项列表 | +| `learningData` | `{learningPart, closing}` | 学习流程 | + +**结构特点:** 有 imageInfo 管理图片素材 + questionList/optionList 管理图文互动题。 + +--- + +## ✏️ 写作类 + +**5 种写作题型共享相同的 JSON 结构框架**,差异体现在 `textInfo` 和素材类型上: + +| 题型 | cType | 素材来源 | +|------|-------|---------| +| 写作互动 | `core_writing_questionMakeSentence` | 文字问题 | +| 邮件组句 | `core_writing_questionMakeSentence` | 邮件文本 | +| 看图组句 | `core_writing_imgMakeSentence` | 图片 | +| 看图撰写 | `core_writing_imgWrite` | 图片 | +| 邮件撰写 | `core_writing_questionWrite` | 邮件文本 | + +### 写作类通用 JSON 结构 + +| 字段 | 结构 | 说明 | +|------|------|------| +| `taskInfo` | `{cId, cType, title, taskDesc, sceneDesc}` | 任务元信息 | +| `textInfo` | `{inputType, passCondition, textList: [...]}` | ⚠️ 写作素材+通过条件 | +| `evalInfo` | `{title, taskDesc, sceneDesc, knowledge, passCondition}` | 评分/评价配置 | +| `studyInfo` | `{learningPart: [...], closing}` | 学习流程 | +| `kpInfo` | 通用结构 | 知识点绑定 | +| `任务信息` | text | 中文任务描述 | +| `写作材料` | text | 写作素材文本 | + +**textInfo 字段详解:** +```json +{ + "inputType": "sentence_building", // 输入类型 + "passCondition": "...", // 通关条件描述 + "textList": [ // 素材列表 + {"type": "...", "content": "..."} + ] +} +``` + +**evalInfo 字段详解:** +```json +{ + "title": "任务中文名", + "taskDesc": "任务描述", + "sceneDesc": "场景背景", + "knowledge": "...", // 所需知识点 + "passCondition": "..." // 通过条件 +} +``` + +⚠️ 写作互动/邮件组句 cType 相同(`core_writing_questionMakeSentence`),通过 `textInfo.textList[].type` 区分素材是文本问题还是邮件格式。 + +--- + +# 第三部分:自动化审校规则 + +## 通用检查(所有题型) + +```python +# 规则列表(适用于所有 14 种题型) +通用检查项 = [ + "JSON字段可解析性(json.loads 不抛异常)", + "taskData/taskInfo.cId == kpInfo.cId == ID", + "kpInfo.cType 正确", + "dataStatus 不为空", + "互动描述 不为空", +] +``` + +## 分题型检查 + +```python +题型专属检查 = { + "阅读类": [ + "sequenceData 索引不越界", + "question[].answer 在 optionList 范围内", + "optionList 每项有 feedback", + "question[].meaning 不为空", + ], + "听力类": [ + "textData.audio 不为空", + "sequenceData 索引不越界", + "听力材料 不为空", + ], + "口语类": [ + "configData/settingData 可解析", + "learningData 可解析", + ], + "写作类": [ + "textInfo.textList 不为空", + "evalInfo 可解析", + "studyInfo 可解析", + ], +} +``` + +--- + +# 第四部分:扩展指南 + +新增题型时: +1. 用 `bitable-reader` 获取 bitable → 列出表 → 读取第一个内容表 +2. 采样 1–2 条记录 → 提取 JSON 字段名和 cType +3. 与已有结构对比 → 归类到阅读/听力/口语/写作 +4. 按本文件格式补充新章节 +5. 更新顶部 coverage 计数 + +**待补充:** 口语探讨 (`core_speaking_discuss`) — S0 表当前无数据 diff --git a/skills/interactive-component-json/references/component_registry.md b/skills/interactive-component-json/references/component_registry.md index c02ed9e..a65eb00 100644 --- a/skills/interactive-component-json/references/component_registry.md +++ b/skills/interactive-component-json/references/component_registry.md @@ -63,27 +63,27 @@ 5. 仍未匹配 → 报错:未知组件类型 ``` -## 二、核心互动组件(待注册) +## 二、核心互动组件 -以下核心互动类型已从多维表格配置目录中发现,待后续注册: +JSON 结构详见 `skills/core-content-json-standard/SKILL.md`。 -| # | 多维表格名称 | wiki_token | 待注册 | -|---|-------------|-----------|--------| -| 1 | 核心互动 - 合作阅读 | `H6DJweNkpigCbak2Y5LcTZ8Vnfb` | 🔲 | -| 2 | 核心互动 - 看图选词 | `MVo7wugWfimJPIkxhJCcqppFnyf` | 🔲 | -| 3 | 核心互动 - 口语快答 | `TSwcw0nFmi21khkpUndchNMsn6f` | 🔲 | -| 4 | 核心互动 - 口语独白 | `G20HwcF1uideNokLsHWcBs3znvg` | 🔲 | -| 5 | 核心互动 - 口语妙问 | `GJUVwNSEkis3EXkrVj0ccbqdn8c` | 🔲 | -| 6 | 核心互动 - 看图说话 | `KBOXwzVHfin6ORkKbA3c3eWEnoh` | 🔲 | -| 7 | 核心互动 - 口语探讨 | `HoidwhEBWiWjaokQnz0cHbhTn6e` | 🔲 | -| 8 | 核心互动 - 合作听力 | `FrxtwNRQDizqiikPkATcBzTCnYe` | 🔲 | -| 9 | 核心互动 - 听力拖拽 | `K3QrwQnWqiPBm1krhnNcWDTqnhe` | 🔲 | -| 10 | 核心互动 - 听力选择 | `Kwrcw6A4jip2sxkdLn4czd1knvf` | 🔲 | -| 11 | 核心互动 - 写作互动 | `DVkcwVr2giswTckcgD0cpHwNnv2` | 🔲 | -| 12 | 核心互动 - 看图组句 | `BkmtwUBwMiHd5Ak7VS6ccE9SnHd` | 🔲 | -| 13 | 核心互动 - 看图撰写 | `KwPHwnaqdiWlvNkDm5fcFreDnQh` | 🔲 | -| 14 | 核心互动 - 邮件组句 | `M5oTwUP6wiImC4kVJU8cRYnfnyf` | 🔲 | -| 15 | 核心互动 - 邮件撰写 | `Brn0wldKYizsLZkBqK6clp1tnKd` | 🔲 | +| # | 剧本类型名 | cType | 多维表格名称 | bitable_token | +|---|-----------|-------|-------------|---------------| +| 1 | 合作阅读 | `core_reading_order` | 核心互动 - 合作阅读 | `HIuLbJ6E8aNw2lsdT1YcvNOmnog` | +| 2 | 看图选词 | `core_reading_imageDrag` | 核心互动 - 看图选词 | `Qf0Ob6TyEaShufsORGBc8WTinFf` | +| 3 | 口语快答 | `core_speaking_reply` | 核心互动 - 口语快答 | `D0Tpbs6D6aIRXdsKDLec0V0on3b` | +| 4 | 口语独白 | `core_speaking_monologue` | 核心互动 - 口语独白 | `JtsAbFoSbaWWvssLsxXcAgdEnkh` | +| 5 | 口语妙问 | `core_speaking_inquiry` | 核心互动 - 口语妙问 | `Ct4ebNgxUatyUssuVHycfhbgnXf` | +| 6 | 看图说话 | `core_speaking_image` | 核心互动 - 看图说话 | `DOVHbQamCaZPTCsuzJrclG1an6b` | +| 7 | 口语探讨 | `core_speaking_discuss` | 核心互动 - 口语探讨 | `WYQobCGomaQ843sjB8Icp2SLnjb` ⚠️ S0无数据 | +| 8 | 合作听力 | `core_listening_order` | 核心互动 - 合作听力 | `B8Z1b2TnYaKRa1sWSljcV4Q9nYf` | +| 9 | 听力拖拽 | `core_listening_drag` | 核心互动 - 听力拖拽 | `Uey8bhdLBa66TIsGrFBcIQKRnsg` | +| 10 | 听力选择 | `core_listening_choose` | 核心互动 - 听力选择 | `C7e9bzA1GaZ61FsTz87cEM8Ynoe` | +| 11 | 写作互动 | `core_writing_questionMakeSentence` | 核心互动 - 写作互动 | `HOhJb8xtMa02YjsHXMNc7MEpnfe` | +| 12 | 看图组句 | `core_writing_imgMakeSentence` | 核心互动 - 看图组句 | `LNEcbo6meahQCosYN9Jc137rnde` | +| 13 | 看图撰写 | `core_writing_imgWrite` | 核心互动 - 看图撰写 | `Be4rbTgoiaDMOvstB4Ucn8sjndh` | +| 14 | 邮件组句 | `core_writing_questionMakeSentence` | 核心互动 - 邮件组句 | `F0e4bd2Z5aFDzpsIHndcxGhgnKd` | +| 15 | 邮件撰写 | `core_writing_questionWrite` | 核心互动 - 邮件撰写 | `JcuybbTRParccasXKrUc7mD1neh` | ## 三、数据库目标表映射 diff --git a/skills/interactive-component-json/references/json_structure_guide.md b/skills/interactive-component-json/references/json_structure_guide.md new file mode 100644 index 0000000..9dee3fe --- /dev/null +++ b/skills/interactive-component-json/references/json_structure_guide.md @@ -0,0 +1,412 @@ +# 中互动组件 — jsonData 字段结构参考 + +> 本文档是中互动 27 种组件的 **jsonData 字段结构指南**,用于内容生产和审校。 +> cType 映射和 bitable_token 参见 `component_registry.md`。 + +## 通用规则 + +- 所有中互动组件结构统一:`jsonData`(主 JSON)+ `kpInfo`(知识点绑定)+ 辅助文本字段 +- `jsonData` 通过 `json.loads()` 可直接解析为 dict +- 公共字段(所有组件都有):`cType`, `cId`, `title` + +## 分类汇总 + +| 分类 | 数量 | 组件列表 | +|------|:---:|---------| +| 🗨 对话类 | 7 | 对话互动、朗读、表达、选择、选读、挖空、组句 | +| 📋 信息类 | 6 | 信息描写、拼词、组句、补词、填词、填句 | +| 📦 词汇类 | 2 | 物品互动、图片互动 | +| 🖼 图片选择类 | 4 | 图片单选、多选、有序、拖拽 | +| 📝 语法类 | 3 | 材料互动、挖空互动、组句互动 | +| 🔧 其他 | 5 | 指令互动、填词互动、语音互动、造句互动、发音互动 | + +--- + +## 🗨 对话类(7 种) + +**公共字段:** `cType`, `cId`, `title`, `resourceMapping` + +### 对话互动 `mid_sentence_dialogue` + +``` +jsonData: + cType str "mid_sentence_dialogue" + cId str 任务ID + title str 中文标题 + asrPrompt str ASR语音识别提示 + preDialog list 前置对话 [{type,content}] + question dict 问题配置 {mode, hint, desc} + postDialog list 后置对话 [{type,content}] + meaning str 能力维度标签 + resourceMapping list NPC角色映射 [name,...] +``` + +### 对话朗读互动 `mid_dialog_repeat` + +``` +jsonData: + cType str "mid_dialog_repeat" + cId str + title str + resourceMapping list NPC映射 + preDialog list 前置对话 [{type,content}] + imgShowTimingList list 图片展示时机 + img dict 配图信息 + question dict 朗读问题 {sentence,...} + postDialog list 后置对话 +``` + +### 对话表达互动 `mid_dialog_express` + +``` +jsonData: + cType str "mid_dialog_express" + cId, title str + asrPrompt str ASR提示(中文场景描述) + resourceMapping list + preDialog list 前置对话 [{type,content}] + imgShowTimingList list 图片展示时机 + img dict 配图 +``` + +### 对话选择互动 `mid_dialog_choose` + +``` +jsonData: + cType str "mid_dialog_choose" + cId, title str + resourceMapping list + preDialog list 前置对话 + imgShowTimingList list + img dict + optionList list 选项 [{option: "选项文本"},...] + answer list 正确答案索引 [0] + corFeedback dict 正确反馈 {type,npcName,content} + errFeedback dict 错误反馈 + postDialog list 后置对话 +``` + +### 对话选读互动 `mid_dialog_select` + +``` +jsonData: + cType str "mid_dialog_select" + cId, title str + resourceMapping list + preDialog list + imgShowTimingList list + optionList list 可选读句列表 + postDialog list +``` + +### 对话挖空互动 `mid_dialog_fillin` + +``` +jsonData: + cType str "mid_dialog_fillin" + cId, title str + resourceMapping list + preDialog list 前置对话(含挖空位置标记) + imgShowTimingList list +``` + +### 对话组句互动 `mid_dialog_sentence` + +``` +jsonData: + cType str "mid_dialog_sentence" + cId, title str + resourceMapping list + preDialog list 前置对话(含组句片段) + imgShowTimingList list +``` + +--- + +## 📋 信息类(6 种) + +**公共字段:** `cType`, `cId`, `title`, `cDesc`, `question` + +### 信息描写 `mid_message_trace` + +``` +jsonData: + cType str "mid_message_trace" + cId, title str + cDesc str 中文产出描述 + question dict 问题 {desc, ...} + trace dict 描红/标注配置 +``` + +### 信息拼词 `mid_message_spell` + +``` +jsonData: + cType str "mid_message_spell" + cId, title str + cDesc str + question dict 拼词问题 {desc, wordList, ...} + tip str 提示文本 +``` + +### 信息组句 `mid_message_combine` + +``` +jsonData: + cType str "mid_message_combine" + cId, title str + cDesc str + question dict 组句问题 {desc, wordList, ...} + tip str +``` + +### 信息补词 `mid_message_fillin` + +``` +jsonData: + cType str "mid_message_fillin" + cId, title str + cDesc str + question dict 补词问题 {desc, stem, optionList, ...} + tip str +``` + +### 信息填词 `mid_message_word` + +``` +jsonData: + cType str "mid_message_word" + cId, title str + cDesc str + question dict 填词问题 {desc, inputConfig, ...} + tip str +``` + +### 信息填句 `mid_message_sentence` + +``` +jsonData: + cType str "mid_message_sentence" + cId, title str + cDesc str + question dict 填句问题 {desc, inputConfig, ...} + tip str +``` + +--- + +## 📦 词汇类(2 种) + +### 物品互动 `mid_vocab_item` + +``` +jsonData: + cType str "mid_vocab_item" + cId, title str + question dict 问题 {desc} + optionList list 选项 [{option, note},...] + answer list 正确答案 [0] + cDesc str 产出描述 + meaning str 能力维度 +``` + +### 图片互动 `mid_vocab_image` + +``` +jsonData: + cType str "mid_vocab_image" + cId, title str + preDialog list 前置对话 [{type,content}] + img dict 图片信息 + question dict 问题 {desc} + answer list 答案 + tip str 提示 +``` + +--- + +## 🖼 图片选择类(4 种) + +**公共字段:** `cType`, `cId`, `title`, `meaning`, `resourceMapping`, `question`, `imageInfo`, `optionList` + +### 图片单选 `mid_image_choose` + +``` +jsonData: + cType str "mid_image_choose" + cId, title str + meaning str 能力维度 + resourceMapping dict NPC映射 + question dict 问题 {desc, content} + imageInfo dict 图片信息 {questionImg, answerImg} + optionList list 选项 [{option},...] +``` + +### 图片多选 `mid_image_multiple` + +``` +jsonData: + cType str "mid_image_multiple" + ...同图片单选(公共字段) + optionList 选项更多(多选型) +``` + +### 图片有序 `mid_image_sequence` + +``` +jsonData: + cType str "mid_image_sequence" + ...同图片单选(公共字段) + optionList 选项需按序排列 +``` + +### 图片拖拽 `mid_image_drag` + +``` +jsonData: + cType str "mid_image_drag" + ...同图片单选(公共字段) + optionList 拖拽目标区配置 +``` + +--- + +## 📝 语法类(3 种) + +**公共字段:** `cType`, `cId`, `title`, `preDialog`, `question`, `optionList`, `answer` + +### 材料互动 `mid_sentence_material` + +``` +jsonData: + cType str "mid_sentence_material" + cId, title str + preDialog list 前置对话 [{type,content}] + img dict 配图 + question dict 问题 {type, content} + optionList list 选项 [{option, feedback},...] + answer list 正确答案 +``` + +### 挖空互动 `mid_grammar_cloze` + +``` +jsonData: + cType str "mid_grammar_cloze" + cId, title str + preDialog list 前置对话 + question dict 问题 {type, desc, content}(含挖空标记) + optionList list 选项 [{option, feedback},...] + answer list 答案 + postDialog list 后置对话 +``` + +### 组句互动 `mid_grammar_sentence` + +``` +jsonData: + cType str "mid_grammar_sentence" + cId, title str + preDialog list 前置对话 [{type,npcName,content}] + question dict 问题 {desc, content} + optionList list 选项 [{option},...] + answer list 答案 + tip str 提示 +``` + +--- + +## 🔧 其他(5 种) + +### 指令互动 `mid_vocab_instruction` + +``` +jsonData: + cType str "mid_vocab_instruction" + cId, title str + preDialog list 前置对话 [{type,npcName,content}] + question dict 主问题 {type,npcName,content,mode} + questionList list 子问题 [{desc,optionList,answer},...] + corFeedback dict 正确反馈 {type,npcName,content} + errFeedback dict 错误反馈 +``` + +### 填词互动 `mid_vocab_fillBlank` + +``` +jsonData: + cType str "mid_vocab_fillBlank" + cId, title str + itemName str 物品/词汇名称 + question dict 问题 {desc} + inputConfig list 输入配置 [{stem,value},...] + tip str 提示 + inputConfigExt list 扩展输入配置 +``` + +### 语音互动 `mid_sentence_voice` + +``` +jsonData: + cType str "mid_sentence_voice" + cId, title str + audioFile str 音频文件标识 + preDialog list 前置对话 + itemName str 物品名 + question dict 问题 {desc,type,npcName,content} + optionList list 选项 [{option,feedback},...] +``` + +### 造句互动 `mid_sentence_makeSentence` + +``` +jsonData: + cType str "mid_sentence_makeSentence" + cId, title str + preDialog list 前置对话 [{type,npcName,content}] + itemName str 物品名 + question dict 问题 {desc,mode,hint} + tip str 提示 + corFeedbackdict 正确反馈 {type,npcName,content} +``` + +### 发音互动 `mid_pron_pron` + +``` +jsonData: + cType str "mid_pron_pron" + cId, title str + itemName str 单词名 + question dict 问题 {desc,mode,type,content} + cDesc str 产出描述 + meaning str 能力维度 +``` + +--- + +## 审校清单(中互动通用) + +``` +- [ ] jsonData 可正确 json.loads 解析 +- [ ] jsonData.cType 正确(与组件注册表一致) +- [ ] jsonData.cId 非空 +- [ ] kpInfo.cType == jsonData.cType +- [ ] kpInfo.cId == jsonData.cId +- [ ] optionList 有 option 时,answer 索引在范围内 +- [ ] preDialog/postDialog 每项的 type/content 非空 +- [ ] 对话类:resourceMapping 不为空 +- [ ] 图片选择类:imageInfo 非空 +- [ ] 语法类:optionList 每项有 feedback(如结构支持) +``` + +--- + +## 与核心互动的对比 + +| 维度 | 核心互动 | 中互动 | +|------|---------|--------| +| JSON 结构 | 多个独立 JSON 字段(taskData, textData, learningData,...) | 统一 `jsonData` + `kpInfo` | +| 记录数 | 少(1-21 条/表) | 多(1-135 条/表) | +| 使用场景 | 独立任务级 | 剧本内嵌组件 | +| 知识库表 | S0-S3, LV1, Level 2 | S0-S3, LV1, L1-Demo, L2 | +| 共字段 | ID, dataStatus, kpStatus, kpInfo | kpInfo |