ai_member_xiaoyan/scripts/convert_components_to_json.py

995 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
将剧本表(Q8AyX5)中互动组件的文本配置转为结构化JSON并写回组件配置列。
"""
import json, subprocess, sys, re
# --- Bot Token ---
def get_token():
APP_ID = "cli_a931175d41799cc7"
import os
with open(os.path.expanduser('/root/.openclaw/credentials/xiaoyan/config.json')) as f:
cfg = json.load(f)
APP_SECRET = cfg['apps'][0]['appSecret']
r = subprocess.run([
'curl', '-s', '-X', 'POST',
'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
'-H', 'Content-Type: application/json',
'-d', json.dumps({"app_id": APP_ID, "app_secret": APP_SECRET})
], capture_output=True, text=True)
return json.loads(r.stdout)['tenant_access_token']
TOKEN = get_token()
SPREADSHEET_TOKEN = "VBozs8u24h4KgdtSSiFc9vHEnBd"
SHEET_ID = "Q8AyX5"
# ============================================================
# Component configs (raw text from sheet column I)
# ============================================================
CONFIGS = {
"1217201": {
"type": "图片单选",
"text": """【任务标题】
为包裹找到正确的日期牌子
【情境引入】
Jay : Help me find the right place!
【互动内容】
Find the "Days Ago" sign.(音频)
选项:
00
01
02
答案:
00
辅助信息days ago 指"几天前"
【互动反馈】
正确 Lin : Bingo!
错误 Jay : No, look! This package is from 3 days ago!
【后置对话】
""",
},
"1217202": {
"type": "对话朗读",
"text": """【任务标题】
朗读90天前的包裹信息
【资源配置】
图片时机:互动内容
【情境引入】
【互动内容】
User: It's 90 days ago...(朗读)
【后置对话】
""",
},
"1217203": {
"type": "对话朗读",
"text": """【任务标题】
理解天数和月数的换算
【资源配置】
图片时机:互动内容
【情境引入】
User: No! 90 days! It is about...
【互动内容】
User: 3 months!(朗读)
【后置对话】
User: This is from months ago!""",
},
"1217204": {
"type": "对话组句",
"text": """【任务标题】
用单词组句描述包裹信息
【资源配置】
图片时机:互动内容
【情境引入】
【互动内容】
要求:用所给单词或短语组句
It says 24 months ago.(音频)
选项1it says
选项2months ago
选项324
答案It says 24 months ago.
【互动反馈】
正确 无
错误 Jay : Try again! Read what the package says.
【后置对话】
""",
},
"1217205": {
"type": "对话朗读",
"text": """【任务标题】
理解月数和年数的换算
【资源配置】
图片时机:互动内容
【情境引入】
【互动内容】
User: That's 2 years!(朗读)
【后置对话】
""",
},
"1217206": {
"type": "对话选读",
"text": """【任务标题】
选择你想表达的感受
【资源配置】
【情境引入】
【互动内容】
要求:选择一个你想表达的观点
选项:(音频)
选项1That's a long time ago!
- 反馈 Jay: You are right.
选项2That is long, long ago!
- 反馈 Jay: You are right.
【后置对话】
""",
},
"1217207": {
"type": "图片多选",
"text": """【任务标题】
找出写有months ago的包裹
【情境引入】
【互动内容】
Find the "months ago" packages in the picture.(音频)
选项:
00
01
02
答案:
01
02
辅助信息months ago 指"几个月前"
【互动反馈】
正确 User : Those two are months ago!
错误 Jay : Look again! Which ones say "months ago"?
【后置对话】
""",
},
"1217208": {
"type": "图片单选",
"text": """【任务标题】
找出写有一年前的包裹
【情境引入】
【互动内容】
Find the "year ago" package in the picture.(音频)
选项:
00
01
02
答案:
02
辅助信息a year ago 指"一年前"
【互动反馈】
正确 User : This one is a year ago!
错误 Jay : No, that's not right. Look again!
【后置对话】
""",
},
"1217209": {
"type": "对话挖空",
"text": """【任务标题】
补全对Sunny说的句子
【资源配置】
【情境引入】
【互动内容】
You must ___ it!(音频)
选项1be happy with正确
选项2happy with
【互动反馈】
正确 User : You must be happy with it!
错误 Sunny : That doesn't sound quite right...
【后置对话】
""",
},
"1217210": {
"type": "对话选读",
"text": """【任务标题】
选择帮Grace拿包裹的说法
【资源配置】
【情境引入】
【互动内容】
要求:选择一个你想表达的观点
选项:(音频)
选项1Let me get it!
- 反馈 Grace: Thank you, kid.
选项2I will get it!
- 反馈 Grace: Thank you, kid.
【后置对话】
""",
},
"1217211": {
"type": "对话朗读",
"text": """【任务标题】
对Anna的提醒
【资源配置】
图片时机:无
【情境引入】
【互动内容】
User: You will not be happy with it.(朗读)
【后置对话】
""",
},
"1217212": {
"type": "对话组句",
"text": """【任务标题】
用单词组句主动帮忙
【资源配置】
【情境引入】
【互动内容】
要求:用所给单词或短语组句
Can I get it for you?(音频)
选项1for you
选项2can I
选项3get it
答案Can I get it for you?
【互动反馈】
正确 Jack : You are very kind. But I just want to say...
错误 Jack: I beg your pardon?
【后置对话】
""",
},
"1217213": {
"type": "对话挖空",
"text": """【任务标题】
补全对Jack说的话
【资源配置】
图片时机:互动内容互动反馈
【情境引入】
【互动内容】
But this meat is from 2 ___ !(音频)
选项1years ago正确
选项2days ago
【互动反馈】
正确 Jack : Perfect!
错误 Jack : No, look at the sign on it!
【后置对话】
""",
},
"1217214": {
"type": "听力拖拽",
"text": """【任务标题】
告诉 Lin 你们分发包裹的事迹
【任务背景】
包裹大作战!你和 Jay 热火朝天地干了半天,把很多包裹带给了他们的主人。快来回顾一下你们的战果吧!
【通关知识】
get v. 收到
month n. 月
year n. 年
ago adv. 以前
... month(s)/year(s) ago.
【开场语】
Lin: Come on, tell me what you did!
【听力文本】
# 文本 1
Jay: Well, well, well! Listen up!
Jay: Tom gets a pen!
User: It is from 5 days ago!
Jay: And Sunny gets a dress.
Jay: It is from 6 months ago.
Jay: Jack gets some meat from 2 years ago!
【题目信息】
#单空选择
选项图片编号00,01,02
答案图片编号:
01,00,02
【学习过程】
句子 1
It is from 5 days ago!
【ago】
句子 2
It is from 6 months ago. 【month】
句子 3
Jack gets some meat from 2 years ago! 【year】""",
},
"1217215": {
"type": "对话选读",
"text": """【任务标题】
选择表达满意的方式
【资源配置】
【情境引入】
【互动内容】
要求:选择一个你想表达的观点
选项:(音频)
选项1I am happy with it!
- 反馈 Jay: That's good!
选项2I am happy with the result!
- 反馈 Jay: That's good!
【后置对话】
""",
},
"1217216": {
"type": "对话挖空",
"text": """【任务标题】
补全User想要包裹的句子
【资源配置】
【情境引入】
【互动内容】
I want to ___ one for myself!(音频)
选项1get正确
选项2get up
【互动反馈】
正确 无
错误 Jay : Hmm, that's not how we say it. Try again!
【后置对话】
""",
},
"1217217": {
"type": "对话朗读",
"text": """【任务标题】
朗读收到帽子的喜悦
【资源配置】
图片时机:无
【情境引入】
【互动内容】
User: Now I get my own hat!(朗读)
【后置对话】
""",
},
}
# ============================================================
# Parsers for each component type
# ============================================================
def extract_section(text, key):
"""Extract content between 【key】and next section header 【...】."""
# Match 【key】 then capture everything until next 【XXX】 header on its own line or end
pattern = rf'{re.escape(key)}\s*\n?(.*?)(?=\n(?:【[^】]+】)\s*\n|\Z)'
m = re.search(pattern, text, re.DOTALL)
if m:
return m.group(1).strip()
return None
def parse_context(text):
"""Parse context intro: '角色 : 台词' or '' or ''"""
if not text or text in ('', '', ''):
return None
lines = [l.strip() for l in text.strip().split('\n') if l.strip()]
result = []
for line in lines:
if ':' in line:
parts = line.split(':', 1)
result.append({"character": parts[0].strip(), "line": parts[1].strip()})
elif '' in line:
parts = line.split('', 1)
result.append({"character": parts[0].strip(), "line": parts[1].strip()})
else:
result.append(line)
return result if result else None
def parse_feedback(text):
"""Parse feedback: 正确/错误 角色 : 台词"""
if not text or text == '':
return {"correct": None, "incorrect": None}
result = {"correct": None, "incorrect": None}
lines = text.strip().split('\n')
current_type = None
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith('正确'):
current_type = 'correct'
content = line[2:].strip()
if ':' in content:
parts = content.split(':', 1)
char = parts[0].strip()
msg = parts[1].strip()
if msg == '':
result['correct'] = None
else:
result['correct'] = {"character": char, "line": msg}
elif content == '':
result['correct'] = None
elif line.startswith('错误'):
current_type = 'incorrect'
content = line[2:].strip()
if ':' in content:
parts = content.split(':', 1)
result['incorrect'] = {"character": parts[0].strip(), "line": parts[1].strip()}
elif '' in content:
parts = content.split('', 1)
result['incorrect'] = {"character": parts[0].strip(), "line": parts[1].strip()}
return result
def parse_selective_options(text):
"""Parse 对话选读 options: 选项Ntext - 反馈 X: line"""
options = []
lines = text.strip().split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
m = re.match(r'选项(\d+)[:]\s*(.+)', line)
if m:
idx = int(m.group(1))
opt_text = m.group(2).strip()
feedback = None
# Check next line for feedback
if i + 1 < len(lines):
next_line = lines[i + 1].strip()
fm = re.match(r'[-]\s*反馈\s*([^:]+)[:]\s*(.+)', next_line)
if fm:
feedback = {"character": fm.group(1).strip(), "line": fm.group(2).strip()}
i += 1
options.append({"index": idx, "text": opt_text, "feedback": feedback})
i += 1
return options
def parse_image_options(text):
"""Parse image choice options: 00, 01, 02 etc."""
options = []
lines = text.strip().split('\n')
in_options = False
for line in lines:
line = line.strip()
if line == '选项:':
in_options = True
continue
if in_options:
if re.match(r'^\d{2}$', line):
options.append(line)
else:
break
return options
def parse_fill_options(text):
"""Parse fill-in-blanks options: 选项Ntext正确"""
options = []
lines = text.strip().split('\n')
for line in lines:
line = line.strip()
m = re.match(r'选项(\d+)[:]\s*(.+)', line)
if m:
idx = int(m.group(1))
opt_text = m.group(2).strip()
correct = False
if '(正确)' in opt_text:
correct = True
opt_text = opt_text.replace('(正确)', '').strip()
options.append({"index": idx, "text": opt_text, "correct": correct})
return options
def parse_sentence_options(text):
"""Parse sentence building options: 选项Ntext"""
options = []
lines = text.strip().split('\n')
for line in lines:
line = line.strip()
m = re.match(r'选项(\d+)[:]\s*(.+)', line)
if m:
idx = int(m.group(1))
opt_text = m.group(2).strip()
options.append({"index": idx, "text": opt_text})
return options
# ============================================================
# Converters
# ============================================================
def convert_choice_image(cid, text):
"""图片单选 / 图片多选"""
is_multi = CONFIGS[cid]['type'] == '图片多选'
raw_interaction = extract_section(text, '互动内容')
context = extract_section(text, '情境引入')
feedback = extract_section(text, '互动反馈')
post = extract_section(text, '后置对话')
# Parse interaction
interaction_lines = raw_interaction.strip().split('\n')
instruction = interaction_lines[0].strip()
audio = '(音频)' in instruction
instruction = instruction.replace('(音频)', '').strip()
options = []
answers = []
hint = None
in_options = False
in_answer = False
in_hint = False
for line in interaction_lines[1:]:
line = line.strip()
if line == '选项:':
in_options = True
continue
if in_options and re.match(r'^\d{2}$', line):
options.append(line)
continue
if '答案:' in line:
in_options = False
in_answer = True
ans_text = line.replace('答案:', '').strip()
if ans_text:
answers.append(ans_text)
continue
if in_answer:
if re.match(r'^\d{2}$', line):
answers.append(line)
elif '辅助信息' in line:
in_answer = False
in_hint = True
hint = line.replace('辅助信息:', '').strip()
else:
in_answer = False
continue
if in_hint:
continue
if '辅助信息' in line:
hint = line.replace('辅助信息:', '').strip()
continue
result = {
"componentType": CONFIGS[cid]['type'],
"taskTitle": extract_section(text, '任务标题'),
"contextIntro": parse_context(context),
"interaction": {
"instruction": instruction,
"audio": audio,
"options": options,
"answers": answers,
"hint": hint
},
"feedback": parse_feedback(feedback),
"postDialogue": parse_context(post)
}
return result
def convert_reading(cid, text):
"""对话朗读"""
return {
"componentType": "对话朗读",
"taskTitle": extract_section(text, '任务标题'),
"resourceConfig": extract_section(text, '资源配置') or None,
"contextIntro": parse_context(extract_section(text, '情境引入')),
"interaction": {
"sentence": (extract_section(text, '互动内容') or '').replace('(朗读)', '').strip(),
"audio": True
},
"postDialogue": parse_context(extract_section(text, '后置对话'))
}
def convert_sentence_building(cid, text):
"""对话组句"""
raw_interaction = extract_section(text, '互动内容')
resource = extract_section(text, '资源配置') or None
context = extract_section(text, '情境引入')
feedback = extract_section(text, '互动反馈')
post = extract_section(text, '后置对话')
interaction_lines = raw_interaction.strip().split('\n')
requirement = None
sentence = None
audio = True
options = []
answer = None
hint = None
# Parse interaction
i = 0
if interaction_lines[0].startswith('要求:'):
requirement = interaction_lines[0].replace('要求:', '').strip()
i = 1
# Find sentence line (ends with (音频))
for j in range(i, len(interaction_lines)):
line = interaction_lines[j].strip()
if '(音频)' in line:
sentence = line.replace('(音频)', '').strip()
i = j + 1
break
elif re.match(r'选项\d+', line):
# No sentence line found, use requirement as sentence
i = j
break
# Parse options
for j in range(i, len(interaction_lines)):
line = interaction_lines[j].strip()
m = re.match(r'选项(\d+)[:]\s*(.+)', line)
if m:
options.append({"index": int(m.group(1)), "text": m.group(2).strip()})
elif '答案:' in line:
answer = line.replace('答案:', '').strip()
elif '辅助信息:' in line:
hint = line.replace('辅助信息:', '').strip()
return {
"componentType": "对话组句",
"taskTitle": extract_section(text, '任务标题'),
"resourceConfig": resource,
"contextIntro": parse_context(context),
"interaction": {
"requirement": requirement,
"sentence": sentence,
"audio": audio,
"options": options,
"answer": answer,
"hint": hint
},
"feedback": parse_feedback(feedback),
"postDialogue": parse_context(post)
}
def convert_selective_reading(cid, text):
"""对话选读"""
raw_interaction = extract_section(text, '互动内容')
resource = extract_section(text, '资源配置') or None
context = extract_section(text, '情境引入')
post = extract_section(text, '后置对话')
interaction_lines = raw_interaction.strip().split('\n')
requirement = interaction_lines[0].strip()
audio = '(音频)' in raw_interaction
if requirement.startswith('要求:'):
requirement = requirement.replace('要求:', '').strip()
options = parse_selective_options(raw_interaction)
return {
"componentType": "对话选读",
"taskTitle": extract_section(text, '任务标题'),
"resourceConfig": resource,
"contextIntro": parse_context(context),
"interaction": {
"requirement": requirement,
"audio": audio,
"options": options
},
"postDialogue": parse_context(post)
}
def convert_fill_blanks(cid, text):
"""对话挖空"""
raw_interaction = extract_section(text, '互动内容')
resource = extract_section(text, '资源配置') or None
context = extract_section(text, '情境引入')
feedback = extract_section(text, '互动反馈')
post = extract_section(text, '后置对话')
interaction_lines = raw_interaction.strip().split('\n')
sentence = None
options = []
for line in interaction_lines:
line = line.strip()
if '(音频)' in line and not line.startswith('选项'):
sentence = line.replace('(音频)', '').strip()
m = re.match(r'选项(\d+)[:]\s*(.+)', line)
if m:
opt_text = m.group(2).strip()
correct = '(正确)' in opt_text
opt_text = opt_text.replace('(正确)', '').strip()
options.append({"index": int(m.group(1)), "text": opt_text, "correct": correct})
return {
"componentType": "对话挖空",
"taskTitle": extract_section(text, '任务标题'),
"resourceConfig": resource,
"contextIntro": parse_context(context),
"interaction": {
"sentence": sentence,
"audio": True,
"options": options
},
"feedback": parse_feedback(feedback),
"postDialogue": parse_context(post)
}
def convert_listening_drag(cid, text):
"""听力拖拽 → core_listening_drag JSON"""
task_title = extract_section(text, '任务标题')
task_bg = extract_section(text, '任务背景') or ''
knowledge = extract_section(text, '通关知识') or ''
opening = extract_section(text, '开场语') or ''
audio_text = extract_section(text, '听力文本') or ''
question_info = extract_section(text, '题目信息') or ''
learning = extract_section(text, '学习过程') or ''
# Parse audio text into dialogs
dialog_list = []
for line in audio_text.strip().split('\n'):
line = line.strip()
if not line or line.startswith('#'):
continue
if ':' in line or '' in line:
sep = ':' if ':' in line else ''
parts = line.split(sep, 1)
dialog_list.append({"character": parts[0].strip(), "line": parts[1].strip()})
# Parse question info
option_images = []
answer_images = []
for line in question_info.strip().split('\n'):
line = line.strip()
if '选项图片编号' in line:
option_images = [x.strip() for x in line.split('', 1)[1].split(',') if x.strip()]
if '答案图片编号' in line:
# Answer on next line
pass
# Get answer from question info - look for answer after 答案图片编号
q_lines = question_info.strip().split('\n')
for i, line in enumerate(q_lines):
if '答案图片编号' in line and i + 1 < len(q_lines):
answer_images = [x.strip() for x in q_lines[i+1].split(',') if x.strip() and x.strip() != '答案图片编号']
# For 听力拖拽, 【学习过程】 is the LAST section and contains inline 【kp】.
# Extract it manually: everything after 【学习过程】
lp_match = re.search(r'【学习过程】\s*\n(.*)', text, re.DOTALL)
learning = lp_match.group(1).strip() if lp_match else ''
learning_steps = []
# Split by sentence markers
blocks = re.split(r'\n(?=句子\s*\d*)', learning)
for block in blocks:
block = block.strip()
if not block:
continue
# Remove the 句子 N header
block = re.sub(r'^句子\s*\d*\s*\n?', '', block).strip()
if not block:
continue
# Extract kp from 【...】
kp_match = re.search(r'【(.+?)】', block)
kp = kp_match.group(1).strip() if kp_match else ""
# Extract sentence (text before 【 or whole block if no 【)
if kp_match:
sentence = block[:kp_match.start()].strip()
else:
sentence = block.strip()
if sentence or kp:
learning_steps.append({"sentence": sentence, "knowledgePoint": kp})
# Scene description
scene_desc = f"{task_bg}\n\n{opening}"
return {
"componentType": "听力拖拽",
"cType": "core_listening_drag",
"taskData": {
"cType": "core_listening_drag",
"cId": cid,
"title": task_title,
"sceneDesc": scene_desc,
"key": "ago, month, year, get"
},
"dialogList": dialog_list,
"preDialog": [],
"questionList": [
{
"type": "drag_match",
"optionImages": option_images,
"answerImages": answer_images,
"itemCount": len(option_images)
}
],
"learningData": {
"learningPart": learning_steps,
"closing": ""
},
"audioText": audio_text,
"knowledgeSummary": knowledge
}
# ============================================================
# Main conversion
# ============================================================
CONVERTERS = {
"图片单选": convert_choice_image,
"图片多选": convert_choice_image,
"对话朗读": convert_reading,
"对话组句": convert_sentence_building,
"对话选读": convert_selective_reading,
"对话挖空": convert_fill_blanks,
"听力拖拽": convert_listening_drag,
}
# Component row mapping (from sheet data: which row has which component ID)
COMPONENT_ROWS = {
"1217201": 33,
"1217202": 40,
"1217203": 43,
"1217204": 50,
"1217205": 51,
"1217206": 53,
"1217207": 68,
"1217208": 70,
"1217209": 89,
"1217210": 95,
"1217211": 107,
"1217212": 120,
"1217213": 123,
"1217214": 157,
"1217215": 164,
"1217216": 168,
"1217217": 173,
}
def write_cell(row, col_letter, value, token):
"""Write a single cell to the sheet."""
cell_range = f"{SHEET_ID}!{col_letter}{row}:{col_letter}{row}"
payload = {
"valueRange": {
"range": cell_range,
"values": [[value]]
}
}
r = subprocess.run([
'curl', '-s', '-X', 'PUT',
f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values',
'-H', f'Authorization: Bearer {token}',
'-H', 'Content-Type: application/json',
'-d', json.dumps(payload, ensure_ascii=False)
], capture_output=True, text=True)
result = json.loads(r.stdout)
if result.get('code') != 0:
print(f" ❌ Write failed: {result}", file=sys.stderr)
return False
return True
def main():
results = {}
for cid, cfg in CONFIGS.items():
ctype = cfg['type']
converter = CONVERTERS.get(ctype)
if not converter:
print(f"⚠️ Unknown type {ctype} for {cid}", file=sys.stderr)
continue
try:
json_data = converter(cid, cfg['text'])
results[cid] = json_data
print(f"{cid} ({ctype}) → JSON OK")
except Exception as e:
print(f"{cid} ({ctype}) → ERROR: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
# Print all JSONs for review
print("\n" + "=" * 60)
print("GENERATED JSONS")
print("=" * 60)
for cid, data in results.items():
print(f"\n--- {cid} ({data['componentType']}) ---")
print(json.dumps(data, ensure_ascii=False, indent=2))
# Save to file for inspection
with open('/root/.openclaw/workspace-xiaoyan/output/component_jsons.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n📁 Saved to output/component_jsons.json")
# Write back to sheet
print("\n" + "=" * 60)
print("WRITING BACK TO SHEET")
print("=" * 60)
# Re-fetch token (may have expired)
token = get_token()
success = 0
fail = 0
for cid, data in results.items():
row = COMPONENT_ROWS.get(cid)
if not row:
print(f"⚠️ {cid}: no row mapping, skipped")
continue
json_str = json.dumps(data, ensure_ascii=False)
if write_cell(row, 'I', json_str, token):
print(f"{cid} → row {row} written")
success += 1
else:
fail += 1
print(f"\nDone: {success} written, {fail} failed")
if __name__ == '__main__':
main()