ai_member_xiaoyan/scripts/produce_speaking_p4.py

393 lines
14 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
口语-P4-看图识物 内容生产、回填、审校
生产范围3条记录100001, 110101, 110201
"""
import json, subprocess, sys, os
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
TABLE_ID = "tblsD2dxaRpLmkXD"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
# Load credentials
with open(CRED_FILE) as f:
cred = json.load(f)
APP_ID = cred['apps'][0]['appId']
APP_SECRET = cred['apps'][0]['appSecret']
def get_token():
r = subprocess.run([
"curl", "-s", "-X", "POST",
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
"-H", "Content-Type: application/json",
"-d", json.dumps({"app_id": APP_ID, "app_secret": APP_SECRET})
], capture_output=True, text=True)
return json.loads(r.stdout)['tenant_access_token']
TOKEN = get_token()
def list_records():
r = subprocess.run([
"curl", "-s", "-X", "GET",
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{TABLE_ID}/records?page_size=100",
"-H", f"Authorization: Bearer {TOKEN}"
], capture_output=True, text=True)
return json.loads(r.stdout)
def create_record(fields):
body = json.dumps({"fields": fields}, ensure_ascii=False)
r = subprocess.run([
"curl", "-s", "-X", "POST",
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{TABLE_ID}/records",
"-H", f"Authorization: Bearer {TOKEN}",
"-H", "Content-Type: application/json",
"-d", body
], capture_output=True, text=True)
return json.loads(r.stdout)
def get_record(record_id):
r = subprocess.run([
"curl", "-s", "-X", "GET",
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{TABLE_ID}/records/{record_id}",
"-H", f"Authorization: Bearer {TOKEN}"
], capture_output=True, text=True)
return json.loads(r.stdout)
def build_explanation(question_text, image_desc, answer_word, is_color=False):
"""Build a detailed Chinese explanation for a question."""
if is_color:
return (
f"回答要点: {question_text}\n"
f"图片内容: {image_desc}\n"
f"考察能力: 图文匹配\n"
f"评估标准: 语音语调准确性、语言流利度、内容完整性与相关性、语法准确性\n"
f"回答指导: 鼓励学生用完整句子作答,如\"It's {answer_word}.\",根据图片颜色准确说出对应的颜色单词"
)
else:
return (
f"回答要点: {question_text}\n"
f"图片内容: {image_desc}\n"
f"考察能力: 图文匹配\n"
f"评估标准: 语音语调准确性、语言流利度、内容完整性与相关性、语法准确性\n"
f"回答指导: 鼓励学生用完整句子作答,如\"It's a {answer_word}.\",根据图片内容准确说出对应的英文单词"
)
def build_short_explanation(image_desc, answer_word, is_color=False):
"""Build short explanation for human-readable config."""
if is_color:
return f"图片中展示的颜色是{answer_word},对应的颜色单词是 {answer_word}"
else:
return f"图片中是{answer_word},对应的英文单词是 {answer_word}"
def build_human_config(qs_id, asr_prompt, questions, text_title="Look and answer."):
"""Build human-readable 题目配置 text."""
lines = [f"做题要求:{text_title}", "", f"热词:", asr_prompt, "", "问题:"]
for i, q in enumerate(questions, 1):
lines.append(f"{i}. ")
lines.append(f"图片:{q['imageDesc']}")
lines.append(f"题目:{q['question']}")
lines.append(f"能力:{''.join(q['ability'])}")
lines.append(f"解析:{q['shortExplanation']}")
lines.append("")
return "\n".join(lines)
# =============================================
# DATA DEFINITIONS
# =============================================
records_data = []
# --- ID: 100001 ---
records_data.append({
"questionSetID": "100001",
"textTitle": "Look and answer.",
"first_words": ["blue", "red", "pink", "green", "orange", "purple"],
"first_is_color": True,
"first_image_descs": [
"白色的背景,中间是一颗蓝色的星星。",
"白色的背景,中间是一个红色的气球。",
"白色的背景,中间是一朵粉红色的花。",
"白色的背景,中间是一片绿色的叶子。",
"白色的背景,中间是一个橙色的橘子。",
"白色的背景,中间是一串紫色的葡萄。",
],
"second_words": ["bag", "dress", "jacket", "hat", "T-shirt"],
"second_is_color": False,
"second_image_descs": [
"白色的背景,中间是一个书包。",
"白色的背景,中间是一条连衣裙。",
"白色的背景,中间是一件夹克衫。",
"白色的背景,中间是一顶帽子。",
"白色的背景中间是一件T恤衫。",
],
})
# --- ID: 110101 ---
records_data.append({
"questionSetID": "110101",
"textTitle": "Look and answer.",
"first_words": ["hair", "eye", "nose", "monster", "foot", "hand"],
"first_is_color": False,
"first_image_descs": [
"白色的背景,中间是一个人的头发。",
"白色的背景,中间是一只眼睛。",
"白色的背景,中间是一个鼻子。",
"白色的背景,中间是一只可爱的小怪兽。",
"白色的背景,中间是一只脚。",
"白色的背景,中间是一只手。",
],
"second_words": ["black", "brown", "colour", "colour", "white", "yellow"],
"second_is_color": True,
"second_image_descs": [
"白色的背景,中间是一只黑色的猫。",
"白色的背景,中间是一只棕色的小狗。",
"白色的背景,中间是一个彩色的调色盘。",
"白色的背景,中间是一道彩虹。",
"白色的背景,中间是一只白色的兔子。",
"白色的背景,中间是一朵黄色的向日葵。",
],
# For colour (not a typical color answer), use "What's this?" instead of "What colour is it?"
"second_override_questions": {
2: "What's this?", # colour - 调色盘
3: "What's this?", # colour - 彩虹
},
})
# --- ID: 110201 ---
records_data.append({
"questionSetID": "110201",
"textTitle": "Look and answer.",
"first_words": ["bread", "pie", "cake", "candy", "chocolate", "ice cream"],
"first_is_color": False,
"first_image_descs": [
"白色的背景,中间是一片面包。",
"白色的背景,中间是一块馅饼。",
"白色的背景,中间是一块蛋糕。",
"白色的背景,中间是一颗糖果。",
"白色的背景,中间是一块巧克力。",
"白色的背景,中间是一个冰淇淋。",
],
"second_words": ["cat", "dog", "mice", "mouse"],
"second_is_color": False,
"second_image_descs": [
"白色的背景,中间是一只猫。",
"白色的背景,中间是一只狗。",
"白色的背景,中间是两只老鼠。",
"白色的背景,中间是一只老鼠。",
],
"second_override_questions": {
2: "What are these?", # mice (plural)
},
})
# =============================================
# GENERATE & WRITE
# =============================================
results = []
for rd in records_data:
qsid = rd["questionSetID"]
print(f"\n{'='*60}")
print(f"Processing ID: {qsid}")
print(f"{'='*60}")
# Build first question set
first_questions = []
first_words = rd["first_words"]
first_is_color = rd["first_is_color"]
first_img_descs = rd["first_image_descs"]
for i, word in enumerate(first_words):
q_text = "What colour is it?" if first_is_color else "What's this?"
q_img = f"{qsid}-{i:02d}.png"
img_desc = first_img_descs[i]
explanation = build_explanation(q_text, img_desc, word, first_is_color)
short_exp = build_short_explanation(img_desc, word, first_is_color)
first_questions.append({
"question": q_text,
"questionImage": q_img,
"imageDesc": img_desc,
"ability": ["图文匹配"],
"explanation": explanation,
"shortExplanation": short_exp,
})
first_asr = ", ".join(first_words)
first_qs = {
"category": "speaking",
"type": "speaking_pic_recognize",
"asrPrompt": first_asr,
"questionSetID": qsid,
"textTitle": rd["textTitle"],
"questionSet": [{k: v for k, v in q.items() if k != "shortExplanation"} for q in first_questions],
}
# Build second question set
second_questions = []
second_words = rd["second_words"]
second_is_color = rd["second_is_color"]
second_img_descs = rd["second_image_descs"]
override_qs = rd.get("second_override_questions", {})
img_start = len(first_questions)
for i, word in enumerate(second_words):
if i in override_qs:
q_text = override_qs[i]
is_color = False
elif second_is_color:
q_text = "What colour is it?"
is_color = True
else:
q_text = "What's this?"
is_color = False
q_img = f"{qsid}-{img_start + i:02d}.png"
img_desc = second_img_descs[i]
explanation = build_explanation(q_text, img_desc, word, is_color)
short_exp = build_short_explanation(img_desc, word, is_color)
second_questions.append({
"question": q_text,
"questionImage": q_img,
"imageDesc": img_desc,
"ability": ["图文匹配"],
"explanation": explanation,
"shortExplanation": short_exp,
})
second_asr = ", ".join(second_words)
second_qs = {
"category": "speaking",
"type": "speaking_pic_recognize",
"asrPrompt": second_asr,
"questionSetID": qsid,
"textTitle": rd["textTitle"],
"questionSet": [{k: v for k, v in q.items() if k != "shortExplanation"} for q in second_questions],
}
# Build jsonData
json_data = json.dumps({"first": first_qs, "second": second_qs}, ensure_ascii=False)
# Build human-readable configs
config1 = build_human_config(qsid, first_asr, first_questions, rd["textTitle"])
config2 = build_human_config(qsid, second_asr, second_questions, rd["textTitle"])
# Fields for bitable
fields = {
"dataStatus": "1",
"jsonData": json_data,
"题目1 完整配置": config1,
"题目2 完整配置": config2,
"题目集合 ID": qsid,
}
# Create record
print(f" Creating record for QSID={qsid}...")
resp = create_record(fields)
if resp.get("code") != 0:
print(f" ❌ CREATE FAILED: {resp}")
results.append({"qsid": qsid, "status": "FAILED", "error": resp})
continue
record_id = resp["data"]["record"]["record_id"]
print(f" ✅ Created: record_id={record_id}")
# Verify by reading back
print(f" Verifying...")
verify = get_record(record_id)
if verify.get("code") != 0:
print(f" ⚠️ VERIFY FAILED: {verify}")
results.append({"qsid": qsid, "record_id": record_id, "status": "VERIFY_FAILED", "error": verify})
continue
v_fields = verify["data"]["record"]["fields"]
v_json = json.loads(v_fields.get("jsonData", "{}"))
# Quick checks
checks = []
# Check first question count
f_count = len(v_json.get("first", {}).get("questionSet", []))
expected_f = len(first_words)
checks.append(("first题数", f_count == expected_f, f"{f_count}/{expected_f}"))
s_count = len(v_json.get("second", {}).get("questionSet", []))
expected_s = len(second_words)
checks.append(("second题数", s_count == expected_s, f"{s_count}/{expected_s}"))
# Check QSID
v_qsid = v_fields.get("题目集合 ID", "")
checks.append(("题目集合ID", v_qsid == qsid, v_qsid))
# Check config contains questions
c1_has = "问题:" in v_fields.get("题目1 完整配置", "")
c2_has = "问题:" in v_fields.get("题目2 完整配置", "")
checks.append(("题目1配置", c1_has, "OK" if c1_has else "MISSING"))
checks.append(("题目2配置", c2_has, "OK" if c2_has else "MISSING"))
all_ok = all(c[1] for c in checks)
status_icon = "" if all_ok else "⚠️"
print(f" {status_icon} Verification: {' | '.join(f'{name}={detail}' for name, ok, detail in checks)}")
results.append({
"qsid": qsid,
"record_id": record_id,
"status": "OK" if all_ok else "WARN",
"checks": checks,
})
# Print summary
print(f" First: {len(first_words)} questions, ASR: {first_asr}")
print(f" Second: {len(second_words)} questions, ASR: {second_asr}")
# =============================================
# AUDIT SUMMARY
# =============================================
print(f"\n{'='*60}")
print(f"AUDIT SUMMARY")
print(f"{'='*60}")
all_passed = True
for r in results:
if r["status"] == "OK":
print(f"{r['qsid']} | record_id={r['record_id']} | ALL CHECKS PASSED")
elif r["status"] == "WARN":
print(f" ⚠️ {r['qsid']} | record_id={r['record_id']} | WARNINGS:")
for name, ok, detail in r["checks"]:
if not ok:
print(f" - {name}: {detail}")
all_passed = False
else:
print(f"{r['qsid']} | {r['status']}: {r.get('error', '')}")
all_passed = False
# Vocabulary audit
print(f"\n--- Vocabulary Audit ---")
audit_notes = []
for rd in records_data:
qsid = rd["questionSetID"]
all_words = rd["first_words"] + rd["second_words"]
for w in all_words:
# Check: compound words, special characters
if " " in w:
audit_notes.append(f" {qsid}: '{w}' is multi-word — ensure ASR handles it")
if not w.isascii():
audit_notes.append(f" ⚠️ {qsid}: '{w}' contains non-ASCII")
# Check 110101 colour duplicate
audit_notes.append(f" 110101: 'colour' appears twice in second set — intentional per spec")
# Check 110201 mice/mouse
audit_notes.append(f" 110201: 'mice' (plural) and 'mouse' (singular) both present — questions use 'What are these?' vs 'What's this?'")
for note in audit_notes:
print(note)
print(f"\n{'='*60}")
if all_passed:
print("✅ ALL 3 RECORDS PRODUCED, BACKFILLED, AND VERIFIED")
else:
print("⚠️ SOME CHECKS FAILED — SEE ABOVE")
print(f"{'='*60}")