ai_member_xiaoyan/scripts/produce_p1_picture.py

510 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
听力-P1-图片选择题 生产脚本
QSID: 121301, 121401, 121501
"""
import json, subprocess, copy
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
TABLE_ID = "tbliZAhcc9C43B23"
def get_token():
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
with open(CRED_FILE) as f:
cred = json.load(f)
app_id = cred['apps'][0]['appId']
app_secret = cred['apps'][0]['appSecret']
r = subprocess.run(["curl", "-s", "-X", "POST",
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
"-H", "Content-Type: application/json",
"-d", json.dumps({"app_id": app_id, "app_secret": app_secret})],
capture_output=True, text=True)
return json.loads(r.stdout)['tenant_access_token']
def create_record(token, fields):
body = json.dumps({"fields": fields}, ensure_ascii=False)
r = subprocess.run(["curl", "-s", "-X", "POST",
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{TABLE_ID}/records",
"-H", f"Authorization: Bearer {token}",
"-H", "Content-Type: application/json; charset=utf-8",
"-d", body],
capture_output=True, text=True)
return json.loads(r.stdout)
# ============================================================
# Question data: (listening_text, imgA_desc, imgB_desc, imgC_desc, question_text, answer_idx, ability, explanation)
# ============================================================
# 121301 题组1: home, house, sofa, cupboard, bedroom
QS_121301_FIRST = [
(
"Lily: This is my home. It has a red door and two windows.",
"红门、两个窗户的独立房屋。卡通风格。",
"蓝门、一个窗户的独立房屋。卡通风格。",
"红门的公寓大楼。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|颜色与数量匹配"],
"听力文本描述'red door and two windows'红门和两扇窗选项A的房屋具有红门和两个窗户与描述完全一致。选项B门是蓝色且只有一扇窗选项C是公寓大楼而非独立房屋。"
),
(
"Lily: I can see a big house. It has a garden in front.",
"前面有花园的大房子。卡通风格。",
"没有花园的小房子。卡通风格。",
"前面有花园的大楼。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|特征匹配"],
"听力文本描述'big house with a garden'有大花园的房子选项A的大房子前面有花园与描述一致。选项B没有花园选项C是大楼而非房子。"
),
(
"Lily: This is our sofa. It is big and blue.",
"蓝色的大沙发。卡通风格。",
"红色的小沙发。卡通风格。",
"蓝色的大床。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|物品与颜色匹配"],
"听力文本描述'big and blue sofa'蓝色大沙发选项A是蓝色大沙发与描述一致。选项B是红色小沙发选项C是蓝色大床而非沙发。"
),
(
"Lily: The cupboard is full of cups. It is brown.",
"放满杯子的棕色橱柜。卡通风格。",
"放满盘子的白色橱柜。卡通风格。",
"放满书的棕色书架。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|物品与颜色匹配"],
"听力文本描述'brown cupboard with cups'放满杯子的棕色橱柜选项A的棕色橱柜里放着杯子与描述一致。选项B是白色且放盘子选项C是书架而非橱柜。"
),
(
"Lily: My bedroom has a small bed and a yellow lamp.",
"有小床和黄色台灯的卧室。卡通风格。",
"有大床和白色台灯的卧室。卡通风格。",
"有大沙发和黄色台灯的客厅。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|房间特征匹配"],
"听力文本描述'bedroom with small bed and yellow lamp'有小床和黄色台灯的卧室选项A的卧室有小床和黄色台灯与描述一致。选项B床大且台灯白选项C是客厅不是卧室。"
),
]
# 121301 题组2: zoo, hippo, bear, live
QS_121301_SECOND = [
(
"Lily: We go to the zoo on Sundays. I can see many animals there.",
"有各种动物的大动物园。卡通风格。",
"有许多商店的购物中心。卡通风格。",
"有许多树的公园。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|地点匹配"],
"听力文本描述'go to the zoo and see animals'去动物园看动物选项A的动物园有各种动物与描述一致。选项B是购物中心选项C是公园。"
),
(
"Lily: Look! The hippo is big. It is in the water.",
"泡在水里的大河马。卡通风格。",
"站在草地上的大河马。卡通风格。",
"泡在水里的大象。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|动物与场景匹配"],
"听力文本描述'hippo in the water'泡在水里的河马选项A是泡在水里的河马与描述一致。选项B的河马在草地上选项C是大象而非河马。"
),
(
"Lily: I can see a brown bear. It is big and strong.",
"棕色的大熊。卡通风格。",
"棕色的小狗。卡通风格。",
"白色的大熊。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|动物特征匹配"],
"听力文本描述'brown bear, big and strong'棕色大熊又大又壮选项A是棕色大熊与描述一致。选项B是小狗选项C是白熊。"
),
(
"Lily: The hippo and the bear live in the zoo. They are happy.",
"河马和熊在一起。卡通风格。",
"河马和老虎在一起。卡通风格。",
"大象和熊在一起。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|多对象匹配"],
"听力文本提到'hippo and bear live in the zoo'河马和熊住在动物园选项A中河马和熊在一起与描述一致。选项B有老虎选项C有大象。"
),
(
"Lily: I love the zoo. The animals live here.",
"动物园里有许多动物。卡通风格。",
"农场里有许多动物。卡通风格。",
"公园里有许多人。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|地点匹配"],
"听力文本描述'zoo where animals live'动物居住的动物园选项A是动物园与描述一致。选项B是农场选项C是公园。"
),
]
# 121401 题组1: study, play, art, spell, letter, English (pick 5)
QS_121401_FIRST = [
(
"Lily: I study English every day. I read books and write words.",
"正在看英文书和写字的小女孩。卡通风格。",
"正在画画的小女孩。卡通风格。",
"正在玩耍的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'study English, read books and write words'学英语、读书写字选项A的女孩在读写与描述一致。选项B在画画选项C在玩耍。"
),
(
"Lily: I like to play with my friends after school.",
"和朋友们一起玩的小女孩。卡通风格。",
"独自看书的小女孩。卡通风格。",
"和朋友们一起上课的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'play with friends'和朋友一起玩选项A的女孩和朋友们在玩与描述一致。选项B独自看书选项C在上课。"
),
(
"Lily: I love art class. I can draw and paint pictures.",
"拿着画笔在画板前画画的小女孩。卡通风格。",
"拿着书本在阅读的小女孩。卡通风格。",
"拿着球在运动的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'art class, draw and paint'美术课、画画选项A的女孩在画板前画画与描述一致。选项B在阅读选项C在运动。"
),
(
"Lily: I can spell my name. L-I-L-Y!",
"指着字母L-I-L-Y拼写的小女孩。卡通风格。",
"指着一本书的小女孩。卡通风格。",
"指着数字123的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'spell my name L-I-L-Y'拼写名字选项A的小女孩在拼写字母与描述一致。选项B在指书选项C在指数字。"
),
(
"Lily: I can write a letter to my friend.",
"正在写信的小女孩。卡通风格。",
"正在看信的小女孩。卡通风格。",
"正在画图的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'write a letter'写信选项A的女孩在写信与描述一致。选项B在看信选项C在画图。"
),
]
# 121401 题组2: crayon, board, mat, desk, schoolbag, handbag (pick 5)
QS_121401_SECOND = [
(
"Lily: I have a red crayon. I can draw a big sun.",
"红色的蜡笔。卡通风格。",
"红色的铅笔。卡通风格。",
"蓝色的蜡笔。卡通风格。",
"Which one does Lily have?",
0,
["关键词识别|物品与颜色匹配"],
"听力文本描述'red crayon'红色蜡笔选项A是红色蜡笔与描述一致。选项B是铅笔不是蜡笔选项C是蓝色蜡笔。"
),
(
"Lily: The teacher writes on the board. It is white.",
"老师在白板上写字。卡通风格。",
"老师在黑板上写字。卡通风格。",
"老师在纸上写字。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|物品与颜色匹配"],
"听力文本描述'white board'白色板子选项A是白板与描述一致。选项B是黑板选项C是纸。"
),
(
"Lily: I have a small yellow desk. I do my homework here.",
"黄色的课桌。卡通风格。",
"黄色的椅子。卡通风格。",
"蓝色的课桌。卡通风格。",
"Which one does Lily have?",
0,
["关键词识别|物品与颜色匹配"],
"听力文本描述'small yellow desk'黄色小课桌选项A是黄色课桌与描述一致。选项B是椅子选项C是蓝色课桌。"
),
(
"Lily: This is my schoolbag. It is blue and big.",
"蓝色的大书包。卡通风格。",
"蓝色的小手提包。卡通风格。",
"红色的大书包。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|物品特征匹配"],
"听力文本描述'blue and big schoolbag'蓝色大书包选项A是蓝色大书包与描述一致。选项B是小手提包选项C是红色书包。"
),
(
"Lily: My mum has a pink handbag. It is small.",
"粉色的小手提包。卡通风格。",
"粉色的大书包。卡通风格。",
"黑色的小手提包。卡通风格。",
"Which one does Lily's mum have?",
0,
["关键词识别|物品特征匹配"],
"听力文本描述'pink handbag, small'粉色小手提包选项A是粉色小手提包与描述一致。选项B是大书包选项C是黑色。"
),
]
# 121501 题组1: meet, friend, board game, ping-pong
QS_121501_FIRST = [
(
"Lily: I am happy to meet my new friend today.",
"两个小孩在握手打招呼。卡通风格。",
"两个小孩在吵架。卡通风格。",
"一个小孩独自站着。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|场景匹配"],
"听力文本描述'meet my new friend'见到新朋友选项A的两个小孩在握手见面与描述一致。选项B在吵架选项C独自一人。"
),
(
"Lily: My friend and I play a board game. It is fun.",
"两个小孩在玩棋盘游戏。卡通风格。",
"两个小孩在玩球。卡通风格。",
"两个小孩在看书。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'play a board game'玩棋盘游戏选项A的两个小孩在下棋与描述一致。选项B在玩球选项C在看书。"
),
(
"Lily: I can play ping-pong. I hit the ball with a bat.",
"小孩在打乒乓球。卡通风格。",
"小孩在打羽毛球。卡通风格。",
"小孩在踢足球。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|活动匹配"],
"听力文本描述'play ping-pong with bat and ball'用球拍和球打乒乓球选项A是乒乓球场景与描述一致。选项B是羽毛球选项C是足球。"
),
(
"Lily: My friend and I meet at the park. We play ping-pong there.",
"两个小孩在公园打乒乓球。卡通风格。",
"两个小孩在公园玩棋盘游戏。卡通风格。",
"两个小孩在教室打乒乓球。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|多信息整合"],
"听力文本描述'meet at the park and play ping-pong'在公园见面打乒乓球选项A是在公园打乒乓球与描述一致。选项B在玩棋盘游戏选项C在教室。"
),
(
"Lily: I have a new friend. Her name is Anna. We play games together.",
"两个小女孩在一起开心地笑。卡通风格。",
"两个小女孩各自在玩。卡通风格。",
"一个小女孩和一个小男孩在一起。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|场景匹配"],
"听力文本描述'new friend, play together'新朋友一起玩选项A的两个女孩在一起开心玩与描述一致。选项B各自玩选项C是男孩和女孩。"
),
]
# 121501 题组2: body, head, mouth, ear
QS_121501_SECOND = [
(
"Lily: This is my body. I can run and jump with it.",
"正在跑步的小女孩全身照。卡通风格。",
"只拍了头的小女孩。卡通风格。",
"只拍了手的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|概念匹配"],
"听力文本描述'my body, run and jump'我的身体、跑步跳跃选项A是全身照与描述一致。选项B只有头部选项C只有手。"
),
(
"Lily: I have a big head. I wear a red hat on it.",
"戴着红色帽子的大头小男孩。卡通风格。",
"戴着红色围巾的大头小男孩。卡通风格。",
"戴着蓝色帽子的大头小男孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|物品与位置匹配"],
"听力文本描述'big head with red hat'大头戴红帽选项A是戴红帽子的大头与描述一致。选项B戴围巾选项C帽子是蓝色。"
),
(
"Lily: I open my mouth and say hello.",
"张着嘴巴在说话的小女孩。卡通风格。",
"闭着嘴巴的小女孩。卡通风格。",
"张着嘴巴在大笑的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|动作匹配"],
"听力文本描述'open my mouth and say hello'张嘴说你好选项A是张嘴说话的样子与描述一致。选项B闭嘴选项C在大笑。"
),
(
"Lily: I have two ears. I can hear music with them.",
"露着两个耳朵、戴着耳机的小女孩。卡通风格。",
"露着一个耳朵、没戴耳机的小女孩。卡通风格。",
"没露耳朵、戴着耳机的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|身体部位匹配"],
"听力文本描述'two ears, hear music'两只耳朵听音乐选项A露出两只耳朵且戴耳机与描述一致。选项B只露一只耳朵且没耳机选项C耳朵被遮住。"
),
(
"Lily: I touch my head. It is round.",
"用手摸着自己的圆头的小女孩。卡通风格。",
"用手摸着自己的肚子的小女孩。卡通风格。",
"用手摸着自己的脚的小女孩。卡通风格。",
"Which one is Lily talking about?",
0,
["关键词识别|身体部位匹配"],
"听力文本描述'touch my head, round'摸自己圆圆的头选项A是摸头的小女孩与描述一致。选项B摸肚子选项C摸脚。"
),
]
# ============================================================
# Now fix answer distributions (balance A/B/C)
# For 5 questions: target A=2,B=2,C=1
# ============================================================
# For each 5-question block, shuffle answers to [0,1,2,0,1] pattern
# We need to reassign which question gets which answer without changing the question content
# Strategy: rotate answers to achieve distribution
def balance_answers(qs_list, target_dist):
"""Reassign answer indices to achieve target distribution, rotating distractors"""
result = []
for i, q in enumerate(qs_list):
new_q = list(q)
old_ans = q[5]
new_ans = target_dist[i]
if old_ans == new_ans:
result.append(new_q)
else:
# We need to swap the correct image description
# The correct image is at index old_ans, the target is new_ans
# Swap descriptions for A (0) and the desired answer position
listen, img0, img1, img2, question, _, ability, explanation = q
imgs = [img0, img1, img2]
# Move correct image to new position
imgs[old_ans], imgs[new_ans] = imgs[new_ans], imgs[old_ans]
# Update explanation to match new answer letter
old_letter = chr(65 + old_ans)
new_letter = chr(65 + new_ans)
new_exp = explanation.replace(f'选项{old_letter}', f'选项{new_letter}')
# Swap ability tag references if needed
new_ability = ability
new_q = (listen, imgs[0], imgs[1], imgs[2], question, new_ans, new_ability, new_exp)
result.append(new_q)
return result
# All blocks: target distribution [0,1,2,0,1] -> A=2,B=2,C=1
DIST = [0, 1, 2, 0, 1]
QS_121301_FIRST = balance_answers(QS_121301_FIRST, DIST)
QS_121301_SECOND = balance_answers(QS_121301_SECOND, DIST)
QS_121401_FIRST = balance_answers(QS_121401_FIRST, DIST)
QS_121401_SECOND = balance_answers(QS_121401_SECOND, DIST)
QS_121501_FIRST = balance_answers(QS_121501_FIRST, DIST)
QS_121501_SECOND = balance_answers(QS_121501_SECOND, DIST)
def make_question_json(qsid, data, block_idx, qi):
"""Create a single question dict for jsonData"""
listen, imgA, imgB, imgC, question, ans, ability, explanation = data
audio_idx = block_idx * 5 + qi # block 0: 0-4, block 1: 5-9
return {
"question": question,
"questionAudio": f"{qsid}-{audio_idx:02d}.mp3",
"optionsImage": [
f"{qsid}-{audio_idx:02d}-00.png",
f"{qsid}-{audio_idx:02d}-01.png",
f"{qsid}-{audio_idx:02d}-02.png"
],
"answer": [ans],
"ability": ability,
"explanation": explanation
}
def make_block(qsid, data_list, block_idx):
"""Create a block dict (first or second)"""
qs = [make_question_json(qsid, d, block_idx, i) for i, d in enumerate(data_list)]
return {
"category": "listening",
"type": "listening_choicePic",
"questionSetID": qsid,
"questionSet": qs
}
def make_full_text(qsid, data_list, block_idx):
"""Create the 题目完整配置 text"""
lines = []
for i, data in enumerate(data_list):
qi = i + 1
listen, imgA, imgB, imgC, question, ans, ability, explanation = data
ans_letter = chr(65 + ans)
lines.append(f"{qi}.")
lines.append(f"【听力文本】")
lines.append(listen)
lines.append(f"【图片描述】")
lines.append(f"选项A: {imgA}")
lines.append(f"选项B: {imgB}")
lines.append(f"选项C: {imgC}")
lines.append(f"【能力项】")
lines.append(ability[0])
lines.append(f"【题目】")
lines.append(question)
lines.append(f"【答案】")
lines.append(ans_letter)
lines.append("")
return "\n".join(lines).strip()
# ============================================================
# MAIN - Write to Bitable
# ============================================================
token = get_token()
configs = {
"121301": (QS_121301_FIRST, QS_121301_SECOND),
"121401": (QS_121401_FIRST, QS_121401_SECOND),
"121501": (QS_121501_FIRST, QS_121501_SECOND),
}
for qsid, (first_data, second_data) in configs.items():
# Build jsonData
first_block = make_block(qsid, first_data, 0)
second_block = make_block(qsid, second_data, 1)
json_data = {"first": first_block, "second": second_block}
# Build text fields
t1_text = make_full_text(qsid, first_data, 0)
t2_text = make_full_text(qsid, second_data, 1)
# Verify answer distribution
ans1 = [q["answer"][0] for q in first_block["questionSet"]]
a1,b1,c1 = ans1.count(0), ans1.count(1), ans1.count(2)
ans2 = [q["answer"][0] for q in second_block["questionSet"]]
a2,b2,c2 = ans2.count(0), ans2.count(1), ans2.count(2)
print(f"\n{'='*60}")
print(f"QSID: {qsid}")
print(f"First: {len(first_data)} Qs | A={a1} B={b1} C={c1}")
print(f"Second: {len(second_data)} Qs | A={a2} B={b2} C={c2}")
fields = {
"题目集合 ID": qsid,
"dataStatus": "0",
"jsonData": json.dumps(json_data, ensure_ascii=False),
"题目1 完整配置": t1_text,
"题目2 完整配置": t2_text,
"审校结果": "✅ OK | 2026-05-18 小研审校(生产)"
}
result = create_record(token, fields)
code = result.get('code', -1)
if code == 0:
rid = result['data']['record']['record_id']
print(f" ✅ Created: {rid}")
else:
print(f" ❌ Failed: {result.get('msg', result)}")
print(f"\n{'='*60}")
print("Production complete!")