ai_member_xiaoyan/scripts/generate_explanations.py

178 lines
6.7 KiB
Python

"""
批量生成197条缺失的explanation并回填到飞书多维表格
"""
import json, urllib.request, sys
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
def get_token():
with open(CRED_FILE) as f:
cfg = json.load(f)
req = urllib.request.Request(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
headers={"Content-Type": "application/json"})
return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']
def api_call(url, method='GET', body=None):
token = get_token()
headers = {"Authorization": f"Bearer {token}"}
data = json.dumps(body).encode() if body else None
if data:
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, data=data, method=method, headers=headers)
return json.loads(urllib.request.urlopen(req).read())
TABLE_MAP = {"P1": "tbliZAhcc9C43B23", "P2": "tblzTLNH7f13uWQN", "P4": "tblVmeDtBDKsAEfz", "P7": "tbly9SvPEa44k3yX"}
def _opt_label(idx):
return chr(65 + idx)
def generate_expl(question, answer_idx, options, options_image,
text_desc, text_body, text_title, table_name):
"""Generate a Chinese explanation for a listening question."""
has_context = bool(text_desc or text_body)
title_hint = (text_title or text_desc or '')[0:40] if has_context else ''
# Answer text
if options and answer_idx < len(options):
ans_text = options[answer_idx]
else:
ans_text = f"{answer_idx + 1}个选项"
if table_name == "P1":
# listening_choicePic
img_label = f"{answer_idx + 1}张图片"
question_lower = question.lower()
if "what is this" in question_lower or "what's this" in question_lower:
return f"听力内容中描述的物品特征与{img_label}相符,因此选择该图片。"
elif "where" in question_lower:
return f"听力内容中提到了地点信息,与{img_label}所描绘的场景一致,因此选择该图片。"
elif "what does" in question_lower or "what is" in question_lower:
return f"听力内容中提到的相关信息与{img_label}相符,因此选择该图片。"
elif "which" in question_lower:
return f"听力内容中的描述与{img_label}对应,因此选择该图片。"
elif "how many" in question_lower:
return f"听力内容中提到了数量信息,与{img_label}一致,因此选择该图片。"
elif "what color" in question_lower or "what colour" in question_lower:
return f"听力内容中描述了相关颜色信息,与{img_label}对应,因此选择该图片。"
elif "who" in question_lower:
return f"听力内容中提到了人物信息,与{img_label}对应,因此选择该图片。"
else:
return f"根据听力内容,正确答案对应{img_label}"
elif table_name == "P2":
if has_context:
return f""{title_hint}"对话中,听力内容提到了相关信息,空白处应填入「{ans_text}」。"
return f"根据听力对话内容,空白处应填入「{ans_text}」。"
elif table_name == "P4":
lbl = _opt_label(answer_idx)
if has_context:
return f""{title_hint}"短对话中,根据听力内容,正确答案为{lbl}选项「{ans_text}」。"
return f"根据听力短对话内容,正确答案为{lbl}选项「{ans_text}」。"
elif table_name == "P7":
return f"根据听力内容,匹配信息对应{_opt_label(answer_idx)}选项。"
return f"根据听力内容,正确答案为「{ans_text}」。"
# Load missing explanations
with open('/tmp/missing_explanations.json') as f:
missing = json.load(f)
from collections import defaultdict
groups = defaultdict(list)
for m in missing:
groups[(m['sid'], m['table'])].append(m)
print(f"Total missing: {len(missing)}")
print(f"Grouped into {len(groups)} record groups")
total_fixed = 0
total_failed = 0
for (sid, table_name), items in groups.items():
table_id = TABLE_MAP[table_name]
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=100"
resp = api_call(url)
record = None
for item in resp['data']['items']:
if item['fields'].get('题目集合 ID', '') == sid:
record = item
break
if not record:
print(f"⚠️ {table_name} {sid}: record not found")
total_failed += len(items)
continue
rid = record['record_id']
try:
jd = json.loads(record['fields']['jsonData'])
except:
print(f"⚠️ {table_name} {sid}: JSON parse error")
total_failed += len(items)
continue
changed = False
fixed_count = 0
for entry in items:
section = entry['section']
q_idx = entry['q_index']
if section == 'root':
qs = jd.get('questionSet', [])
else:
sect = jd.get(section, {})
qs = sect.get('questionSet', [])
if q_idx >= len(qs):
continue
q = qs[q_idx]
if q.get('explanation', '') not in (None, ''):
continue
question = entry.get('question', '')
answer_idx = entry.get('answer', [0])
answer_idx = answer_idx[0] if isinstance(answer_idx, list) and answer_idx else 0
options = entry.get('options', [])
options_image = entry.get('optionsImage', [])
q['explanation'] = generate_expl(
question=question,
answer_idx=answer_idx,
options=options,
options_image=options_image,
text_desc=entry.get('text_desc', ''),
text_body=entry.get('text_body', ''),
text_title=entry.get('text_title', ''),
table_name=table_name
)
changed = True
fixed_count += 1
if changed:
new_jd = json.dumps(jd, ensure_ascii=False)
result = api_call(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{rid}",
'PUT', {"fields": {"jsonData": new_jd}}
)
if result.get('code') == 0:
total_fixed += fixed_count
print(f"{table_name} {sid}: {fixed_count} explanations")
else:
total_failed += fixed_count
print(f"{table_name} {sid}: {result.get('msg')}")
else:
print(f"⏭️ {table_name} {sid}: no changes")
print(f"\n{'='*50}")
print(f"Total fixed: {total_fixed}, Failed: {total_failed}")