ai_member_xiaoyan/scripts/generate_explanations.py

"""
批量生成197条缺失的explanation并回填到飞书多维表格
"""
import json, urllib.request, sys

APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"

def get_token():
    with open(CRED_FILE) as f:
        cfg = json.load(f)
    req = urllib.request.Request(
        "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
        data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
        headers={"Content-Type": "application/json"})
    return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']

def api_call(url, method='GET', body=None):
    token = get_token()
    headers = {"Authorization": f"Bearer {token}"}
    data = json.dumps(body).encode() if body else None
    if data:
        headers["Content-Type"] = "application/json"
    req = urllib.request.Request(url, data=data, method=method, headers=headers)
    return json.loads(urllib.request.urlopen(req).read())

TABLE_MAP = {"P1": "tbliZAhcc9C43B23", "P2": "tblzTLNH7f13uWQN", "P4": "tblVmeDtBDKsAEfz", "P7": "tbly9SvPEa44k3yX"}

def _opt_label(idx):
    return chr(65 + idx)

def generate_expl(question, answer_idx, options, options_image,
                  text_desc, text_body, text_title, table_name):
    """Generate a Chinese explanation for a listening question."""
    has_context = bool(text_desc or text_body)
    title_hint = (text_title or text_desc or '')[0:40] if has_context else ''

    # Answer text
    if options and answer_idx < len(options):
        ans_text = options[answer_idx]
    else:
        ans_text = f"第{answer_idx + 1}个选项"

    if table_name == "P1":
        # listening_choicePic
        img_label = f"第{answer_idx + 1}张图片"
        question_lower = question.lower()
        if "what is this" in question_lower or "what's this" in question_lower:
            return f"听力内容中描述的物品特征与{img_label}相符，因此选择该图片。"
        elif "where" in question_lower:
            return f"听力内容中提到了地点信息，与{img_label}所描绘的场景一致，因此选择该图片。"
        elif "what does" in question_lower or "what is" in question_lower:
            return f"听力内容中提到的相关信息与{img_label}相符，因此选择该图片。"
        elif "which" in question_lower:
            return f"听力内容中的描述与{img_label}对应，因此选择该图片。"
        elif "how many" in question_lower:
            return f"听力内容中提到了数量信息，与{img_label}一致，因此选择该图片。"
        elif "what color" in question_lower or "what colour" in question_lower:
            return f"听力内容中描述了相关颜色信息，与{img_label}对应，因此选择该图片。"
        elif "who" in question_lower:
            return f"听力内容中提到了人物信息，与{img_label}对应，因此选择该图片。"
        else:
            return f"根据听力内容，正确答案对应{img_label}。"

    elif table_name == "P2":
        if has_context:
            return f"在"{title_hint}"对话中，听力内容提到了相关信息，空白处应填入「{ans_text}」。"
        return f"根据听力对话内容，空白处应填入「{ans_text}」。"

    elif table_name == "P4":
        lbl = _opt_label(answer_idx)
        if has_context:
            return f"在"{title_hint}"短对话中，根据听力内容，正确答案为{lbl}选项「{ans_text}」。"
        return f"根据听力短对话内容，正确答案为{lbl}选项「{ans_text}」。"

    elif table_name == "P7":
        return f"根据听力内容，匹配信息对应{_opt_label(answer_idx)}选项。"

    return f"根据听力内容，正确答案为「{ans_text}」。"

# Load missing explanations
with open('/tmp/missing_explanations.json') as f:
    missing = json.load(f)

from collections import defaultdict
groups = defaultdict(list)
for m in missing:
    groups[(m['sid'], m['table'])].append(m)

print(f"Total missing: {len(missing)}")
print(f"Grouped into {len(groups)} record groups")

total_fixed = 0
total_failed = 0

for (sid, table_name), items in groups.items():
    table_id = TABLE_MAP[table_name]

    url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=100"
    resp = api_call(url)

    record = None
    for item in resp['data']['items']:
        if item['fields'].get('题目集合 ID', '') == sid:
            record = item
            break

    if not record:
        print(f"⚠️ {table_name} {sid}: record not found")
        total_failed += len(items)
        continue

    rid = record['record_id']

    try:
        jd = json.loads(record['fields']['jsonData'])
    except:
        print(f"⚠️ {table_name} {sid}: JSON parse error")
        total_failed += len(items)
        continue

    changed = False
    fixed_count = 0

    for entry in items:
        section = entry['section']
        q_idx = entry['q_index']

        if section == 'root':
            qs = jd.get('questionSet', [])
        else:
            sect = jd.get(section, {})
            qs = sect.get('questionSet', [])

        if q_idx >= len(qs):
            continue
        q = qs[q_idx]

        if q.get('explanation', '') not in (None, ''):
            continue

        question = entry.get('question', '')
        answer_idx = entry.get('answer', [0])
        answer_idx = answer_idx[0] if isinstance(answer_idx, list) and answer_idx else 0
        options = entry.get('options', [])
        options_image = entry.get('optionsImage', [])

        q['explanation'] = generate_expl(
            question=question,
            answer_idx=answer_idx,
            options=options,
            options_image=options_image,
            text_desc=entry.get('text_desc', ''),
            text_body=entry.get('text_body', ''),
            text_title=entry.get('text_title', ''),
            table_name=table_name
        )
        changed = True
        fixed_count += 1

    if changed:
        new_jd = json.dumps(jd, ensure_ascii=False)
        result = api_call(
            f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{rid}",
            'PUT', {"fields": {"jsonData": new_jd}}
        )
        if result.get('code') == 0:
            total_fixed += fixed_count
            print(f"✅ {table_name} {sid}: {fixed_count} explanations")
        else:
            total_failed += fixed_count
            print(f"❌ {table_name} {sid}: {result.get('msg')}")
    else:
        print(f"⏭️ {table_name} {sid}: no changes")

print(f"\n{'='*50}")
print(f"Total fixed: {total_fixed}, Failed: {total_failed}")