ai_member_xiaoyan/scripts/fix_and_backfill_v3.py

"""
修正能力标签 + 补充解析 v3 — 使用 shell=True curl（已验证可行）
"""
import json, subprocess, os, shlex

APP_TOKEN = 'CMHSbUUjka3TrUsaxxEc297ongf'

def get_token():
    cred_file = '/root/.openclaw/credentials/xiaoyan/config.json'
    with open(cred_file) as f:
        cfg = json.load(f)
    app_id = cfg['apps'][0]['appId']
    app_secret = cfg['apps'][0]['appSecret']
    body = json.dumps({'app_id': app_id, 'app_secret': app_secret})
    cmd = f"curl -s -X POST 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal' -H 'Content-Type: application/json' -d '{body}'"
    r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=10)
    return json.loads(r.stdout).get('tenant_access_token', '')

def api_get(token, url):
    cmd = f"curl -s -X GET '{url}' -H 'Authorization: Bearer {token}'"
    r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
    return json.loads(r.stdout)

def api_put(token, url, body):
    """PUT request with JSON body, using temp file for large payloads"""
    tmpfile = '/tmp/bitable_update.json'
    with open(tmpfile, 'w') as f:
        json.dump(body, f, ensure_ascii=False)
    cmd = f"curl -s -X PUT '{url}' -H 'Authorization: Bearer {token}' -H 'Content-Type: application/json' -d @{tmpfile}"
    r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
    return json.loads(r.stdout)

TAG_MAP = {
    '信息定位与提取': '扫读定位｜信息匹配',
    '信息定位': '扫读定位｜信息匹配',
    '细节理解': '细节理解｜事实信息提取',
    '因果推断': '推理判断｜原因/结果',
    '主旨归纳': '主旨理解｜段落/文本大意',
    '推理判断': '推理判断｜原因/结果',
}

EXPLANATIONS = {
    ('032701', 'first', 0): "空白处需要表示赚钱的名词。get some cash 是地道表达，cash（现金）最符合学生售卖手链的情境。cheque（支票）语义过正式，bracelet（手链）是销售的商品而非获取的目标。考查名词在语境中的最佳语义选择。",
    ('032701', 'first', 1): "修饰可数名词bracelets应用many（许多）。much修饰不可数名词，only（只有）在此处语义不流畅，不符合带了一些手链去给朋友看的语境。考查可数名词数量修饰词的语法规则。",
    ('032701', 'first', 2): "表语位置需形容词。They were very popular（它们很受欢迎）才是完整句子。cash和sell均非形容词，不能直接作表语。考查词性辨别和上下文语义连贯。",
    ('032701', 'first', 3): "students是可数名词复数，需用many修饰表示很多学生。much用于不可数名词，only在so many结构后不合适。考查可数名词数量表达。",
    ('032701', 'first', 4): "bracelets是可数名词复数，需用many表示数量。didn't have many bracelets表示手链不够多，与后文promised to make more呼应。考查上下文逻辑与可数名词修饰。",
    ('032701', 'first', 5): "空白处需动词作谓语。She could sell a few（她能卖出几条）中sell是唯一动词。cheque和popular均为非动词。考查句子主干成分（谓语动词）的语法识别。",
    ('032801', 'first', 0): "根据后文a local band was playing music和it was very noisy，可推断是一场派对。party最能描述这种热闹场景，car和sleep与乐队演奏、噪音等语境不符。考查名词的语境语义选择。",
    ('032801', 'first', 1): "I tried to sleep, but the sound was too loud——噪音太大导致睡不着，sleep是最合理的选择。drive和sing都偏离语境。考查动词的语义逻辑匹配。",
    ('032801', 'first', 2): "描述派对参与者身份，adult people（成年人）与午夜派对的情境相符，也为后文decided to be more understanding提供合理性。考查修饰词的语境判断。",
    ('032801', 'first', 3): "The party went on until midnight——派对持续到午夜是最符合逻辑的时间终点。morning和afternoon时间太早，不符合嘈杂派对的情境。考查时间名词的合理推断。",
    ('032801', 'first', 4): "前后分句存在转折关系：I was annoyed because I couldn't rest, but then I remembered...。but引导转折，because表示原因（前后因果不成立），so表示结果。考查连词的逻辑关系。",
    ('032801', 'first', 5): "此处需引导原因的连词。because the band members cleaned up解释为什么第二天心情好转。although表示让步（语义不通），until表示时间（不够自然）。考查原因连词的选择。",
    ('032901', 'first', 0): "前后是因果关系：去野生动物园是因为想看小象。because引导原因状语从句最合适。but表示转折，so表示结果，均不符合语义。考查原因连词。",
    ('032901', 'first', 1): "had just been born是被动语态，表示刚刚出生。born是bear的过去分词，与a few weeks ago的时间状语呼应。grow up（长大）和top（顶端）语义不通。考查词义辨析与被动语态。",
    ('032901', 'first', 2): "reach the top of a tall tree——大象想够到高树的顶端。top表示顶部位置，与tall tree形成语义对应。bottom与reach矛盾，nose不是位置。考查方位词的选择。",
    ('032901', 'first', 3): "大象的长鼻子是标志性特征。nose是正确选项，banana和bottom明显不符。考查动物特征相关的核心词汇。",
    ('032901', 'first', 4): "could only touch the bottom——虽然用长鼻子去够，但只碰到了底部。bottom与top形成对比，体现够不到的落差感。考查反义词对（top↔bottom）的理解。",
    ('032901', 'first', 5): "小象吃黄色的香蕉是最自然的食物搭配。banana是常见动物投喂食物，elephant和nose均不符合吃的语义。考查动物食物相关词汇。",
    ('032901', 'first', 6): "when I grow up（等我长大）是固定表达。grow up表示成长、长大，born与when从句时态矛盾，bottom不相关。考查固定短语的掌握。",
    ('032901', 'first', 7): "根据上下文描述的美好回忆，bottom指从心底的比喻义。from the bottom of my heart是常见表达。top与情感表达不相配，nose属于干扰项。考查固定搭配和比喻义。",
    ('032901', 'second', 0): "前后分句是因果关系：姐姐喜欢美丽的东西，所以总戴首饰。so引导结果，because引导原因（方向反了），but表示转折（不成立）。考查结果连词。",
    ('032901', 'second', 1): "lost one of her favourite silver earrings——在首饰语境中，银色的earrings（耳环）是最典型的可丢失物品。painting和language与silver修饰和wear搭配都不符。考查语境词义推断。",
    ('032901', 'second', 2): "couldn't find it anywhere else——在否定句中else表示别的地方/其他任何地方，except含义不符，again重复逻辑不通。考查否定句中的词汇用法。",
    ('032901', 'second', 3): "I decided to make..., although I'm not very good——前后存在让步转折关系，虽然不擅长但还是决定做。although正确，because和so均表示因果。考查让步连词。",
    ('032901', 'second', 4): "She showed me how to use small silver pieces——朋友擅长的是首饰制作（jewellery），与silver pieces和earrings主题一致。painting和language偏离主题。考查上下文主题关联。",
    ('032901', 'second', 5): "I made a mistake, so I had to start again——犯错后重新开始，again表示再一次。else用于否定句表另外，except表除了，都不符合重新来过的语义。考查副词选择。",
}

def fix_record(sid, fields):
    """Fix ability + explanations, return (new_jsonData, new_题目1, new_题目2) or None"""
    jd = json.loads(fields.get('jsonData', '{}'))
    changed = False

    for set_key in ['first', 'second']:
        qset = jd.get(set_key, {}).get('questionSet', [])
        for qi, q in enumerate(qset):
            abilities = q.get('ability', [])
            new_abilities = []
            ab_changed = False
            for a in abilities:
                if a in TAG_MAP:
                    new_abilities.append(TAG_MAP[a])
                    ab_changed = True
                else:
                    new_abilities.append(a)

            if not new_abilities:
                new_abilities = ['语法结构识别｜完形填空']
                ab_changed = True

            if ab_changed:
                q['ability'] = new_abilities
                changed = True

            expl_key = (sid, set_key, qi)
            if expl_key in EXPLANATIONS:
                old_expl = q.get('explanation', '')
                if not old_expl or old_expl.strip() == '':
                    q['explanation'] = EXPLANATIONS[expl_key]
                    changed = True

    if not changed:
        return None, None, None

    new_jd = json.dumps(jd, ensure_ascii=False)

    t1 = fields.get('题目1', '') or ''
    t2 = fields.get('题目2', '') or ''
    new_t1, new_t2 = t1, t2

    for t_field, t_orig, qs_set_key in [('题目1', t1, 'first'), ('题目2', t2, 'second')]:
        qs = jd.get(qs_set_key, {}).get('questionSet', [])
        if not qs or not t_orig or '【解析】' in t_orig:
            continue
        expl_lines = [f"{qi+1}. {q.get('explanation','')}" for qi, q in enumerate(qs) if q.get('explanation','')]
        if expl_lines:
            if t_field == '题目1':
                new_t1 = t_orig + '\n\n【解析】\n' + '\n'.join(expl_lines)
            else:
                new_t2 = t_orig + '\n\n【解析】\n' + '\n'.join(expl_lines)

    return new_jd, new_t1, new_t2

# ===== Main =====
token = get_token()
if not token:
    print("❌ Failed to get token")
    exit(1)

targets = [
    ('P1','tblCgfYDnnqwLfgH','recvjt0HzBBhYu','032501'),
    ('P3','tbl4q0ZUV3HB54t1','recvjug1hWz2oG','032601'),
    ('P4','tblzKVm1FEukPgnN','recvjueHm15HPu','032701'),
    ('P4','tblzKVm1FEukPgnN','recvjueN5QT1c5','032801'),
    ('P5','tblLmUxzzUDe0QAJ','recvjueULrufNg','032901'),
]

for tname, tid, rid, sid in targets:
    # Fetch
    url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=500"
    data = api_get(token, url)
    if data.get('code') != 0:
        print(f"❌ {tname} {sid}: fetch error {data.get('code')} {data.get('msg','')[:60]}")
        continue

    fields = None
    for item in data['data']['items']:
        if item['record_id'] == rid:
            fields = item['fields']
            break
    if not fields:
        print(f"❌ {tname} {sid}: record not found")
        continue

    new_jd, new_t1, new_t2 = fix_record(sid, fields)
    if new_jd is None:
        print(f"⏭️ {tname} {sid}: already fixed, no changes")
        continue

    # Update
    update_fields = {'jsonData': new_jd}
    if new_t1 != (fields.get('题目1', '') or ''):
        update_fields['题目1'] = new_t1
    if new_t2 != (fields.get('题目2', '') or ''):
        update_fields['题目2'] = new_t2

    changed_keys = list(update_fields.keys())
    url2 = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records/{rid}"
    resp = api_put(token, url2, {"fields": update_fields})
    if resp.get('code') == 0:
        print(f"✅ {tname} {sid}: updated {', '.join(changed_keys)}")
    else:
        print(f"❌ {tname} {sid}: code={resp.get('code')} msg={resp.get('msg','?')[:100]}")