ai_member_xiaoyan/scripts/fix_and_backfill_v3.py

176 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
修正能力标签 + 补充解析 v3 — 使用 shell=True curl已验证可行
"""
import json, subprocess, os, shlex
APP_TOKEN = 'CMHSbUUjka3TrUsaxxEc297ongf'
def get_token():
cred_file = '/root/.openclaw/credentials/xiaoyan/config.json'
with open(cred_file) as f:
cfg = json.load(f)
app_id = cfg['apps'][0]['appId']
app_secret = cfg['apps'][0]['appSecret']
body = json.dumps({'app_id': app_id, 'app_secret': app_secret})
cmd = f"curl -s -X POST 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal' -H 'Content-Type: application/json' -d '{body}'"
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=10)
return json.loads(r.stdout).get('tenant_access_token', '')
def api_get(token, url):
cmd = f"curl -s -X GET '{url}' -H 'Authorization: Bearer {token}'"
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
return json.loads(r.stdout)
def api_put(token, url, body):
"""PUT request with JSON body, using temp file for large payloads"""
tmpfile = '/tmp/bitable_update.json'
with open(tmpfile, 'w') as f:
json.dump(body, f, ensure_ascii=False)
cmd = f"curl -s -X PUT '{url}' -H 'Authorization: Bearer {token}' -H 'Content-Type: application/json' -d @{tmpfile}"
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
return json.loads(r.stdout)
TAG_MAP = {
'信息定位与提取': '扫读定位|信息匹配',
'信息定位': '扫读定位|信息匹配',
'细节理解': '细节理解|事实信息提取',
'因果推断': '推理判断|原因/结果',
'主旨归纳': '主旨理解|段落/文本大意',
'推理判断': '推理判断|原因/结果',
}
EXPLANATIONS = {
('032701', 'first', 0): "空白处需要表示赚钱的名词。get some cash 是地道表达cash现金最符合学生售卖手链的情境。cheque支票语义过正式bracelet手链是销售的商品而非获取的目标。考查名词在语境中的最佳语义选择。",
('032701', 'first', 1): "修饰可数名词bracelets应用many许多。much修饰不可数名词only只有在此处语义不流畅不符合带了一些手链去给朋友看的语境。考查可数名词数量修饰词的语法规则。",
('032701', 'first', 2): "表语位置需形容词。They were very popular它们很受欢迎才是完整句子。cash和sell均非形容词不能直接作表语。考查词性辨别和上下文语义连贯。",
('032701', 'first', 3): "students是可数名词复数需用many修饰表示很多学生。much用于不可数名词only在so many结构后不合适。考查可数名词数量表达。",
('032701', 'first', 4): "bracelets是可数名词复数需用many表示数量。didn't have many bracelets表示手链不够多与后文promised to make more呼应。考查上下文逻辑与可数名词修饰。",
('032701', 'first', 5): "空白处需动词作谓语。She could sell a few她能卖出几条中sell是唯一动词。cheque和popular均为非动词。考查句子主干成分谓语动词的语法识别。",
('032801', 'first', 0): "根据后文a local band was playing music和it was very noisy可推断是一场派对。party最能描述这种热闹场景car和sleep与乐队演奏、噪音等语境不符。考查名词的语境语义选择。",
('032801', 'first', 1): "I tried to sleep, but the sound was too loud——噪音太大导致睡不着sleep是最合理的选择。drive和sing都偏离语境。考查动词的语义逻辑匹配。",
('032801', 'first', 2): "描述派对参与者身份adult people成年人与午夜派对的情境相符也为后文decided to be more understanding提供合理性。考查修饰词的语境判断。",
('032801', 'first', 3): "The party went on until midnight——派对持续到午夜是最符合逻辑的时间终点。morning和afternoon时间太早不符合嘈杂派对的情境。考查时间名词的合理推断。",
('032801', 'first', 4): "前后分句存在转折关系I was annoyed because I couldn't rest, but then I remembered...。but引导转折because表示原因前后因果不成立so表示结果。考查连词的逻辑关系。",
('032801', 'first', 5): "此处需引导原因的连词。because the band members cleaned up解释为什么第二天心情好转。although表示让步语义不通until表示时间不够自然。考查原因连词的选择。",
('032901', 'first', 0): "前后是因果关系去野生动物园是因为想看小象。because引导原因状语从句最合适。but表示转折so表示结果均不符合语义。考查原因连词。",
('032901', 'first', 1): "had just been born是被动语态表示刚刚出生。born是bear的过去分词与a few weeks ago的时间状语呼应。grow up长大和top顶端语义不通。考查词义辨析与被动语态。",
('032901', 'first', 2): "reach the top of a tall tree——大象想够到高树的顶端。top表示顶部位置与tall tree形成语义对应。bottom与reach矛盾nose不是位置。考查方位词的选择。",
('032901', 'first', 3): "大象的长鼻子是标志性特征。nose是正确选项banana和bottom明显不符。考查动物特征相关的核心词汇。",
('032901', 'first', 4): "could only touch the bottom——虽然用长鼻子去够但只碰到了底部。bottom与top形成对比体现够不到的落差感。考查反义词对top↔bottom的理解。",
('032901', 'first', 5): "小象吃黄色的香蕉是最自然的食物搭配。banana是常见动物投喂食物elephant和nose均不符合吃的语义。考查动物食物相关词汇。",
('032901', 'first', 6): "when I grow up等我长大是固定表达。grow up表示成长、长大born与when从句时态矛盾bottom不相关。考查固定短语的掌握。",
('032901', 'first', 7): "根据上下文描述的美好回忆bottom指从心底的比喻义。from the bottom of my heart是常见表达。top与情感表达不相配nose属于干扰项。考查固定搭配和比喻义。",
('032901', 'second', 0): "前后分句是因果关系姐姐喜欢美丽的东西所以总戴首饰。so引导结果because引导原因方向反了but表示转折不成立。考查结果连词。",
('032901', 'second', 1): "lost one of her favourite silver earrings——在首饰语境中银色的earrings耳环是最典型的可丢失物品。painting和language与silver修饰和wear搭配都不符。考查语境词义推断。",
('032901', 'second', 2): "couldn't find it anywhere else——在否定句中else表示别的地方/其他任何地方except含义不符again重复逻辑不通。考查否定句中的词汇用法。",
('032901', 'second', 3): "I decided to make..., although I'm not very good——前后存在让步转折关系虽然不擅长但还是决定做。although正确because和so均表示因果。考查让步连词。",
('032901', 'second', 4): "She showed me how to use small silver pieces——朋友擅长的是首饰制作jewellery与silver pieces和earrings主题一致。painting和language偏离主题。考查上下文主题关联。",
('032901', 'second', 5): "I made a mistake, so I had to start again——犯错后重新开始again表示再一次。else用于否定句表另外except表除了都不符合重新来过的语义。考查副词选择。",
}
def fix_record(sid, fields):
"""Fix ability + explanations, return (new_jsonData, new_题目1, new_题目2) or None"""
jd = json.loads(fields.get('jsonData', '{}'))
changed = False
for set_key in ['first', 'second']:
qset = jd.get(set_key, {}).get('questionSet', [])
for qi, q in enumerate(qset):
abilities = q.get('ability', [])
new_abilities = []
ab_changed = False
for a in abilities:
if a in TAG_MAP:
new_abilities.append(TAG_MAP[a])
ab_changed = True
else:
new_abilities.append(a)
if not new_abilities:
new_abilities = ['语法结构识别|完形填空']
ab_changed = True
if ab_changed:
q['ability'] = new_abilities
changed = True
expl_key = (sid, set_key, qi)
if expl_key in EXPLANATIONS:
old_expl = q.get('explanation', '')
if not old_expl or old_expl.strip() == '':
q['explanation'] = EXPLANATIONS[expl_key]
changed = True
if not changed:
return None, None, None
new_jd = json.dumps(jd, ensure_ascii=False)
t1 = fields.get('题目1', '') or ''
t2 = fields.get('题目2', '') or ''
new_t1, new_t2 = t1, t2
for t_field, t_orig, qs_set_key in [('题目1', t1, 'first'), ('题目2', t2, 'second')]:
qs = jd.get(qs_set_key, {}).get('questionSet', [])
if not qs or not t_orig or '【解析】' in t_orig:
continue
expl_lines = [f"{qi+1}. {q.get('explanation','')}" for qi, q in enumerate(qs) if q.get('explanation','')]
if expl_lines:
if t_field == '题目1':
new_t1 = t_orig + '\n\n【解析】\n' + '\n'.join(expl_lines)
else:
new_t2 = t_orig + '\n\n【解析】\n' + '\n'.join(expl_lines)
return new_jd, new_t1, new_t2
# ===== Main =====
token = get_token()
if not token:
print("❌ Failed to get token")
exit(1)
targets = [
('P1','tblCgfYDnnqwLfgH','recvjt0HzBBhYu','032501'),
('P3','tbl4q0ZUV3HB54t1','recvjug1hWz2oG','032601'),
('P4','tblzKVm1FEukPgnN','recvjueHm15HPu','032701'),
('P4','tblzKVm1FEukPgnN','recvjueN5QT1c5','032801'),
('P5','tblLmUxzzUDe0QAJ','recvjueULrufNg','032901'),
]
for tname, tid, rid, sid in targets:
# Fetch
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=500"
data = api_get(token, url)
if data.get('code') != 0:
print(f"{tname} {sid}: fetch error {data.get('code')} {data.get('msg','')[:60]}")
continue
fields = None
for item in data['data']['items']:
if item['record_id'] == rid:
fields = item['fields']
break
if not fields:
print(f"{tname} {sid}: record not found")
continue
new_jd, new_t1, new_t2 = fix_record(sid, fields)
if new_jd is None:
print(f"⏭️ {tname} {sid}: already fixed, no changes")
continue
# Update
update_fields = {'jsonData': new_jd}
if new_t1 != (fields.get('题目1', '') or ''):
update_fields['题目1'] = new_t1
if new_t2 != (fields.get('题目2', '') or ''):
update_fields['题目2'] = new_t2
changed_keys = list(update_fields.keys())
url2 = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records/{rid}"
resp = api_put(token, url2, {"fields": update_fields})
if resp.get('code') == 0:
print(f"{tname} {sid}: updated {', '.join(changed_keys)}")
else:
print(f"{tname} {sid}: code={resp.get('code')} msg={resp.get('msg','?')[:100]}")