ai_member_xiaoyan/scripts/fix_and_backfill.py

195 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
修正能力标签 + 补充解析,回填 jsonData 和 题目1/题目2
"""
import json, subprocess, copy
APP_TOKEN = 'CMHSbUUjka3TrUsaxxEc297ongf'
SKILL = 'skills/lark_bitable_operate_as_bot/scripts/operate_bitable.sh'
# ===== Ability tag mappings =====
TAG_MAP = {
'信息定位与提取': '扫读定位|信息匹配',
'信息定位': '扫读定位|信息匹配',
'细节理解': '细节理解|事实信息提取',
'因果推断': '推理判断|原因/结果',
'主旨归纳': '主旨理解|段落/文本大意',
'推理判断': '推理判断|原因/结果',
}
# ===== Explanations per record (qset_index, question_index) =====
EXPLANATIONS = {
# P4 032701 - reading_cloze: Selling Bracelets at School
('032701', 'first', 0): '空白处需要表示"赚钱"的名词。"get some cash"挣点零花钱是地道表达cash现金最符合学生售卖手链的情境。cheque支票语义过正式bracelet手链是销售的商品而非获取的目标。考查名词在语境中的最佳语义选择。',
('032701', 'first', 1): '修饰可数名词bracelets应用many许多。much修饰不可数名词only只有在此处语义不流畅不符合"带了一些手链去给朋友看"的语境。考查可数名词数量修饰词的语法规则。',
('032701', 'first', 2): '表语位置需形容词。"They were very popular"它们很受欢迎才是完整句子。cash和sell均非形容词不能直接作表语。考查词性辨别和上下文语义连贯。',
('032701', 'first', 3): 'students是可数名词复数需用many修饰表示"很多学生"。much用于不可数名词only在"so many"结构后不合适。考查可数名词数量表达。',
('032701', 'first', 4): "bracelets是可数名词复数需用many表示数量。didn't have many bracelets 表示手链不够多,与后文 promised to make more 呼应。考查上下文逻辑与可数名词修饰。",
('032701', 'first', 5): '空白处需动词作谓语。"She could sell a few"她能卖出几条中sell是唯一动词。cheque和popular均为非动词。考查句子主干成分谓语动词的语法识别。',
# P4 032801 - reading_cloze: The Midnight Party in the Car Park
('032801', 'first', 0): '根据后文"a local band was playing music""it was very noisy",可推断是一场"派对"。party最能描述这种热闹场景car和sleep与乐队演奏、噪音等语境不符。考查名词的语境语义选择。',
('032801', 'first', 1): '"I tried to sleep, but the sound was too loud"——噪音太大导致睡不着sleep是最合理的选择。drive开车和sing唱歌都偏离语境。考查动词的语义逻辑匹配。',
('032801', 'first', 2): '描述派对参与者身份,"adult people"(成年人)与午夜派对的情境相符,也为后文"decided to be more understanding"提供合理性。young语义对等但看答案选择考查修饰词的语境判断。',
('032801', 'first', 3): '"The party went on until midnight"——派对持续到午夜是最符合逻辑的时间终点。morning和afternoon太早不符合嘈杂派对的情境。考查时间名词的合理推断。',
('032801', 'first', 4): "前后分句存在转折关系I was annoyed because I couldn't rest, but then I remembered...。but引导转折because表示原因前后因果不成立so表示结果。考查连词的逻辑关系。",
('032801', 'first', 5): '此处需引导原因的连词。"because the band members cleaned up"解释为什么第二天心情好转。although表示让步语义不通until表示时间不够自然。考查原因连词的选择。',
# P5 032901 first set - reading_openCloze: A Visit to the Wildlife Park
('032901', 'first', 0): '前后是因果关系:去野生动物园"是因为"想看小象。because引导原因状语从句最合适。but表示转折so表示结果均不符合语义。考查原因连词。',
('032901', 'first', 1): '"had just been born"是被动语态,表示"刚刚出生"。born是bear出生的过去分词"a few weeks ago"的时间状语呼应。grow up长大和top顶端语义不通。考查词义辨析与被动语态。',
('032901', 'first', 2): '"reach the top of a tall tree"——大象想够到高树的顶端。top表示顶部位置与tall tree高树形成语义对应。bottom底部与reach矛盾nose鼻子不是位置。考查方位词的选择。',
('032901', 'first', 3): '大象的"长鼻子"是标志性特征。nose是正确选项banana和bottom明显不符。考查动物特征相关的核心词汇。',
('032901', 'first', 4): '"could only touch the bottom"——虽然用长鼻子去够,但只碰到了"底部"。bottom与top形成对比体现够不到的落差感。考查反义词对top↔bottom的理解。',
('032901', 'first', 5): '小象吃黄色的"香蕉"是最自然的食物搭配。banana是常见动物投喂食物elephant和nose均不符合""的语义。考查动物食物相关词汇。',
('032901', 'first', 6): '"when I grow up"等我长大是固定表达。grow up表示成长、长大born出生与when从句时态矛盾bottom不相关。考查固定短语的掌握。',
('032901', 'first', 7): '根据上下文描述的美好回忆bottom指"从心底"的比喻义。"from the bottom of my heart"是常见表达。top顶部与情感表达不相配nose属于干扰项。考查固定搭配和比喻义。',
# P5 032901 second set - reading_openCloze: A Special Gift
('032901', 'second', 0): '前后分句是因果关系:姐姐喜欢美丽的东西,"所以"总戴首饰。so引导结果because引导原因方向反了but表示转折不成立。考查结果连词。',
('032901', 'second', 1): '"lost one of her favourite silver earrings"——在首饰语境中银色的earrings耳环是最典型的可丢失物品。painting和language语言与silver修饰和"wear"搭配都不符。考查语境词义推断。',
('032901', 'second', 2): "couldn't find it anywhere else——在否定句中else表示别的地方/其他任何地方except含义不符again重复逻辑不通。考查否定句中的词汇用法。",
('032901', 'second', 3): "I decided to make..., although I'm not very good——前后存在让步转折关系虽然不擅长但还是决定做。although正确because和so均表示因果。考查让步连词。",
('032901', 'second', 4): '"She showed me how to use small silver pieces"——朋友擅长的是首饰制作jewellery与silver pieces和earrings主题一致。painting绘画和language语言偏离主题。考查上下文主题关联。',
('032901', 'second', 5): '"I made a mistake, so I had to start again"——犯错后重新开始again表示"再一次"。else用于否定句表"另外"except表"除了",都不符合重新来过的语义。考查副词选择。',
}
def exec_bash(cmd):
r = subprocess.run(cmd, shell=True, capture_output=True, text=True)
try:
return json.loads(r.stdout)
except:
print(f"PARSE ERROR: {r.stdout[:200]}")
return {}
def fix_record(tname, tid, rid, sid, record_fields):
"""Fix ability tags and explanations, return updated jsonData + text fields"""
jd = json.loads(record_fields.get('jsonData', '{}'))
modified = False
for set_key in ['first', 'second']:
qset = jd.get(set_key, {}).get('questionSet', [])
for qi, q in enumerate(qset):
# Fix ability tags
abilities = q.get('ability', [])
new_abilities = []
ab_changed = False
for a in abilities:
if a in TAG_MAP:
new_abilities.append(TAG_MAP[a])
ab_changed = True
else:
new_abilities.append(a)
# For P4/P5: if ability is empty, add the cloze tag
if not new_abilities:
new_abilities = ['语法结构识别|完形填空']
ab_changed = True
if ab_changed:
q['ability'] = new_abilities
modified = True
# Fix explanation
expl_key = (sid, set_key, qi)
if expl_key in EXPLANATIONS:
old_expl = q.get('explanation', '')
if not old_expl or old_expl.strip() == '':
q['explanation'] = EXPLANATIONS[expl_key]
modified = True
if not modified:
return None, None, None
new_jd_str = json.dumps(jd, ensure_ascii=False)
# Also update 题目1/题目2 for P4/P5 where explanation was in the text description
t1 = record_fields.get('题目1', '') or ''
t2 = record_fields.get('题目2', '') or ''
new_t1 = t1
new_t2 = t2
# For records where 题目 fields contain answer markers like (正确)
# Add explanation notes to the text fields
has_expl_added = False
first_qs = jd.get('first', {}).get('questionSet', [])
second_qs = jd.get('second', {}).get('questionSet', [])
for set_idx, (qs, t_field, t_orig) in enumerate([(first_qs, '题目1', t1), (second_qs, '题目2', t2)]):
if not qs or not t_orig:
continue
# Check if this text field already has explanations
if '【解析】' in t_orig:
continue
# Add explanations section
expl_lines = ['\n\n【解析】']
for qi, q in enumerate(qs):
expl = q.get('explanation', '')
if expl:
expl_lines.append(f'{qi+1}. {expl}')
if len(expl_lines) > 1:
new_text = t_orig + '\n'.join(expl_lines)
if t_field == '题目1':
new_t1 = new_text
else:
new_t2 = new_text
has_expl_added = True
return new_jd_str, new_t1, new_t2
# ===== Main =====
targets = [
('P1','tblCgfYDnnqwLfgH','recvjt0HzBBhYu','032501'),
('P3','tbl4q0ZUV3HB54t1','recvjug1hWz2oG','032601'),
('P4','tblzKVm1FEukPgnN','recvjueHm15HPu','032701'),
('P4','tblzKVm1FEukPgnN','recvjueN5QT1c5','032801'),
('P5','tblLmUxzzUDe0QAJ','recvjueULrufNg','032901'),
]
for tname, tid, rid, sid in targets:
# Fetch record
cmd = f'bash {SKILL} list_records {APP_TOKEN} {tid} 500'
data = exec_bash(cmd)
if data.get('code') != 0:
print(f"{sid}: fetch failed")
continue
fields = None
for item in data['data']['items']:
if item['record_id'] == rid:
fields = item['fields']
break
if not fields:
print(f"{sid}: record not found")
continue
new_jd, new_t1, new_t2 = fix_record(tname, tid, rid, sid, fields)
if new_jd is None:
print(f"⏭️ {sid}: no changes needed")
continue
# Build update payload
update_fields = {'jsonData': new_jd}
if new_t1 != (fields.get('题目1', '') or ''):
update_fields['题目1'] = new_t1
if new_t2 != (fields.get('题目2', '') or ''):
update_fields['题目2'] = new_t2
payload = json.dumps(update_fields, ensure_ascii=False)
cmd = f"bash {SKILL} update_record {APP_TOKEN} {tid} {rid} '{payload}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
try:
resp = json.loads(result.stdout)
if resp.get('code') == 0:
changes = []
if 'jsonData' in update_fields:
changes.append('jsonData')
if '题目1' in update_fields:
changes.append('题目1')
if '题目2' in update_fields:
changes.append('题目2')
print(f"{tname} {sid}: updated {', '.join(changes)}")
else:
print(f"{tname} {sid}: API error — {resp}")
except Exception as e:
print(f"{tname} {sid}: {e}{result.stdout[:200]}")