136 lines
5.1 KiB
Python
136 lines
5.1 KiB
Python
"""
|
||
单元挑战审校脚本:阅读-P1/P3/P4/P5
|
||
跳过 ID=010199 的记录
|
||
"""
|
||
import json, subprocess, sys
|
||
|
||
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
|
||
SKILL_SCRIPT = "/root/.openclaw/workspace-xiaoyan/skills/lark_bitable_operate_as_bot/scripts/operate_bitable.sh"
|
||
|
||
def exec_bash(cmd):
|
||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
|
||
return json.loads(result.stdout) if result.stdout else {}
|
||
|
||
# Target records
|
||
targets = {
|
||
"阅读-P1": {"table_id": "tblCgfYDnnqwLfgH", "filter_ids": ["032501"]},
|
||
"阅读-P3": {"table_id": "tbl4q0ZUV3HB54t1", "filter_ids": None},
|
||
"阅读-P4": {"table_id": "tblzKVm1FEukPgnN", "filter_ids": None},
|
||
"阅读-P5": {"table_id": "tblLmUxzzUDe0QAJ", "filter_ids": None},
|
||
}
|
||
|
||
all_audit_results = {}
|
||
|
||
for name, config in targets.items():
|
||
print(f"\n{'='*60}")
|
||
print(f" {name} (table: {config['table_id']})")
|
||
print(f"{'='*60}")
|
||
|
||
cmd = f"bash {SKILL_SCRIPT} list_records {APP_TOKEN} {config['table_id']} 500"
|
||
data = exec_bash(cmd)
|
||
|
||
if data.get('code') != 0:
|
||
print(f" ERROR: {data}")
|
||
continue
|
||
|
||
items = data['data']['items']
|
||
audit_records = []
|
||
|
||
for item in items:
|
||
fields = item.get('fields', {})
|
||
sid = fields.get('题目集合 ID', '') or ''
|
||
|
||
# Skip 010199
|
||
if '010199' in str(sid):
|
||
print(f" SKIP: {sid} (test data)")
|
||
continue
|
||
|
||
# For P1, only filter 032501
|
||
if config['filter_ids'] and sid not in config['filter_ids']:
|
||
continue
|
||
|
||
if not sid:
|
||
# Check if record has data but no ID
|
||
jd = fields.get('jsonData', '')
|
||
if jd and jd != '{}':
|
||
print(f" WARN: record {item['record_id']} has jsonData but no 题目集合 ID")
|
||
continue
|
||
|
||
audit_records.append(item)
|
||
jd_str = fields.get('jsonData', '{}')
|
||
try:
|
||
jd = json.loads(jd_str)
|
||
except:
|
||
jd = {"_parse_error": str(jd_str)[:200]}
|
||
|
||
# Count question sets
|
||
first = jd.get('first', {})
|
||
second = jd.get('second', {})
|
||
qs1 = first.get('questionSet', [])
|
||
qs2 = second.get('questionSet', [])
|
||
|
||
has_single_set = (len(qs1) > 0 and len(qs2) == 0) or (not first)
|
||
|
||
print(f"\n Record: {item['record_id']} | ID={sid} | dataStatus={fields.get('dataStatus','?')}")
|
||
print(f" Question sets: first={len(qs1)}题, second={len(qs2)}题")
|
||
if has_single_set and not (len(qs1) == 0 and len(qs2) == 0):
|
||
print(f" ⚠️ 只有一道题组(缺少second题组)")
|
||
|
||
# Check ability tags
|
||
first_type = first.get('type', '')
|
||
first_cat = first.get('category', '')
|
||
|
||
issues = []
|
||
|
||
for set_name, qset in [('first', qs1), ('second', qs2)]:
|
||
for qi, q in enumerate(qset):
|
||
abilities = q.get('ability', [])
|
||
# Check for hearing-related abilities in reading
|
||
if '听觉' in str(abilities) or '听力' in str(abilities):
|
||
issues.append(f"{set_name}[{qi}]: 能力标签含'听觉/听力'但题型为{first_cat}/{first_type}")
|
||
|
||
# Check non-standard ability tags
|
||
non_std = [a for a in abilities if '|' not in a and a not in ('信息提取', '细节理解', '信息定位与提取')]
|
||
if non_std:
|
||
issues.append(f"{set_name}[{qi}]: 能力标签格式不规范: {non_std}")
|
||
|
||
# Check answer bounds
|
||
answer = q.get('answer', [])
|
||
options = q.get('options', [])
|
||
if isinstance(answer, list):
|
||
for ai in answer:
|
||
if isinstance(ai, int) and (ai < 0 or ai >= len(options)):
|
||
issues.append(f"{set_name}[{qi}]: answer索引{ai}超出options范围(0-{len(options)-1})")
|
||
elif isinstance(answer, int):
|
||
if answer < 0 or answer >= len(options):
|
||
issues.append(f"{set_name}[{qi}]: answer索引{answer}超出options范围(0-{len(options)-1})")
|
||
|
||
if issues:
|
||
print(f" 🔴 发现问题:")
|
||
for issue in issues:
|
||
print(f" - {issue}")
|
||
else:
|
||
print(f" ✅ 基础检查通过")
|
||
|
||
all_audit_results[f"{name}|{sid}|{item['record_id']}"] = {
|
||
"table_name": name,
|
||
"table_id": config['table_id'],
|
||
"record_id": item['record_id'],
|
||
"sid": sid,
|
||
"dataStatus": fields.get('dataStatus', '?'),
|
||
"has_single_set": has_single_set,
|
||
"issues": issues,
|
||
"first_count": len(qs1),
|
||
"second_count": len(qs2),
|
||
"first_type": f"{first_cat}/{first_type}",
|
||
"existing_audit": (fields.get('审校结果', '') or '')[:80],
|
||
}
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f" SUMMARY: {len(all_audit_results)} records to audit")
|
||
print(f"{'='*60}")
|
||
|
||
# Output JSON for next step
|
||
print("\n---JSON_OUTPUT---")
|
||
print(json.dumps(all_audit_results, ensure_ascii=False, indent=2))
|