ai_member_xiaoyan/scripts/audit_unit_challenge_reading.py

136 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
单元挑战审校脚本:阅读-P1/P3/P4/P5
跳过 ID=010199 的记录
"""
import json, subprocess, sys
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
SKILL_SCRIPT = "/root/.openclaw/workspace-xiaoyan/skills/lark_bitable_operate_as_bot/scripts/operate_bitable.sh"
def exec_bash(cmd):
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
return json.loads(result.stdout) if result.stdout else {}
# Target records
targets = {
"阅读-P1": {"table_id": "tblCgfYDnnqwLfgH", "filter_ids": ["032501"]},
"阅读-P3": {"table_id": "tbl4q0ZUV3HB54t1", "filter_ids": None},
"阅读-P4": {"table_id": "tblzKVm1FEukPgnN", "filter_ids": None},
"阅读-P5": {"table_id": "tblLmUxzzUDe0QAJ", "filter_ids": None},
}
all_audit_results = {}
for name, config in targets.items():
print(f"\n{'='*60}")
print(f" {name} (table: {config['table_id']})")
print(f"{'='*60}")
cmd = f"bash {SKILL_SCRIPT} list_records {APP_TOKEN} {config['table_id']} 500"
data = exec_bash(cmd)
if data.get('code') != 0:
print(f" ERROR: {data}")
continue
items = data['data']['items']
audit_records = []
for item in items:
fields = item.get('fields', {})
sid = fields.get('题目集合 ID', '') or ''
# Skip 010199
if '010199' in str(sid):
print(f" SKIP: {sid} (test data)")
continue
# For P1, only filter 032501
if config['filter_ids'] and sid not in config['filter_ids']:
continue
if not sid:
# Check if record has data but no ID
jd = fields.get('jsonData', '')
if jd and jd != '{}':
print(f" WARN: record {item['record_id']} has jsonData but no 题目集合 ID")
continue
audit_records.append(item)
jd_str = fields.get('jsonData', '{}')
try:
jd = json.loads(jd_str)
except:
jd = {"_parse_error": str(jd_str)[:200]}
# Count question sets
first = jd.get('first', {})
second = jd.get('second', {})
qs1 = first.get('questionSet', [])
qs2 = second.get('questionSet', [])
has_single_set = (len(qs1) > 0 and len(qs2) == 0) or (not first)
print(f"\n Record: {item['record_id']} | ID={sid} | dataStatus={fields.get('dataStatus','?')}")
print(f" Question sets: first={len(qs1)}题, second={len(qs2)}")
if has_single_set and not (len(qs1) == 0 and len(qs2) == 0):
print(f" ⚠️ 只有一道题组缺少second题组")
# Check ability tags
first_type = first.get('type', '')
first_cat = first.get('category', '')
issues = []
for set_name, qset in [('first', qs1), ('second', qs2)]:
for qi, q in enumerate(qset):
abilities = q.get('ability', [])
# Check for hearing-related abilities in reading
if '听觉' in str(abilities) or '听力' in str(abilities):
issues.append(f"{set_name}[{qi}]: 能力标签含'听觉/听力'但题型为{first_cat}/{first_type}")
# Check non-standard ability tags
non_std = [a for a in abilities if '' not in a and a not in ('信息提取', '细节理解', '信息定位与提取')]
if non_std:
issues.append(f"{set_name}[{qi}]: 能力标签格式不规范: {non_std}")
# Check answer bounds
answer = q.get('answer', [])
options = q.get('options', [])
if isinstance(answer, list):
for ai in answer:
if isinstance(ai, int) and (ai < 0 or ai >= len(options)):
issues.append(f"{set_name}[{qi}]: answer索引{ai}超出options范围(0-{len(options)-1})")
elif isinstance(answer, int):
if answer < 0 or answer >= len(options):
issues.append(f"{set_name}[{qi}]: answer索引{answer}超出options范围(0-{len(options)-1})")
if issues:
print(f" 🔴 发现问题:")
for issue in issues:
print(f" - {issue}")
else:
print(f" ✅ 基础检查通过")
all_audit_results[f"{name}|{sid}|{item['record_id']}"] = {
"table_name": name,
"table_id": config['table_id'],
"record_id": item['record_id'],
"sid": sid,
"dataStatus": fields.get('dataStatus', '?'),
"has_single_set": has_single_set,
"issues": issues,
"first_count": len(qs1),
"second_count": len(qs2),
"first_type": f"{first_cat}/{first_type}",
"existing_audit": (fields.get('审校结果', '') or '')[:80],
}
print(f"\n{'='*60}")
print(f" SUMMARY: {len(all_audit_results)} records to audit")
print(f"{'='*60}")
# Output JSON for next step
print("\n---JSON_OUTPUT---")
print(json.dumps(all_audit_results, ensure_ascii=False, indent=2))