ai_member_xiaoyan/scripts/final_audit_all.py

145 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
最终审校单元挑战听力全部表格P1-P7除P3
"""
import json, urllib.request
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
STD_HEARING_TAGS = {
"显性事实理解|关键词识别", "显性事实理解|单句信息点抓取",
"基础语境理解|场景/物品/动作识别", "显性细节理解|数字/时间/地点",
"同义替换识别|词/短语级", "目的/偏好识别|显性 to/for/like",
"干扰抑制|多信息筛选", "语用推断|否定与纠错",
"多句保持|信息整合", "情绪/态度理解", "长对话理解|主旨+细节",
}
def get_token():
with open(CRED_FILE) as f:
cfg = json.load(f)
req = urllib.request.Request(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
headers={"Content-Type": "application/json"})
return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']
def api_call(url):
token = get_token()
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
return json.loads(urllib.request.urlopen(req).read())
tables = {
"P1": ("tbliZAhcc9C43B23", "listening_choicePic"),
"P2": ("tblzTLNH7f13uWQN", "listening_tableCloze"),
"P4": ("tblVmeDtBDKsAEfz", "listening_choiceShort"),
"P5": ("tblDssVmhGzc3UKd", "listening_matchInfo"),
"P6": ("tbloiMcD0sBtGSTq", "listening_choicePic"),
"P7": ("tbly9SvPEa44k3yX", "listening_drag"),
}
results = {}
for name, (tid, etype) in tables.items():
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=100"
resp = api_call(url)
items = resp['data']['items']
ok = 0
missing_expl = 0
bad_tags = 0
json_err = 0
empty = 0
single_group = 0
skip_010199 = 0
for item in items:
sid = item['fields'].get('题目集合 ID', '')
if '010199' in str(sid):
skip_010199 += 1
continue
jd_str = item['fields'].get('jsonData', '')
if not jd_str or jd_str == 'None':
empty += 1
continue
try:
jd = json.loads(jd_str)
except:
json_err += 1
continue
has_issues = False
# Check first/second questionSet
for section in ['first', 'second']:
sect = jd.get(section, {})
if not sect:
continue
# Check section-level explanation (P5 style)
if 'explanation' in sect:
if not sect['explanation']:
missing_expl += 1
has_issues = True
# Check section-level ability (P5 style)
if 'ability' in sect and isinstance(sect['ability'], list):
for a in sect['ability']:
a_clean = a.split('¥¥')[0].strip()
if a_clean not in STD_HEARING_TAGS:
bad_tags += 1
has_issues = True
# Check question-level
qs = sect.get('questionSet', [])
if not qs:
continue
for q in qs:
# Explanation
if not q.get('explanation'):
missing_expl += 1
has_issues = True
# Ability
for a in q.get('ability', []):
if isinstance(a, str):
a_clean = a.split('¥¥')[0].strip()
if a_clean not in STD_HEARING_TAGS:
bad_tags += 1
has_issues = True
# Check root questionSet (P6 style)
qs = jd.get('questionSet', [])
for q in qs:
if not q.get('explanation'):
missing_expl += 1
has_issues = True
for a in q.get('ability', []):
if isinstance(a, str) and a.split('¥¥')[0].strip() not in STD_HEARING_TAGS:
bad_tags += 1
has_issues = True
# Check single group
has_first = jd.get('first', {}).get('questionSet') or jd.get('first', {}).get('answerSet')
has_second = jd.get('second', {}).get('questionSet')
if has_first and not has_second and jd.get('second') is not None:
single_group += 1
if not has_issues:
ok += 1
results[name] = {
'total': len(items), 'skip_010199': skip_010199, 'empty': empty,
'ok': ok, 'missing_expl': missing_expl, 'bad_tags': bad_tags,
'json_err': json_err, 'single_group': single_group
}
print(f"{'Table':<8} {'Total':>5} {'Skip':>5} {'Empty':>5} {'Pass':>5} {'!Expl':>5} {'!Tag':>5} {'!JSON':>5} {'1Grp':>5}")
print("-" * 58)
for name in ["P1","P2","P4","P5","P6","P7"]:
r = results[name]
print(f"{name:<8} {r['total']:>5} {r['skip_010199']:>5} {r['empty']:>5} {r['ok']:>5} {r['missing_expl']:>5} {r['bad_tags']:>5} {r['json_err']:>5} {r['single_group']:>5}")
print(f"\nTotal: OK={sum(r['ok'] for r in results.values())}, MissingExpl={sum(r['missing_expl'] for r in results.values())}, BadTags={sum(r['bad_tags'] for r in results.values())}, JSONerr={sum(r['json_err'] for r in results.values())}")