ai_member_xiaoyan/scripts/final_audit_all.py

"""
最终审校：单元挑战听力全部表格（P1-P7除P3）
"""
import json, urllib.request

APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"

STD_HEARING_TAGS = {
    "显性事实理解｜关键词识别", "显性事实理解｜单句信息点抓取",
    "基础语境理解｜场景/物品/动作识别", "显性细节理解｜数字/时间/地点",
    "同义替换识别｜词/短语级", "目的/偏好识别｜显性 to/for/like",
    "干扰抑制｜多信息筛选", "语用推断｜否定与纠错",
    "多句保持｜信息整合", "情绪/态度理解", "长对话理解｜主旨+细节",
}

def get_token():
    with open(CRED_FILE) as f:
        cfg = json.load(f)
    req = urllib.request.Request(
        "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
        data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
        headers={"Content-Type": "application/json"})
    return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']

def api_call(url):
    token = get_token()
    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
    return json.loads(urllib.request.urlopen(req).read())

tables = {
    "P1": ("tbliZAhcc9C43B23", "listening_choicePic"),
    "P2": ("tblzTLNH7f13uWQN", "listening_tableCloze"),
    "P4": ("tblVmeDtBDKsAEfz", "listening_choiceShort"),
    "P5": ("tblDssVmhGzc3UKd", "listening_matchInfo"),
    "P6": ("tbloiMcD0sBtGSTq", "listening_choicePic"),
    "P7": ("tbly9SvPEa44k3yX", "listening_drag"),
}

results = {}

for name, (tid, etype) in tables.items():
    url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=100"
    resp = api_call(url)
    items = resp['data']['items']

    ok = 0
    missing_expl = 0
    bad_tags = 0
    json_err = 0
    empty = 0
    single_group = 0
    skip_010199 = 0

    for item in items:
        sid = item['fields'].get('题目集合 ID', '')
        if '010199' in str(sid):
            skip_010199 += 1
            continue

        jd_str = item['fields'].get('jsonData', '')
        if not jd_str or jd_str == 'None':
            empty += 1
            continue

        try:
            jd = json.loads(jd_str)
        except:
            json_err += 1
            continue

        has_issues = False

        # Check first/second questionSet
        for section in ['first', 'second']:
            sect = jd.get(section, {})
            if not sect:
                continue

            # Check section-level explanation (P5 style)
            if 'explanation' in sect:
                if not sect['explanation']:
                    missing_expl += 1
                    has_issues = True

            # Check section-level ability (P5 style)
            if 'ability' in sect and isinstance(sect['ability'], list):
                for a in sect['ability']:
                    a_clean = a.split('¥¥')[0].strip()
                    if a_clean not in STD_HEARING_TAGS:
                        bad_tags += 1
                        has_issues = True

            # Check question-level
            qs = sect.get('questionSet', [])
            if not qs:
                continue

            for q in qs:
                # Explanation
                if not q.get('explanation'):
                    missing_expl += 1
                    has_issues = True
                # Ability
                for a in q.get('ability', []):
                    if isinstance(a, str):
                        a_clean = a.split('¥¥')[0].strip()
                        if a_clean not in STD_HEARING_TAGS:
                            bad_tags += 1
                            has_issues = True

        # Check root questionSet (P6 style)
        qs = jd.get('questionSet', [])
        for q in qs:
            if not q.get('explanation'):
                missing_expl += 1
                has_issues = True
            for a in q.get('ability', []):
                if isinstance(a, str) and a.split('¥¥')[0].strip() not in STD_HEARING_TAGS:
                    bad_tags += 1
                    has_issues = True

        # Check single group
        has_first = jd.get('first', {}).get('questionSet') or jd.get('first', {}).get('answerSet')
        has_second = jd.get('second', {}).get('questionSet')
        if has_first and not has_second and jd.get('second') is not None:
            single_group += 1

        if not has_issues:
            ok += 1

    results[name] = {
        'total': len(items), 'skip_010199': skip_010199, 'empty': empty,
        'ok': ok, 'missing_expl': missing_expl, 'bad_tags': bad_tags,
        'json_err': json_err, 'single_group': single_group
    }

print(f"{'Table':<8} {'Total':>5} {'Skip':>5} {'Empty':>5} {'Pass':>5} {'!Expl':>5} {'!Tag':>5} {'!JSON':>5} {'1Grp':>5}")
print("-" * 58)
for name in ["P1","P2","P4","P5","P6","P7"]:
    r = results[name]
    print(f"{name:<8} {r['total']:>5} {r['skip_010199']:>5} {r['empty']:>5} {r['ok']:>5} {r['missing_expl']:>5} {r['bad_tags']:>5} {r['json_err']:>5} {r['single_group']:>5}")

print(f"\nTotal: OK={sum(r['ok'] for r in results.values())}, MissingExpl={sum(r['missing_expl'] for r in results.values())}, BadTags={sum(r['bad_tags'] for r in results.values())}, JSONerr={sum(r['json_err'] for r in results.values())}")