ai_member_xiaoyan/scripts/audit_final.py

#!/usr/bin/env python3
"""Final comprehensive audit - checks all explanation locations."""
import json, requests, time, sys

APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
    json={"app_id":"cli_a931175d41799cc7","app_secret":"Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14"})
TOKEN = r.json()["tenant_access_token"]

ALL = [
    ("tbliZAhcc9C43B23", "听力-P1"),
    ("tblzTLNH7f13uWQN", "听力-P2"),
    ("tblgxsDn25oSq7WS", "听力-P3"),
    ("tblVmeDtBDKsAEfz", "听力-P4"),
    ("tblDssVmhGzc3UKd", "听力-P5"),
    ("tbly9SvPEa44k3yX", "听力-P7"),
    ("tblCgfYDnnqwLfgH", "阅读-P1"),
    ("tblEp820dnatNYbb", "阅读-P2"),
    ("tbl4q0ZUV3HB54t1", "阅读-P3"),
    ("tblzKVm1FEukPgnN", "阅读-P4"),
    ("tblLmUxzzUDe0QAJ", "阅读-P5"),
    ("tblJc60aO0T163MJ", "阅读-P6"),
    ("tblweY65jGBiwSdt", "阅读-P7"),
    ("tblszuk1TeToofBF", "写作-P1"),
    ("tblSAwlMumKoyjws", "写作-P2"),
    ("tblFc9TVl2PeM2tg", "写作-P3"),
    ("tblRGv7k4WH58Jgq", "口语-P1"),
    ("tblGoWYBmVI0IrvQ", "口语-P2"),
    ("tblOHgNkNer2hGEp", "口语-P3"),
    ("tblsD2dxaRpLmkXD", "口语-P4"),
]

def fetch_all(token, table_id):
    records = []
    page_token = None
    while True:
        params = {"page_size": 100}
        if page_token: params["page_token"] = page_token
        r = requests.get(
            f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records",
            headers={"Authorization": f"Bearer {token}"}, params=params)
        data = r.json()
        if data.get("code") != 0: break
        d = data.get("data", {})
        records.extend(d.get("items", []))
        if not d.get("has_more"): break
        page_token = d.get("page_token")
        time.sleep(0.3)
    return records

def has_chinese(s):
    return any('\u4e00' <= c <= '\u9fff' for c in s) if s else False

def check_explanations(block, block_name, qsid, qtype):
    """Extract all explanations from a block and check them."""
    issues = []

    # 1. Check block-level explanation
    block_expl = block.get("explanation", "")
    if block_expl:
        if not has_chinese(block_expl):
            issues.append(f"{block_name}.explanation 纯英文")

    # 2. Check questionSet-level explanations
    qs = block.get("questionSet", [])
    for i, q in enumerate(qs):
        expl = q.get("explanation", "")
        if not expl or not expl.strip():
            issues.append(f"{block_name}[{i+1}] 解析为空")
        elif not has_chinese(expl):
            issues.append(f"{block_name}[{i+1}] 解析纯英文")

    # 3. Check questionList (writing_pic_qa)
    ql = block.get("questionList", [])
    for i, q in enumerate(ql):
        expl = q.get("explanation", "")
        if not expl or not expl.strip():
            issues.append(f"{block_name} questionList[{i+1}] 解析为空")
        elif not has_chinese(expl):
            issues.append(f"{block_name} questionList[{i+1}] 解析纯英文")

    return issues

def check_answers(block, block_name, qtype):
    """Check answer formats."""
    issues = []

    qs = block.get("questionSet", [])

    # Check per-question answer
    for i, q in enumerate(qs):
        ans = q.get("answer", None)
        if ans is not None and isinstance(ans, list):
            options = q.get("options", [])
            for a in ans:
                if isinstance(a, int) and options and a >= len(options):
                    issues.append(f"{block_name}[{i+1}] answer索引{a}超出options范围(0-{len(options)-1})")

    # Check block-level answer
    block_ans = block.get("answer", [])
    if block_ans and isinstance(block_ans, list):
        for i, a in enumerate(block_ans):
            if i < len(qs):
                options = qs[i].get("options", [])
                if options and isinstance(a, int) and a >= len(options):
                    issues.append(f"{block_name}[{i+1}] block.answer索引{a}超出options范围(0-{len(options)-1})")

    # Check answerSet
    ans_set = block.get("answerSet", [])
    opt_set = block.get("optionSetList", [])
    if ans_set and opt_set:
        for j, match in enumerate(ans_set):
            if isinstance(match, list) and len(match) >= 2:
                idx = match[1]
                if isinstance(idx, int) and idx >= len(opt_set):
                    issues.append(f"{block_name} answerSet[{j}]索引{idx}超出optionSetList范围({len(opt_set)})")

    return issues

# Main audit
print("="*80)
print("📊 单元挑战 — 全题型解析+答案审核报告")
print("="*80)

for table_id, table_name in ALL:
    records = fetch_all(TOKEN, table_id)

    empty_json = []  # records with no jsonData
    ok_records = []  # records that pass
    problem_records = []  # records with issues

    for rec in records:
        fields = rec.get("fields", {})
        qsid = fields.get("题目集合 ID", fields.get("题目集合ID", "N/A"))
        json_str = fields.get("jsonData", "")

        if not json_str or not json_str.strip():
            empty_json.append(qsid)
            continue

        try:
            jd = json.loads(json_str)
        except:
            empty_json.append(f"{qsid}(解析失败)")
            continue

        record_expl_issues = []
        record_ans_issues = []

        for bn in ["first", "second"]:
            blk = jd.get(bn, {})
            if not blk:
                continue

            qtype = blk.get("type", "unknown")

            expl_issues = check_explanations(blk, bn, qsid, qtype)
            record_expl_issues.extend(expl_issues)

            ans_issues = check_answers(blk, bn, qtype)
            record_ans_issues.extend(ans_issues)

        if record_expl_issues or record_ans_issues:
            problem_records.append({
                "qsid": qsid,
                "expl": record_expl_issues,
                "ans": record_ans_issues
            })
        else:
            ok_records.append(qsid)

    # Print table summary
    total_valid = len(ok_records) + len(problem_records)
    expl_only = sum(1 for r in problem_records if r["expl"] and not r["ans"])
    ans_only = sum(1 for r in problem_records if r["ans"] and not r["expl"])
    both = sum(1 for r in problem_records if r["expl"] and r["ans"])

    status = "✅ OK" if not problem_records and not empty_json else ""
    if problem_records:
        status = f"⚠️ {len(problem_records)}条有问题"
    if empty_json:
        status += f" + {len(empty_json)}条空模板"

    print(f"\n{'─'*60}")
    print(f"📋 {table_name} | {status}")
    print(f"   有效记录: {total_valid} | OK: {len(ok_records)} | 有问题: {len(problem_records)} | 空模板: {len(empty_json)}")

    for pr in problem_records:
        print(f"\n   [{pr['qsid']}]")
        for e in pr["expl"]:
            print(f"      🟡 解析: {e}")
        for a in pr["ans"]:
            print(f"      🔴 答案: {a}")

print(f"\n{'='*80}")
print("审核完成")
PYEOF