ai_member_xiaoyan/scripts/write_audit_results_v2.py

#!/usr/bin/env python3
"""
直接通过Python requests将审校结果写回单元挑战多维表格
"""
import json, requests, sys, os

APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
APP_ID = "cli_a931175d41799cc7"
APP_SECRET = "Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14"
BASE = "https://open.feishu.cn/open-apis/bitable/v1"

TABLES = {
    "听力-P1-图片选择题": "tbliZAhcc9C43B23",
    "听力-P2-表格填空题": "tblzTLNH7f13uWQN",
    "听力-P4-短对话选择题": "tblVmeDtBDKsAEfz",
    "听力-P5-信息匹配题": "tblDssVmhGzc3UKd",
    "听力-P6-听力选图": "tbloiMcD0sBtGSTq",
    "听力-P7-听力拖拽": "tbly9SvPEa44k3yX",
}

KNOWN_ABILITY_LABELS = {
    "显性事实理解｜关键词识别", "显性事实理解｜单句信息点抓取",
    "显性细节理解｜数字/时间/地点", "多特征整合", "语用推断",
    "干扰抑制｜多信息筛选", "多句保持｜信息整合",
    "语用推断｜否定与纠错", "听觉抓取关键信息",
    "问题意图识别", "关键细节听辨", "图像语义对齐",
    "近义改写", "否定与纠错",
}

def get_token():
    r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
        json={"app_id": APP_ID, "app_secret": APP_SECRET})
    return r.json()["tenant_access_token"]

def fetch_records(token, table_id):
    all_items = []
    page_token = None
    while True:
        url = f"{BASE}/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=200"
        if page_token:
            url += f"&page_token={page_token}"
        r = requests.get(url, headers={"Authorization": f"Bearer {token}"})
        data = r.json()
        if data.get("code") != 0:
            print(f"  Fetch error: {data}", file=sys.stderr)
            break
        all_items.extend(data["data"]["items"])
        if not data["data"].get("has_more"):
            break
        page_token = data["data"].get("page_token")
    return all_items

def write_record(token, table_id, record_id, result_text):
    r = requests.put(
        f"{BASE}/apps/{APP_TOKEN}/tables/{table_id}/records/{record_id}",
        headers={
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json"
        },
        json={"fields": {"审校结果": result_text}}
    )
    data = r.json()
    return data.get("code") == 0

def audit_record(rec):
    issues = []
    fields = rec.get("fields", {})
    jd_raw = fields.get("jsonData")
    qs_id = fields.get("题目集合 ID", "")

    if not jd_raw:
        return None, False

    try:
        parsed = json.loads(jd_raw)
    except:
        return "❌ jsonData JSON解析失败", True

    first = parsed.get("first", {})
    second = parsed.get("second", {})
    qtype = first.get("type", "unknown")
    f_qsid = first.get("questionSetID", "")
    s_qsid = second.get("questionSetID", "")

    if qs_id and f_qsid and f_qsid != qs_id:
        issues.append(f"  ❌ first questionSetID({f_qsid})与字段({qs_id})不一致")
    if qs_id and s_qsid and s_qsid != qs_id:
        issues.append(f"  ❌ second questionSetID({s_qsid})与字段({qs_id})不一致")
    if f_qsid == "000001":
        issues.append(f"  ❌ questionSetID为000001(占位数据)")
    if qs_id and not qs_id.replace("-","").isdigit() and qs_id != "000001":
        issues.append(f"  ❌ 题目集合 ID异常: '{qs_id}'")

    for bname, block in [("first", first), ("second", second)]:
        qs = block.get("questionSet", [])
        if not isinstance(qs, list) or len(qs) == 0:
            continue

        for i, q in enumerate(qs):
            expl = q.get("explanation", "")
            if not expl or expl.strip() == "":
                issues.append(f"  ❌ {bname}[{i}]: explanation为空")
            elif len(expl) < 20:
                issues.append(f"  🟡 {bname}[{i}]: explanation过短({len(expl)}字)")

            ability = q.get("ability", [])
            if not ability:
                issues.append(f"  ❌ {bname}[{i}]: ability为空")
            else:
                found_bad_sep = False
                for a in ability:
                    if isinstance(a, str) and "¥¥" in a:
                        if not found_bad_sep:
                            issues.append(f"  ❌ {bname}[{i}]: ability用¥¥分隔(应为逗号)")
                            found_bad_sep = True

    text1 = None
    text2 = None
    for k in ["题目1 完整配置", "题目1", "题目完整配置"]:
        if fields.get(k):
            text1 = fields[k]
            break
    for k in ["题目2 完整配置", "题目2"]:
        if fields.get(k):
            text2 = fields[k]
            break

    if not text1:
        issues.append(f"  ❌ 题目1文本字段为空")
    if second and second.get("questionSet") and not text2:
        issues.append(f"  🟡 题目2文本字段为空(但jsonData有second)")

    if not issues:
        return f"✅ 审校通过\n题型：{qtype} | 题组：first={len(first.get('questionSet',[]))}题 second={len(second.get('questionSet',[]))}题", False
    else:
        return f"❌ 审校发现问题（{len(issues)}项）\n题型：{qtype} | 题组：first={len(first.get('questionSet',[]))}题 second={len(second.get('questionSet',[]))}题\n" + "\n".join(issues), True

def main():
    token = get_token()
    print(f"Token获取成功")

    total_err = total_ok = total_skip = 0

    for tname, tid in TABLES.items():
        print(f"\n--- {tname} ---")
        records = fetch_records(token, tid)

        for rec in records:
            rid = rec["record_id"]
            fields = rec.get("fields", {})
            ds = fields.get("dataStatus")

            if ds != "0" or not fields.get("jsonData"):
                total_skip += 1
                continue

            result_text, has_err = audit_record(rec)
            if result_text is None:
                total_skip += 1
                continue

            if write_record(token, tid, rid, result_text):
                tag = "🔴" if has_err else "✅"
                print(f"  {tag} {rid} ✓")
                if has_err:
                    total_err += 1
                else:
                    total_ok += 1
            else:
                print(f"  ❌ {rid} 写入失败")

    print(f"\n{'='*40}")
    print(f"✅通过={total_ok}, 🔴问题={total_err}, ⏭️跳过={total_skip}")

if __name__ == "__main__":
    main()