ai_member_xiaoyan/scripts/write_audit_results_v2.py

177 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
直接通过Python requests将审校结果写回单元挑战多维表格
"""
import json, requests, sys, os
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
APP_ID = "cli_a931175d41799cc7"
APP_SECRET = "Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14"
BASE = "https://open.feishu.cn/open-apis/bitable/v1"
TABLES = {
"听力-P1-图片选择题": "tbliZAhcc9C43B23",
"听力-P2-表格填空题": "tblzTLNH7f13uWQN",
"听力-P4-短对话选择题": "tblVmeDtBDKsAEfz",
"听力-P5-信息匹配题": "tblDssVmhGzc3UKd",
"听力-P6-听力选图": "tbloiMcD0sBtGSTq",
"听力-P7-听力拖拽": "tbly9SvPEa44k3yX",
}
KNOWN_ABILITY_LABELS = {
"显性事实理解|关键词识别", "显性事实理解|单句信息点抓取",
"显性细节理解|数字/时间/地点", "多特征整合", "语用推断",
"干扰抑制|多信息筛选", "多句保持|信息整合",
"语用推断|否定与纠错", "听觉抓取关键信息",
"问题意图识别", "关键细节听辨", "图像语义对齐",
"近义改写", "否定与纠错",
}
def get_token():
r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
json={"app_id": APP_ID, "app_secret": APP_SECRET})
return r.json()["tenant_access_token"]
def fetch_records(token, table_id):
all_items = []
page_token = None
while True:
url = f"{BASE}/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=200"
if page_token:
url += f"&page_token={page_token}"
r = requests.get(url, headers={"Authorization": f"Bearer {token}"})
data = r.json()
if data.get("code") != 0:
print(f" Fetch error: {data}", file=sys.stderr)
break
all_items.extend(data["data"]["items"])
if not data["data"].get("has_more"):
break
page_token = data["data"].get("page_token")
return all_items
def write_record(token, table_id, record_id, result_text):
r = requests.put(
f"{BASE}/apps/{APP_TOKEN}/tables/{table_id}/records/{record_id}",
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
},
json={"fields": {"审校结果": result_text}}
)
data = r.json()
return data.get("code") == 0
def audit_record(rec):
issues = []
fields = rec.get("fields", {})
jd_raw = fields.get("jsonData")
qs_id = fields.get("题目集合 ID", "")
if not jd_raw:
return None, False
try:
parsed = json.loads(jd_raw)
except:
return "❌ jsonData JSON解析失败", True
first = parsed.get("first", {})
second = parsed.get("second", {})
qtype = first.get("type", "unknown")
f_qsid = first.get("questionSetID", "")
s_qsid = second.get("questionSetID", "")
if qs_id and f_qsid and f_qsid != qs_id:
issues.append(f" ❌ first questionSetID({f_qsid})与字段({qs_id})不一致")
if qs_id and s_qsid and s_qsid != qs_id:
issues.append(f" ❌ second questionSetID({s_qsid})与字段({qs_id})不一致")
if f_qsid == "000001":
issues.append(f" ❌ questionSetID为000001(占位数据)")
if qs_id and not qs_id.replace("-","").isdigit() and qs_id != "000001":
issues.append(f" ❌ 题目集合 ID异常: '{qs_id}'")
for bname, block in [("first", first), ("second", second)]:
qs = block.get("questionSet", [])
if not isinstance(qs, list) or len(qs) == 0:
continue
for i, q in enumerate(qs):
expl = q.get("explanation", "")
if not expl or expl.strip() == "":
issues.append(f"{bname}[{i}]: explanation为空")
elif len(expl) < 20:
issues.append(f" 🟡 {bname}[{i}]: explanation过短({len(expl)}字)")
ability = q.get("ability", [])
if not ability:
issues.append(f"{bname}[{i}]: ability为空")
else:
found_bad_sep = False
for a in ability:
if isinstance(a, str) and "¥¥" in a:
if not found_bad_sep:
issues.append(f"{bname}[{i}]: ability用¥¥分隔(应为逗号)")
found_bad_sep = True
text1 = None
text2 = None
for k in ["题目1 完整配置", "题目1", "题目完整配置"]:
if fields.get(k):
text1 = fields[k]
break
for k in ["题目2 完整配置", "题目2"]:
if fields.get(k):
text2 = fields[k]
break
if not text1:
issues.append(f" ❌ 题目1文本字段为空")
if second and second.get("questionSet") and not text2:
issues.append(f" 🟡 题目2文本字段为空(但jsonData有second)")
if not issues:
return f"✅ 审校通过\n题型:{qtype} | 题组first={len(first.get('questionSet',[]))}题 second={len(second.get('questionSet',[]))}", False
else:
return f"❌ 审校发现问题({len(issues)}项)\n题型:{qtype} | 题组first={len(first.get('questionSet',[]))}题 second={len(second.get('questionSet',[]))}\n" + "\n".join(issues), True
def main():
token = get_token()
print(f"Token获取成功")
total_err = total_ok = total_skip = 0
for tname, tid in TABLES.items():
print(f"\n--- {tname} ---")
records = fetch_records(token, tid)
for rec in records:
rid = rec["record_id"]
fields = rec.get("fields", {})
ds = fields.get("dataStatus")
if ds != "0" or not fields.get("jsonData"):
total_skip += 1
continue
result_text, has_err = audit_record(rec)
if result_text is None:
total_skip += 1
continue
if write_record(token, tid, rid, result_text):
tag = "🔴" if has_err else ""
print(f" {tag} {rid}")
if has_err:
total_err += 1
else:
total_ok += 1
else:
print(f"{rid} 写入失败")
print(f"\n{'='*40}")
print(f"✅通过={total_ok}, 🔴问题={total_err}, ⏭️跳过={total_skip}")
if __name__ == "__main__":
main()