ai_member_xiaoyan/scripts/audit_final.py

198 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""Final comprehensive audit - checks all explanation locations."""
import json, requests, time, sys
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
json={"app_id":"cli_a931175d41799cc7","app_secret":"Iw2vEfbjT6GtV0GhbxbZqfQ4nAPtbR14"})
TOKEN = r.json()["tenant_access_token"]
ALL = [
("tbliZAhcc9C43B23", "听力-P1"),
("tblzTLNH7f13uWQN", "听力-P2"),
("tblgxsDn25oSq7WS", "听力-P3"),
("tblVmeDtBDKsAEfz", "听力-P4"),
("tblDssVmhGzc3UKd", "听力-P5"),
("tbly9SvPEa44k3yX", "听力-P7"),
("tblCgfYDnnqwLfgH", "阅读-P1"),
("tblEp820dnatNYbb", "阅读-P2"),
("tbl4q0ZUV3HB54t1", "阅读-P3"),
("tblzKVm1FEukPgnN", "阅读-P4"),
("tblLmUxzzUDe0QAJ", "阅读-P5"),
("tblJc60aO0T163MJ", "阅读-P6"),
("tblweY65jGBiwSdt", "阅读-P7"),
("tblszuk1TeToofBF", "写作-P1"),
("tblSAwlMumKoyjws", "写作-P2"),
("tblFc9TVl2PeM2tg", "写作-P3"),
("tblRGv7k4WH58Jgq", "口语-P1"),
("tblGoWYBmVI0IrvQ", "口语-P2"),
("tblOHgNkNer2hGEp", "口语-P3"),
("tblsD2dxaRpLmkXD", "口语-P4"),
]
def fetch_all(token, table_id):
records = []
page_token = None
while True:
params = {"page_size": 100}
if page_token: params["page_token"] = page_token
r = requests.get(
f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records",
headers={"Authorization": f"Bearer {token}"}, params=params)
data = r.json()
if data.get("code") != 0: break
d = data.get("data", {})
records.extend(d.get("items", []))
if not d.get("has_more"): break
page_token = d.get("page_token")
time.sleep(0.3)
return records
def has_chinese(s):
return any('\u4e00' <= c <= '\u9fff' for c in s) if s else False
def check_explanations(block, block_name, qsid, qtype):
"""Extract all explanations from a block and check them."""
issues = []
# 1. Check block-level explanation
block_expl = block.get("explanation", "")
if block_expl:
if not has_chinese(block_expl):
issues.append(f"{block_name}.explanation 纯英文")
# 2. Check questionSet-level explanations
qs = block.get("questionSet", [])
for i, q in enumerate(qs):
expl = q.get("explanation", "")
if not expl or not expl.strip():
issues.append(f"{block_name}[{i+1}] 解析为空")
elif not has_chinese(expl):
issues.append(f"{block_name}[{i+1}] 解析纯英文")
# 3. Check questionList (writing_pic_qa)
ql = block.get("questionList", [])
for i, q in enumerate(ql):
expl = q.get("explanation", "")
if not expl or not expl.strip():
issues.append(f"{block_name} questionList[{i+1}] 解析为空")
elif not has_chinese(expl):
issues.append(f"{block_name} questionList[{i+1}] 解析纯英文")
return issues
def check_answers(block, block_name, qtype):
"""Check answer formats."""
issues = []
qs = block.get("questionSet", [])
# Check per-question answer
for i, q in enumerate(qs):
ans = q.get("answer", None)
if ans is not None and isinstance(ans, list):
options = q.get("options", [])
for a in ans:
if isinstance(a, int) and options and a >= len(options):
issues.append(f"{block_name}[{i+1}] answer索引{a}超出options范围(0-{len(options)-1})")
# Check block-level answer
block_ans = block.get("answer", [])
if block_ans and isinstance(block_ans, list):
for i, a in enumerate(block_ans):
if i < len(qs):
options = qs[i].get("options", [])
if options and isinstance(a, int) and a >= len(options):
issues.append(f"{block_name}[{i+1}] block.answer索引{a}超出options范围(0-{len(options)-1})")
# Check answerSet
ans_set = block.get("answerSet", [])
opt_set = block.get("optionSetList", [])
if ans_set and opt_set:
for j, match in enumerate(ans_set):
if isinstance(match, list) and len(match) >= 2:
idx = match[1]
if isinstance(idx, int) and idx >= len(opt_set):
issues.append(f"{block_name} answerSet[{j}]索引{idx}超出optionSetList范围({len(opt_set)})")
return issues
# Main audit
print("="*80)
print("📊 单元挑战 — 全题型解析+答案审核报告")
print("="*80)
for table_id, table_name in ALL:
records = fetch_all(TOKEN, table_id)
empty_json = [] # records with no jsonData
ok_records = [] # records that pass
problem_records = [] # records with issues
for rec in records:
fields = rec.get("fields", {})
qsid = fields.get("题目集合 ID", fields.get("题目集合ID", "N/A"))
json_str = fields.get("jsonData", "")
if not json_str or not json_str.strip():
empty_json.append(qsid)
continue
try:
jd = json.loads(json_str)
except:
empty_json.append(f"{qsid}(解析失败)")
continue
record_expl_issues = []
record_ans_issues = []
for bn in ["first", "second"]:
blk = jd.get(bn, {})
if not blk:
continue
qtype = blk.get("type", "unknown")
expl_issues = check_explanations(blk, bn, qsid, qtype)
record_expl_issues.extend(expl_issues)
ans_issues = check_answers(blk, bn, qtype)
record_ans_issues.extend(ans_issues)
if record_expl_issues or record_ans_issues:
problem_records.append({
"qsid": qsid,
"expl": record_expl_issues,
"ans": record_ans_issues
})
else:
ok_records.append(qsid)
# Print table summary
total_valid = len(ok_records) + len(problem_records)
expl_only = sum(1 for r in problem_records if r["expl"] and not r["ans"])
ans_only = sum(1 for r in problem_records if r["ans"] and not r["expl"])
both = sum(1 for r in problem_records if r["expl"] and r["ans"])
status = "✅ OK" if not problem_records and not empty_json else ""
if problem_records:
status = f"⚠️ {len(problem_records)}条有问题"
if empty_json:
status += f" + {len(empty_json)}条空模板"
print(f"\n{''*60}")
print(f"📋 {table_name} | {status}")
print(f" 有效记录: {total_valid} | OK: {len(ok_records)} | 有问题: {len(problem_records)} | 空模板: {len(empty_json)}")
for pr in problem_records:
print(f"\n [{pr['qsid']}]")
for e in pr["expl"]:
print(f" 🟡 解析: {e}")
for a in pr["ans"]:
print(f" 🔴 答案: {a}")
print(f"\n{'='*80}")
print("审核完成")
PYEOF