""" 扫描所有听力表格,列出所有explanation为空的题目 """ import json, urllib.request APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf" CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json" def get_token(): with open(CRED_FILE) as f: cfg = json.load(f) req = urllib.request.Request( "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(), headers={"Content-Type": "application/json"}) return json.loads(urllib.request.urlopen(req).read())['tenant_access_token'] def api_call(url): token = get_token() req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"}) return json.loads(urllib.request.urlopen(req).read()) tables = { "P1": "tbliZAhcc9C43B23", "P2": "tblzTLNH7f13uWQN", "P4": "tblVmeDtBDKsAEfz", "P5": "tblDssVmhGzc3UKd", "P6": "tbloiMcD0sBtGSTq", "P7": "tbly9SvPEa44k3yX", } missing = [] total_questions = 0 for name, tid in tables.items(): url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=100" resp = api_call(url) if resp.get('code') != 0: print(f"❌ {name}: {resp.get('msg')}") continue for item in resp['data']['items']: sid = item['fields'].get('题目集合 ID', '') if '010199' in str(sid): continue jd_str = item['fields'].get('jsonData', '{}') if not jd_str or jd_str == 'None': continue try: jd = json.loads(jd_str) except: print(f" ⚠️ {name} {sid}: still can't parse JSON") continue # Check first/second structure for section in ['first', 'second']: sect = jd.get(section, {}) if not sect or not sect.get('questionSet'): continue text_desc = sect.get('textDesc', '') text_audio = sect.get('textAudio', '') text_title = sect.get('textTitle', '') text_body = sect.get('textBody', '') qs = sect.get('questionSet', []) total_questions += len(qs) for i, q in enumerate(qs): expl = q.get('explanation', '') if expl is None or expl == '': missing.append({ 'table': name, 'sid': sid, 'section': section, 'q_index': i, 'question': q.get('question', q.get('questionDesc', '')), 'answer': q.get('answer', []), 'options': q.get('options', []), 'optionsImage': q.get('optionsImage', []), 'text_desc': text_desc, 'text_audio': text_audio, 'text_title': text_title, 'text_body': text_body, }) # Check root questionSet (P6 style) qs = jd.get('questionSet', []) if qs: text_desc = jd.get('textDesc', '') text_audio = jd.get('textAudio', '') for i, q in enumerate(qs): expl = q.get('explanation', '') if expl is None or expl == '': total_questions += len(qs) missing.append({ 'table': name, 'sid': sid, 'section': 'root', 'q_index': i, 'question': q.get('question', ''), 'answer': q.get('answer', []), 'optionsImages': q.get('optionsImage', []), 'text_desc': text_desc, 'text_audio': text_audio, 'text_title': jd.get('textTitle', ''), 'text_body': '', }) print(f"Total questions scanned: {total_questions}") print(f"Missing explanations: {len(missing)}") print() # Show summary by table from collections import Counter by_table = Counter(m['table'] for m in missing) for t in ["P1","P2","P4","P5","P6","P7"]: print(f" {t}: {by_table.get(t, 0)} missing") # Save for generation with open('/tmp/missing_explanations.json', 'w') as f: json.dump(missing, f, ensure_ascii=False, indent=2) print(f"\nSaved {len(missing)} missing explanations to /tmp/missing_explanations.json") # Show a few samples for m in missing[:3]: print(f"\n [{m['table']}] {m['sid']}/{m['section']}[{m['q_index']}]") print(f" Question: {m['question'][:80]}") print(f" Answer: {m['answer']}")