ai_member_xiaoyan/scripts/scan_missing_explanations.py

133 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
扫描所有听力表格列出所有explanation为空的题目
"""
import json, urllib.request
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
def get_token():
with open(CRED_FILE) as f:
cfg = json.load(f)
req = urllib.request.Request(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
headers={"Content-Type": "application/json"})
return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']
def api_call(url):
token = get_token()
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
return json.loads(urllib.request.urlopen(req).read())
tables = {
"P1": "tbliZAhcc9C43B23",
"P2": "tblzTLNH7f13uWQN",
"P4": "tblVmeDtBDKsAEfz",
"P5": "tblDssVmhGzc3UKd",
"P6": "tbloiMcD0sBtGSTq",
"P7": "tbly9SvPEa44k3yX",
}
missing = []
total_questions = 0
for name, tid in tables.items():
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=100"
resp = api_call(url)
if resp.get('code') != 0:
print(f"{name}: {resp.get('msg')}")
continue
for item in resp['data']['items']:
sid = item['fields'].get('题目集合 ID', '')
if '010199' in str(sid):
continue
jd_str = item['fields'].get('jsonData', '{}')
if not jd_str or jd_str == 'None':
continue
try:
jd = json.loads(jd_str)
except:
print(f" ⚠️ {name} {sid}: still can't parse JSON")
continue
# Check first/second structure
for section in ['first', 'second']:
sect = jd.get(section, {})
if not sect or not sect.get('questionSet'):
continue
text_desc = sect.get('textDesc', '')
text_audio = sect.get('textAudio', '')
text_title = sect.get('textTitle', '')
text_body = sect.get('textBody', '')
qs = sect.get('questionSet', [])
total_questions += len(qs)
for i, q in enumerate(qs):
expl = q.get('explanation', '')
if expl is None or expl == '':
missing.append({
'table': name,
'sid': sid,
'section': section,
'q_index': i,
'question': q.get('question', q.get('questionDesc', '')),
'answer': q.get('answer', []),
'options': q.get('options', []),
'optionsImage': q.get('optionsImage', []),
'text_desc': text_desc,
'text_audio': text_audio,
'text_title': text_title,
'text_body': text_body,
})
# Check root questionSet (P6 style)
qs = jd.get('questionSet', [])
if qs:
text_desc = jd.get('textDesc', '')
text_audio = jd.get('textAudio', '')
for i, q in enumerate(qs):
expl = q.get('explanation', '')
if expl is None or expl == '':
total_questions += len(qs)
missing.append({
'table': name,
'sid': sid,
'section': 'root',
'q_index': i,
'question': q.get('question', ''),
'answer': q.get('answer', []),
'optionsImages': q.get('optionsImage', []),
'text_desc': text_desc,
'text_audio': text_audio,
'text_title': jd.get('textTitle', ''),
'text_body': '',
})
print(f"Total questions scanned: {total_questions}")
print(f"Missing explanations: {len(missing)}")
print()
# Show summary by table
from collections import Counter
by_table = Counter(m['table'] for m in missing)
for t in ["P1","P2","P4","P5","P6","P7"]:
print(f" {t}: {by_table.get(t, 0)} missing")
# Save for generation
with open('/tmp/missing_explanations.json', 'w') as f:
json.dump(missing, f, ensure_ascii=False, indent=2)
print(f"\nSaved {len(missing)} missing explanations to /tmp/missing_explanations.json")
# Show a few samples
for m in missing[:3]:
print(f"\n [{m['table']}] {m['sid']}/{m['section']}[{m['q_index']}]")
print(f" Question: {m['question'][:80]}")
print(f" Answer: {m['answer']}")