ai_member_xiaoyan/scripts/scan_missing_explanations.py

"""
扫描所有听力表格，列出所有explanation为空的题目
"""
import json, urllib.request

APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"

def get_token():
    with open(CRED_FILE) as f:
        cfg = json.load(f)
    req = urllib.request.Request(
        "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
        data=json.dumps({"app_id": cfg['apps'][0]['appId'], "app_secret": cfg['apps'][0]['appSecret']}).encode(),
        headers={"Content-Type": "application/json"})
    return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']

def api_call(url):
    token = get_token()
    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
    return json.loads(urllib.request.urlopen(req).read())

tables = {
    "P1": "tbliZAhcc9C43B23",
    "P2": "tblzTLNH7f13uWQN",
    "P4": "tblVmeDtBDKsAEfz",
    "P5": "tblDssVmhGzc3UKd",
    "P6": "tbloiMcD0sBtGSTq",
    "P7": "tbly9SvPEa44k3yX",
}

missing = []
total_questions = 0

for name, tid in tables.items():
    url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{tid}/records?page_size=100"
    resp = api_call(url)
    if resp.get('code') != 0:
        print(f"❌ {name}: {resp.get('msg')}")
        continue

    for item in resp['data']['items']:
        sid = item['fields'].get('题目集合 ID', '')
        if '010199' in str(sid):
            continue

        jd_str = item['fields'].get('jsonData', '{}')
        if not jd_str or jd_str == 'None':
            continue

        try:
            jd = json.loads(jd_str)
        except:
            print(f"  ⚠️ {name} {sid}: still can't parse JSON")
            continue

        # Check first/second structure
        for section in ['first', 'second']:
            sect = jd.get(section, {})
            if not sect or not sect.get('questionSet'):
                continue

            text_desc = sect.get('textDesc', '')
            text_audio = sect.get('textAudio', '')
            text_title = sect.get('textTitle', '')
            text_body = sect.get('textBody', '')

            qs = sect.get('questionSet', [])
            total_questions += len(qs)

            for i, q in enumerate(qs):
                expl = q.get('explanation', '')
                if expl is None or expl == '':
                    missing.append({
                        'table': name,
                        'sid': sid,
                        'section': section,
                        'q_index': i,
                        'question': q.get('question', q.get('questionDesc', '')),
                        'answer': q.get('answer', []),
                        'options': q.get('options', []),
                        'optionsImage': q.get('optionsImage', []),
                        'text_desc': text_desc,
                        'text_audio': text_audio,
                        'text_title': text_title,
                        'text_body': text_body,
                    })

        # Check root questionSet (P6 style)
        qs = jd.get('questionSet', [])
        if qs:
            text_desc = jd.get('textDesc', '')
            text_audio = jd.get('textAudio', '')
            for i, q in enumerate(qs):
                expl = q.get('explanation', '')
                if expl is None or expl == '':
                    total_questions += len(qs)
                    missing.append({
                        'table': name,
                        'sid': sid,
                        'section': 'root',
                        'q_index': i,
                        'question': q.get('question', ''),
                        'answer': q.get('answer', []),
                        'optionsImages': q.get('optionsImage', []),
                        'text_desc': text_desc,
                        'text_audio': text_audio,
                        'text_title': jd.get('textTitle', ''),
                        'text_body': '',
                    })

print(f"Total questions scanned: {total_questions}")
print(f"Missing explanations: {len(missing)}")
print()

# Show summary by table
from collections import Counter
by_table = Counter(m['table'] for m in missing)
for t in ["P1","P2","P4","P5","P6","P7"]:
    print(f"  {t}: {by_table.get(t, 0)} missing")

# Save for generation
with open('/tmp/missing_explanations.json', 'w') as f:
    json.dump(missing, f, ensure_ascii=False, indent=2)

print(f"\nSaved {len(missing)} missing explanations to /tmp/missing_explanations.json")

# Show a few samples
for m in missing[:3]:
    print(f"\n  [{m['table']}] {m['sid']}/{m['section']}[{m['q_index']}]")
    print(f"    Question: {m['question'][:80]}")
    print(f"    Answer: {m['answer']}")