""" 修复 5 条 JSON 解析失败记录 + P5/P6 能力标签修正 + 回填 """ import json, urllib.request, re, sys APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf" CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json" def get_token(): with open(CRED_FILE) as f: cfg = json.load(f) app_id = cfg['apps'][0]['appId'] app_secret = cfg['apps'][0]['appSecret'] req = urllib.request.Request( "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", data=json.dumps({"app_id": app_id, "app_secret": app_secret}).encode(), headers={"Content-Type": "application/json"}) return json.loads(urllib.request.urlopen(req).read())['tenant_access_token'] def api_call(url, method='GET', body=None): token = get_token() headers = {"Authorization": f"Bearer {token}"} data = json.dumps(body).encode() if body else None if data: headers["Content-Type"] = "application/json" req = urllib.request.Request(url, data=data, method=method, headers=headers) return json.loads(urllib.request.urlopen(req).read()) def update_record(table_id, record_id, fields): url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{record_id}" return api_call(url, 'PUT', {"fields": fields}) def fix_json_str(jd_str): """Attempt to fix a broken JSON string.""" # Try direct parse first try: jd = json.loads(jd_str) return jd, "already_valid" except json.JSONDecodeError as e: pass # Strategy: try raw_decode to find where valid JSON ends decoder = json.JSONDecoder() # Try multiple positions - walk back from error to find fix point # Common fix 1: Insert missing closing bracket(s) # Check bracket balance open_brackets = jd_str.count('[') - jd_str.count(']') open_braces = jd_str.count('{') - jd_str.count('}') if open_brackets > 0 or open_braces > 0: fixed = jd_str.rstrip() # Remove trailing chars that might be wrong fixed = fixed.rstrip('}') fixed += ']' * open_brackets + '}' * open_braces try: jd = json.loads(fixed) return jd, "bracket_balance_fix" except: pass # Common fix 2: Unescaped quotes in explanation (P4_021301) # Find all "explanation":"..." patterns and fix internal quotes # Use regex to find explanation values containing broken quotes def fix_expl_quotes(s): """Replace Chinese-context double quotes inside explanations with special chars""" # Find: ...提到"text text." pattern inside explanation values # The quote after 提到/说/告诉 etc are typical Chinese quotation result = [] i = 0 in_explanation = False expl_start = -1 while i < len(s): # Detect start of explanation value if not in_explanation and s[i:i+15] == '"explanation":"': in_explanation = True expl_start = i + 15 # after '"explanation":"' result.append(s[i:i+15]) i += 15 continue if in_explanation: # Walk until we find the closing quote of explanation if s[i] == '"' and (i == 0 or s[i-1] != '\\'): # Check if this is closing quote (followed by , or } or ]) j = i + 1 while j < len(s) and s[j] in ' \t': j += 1 if j < len(s) and s[j] in ',}]': result.append(s[expl_start:i]) result.append('"') in_explanation = False i += 1 expl_start = -1 continue # Not closing quote - replace with ' result.append(s[expl_start:i]) result.append("'") expl_start = i + 1 i += 1 continue result.append(s[i]) i += 1 if in_explanation: result.append(s[expl_start:]) return ''.join(result) fixed = fix_expl_quotes(jd_str) try: jd = json.loads(fixed) return jd, "quote_fix" except: pass # Common fix 3: Replace problematic characters replacements = [ ('\u201c', "'"), ('\u201d', "'"), # smart quotes ('\uff0c', ','), ('\uff1a', ':'), # fullwidth punctuation ('\u2019', "'"), # right single quote ] fixed = jd_str for old, new in replacements: fixed = fixed.replace(old, new) try: jd = json.loads(fixed) return jd, "char_replace" except: pass return None, "unfixable: all strategies exhausted" def fix_ability_tags(jd): """Fix ability tags in jsonData - check all levels""" changed = False for section in ['first', 'second']: sect = jd.get(section, {}) if not sect: continue # Fix section-level ability (P5 style: first.ability) if 'ability' in sect and isinstance(sect['ability'], list): new_ab = [] for a in sect['ability']: if a == '听觉抓取关键信息': new_ab.append('显性事实理解|关键词识别') elif a == '多特征整合': new_ab.append('多句保持|信息整合') else: new_ab.append(a) if new_ab != sect['ability']: sect['ability'] = new_ab changed = True # Fix question-level ability (first.questionSet[i].ability) qs = sect.get('questionSet', []) for q in qs: if 'ability' in q and isinstance(q['ability'], list): new_ab = [] for a in q['ability']: if a == '听觉抓取关键信息': new_ab.append('显性事实理解|关键词识别') elif a == '多特征整合': new_ab.append('多句保持|信息整合') else: new_ab.append(a) if new_ab != q['ability']: q['ability'] = new_ab changed = True # Fix root questionSet ability (P6 style) qs = jd.get('questionSet', []) for q in qs: if 'ability' in q and isinstance(q['ability'], list): new_ab = [] for a in q['ability']: if a == '听觉抓取关键信息': new_ab.append('显性事实理解|关键词识别') elif a == '多特征整合': new_ab.append('多句保持|信息整合') else: new_ab.append(a) if new_ab != q['ability']: q['ability'] = new_ab changed = True return jd, changed # ===== STEP 1: Fix 5 broken JSON records ===== print("=" * 60) print("STEP 1: Fixing 5 broken JSON records") print("=" * 60) broken = { "021901": ("tblzTLNH7f13uWQN", "P2"), "022301": ("tblzTLNH7f13uWQN", "P2"), "021301": ("tblVmeDtBDKsAEfz", "P4"), "021601": ("tblVmeDtBDKsAEfz", "P4"), "021801": ("tblVmeDtBDKsAEfz", "P4"), } for sid, (table_id, label) in broken.items(): url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=50" resp = api_call(url) for item in resp['data']['items']: if item['fields'].get('题目集合 ID', '') == sid: rid = item['record_id'] jd_str = item['fields']['jsonData'] jd, msg = fix_json_str(jd_str) if jd is None: print(f" ❌ {label} {sid}: {msg}") continue # Also fix ability tags jd, ab_changed = fix_ability_tags(jd) new_jd = json.dumps(jd, ensure_ascii=False) result = update_record(table_id, rid, {"jsonData": new_jd}) # Verify url2 = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{rid}" v_resp = api_call(url2) v_jd = v_resp['data']['items'][0]['fields'].get('jsonData', '') try: json.loads(v_jd) detail = f"json={msg}, ab={'fixed' if ab_changed else 'ok'}" print(f" ✅ {label} {sid}: {detail}") except json.JSONDecodeError as e: print(f" ⚠️ {label} {sid}: written but re-verify failed: {e}") break # ===== STEP 2: Fix P5 032801 ===== print(f"\n{'='*60}") print("STEP 2: Fix P5 032801") print("=" * 60) url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/tblDssVmhGzc3UKd/records?page_size=50" resp = api_call(url) for item in resp['data']['items']: sid = item['fields'].get('题目集合 ID', '') if sid == '032801': rid = item['record_id'] jd_str = item['fields']['jsonData'] try: jd = json.loads(jd_str) except: jd, _ = fix_json_str(jd_str) if jd is None: print(f" ❌ P5 {sid}: JSON parse failed") break jd, ab_changed = fix_ability_tags(jd) if not ab_changed: print(f" ⏭️ P5 {sid}: no ability tag changes needed") else: new_jd = json.dumps(jd, ensure_ascii=False) result = update_record("tblDssVmhGzc3UKd", rid, {"jsonData": new_jd}) print(f" {'✅' if result.get('code')==0 else '❌'} P5 {sid}: ability tags updated") break # ===== STEP 3: Fix P6 records ===== print(f"\n{'='*60}") print("STEP 3: Fix P6 records (add ability tags)") print("=" * 60) url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/tbloiMcD0sBtGSTq/records?page_size=50" resp = api_call(url) for item in resp['data']['items']: sid = item['fields'].get('题目集合 ID', '') if '010199' in str(sid): continue jd_str = item['fields'].get('jsonData', '{}') if not jd_str or jd_str == 'None': continue try: jd = json.loads(jd_str) except: jd, _ = fix_json_str(jd_str) if jd is None: print(f" ❌ P6 {sid}: JSON parse failed") continue # P6 has root-level questionSet qs = jd.get('questionSet', []) changed = False for q in qs: if not q.get('ability') or len(q.get('ability', [])) == 0: # Assign default based on question content question = q.get('question', '') answer = q.get('answer', []) # Determine ability tag if any(w in question.lower() for w in ['where', 'location', 'place']): tag = '显性细节理解|数字/时间/地点' elif any(w in question.lower() for w in ['how many', 'how much', 'number']): tag = '显性细节理解|数字/时间/地点' elif any(w in question.lower() for w in ['what color', 'which one']): tag = '显性事实理解|关键词识别' elif any(w in question.lower() for w in ['why', 'because']): tag = '情绪/态度理解' elif any(w in question.lower() for w in ['like', 'love', 'want']): tag = '目的/偏好识别|显性 to/for/like' else: tag = '显性事实理解|关键词识别' q['ability'] = [tag] changed = True if changed: new_jd = json.dumps(jd, ensure_ascii=False) result = update_record("tbloiMcD0sBtGSTq", item['record_id'], {"jsonData": new_jd}) if result.get('code') == 0: tags_assigned = [q.get('ability', []) for q in jd.get('questionSet', [])] print(f" ✅ P6 {sid}: ability filled ({[t[0] for t in tags_assigned if t]})") else: print(f" ❌ P6 {sid}: update failed - {result.get('msg')}") else: print(f" ⏭️ P6 {sid}: no changes needed") print(f"\n{'='*60}") print("All fixes complete!") print("=" * 60)