333 lines
12 KiB
Python
333 lines
12 KiB
Python
"""
|
|
修复 5 条 JSON 解析失败记录 + P5/P6 能力标签修正 + 回填
|
|
"""
|
|
import json, urllib.request, re, sys
|
|
|
|
APP_TOKEN = "CMHSbUUjka3TrUsaxxEc297ongf"
|
|
CRED_FILE = "/root/.openclaw/credentials/xiaoyan/config.json"
|
|
|
|
def get_token():
|
|
with open(CRED_FILE) as f:
|
|
cfg = json.load(f)
|
|
app_id = cfg['apps'][0]['appId']
|
|
app_secret = cfg['apps'][0]['appSecret']
|
|
req = urllib.request.Request(
|
|
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
|
|
data=json.dumps({"app_id": app_id, "app_secret": app_secret}).encode(),
|
|
headers={"Content-Type": "application/json"})
|
|
return json.loads(urllib.request.urlopen(req).read())['tenant_access_token']
|
|
|
|
def api_call(url, method='GET', body=None):
|
|
token = get_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
data = json.dumps(body).encode() if body else None
|
|
if data:
|
|
headers["Content-Type"] = "application/json"
|
|
req = urllib.request.Request(url, data=data, method=method, headers=headers)
|
|
return json.loads(urllib.request.urlopen(req).read())
|
|
|
|
def update_record(table_id, record_id, fields):
|
|
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{record_id}"
|
|
return api_call(url, 'PUT', {"fields": fields})
|
|
|
|
def fix_json_str(jd_str):
|
|
"""Attempt to fix a broken JSON string."""
|
|
# Try direct parse first
|
|
try:
|
|
jd = json.loads(jd_str)
|
|
return jd, "already_valid"
|
|
except json.JSONDecodeError as e:
|
|
pass
|
|
|
|
# Strategy: try raw_decode to find where valid JSON ends
|
|
decoder = json.JSONDecoder()
|
|
# Try multiple positions - walk back from error to find fix point
|
|
|
|
# Common fix 1: Insert missing closing bracket(s)
|
|
# Check bracket balance
|
|
open_brackets = jd_str.count('[') - jd_str.count(']')
|
|
open_braces = jd_str.count('{') - jd_str.count('}')
|
|
|
|
if open_brackets > 0 or open_braces > 0:
|
|
fixed = jd_str.rstrip()
|
|
# Remove trailing chars that might be wrong
|
|
fixed = fixed.rstrip('}')
|
|
fixed += ']' * open_brackets + '}' * open_braces
|
|
try:
|
|
jd = json.loads(fixed)
|
|
return jd, "bracket_balance_fix"
|
|
except:
|
|
pass
|
|
|
|
# Common fix 2: Unescaped quotes in explanation (P4_021301)
|
|
# Find all "explanation":"..." patterns and fix internal quotes
|
|
# Use regex to find explanation values containing broken quotes
|
|
def fix_expl_quotes(s):
|
|
"""Replace Chinese-context double quotes inside explanations with special chars"""
|
|
# Find: ...提到"text text." pattern inside explanation values
|
|
# The quote after 提到/说/告诉 etc are typical Chinese quotation
|
|
result = []
|
|
i = 0
|
|
in_explanation = False
|
|
expl_start = -1
|
|
while i < len(s):
|
|
# Detect start of explanation value
|
|
if not in_explanation and s[i:i+15] == '"explanation":"':
|
|
in_explanation = True
|
|
expl_start = i + 15 # after '"explanation":"'
|
|
result.append(s[i:i+15])
|
|
i += 15
|
|
continue
|
|
|
|
if in_explanation:
|
|
# Walk until we find the closing quote of explanation
|
|
if s[i] == '"' and (i == 0 or s[i-1] != '\\'):
|
|
# Check if this is closing quote (followed by , or } or ])
|
|
j = i + 1
|
|
while j < len(s) and s[j] in ' \t':
|
|
j += 1
|
|
if j < len(s) and s[j] in ',}]':
|
|
result.append(s[expl_start:i])
|
|
result.append('"')
|
|
in_explanation = False
|
|
i += 1
|
|
expl_start = -1
|
|
continue
|
|
# Not closing quote - replace with '
|
|
result.append(s[expl_start:i])
|
|
result.append("'")
|
|
expl_start = i + 1
|
|
i += 1
|
|
continue
|
|
|
|
result.append(s[i])
|
|
i += 1
|
|
|
|
if in_explanation:
|
|
result.append(s[expl_start:])
|
|
|
|
return ''.join(result)
|
|
|
|
fixed = fix_expl_quotes(jd_str)
|
|
try:
|
|
jd = json.loads(fixed)
|
|
return jd, "quote_fix"
|
|
except:
|
|
pass
|
|
|
|
# Common fix 3: Replace problematic characters
|
|
replacements = [
|
|
('\u201c', "'"), ('\u201d', "'"), # smart quotes
|
|
('\uff0c', ','), ('\uff1a', ':'), # fullwidth punctuation
|
|
('\u2019', "'"), # right single quote
|
|
]
|
|
fixed = jd_str
|
|
for old, new in replacements:
|
|
fixed = fixed.replace(old, new)
|
|
try:
|
|
jd = json.loads(fixed)
|
|
return jd, "char_replace"
|
|
except:
|
|
pass
|
|
|
|
return None, "unfixable: all strategies exhausted"
|
|
|
|
def fix_ability_tags(jd):
|
|
"""Fix ability tags in jsonData - check all levels"""
|
|
changed = False
|
|
|
|
for section in ['first', 'second']:
|
|
sect = jd.get(section, {})
|
|
if not sect:
|
|
continue
|
|
|
|
# Fix section-level ability (P5 style: first.ability)
|
|
if 'ability' in sect and isinstance(sect['ability'], list):
|
|
new_ab = []
|
|
for a in sect['ability']:
|
|
if a == '听觉抓取关键信息':
|
|
new_ab.append('显性事实理解|关键词识别')
|
|
elif a == '多特征整合':
|
|
new_ab.append('多句保持|信息整合')
|
|
else:
|
|
new_ab.append(a)
|
|
if new_ab != sect['ability']:
|
|
sect['ability'] = new_ab
|
|
changed = True
|
|
|
|
# Fix question-level ability (first.questionSet[i].ability)
|
|
qs = sect.get('questionSet', [])
|
|
for q in qs:
|
|
if 'ability' in q and isinstance(q['ability'], list):
|
|
new_ab = []
|
|
for a in q['ability']:
|
|
if a == '听觉抓取关键信息':
|
|
new_ab.append('显性事实理解|关键词识别')
|
|
elif a == '多特征整合':
|
|
new_ab.append('多句保持|信息整合')
|
|
else:
|
|
new_ab.append(a)
|
|
if new_ab != q['ability']:
|
|
q['ability'] = new_ab
|
|
changed = True
|
|
|
|
# Fix root questionSet ability (P6 style)
|
|
qs = jd.get('questionSet', [])
|
|
for q in qs:
|
|
if 'ability' in q and isinstance(q['ability'], list):
|
|
new_ab = []
|
|
for a in q['ability']:
|
|
if a == '听觉抓取关键信息':
|
|
new_ab.append('显性事实理解|关键词识别')
|
|
elif a == '多特征整合':
|
|
new_ab.append('多句保持|信息整合')
|
|
else:
|
|
new_ab.append(a)
|
|
if new_ab != q['ability']:
|
|
q['ability'] = new_ab
|
|
changed = True
|
|
|
|
return jd, changed
|
|
|
|
|
|
# ===== STEP 1: Fix 5 broken JSON records =====
|
|
print("=" * 60)
|
|
print("STEP 1: Fixing 5 broken JSON records")
|
|
print("=" * 60)
|
|
|
|
broken = {
|
|
"021901": ("tblzTLNH7f13uWQN", "P2"),
|
|
"022301": ("tblzTLNH7f13uWQN", "P2"),
|
|
"021301": ("tblVmeDtBDKsAEfz", "P4"),
|
|
"021601": ("tblVmeDtBDKsAEfz", "P4"),
|
|
"021801": ("tblVmeDtBDKsAEfz", "P4"),
|
|
}
|
|
|
|
for sid, (table_id, label) in broken.items():
|
|
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records?page_size=50"
|
|
resp = api_call(url)
|
|
for item in resp['data']['items']:
|
|
if item['fields'].get('题目集合 ID', '') == sid:
|
|
rid = item['record_id']
|
|
jd_str = item['fields']['jsonData']
|
|
|
|
jd, msg = fix_json_str(jd_str)
|
|
if jd is None:
|
|
print(f" ❌ {label} {sid}: {msg}")
|
|
continue
|
|
|
|
# Also fix ability tags
|
|
jd, ab_changed = fix_ability_tags(jd)
|
|
|
|
new_jd = json.dumps(jd, ensure_ascii=False)
|
|
result = update_record(table_id, rid, {"jsonData": new_jd})
|
|
|
|
# Verify
|
|
url2 = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/{table_id}/records/{rid}"
|
|
v_resp = api_call(url2)
|
|
v_jd = v_resp['data']['items'][0]['fields'].get('jsonData', '')
|
|
try:
|
|
json.loads(v_jd)
|
|
detail = f"json={msg}, ab={'fixed' if ab_changed else 'ok'}"
|
|
print(f" ✅ {label} {sid}: {detail}")
|
|
except json.JSONDecodeError as e:
|
|
print(f" ⚠️ {label} {sid}: written but re-verify failed: {e}")
|
|
break
|
|
|
|
# ===== STEP 2: Fix P5 032801 =====
|
|
print(f"\n{'='*60}")
|
|
print("STEP 2: Fix P5 032801")
|
|
print("=" * 60)
|
|
|
|
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/tblDssVmhGzc3UKd/records?page_size=50"
|
|
resp = api_call(url)
|
|
for item in resp['data']['items']:
|
|
sid = item['fields'].get('题目集合 ID', '')
|
|
if sid == '032801':
|
|
rid = item['record_id']
|
|
jd_str = item['fields']['jsonData']
|
|
|
|
try:
|
|
jd = json.loads(jd_str)
|
|
except:
|
|
jd, _ = fix_json_str(jd_str)
|
|
|
|
if jd is None:
|
|
print(f" ❌ P5 {sid}: JSON parse failed")
|
|
break
|
|
|
|
jd, ab_changed = fix_ability_tags(jd)
|
|
if not ab_changed:
|
|
print(f" ⏭️ P5 {sid}: no ability tag changes needed")
|
|
else:
|
|
new_jd = json.dumps(jd, ensure_ascii=False)
|
|
result = update_record("tblDssVmhGzc3UKd", rid, {"jsonData": new_jd})
|
|
print(f" {'✅' if result.get('code')==0 else '❌'} P5 {sid}: ability tags updated")
|
|
break
|
|
|
|
# ===== STEP 3: Fix P6 records =====
|
|
print(f"\n{'='*60}")
|
|
print("STEP 3: Fix P6 records (add ability tags)")
|
|
print("=" * 60)
|
|
|
|
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{APP_TOKEN}/tables/tbloiMcD0sBtGSTq/records?page_size=50"
|
|
resp = api_call(url)
|
|
for item in resp['data']['items']:
|
|
sid = item['fields'].get('题目集合 ID', '')
|
|
if '010199' in str(sid):
|
|
continue
|
|
|
|
jd_str = item['fields'].get('jsonData', '{}')
|
|
if not jd_str or jd_str == 'None':
|
|
continue
|
|
|
|
try:
|
|
jd = json.loads(jd_str)
|
|
except:
|
|
jd, _ = fix_json_str(jd_str)
|
|
|
|
if jd is None:
|
|
print(f" ❌ P6 {sid}: JSON parse failed")
|
|
continue
|
|
|
|
# P6 has root-level questionSet
|
|
qs = jd.get('questionSet', [])
|
|
changed = False
|
|
for q in qs:
|
|
if not q.get('ability') or len(q.get('ability', [])) == 0:
|
|
# Assign default based on question content
|
|
question = q.get('question', '')
|
|
answer = q.get('answer', [])
|
|
|
|
# Determine ability tag
|
|
if any(w in question.lower() for w in ['where', 'location', 'place']):
|
|
tag = '显性细节理解|数字/时间/地点'
|
|
elif any(w in question.lower() for w in ['how many', 'how much', 'number']):
|
|
tag = '显性细节理解|数字/时间/地点'
|
|
elif any(w in question.lower() for w in ['what color', 'which one']):
|
|
tag = '显性事实理解|关键词识别'
|
|
elif any(w in question.lower() for w in ['why', 'because']):
|
|
tag = '情绪/态度理解'
|
|
elif any(w in question.lower() for w in ['like', 'love', 'want']):
|
|
tag = '目的/偏好识别|显性 to/for/like'
|
|
else:
|
|
tag = '显性事实理解|关键词识别'
|
|
|
|
q['ability'] = [tag]
|
|
changed = True
|
|
|
|
if changed:
|
|
new_jd = json.dumps(jd, ensure_ascii=False)
|
|
result = update_record("tbloiMcD0sBtGSTq", item['record_id'], {"jsonData": new_jd})
|
|
if result.get('code') == 0:
|
|
tags_assigned = [q.get('ability', []) for q in jd.get('questionSet', [])]
|
|
print(f" ✅ P6 {sid}: ability filled ({[t[0] for t in tags_assigned if t]})")
|
|
else:
|
|
print(f" ❌ P6 {sid}: update failed - {result.get('msg')}")
|
|
else:
|
|
print(f" ⏭️ P6 {sid}: no changes needed")
|
|
|
|
print(f"\n{'='*60}")
|
|
print("All fixes complete!")
|
|
print("=" * 60)
|