ai_member_xiaoyan/scripts/update_speaking_qa_ability.py

#!/usr/bin/env python3
"""Update ability labels for speaking_qa records based on question content."""
import json, requests, time

CRED = "/root/.openclaw/credentials/xiaoyan/config.json"
TOKEN_APP = "CMHSbUUjka3TrUsaxxEc297ongf"
TBL = "tblRGv7k4WH58Jgq"

L_INFO = "基础信息表达｜个人信息问答"
L_PREF = "表达喜好与理由"
L_PAST = "过去经历描述｜Past Activities"
L_INTR = "互动应答｜问答交流"
L_EXCH = "信息交换｜双向问答"

def classify(question):
    q = question.lower()

    # 互动应答
    if any(kw in q for kw in ["what do you say", "what will you say", "what would you say",
                               "what shouldn't we do", "what must we not do", "what should we do",
                               "what do we do next", "what do the shop workers say"]):
        return L_INTR

    # 过去经历描述
    if any(kw in q for kw in ["what did you", "when did you last", "who did you help",
                               "last weekend", "what have you learned", "what have you decided",
                               "what toy did you buy", "where did you meet",
                               "what did your father say", "what did you first find",
                               "what gift have you got", "what did you see",
                               "what you're glad you could do"]):
        return L_PAST

    # 信息交换
    if any(kw in q for kw in ["what do we need", "what things are there",
                               "what will your friends bring", "what do people use",
                               "what do we use", "what shall we", "whose bag",
                               "whose schoolbag", "whose pen is this", "who is this gift for"]):
        return L_EXCH

    # 个人信息: name, age, from, family, appearance
    if any(kw in q for kw in ["what's your name", "how old", "where are you from",
                               "who do you live", "is your home in", "what rooms",
                               "what does your dad look like", "what does your mother look like",
                               "what is your mom's hair", "what are your mom's",
                               "what does your sister wear", "what does a dinosaur look like",
                               "what do the clouds look like", "what is the moon like",
                               "what's your best friend like", "what does your cat look like",
                               "how is the bear", "who is your best", "who is taller",
                               "what are you good at", "what is your best friend good at",
                               "what is your main study goal", "what is your goal",
                               "what habit is very important", "what information",
                               "what do you have to do every day",
                               "tell me what you're glad", "please introduce your pet",
                               "what pet do you love", "any animals. which one",
                               "what toys do you have", "how many books does your dad have",
                               "what is in your schoolbag", "what's in your schoolbag",
                               "what can you find", "what is there in your schoolbag",
                               "what is in your classroom", "what are in the classroom"]):
        return L_INFO

    # 信息交换 fallback
    if any(kw in q for kw in ["where is your book", "is this pen yours", "where are your books",
                               "which page", "where should you put", "can you swim",
                               "is there a book", "can you help me", "what do you say to make",
                               "what do you say when others", "what do you say when your friend",
                               "what do you say when you want", "what do you say to draw",
                               "what do you say to introduce"]):
        return L_INTR

    # 表达喜好
    if any(kw in q for kw in ["favorite", "favourite", "what do you like", "what kind of",
                               "what do you think", "what do you want", "do you like",
                               "what do you enjoy", "what books do you", "do you keep",
                               "are your classmates", "are you afraid",
                               "what do your family plan", "what do your parents plan",
                               "what do you plan", "what are you going to",
                               "what will you", "what would you", "what can you do",
                               "how do you like", "what colour do you like", "what colour do you want",
                               "what gift do you want", "what do you want to eat", "why do you like",
                               "which car is faster", "what sport can't you do",
                               "what can you see", "who will you sit with",
                               "how is the ice cream", "what animal might it be",
                               "what is the rabbit eating", "what is the bird",
                               "what do you need", "what will we have at the party",
                               "what must we protect", "what do you believe",
                               "what makes your dad unhappy", "what do your friends",
                               "whose pen do you think", "what hair has your dad",
                               "what sports can you do", "what do you need for",
                               "if you can't find", "where will we go", "what do people use",
                               "the radio has no sound", "what do you need to do"]):
        return L_PREF

    return L_INFO

# Main
with open(CRED) as f:
    cred = json.load(f)
aid = cred["apps"][0]["appId"]
asecret = cred["apps"][0]["appSecret"]

def get_token():
    r = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
                      json={"app_id": aid, "app_secret": asecret})
    return r.json()["tenant_access_token"]

token = get_token()

all_items = []
page_token = None
while True:
    url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records?page_size=50"
    if page_token:
        url += f"&page_token={page_token}"
    resp = requests.get(url, headers={"Authorization": f"Bearer {token}"})
    data = resp.json()
    all_items.extend(data["data"].get("items", []))
    if not data["data"].get("has_more"):
        break
    page_token = data["data"].get("page_token")

updates = 0
stats = {}
for item in all_items:
    rid = item["record_id"]
    r = requests.get(
        f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records/{rid}",
        headers={"Authorization": f"Bearer {token}"})
    fields = r.json()["data"]["record"]["fields"]
    fid = fields.get("题目集合 ID", "")
    jd = fields.get("jsonData", "")

    if not jd or fid in ("L1", "L2", None):
        continue

    try:
        j = json.loads(jd)
    except:
        continue

    changed = False
    for grp_name in ["first", "second"]:
        if grp_name not in j:
            continue
        for q in j[grp_name].get("questionSet", []):
            existing = q.get("ability", [])
            valid_labels = {L_INFO, L_PREF, L_PAST, L_INTR, L_EXCH}
            if existing and all(a in valid_labels for a in existing):
                continue

            content = q.get("content", q.get("question", ""))
            label = classify(content)
            q["ability"] = [label]
            changed = True
            stats[label] = stats.get(label, 0) + 1

    if changed:
        new_jd = json.dumps(j, ensure_ascii=False)
        r = requests.put(
            f"https://open.feishu.cn/open-apis/bitable/v1/apps/{TOKEN_APP}/tables/{TBL}/records/{rid}",
            headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
            json={"fields": {"jsonData": new_jd}})
        code = r.json().get("code")
        if code == 0:
            updates += 1
            print(f"✅ {fid}: updated")
        else:
            print(f"❌ {fid}: code={code}")
        time.sleep(0.3)

print(f"\nUpdated {updates} records")
print("\nLabel distribution:")
for label, count in sorted(stats.items(), key=lambda x: -x[1]):
    print(f"  {label}: {count}")