ai_member_xiaoyan/scripts/inject_text_rules.py

#!/usr/bin/env python3
"""Inject text output rules into all production skill SKILL.md files."""
import os, re

TEXT_RULES = """

## 文本输出规范（强制执行）

所有输出的文本内容（台词、题目、选项、解析、音频文本、阅读文章等）必须遵守以下规则：

1. 禁止 Markdown 标记：禁止使用 ** * __ _ 等加粗/斜体标识，也禁止使用 # > - 等块级 Markdown 语法。所有文本纯文本输出。
2. 英式拼写优先：单词涉及英美式拼写差异时（如 colour/color、centre/center、travelling/traveling），统一选择英式拼写。
3. 标点符号规范：严格区分全角/半角符号。中文内容使用全角标点（，。！？），英文内容使用半角标点（. , ! ?），不得混用。
"""

# Files already updated (core files - skip these)
ALREADY_UPDATED = {
    'unit_challenge_core/SKILL.md',
    'unit_challenge_master/SKILL.md',
    'script-component-production/SKILL.md',
    'kids-english-script-production/SKILL.md',
    'core-content-json-standard/SKILL.md',
    'interactive-component-json/SKILL.md',
}

WORKSPACE = '/root/.openclaw/workspace-xiaoyan'

def find_skill_files(base_dir):
    """Find all SKILL.md files under base_dir."""
    result = []
    for root, dirs, files in os.walk(base_dir):
        for f in files:
            if f == 'SKILL.md':
                rel_path = os.path.relpath(os.path.join(root, f), WORKSPACE)
                result.append(os.path.join(root, f))
    return result

def has_text_rules(content):
    """Check if file already has text output rules."""
    return '禁止 Markdown 标记' in content or '英式拼写优先' in content

def insert_rules(content, anchor_pattern, position='before'):
    """Insert TEXT_RULES relative to anchor_pattern."""
    if has_text_rules(content):
        return None  # Already has rules

    # Find the anchor
    lines = content.split('\n')

    # Find the line matching anchor_pattern
    target_idx = None
    for i, line in enumerate(lines):
        if re.search(anchor_pattern, line):
            target_idx = i
            break

    if target_idx is None:
        # Try alternative: insert after frontmatter (second ---)
        count = 0
        for i, line in enumerate(lines):
            if line.strip() == '---':
                count += 1
                if count == 2:
                    target_idx = i
                    break
        if target_idx is not None:
            # Insert after the first heading after frontmatter
            for i in range(target_idx + 1, len(lines)):
                if lines[i].startswith('# '):
                    target_idx = i
                    break

        if target_idx is None:
            print(f"  Could not find insertion point")
            return None

    if position == 'before':
        insert_idx = target_idx
    elif position == 'after':
        # Find end of the section (next ## heading or empty line then non-empty)
        insert_idx = target_idx
        for i in range(target_idx + 1, len(lines)):
            if lines[i].startswith('## '):
                insert_idx = i - 1
                break
        else:
            insert_idx = len(lines) - 1

    new_lines = lines[:insert_idx] + [TEXT_RULES] + lines[insert_idx:]
    return '\n'.join(new_lines)

def main():
    # Find all skill files
    skill_dirs = [
        os.path.join(WORKSPACE, 'skills'),
        os.path.join(WORKSPACE, 'business_production'),
    ]

    all_files = []
    for d in skill_dirs:
        if os.path.isdir(d):
            all_files.extend(find_skill_files(d))

    updated = 0
    skipped = 0

    # Define insertion strategies per file category
    for filepath in all_files:
        rel = os.path.relpath(filepath, WORKSPACE)

        # Skip already updated
        if any(rel.endswith(skip) for skip in ALREADY_UPDATED):
            skipped += 1
            continue

        # Skip non-text-production skills
        skip_patterns = [
            'lark_bitable_operate_as_bot',
            'lark_wiki_operate_as_bot',
            'feishu-embedded-sheet',
            'bitable-reader',
            'vala_git_workspace_backup',
            'feishu-table-translate-fill',
        ]
        if any(p in rel for p in skip_patterns):
            skipped += 1
            continue

        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        if has_text_rules(content):
            skipped += 1
            continue

        # Choose anchor based on file type
        if 'questions/' in rel:
            # Unit challenge question types - insert before "## 核心功能"
            anchor = r'^##\s+核心功能'
            position = 'before'
        elif 'dialogue-' in rel or 'task-router' in rel:
            # Dialogue component skills - insert before "## 配置格式" or "## 触发"
            anchor = r'^##\s+(配置格式|触发|核心|规则|组件)'
            position = 'before'
        elif '组件生产' in rel:
            # Component production skills
            anchor = r'^##\s+核心功能'
            position = 'before'
        elif 'audit' in rel:
            # Audit skills
            anchor = r'^##\s+审校'
            position = 'before'
        elif 'cambridge' in rel:
            # Cambridge exam library
            anchor = r'^##\s+'
            position = 'before'
        elif 'knowledge-mastery' in rel:
            # Calculator - skip
            skipped += 1
            continue
        else:
            # Default: insert after frontmatter, before first ## heading
            anchor = r'^##\s+'
            position = 'before'

        new_content = insert_rules(content, anchor, position)
        if new_content is None:
            skipped += 1
            continue

        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(new_content)

        print(f"  Updated: {rel}")
        updated += 1

    print(f"\nDone: {updated} updated, {skipped} skipped")

if __name__ == '__main__':
    main()