ai_member_xiaoyan/scripts/inject_text_rules.py

181 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Inject text output rules into all production skill SKILL.md files."""
import os, re
TEXT_RULES = """
## 文本输出规范(强制执行)
所有输出的文本内容(台词、题目、选项、解析、音频文本、阅读文章等)必须遵守以下规则:
1. 禁止 Markdown 标记:禁止使用 ** * __ _ 等加粗/斜体标识,也禁止使用 # > - 等块级 Markdown 语法。所有文本纯文本输出。
2. 英式拼写优先:单词涉及英美式拼写差异时(如 colour/color、centre/center、travelling/traveling统一选择英式拼写。
3. 标点符号规范:严格区分全角/半角符号。中文内容使用全角标点(,。!?),英文内容使用半角标点(. , ! ?),不得混用。
"""
# Files already updated (core files - skip these)
ALREADY_UPDATED = {
'unit_challenge_core/SKILL.md',
'unit_challenge_master/SKILL.md',
'script-component-production/SKILL.md',
'kids-english-script-production/SKILL.md',
'core-content-json-standard/SKILL.md',
'interactive-component-json/SKILL.md',
}
WORKSPACE = '/root/.openclaw/workspace-xiaoyan'
def find_skill_files(base_dir):
"""Find all SKILL.md files under base_dir."""
result = []
for root, dirs, files in os.walk(base_dir):
for f in files:
if f == 'SKILL.md':
rel_path = os.path.relpath(os.path.join(root, f), WORKSPACE)
result.append(os.path.join(root, f))
return result
def has_text_rules(content):
"""Check if file already has text output rules."""
return '禁止 Markdown 标记' in content or '英式拼写优先' in content
def insert_rules(content, anchor_pattern, position='before'):
"""Insert TEXT_RULES relative to anchor_pattern."""
if has_text_rules(content):
return None # Already has rules
# Find the anchor
lines = content.split('\n')
# Find the line matching anchor_pattern
target_idx = None
for i, line in enumerate(lines):
if re.search(anchor_pattern, line):
target_idx = i
break
if target_idx is None:
# Try alternative: insert after frontmatter (second ---)
count = 0
for i, line in enumerate(lines):
if line.strip() == '---':
count += 1
if count == 2:
target_idx = i
break
if target_idx is not None:
# Insert after the first heading after frontmatter
for i in range(target_idx + 1, len(lines)):
if lines[i].startswith('# '):
target_idx = i
break
if target_idx is None:
print(f" Could not find insertion point")
return None
if position == 'before':
insert_idx = target_idx
elif position == 'after':
# Find end of the section (next ## heading or empty line then non-empty)
insert_idx = target_idx
for i in range(target_idx + 1, len(lines)):
if lines[i].startswith('## '):
insert_idx = i - 1
break
else:
insert_idx = len(lines) - 1
new_lines = lines[:insert_idx] + [TEXT_RULES] + lines[insert_idx:]
return '\n'.join(new_lines)
def main():
# Find all skill files
skill_dirs = [
os.path.join(WORKSPACE, 'skills'),
os.path.join(WORKSPACE, 'business_production'),
]
all_files = []
for d in skill_dirs:
if os.path.isdir(d):
all_files.extend(find_skill_files(d))
updated = 0
skipped = 0
# Define insertion strategies per file category
for filepath in all_files:
rel = os.path.relpath(filepath, WORKSPACE)
# Skip already updated
if any(rel.endswith(skip) for skip in ALREADY_UPDATED):
skipped += 1
continue
# Skip non-text-production skills
skip_patterns = [
'lark_bitable_operate_as_bot',
'lark_wiki_operate_as_bot',
'feishu-embedded-sheet',
'bitable-reader',
'vala_git_workspace_backup',
'feishu-table-translate-fill',
]
if any(p in rel for p in skip_patterns):
skipped += 1
continue
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
if has_text_rules(content):
skipped += 1
continue
# Choose anchor based on file type
if 'questions/' in rel:
# Unit challenge question types - insert before "## 核心功能"
anchor = r'^##\s+核心功能'
position = 'before'
elif 'dialogue-' in rel or 'task-router' in rel:
# Dialogue component skills - insert before "## 配置格式" or "## 触发"
anchor = r'^##\s+(配置格式|触发|核心|规则|组件)'
position = 'before'
elif '组件生产' in rel:
# Component production skills
anchor = r'^##\s+核心功能'
position = 'before'
elif 'audit' in rel:
# Audit skills
anchor = r'^##\s+审校'
position = 'before'
elif 'cambridge' in rel:
# Cambridge exam library
anchor = r'^##\s+'
position = 'before'
elif 'knowledge-mastery' in rel:
# Calculator - skip
skipped += 1
continue
else:
# Default: insert after frontmatter, before first ## heading
anchor = r'^##\s+'
position = 'before'
new_content = insert_rules(content, anchor, position)
if new_content is None:
skipped += 1
continue
with open(filepath, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f" Updated: {rel}")
updated += 1
print(f"\nDone: {updated} updated, {skipped} skipped")
if __name__ == '__main__':
main()