181 lines
6.0 KiB
Python
181 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
||
"""Inject text output rules into all production skill SKILL.md files."""
|
||
import os, re
|
||
|
||
TEXT_RULES = """
|
||
|
||
## 文本输出规范(强制执行)
|
||
|
||
所有输出的文本内容(台词、题目、选项、解析、音频文本、阅读文章等)必须遵守以下规则:
|
||
|
||
1. 禁止 Markdown 标记:禁止使用 ** * __ _ 等加粗/斜体标识,也禁止使用 # > - 等块级 Markdown 语法。所有文本纯文本输出。
|
||
2. 英式拼写优先:单词涉及英美式拼写差异时(如 colour/color、centre/center、travelling/traveling),统一选择英式拼写。
|
||
3. 标点符号规范:严格区分全角/半角符号。中文内容使用全角标点(,。!?),英文内容使用半角标点(. , ! ?),不得混用。
|
||
"""
|
||
|
||
# Files already updated (core files - skip these)
|
||
ALREADY_UPDATED = {
|
||
'unit_challenge_core/SKILL.md',
|
||
'unit_challenge_master/SKILL.md',
|
||
'script-component-production/SKILL.md',
|
||
'kids-english-script-production/SKILL.md',
|
||
'core-content-json-standard/SKILL.md',
|
||
'interactive-component-json/SKILL.md',
|
||
}
|
||
|
||
WORKSPACE = '/root/.openclaw/workspace-xiaoyan'
|
||
|
||
def find_skill_files(base_dir):
|
||
"""Find all SKILL.md files under base_dir."""
|
||
result = []
|
||
for root, dirs, files in os.walk(base_dir):
|
||
for f in files:
|
||
if f == 'SKILL.md':
|
||
rel_path = os.path.relpath(os.path.join(root, f), WORKSPACE)
|
||
result.append(os.path.join(root, f))
|
||
return result
|
||
|
||
def has_text_rules(content):
|
||
"""Check if file already has text output rules."""
|
||
return '禁止 Markdown 标记' in content or '英式拼写优先' in content
|
||
|
||
def insert_rules(content, anchor_pattern, position='before'):
|
||
"""Insert TEXT_RULES relative to anchor_pattern."""
|
||
if has_text_rules(content):
|
||
return None # Already has rules
|
||
|
||
# Find the anchor
|
||
lines = content.split('\n')
|
||
|
||
# Find the line matching anchor_pattern
|
||
target_idx = None
|
||
for i, line in enumerate(lines):
|
||
if re.search(anchor_pattern, line):
|
||
target_idx = i
|
||
break
|
||
|
||
if target_idx is None:
|
||
# Try alternative: insert after frontmatter (second ---)
|
||
count = 0
|
||
for i, line in enumerate(lines):
|
||
if line.strip() == '---':
|
||
count += 1
|
||
if count == 2:
|
||
target_idx = i
|
||
break
|
||
if target_idx is not None:
|
||
# Insert after the first heading after frontmatter
|
||
for i in range(target_idx + 1, len(lines)):
|
||
if lines[i].startswith('# '):
|
||
target_idx = i
|
||
break
|
||
|
||
if target_idx is None:
|
||
print(f" Could not find insertion point")
|
||
return None
|
||
|
||
if position == 'before':
|
||
insert_idx = target_idx
|
||
elif position == 'after':
|
||
# Find end of the section (next ## heading or empty line then non-empty)
|
||
insert_idx = target_idx
|
||
for i in range(target_idx + 1, len(lines)):
|
||
if lines[i].startswith('## '):
|
||
insert_idx = i - 1
|
||
break
|
||
else:
|
||
insert_idx = len(lines) - 1
|
||
|
||
new_lines = lines[:insert_idx] + [TEXT_RULES] + lines[insert_idx:]
|
||
return '\n'.join(new_lines)
|
||
|
||
def main():
|
||
# Find all skill files
|
||
skill_dirs = [
|
||
os.path.join(WORKSPACE, 'skills'),
|
||
os.path.join(WORKSPACE, 'business_production'),
|
||
]
|
||
|
||
all_files = []
|
||
for d in skill_dirs:
|
||
if os.path.isdir(d):
|
||
all_files.extend(find_skill_files(d))
|
||
|
||
updated = 0
|
||
skipped = 0
|
||
|
||
# Define insertion strategies per file category
|
||
for filepath in all_files:
|
||
rel = os.path.relpath(filepath, WORKSPACE)
|
||
|
||
# Skip already updated
|
||
if any(rel.endswith(skip) for skip in ALREADY_UPDATED):
|
||
skipped += 1
|
||
continue
|
||
|
||
# Skip non-text-production skills
|
||
skip_patterns = [
|
||
'lark_bitable_operate_as_bot',
|
||
'lark_wiki_operate_as_bot',
|
||
'feishu-embedded-sheet',
|
||
'bitable-reader',
|
||
'vala_git_workspace_backup',
|
||
'feishu-table-translate-fill',
|
||
]
|
||
if any(p in rel for p in skip_patterns):
|
||
skipped += 1
|
||
continue
|
||
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
if has_text_rules(content):
|
||
skipped += 1
|
||
continue
|
||
|
||
# Choose anchor based on file type
|
||
if 'questions/' in rel:
|
||
# Unit challenge question types - insert before "## 核心功能"
|
||
anchor = r'^##\s+核心功能'
|
||
position = 'before'
|
||
elif 'dialogue-' in rel or 'task-router' in rel:
|
||
# Dialogue component skills - insert before "## 配置格式" or "## 触发"
|
||
anchor = r'^##\s+(配置格式|触发|核心|规则|组件)'
|
||
position = 'before'
|
||
elif '组件生产' in rel:
|
||
# Component production skills
|
||
anchor = r'^##\s+核心功能'
|
||
position = 'before'
|
||
elif 'audit' in rel:
|
||
# Audit skills
|
||
anchor = r'^##\s+审校'
|
||
position = 'before'
|
||
elif 'cambridge' in rel:
|
||
# Cambridge exam library
|
||
anchor = r'^##\s+'
|
||
position = 'before'
|
||
elif 'knowledge-mastery' in rel:
|
||
# Calculator - skip
|
||
skipped += 1
|
||
continue
|
||
else:
|
||
# Default: insert after frontmatter, before first ## heading
|
||
anchor = r'^##\s+'
|
||
position = 'before'
|
||
|
||
new_content = insert_rules(content, anchor, position)
|
||
if new_content is None:
|
||
skipped += 1
|
||
continue
|
||
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
f.write(new_content)
|
||
|
||
print(f" Updated: {rel}")
|
||
updated += 1
|
||
|
||
print(f"\nDone: {updated} updated, {skipped} skipped")
|
||
|
||
if __name__ == '__main__':
|
||
main()
|