ai_member_xiaobian/scripts/word_allocation.py
2026-06-12 08:10:01 +08:00

50 lines
1.5 KiB
Python

#!/usr/bin/env python3
"""
U28-U36 情境化选词辅助工具
从词库中按Unit故事主题做初步分配
"""
# 读取词库
words = []
with open('/tmp/wordbank_all.tsv', 'r') as f:
for line in f:
parts = line.strip().split('\t')
if len(parts) >= 4:
words.append({
'word': parts[0],
'pos': parts[1],
'meaning': parts[2],
'theme': parts[3]
})
print(f"总可用词条: {len(words)}")
# 统计
from collections import Counter
pos_count = Counter(w['pos'] for w in words)
theme_count = Counter(w['theme'] for w in words)
print(f"\n词性分布: {dict(pos_count)}")
print(f"\n主题Top10:")
for theme, count in theme_count.most_common(10):
print(f" {theme}: {count}")
# 标记虚词
func_words = [w for w in words if w['pos'] in ('adv', 'prep', 'pron', 'det')]
print(f"\n虚词总数: {len(func_words)}")
# U29锁定词
u29_locked = ['star', 'moon', 'forest', 'rainbow', 'hill', 'waterfall', 'jungle', 'leaf/leaves', 'lake', 'snow', 'ice']
print(f"\nU29锁定词: {len(u29_locked)}")
# 大纲提示词
outline_hints = {
'U30': ['circus', 'lion', 'cage', 'clown', 'parrot', 'fire', 'mistake', 'bad'],
'U34': ['mountain', 'shout', 'opposite']
}
# 计算去除锁定词后的可用池
available = [w for w in words if w['word'] not in u29_locked]
print(f"去除U29锁定后可用: {len(available)}")
print(f"需分配: 180 - 11(U29锁定) = 169 从 {len(available)} 中选")