50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
U28-U36 情境化选词辅助工具
|
|
从词库中按Unit故事主题做初步分配
|
|
"""
|
|
|
|
# 读取词库
|
|
words = []
|
|
with open('/tmp/wordbank_all.tsv', 'r') as f:
|
|
for line in f:
|
|
parts = line.strip().split('\t')
|
|
if len(parts) >= 4:
|
|
words.append({
|
|
'word': parts[0],
|
|
'pos': parts[1],
|
|
'meaning': parts[2],
|
|
'theme': parts[3]
|
|
})
|
|
|
|
print(f"总可用词条: {len(words)}")
|
|
|
|
# 统计
|
|
from collections import Counter
|
|
pos_count = Counter(w['pos'] for w in words)
|
|
theme_count = Counter(w['theme'] for w in words)
|
|
|
|
print(f"\n词性分布: {dict(pos_count)}")
|
|
print(f"\n主题Top10:")
|
|
for theme, count in theme_count.most_common(10):
|
|
print(f" {theme}: {count}")
|
|
|
|
# 标记虚词
|
|
func_words = [w for w in words if w['pos'] in ('adv', 'prep', 'pron', 'det')]
|
|
print(f"\n虚词总数: {len(func_words)}")
|
|
|
|
# U29锁定词
|
|
u29_locked = ['star', 'moon', 'forest', 'rainbow', 'hill', 'waterfall', 'jungle', 'leaf/leaves', 'lake', 'snow', 'ice']
|
|
print(f"\nU29锁定词: {len(u29_locked)}个")
|
|
|
|
# 大纲提示词
|
|
outline_hints = {
|
|
'U30': ['circus', 'lion', 'cage', 'clown', 'parrot', 'fire', 'mistake', 'bad'],
|
|
'U34': ['mountain', 'shout', 'opposite']
|
|
}
|
|
|
|
# 计算去除锁定词后的可用池
|
|
available = [w for w in words if w['word'] not in u29_locked]
|
|
print(f"去除U29锁定后可用: {len(available)}")
|
|
print(f"需分配: 180 - 11(U29锁定) = 169 从 {len(available)} 中选")
|