#!/usr/bin/env python3 """ U28-U36 情境化选词辅助工具 从词库中按Unit故事主题做初步分配 """ # 读取词库 words = [] with open('/tmp/wordbank_all.tsv', 'r') as f: for line in f: parts = line.strip().split('\t') if len(parts) >= 4: words.append({ 'word': parts[0], 'pos': parts[1], 'meaning': parts[2], 'theme': parts[3] }) print(f"总可用词条: {len(words)}") # 统计 from collections import Counter pos_count = Counter(w['pos'] for w in words) theme_count = Counter(w['theme'] for w in words) print(f"\n词性分布: {dict(pos_count)}") print(f"\n主题Top10:") for theme, count in theme_count.most_common(10): print(f" {theme}: {count}") # 标记虚词 func_words = [w for w in words if w['pos'] in ('adv', 'prep', 'pron', 'det')] print(f"\n虚词总数: {len(func_words)}") # U29锁定词 u29_locked = ['star', 'moon', 'forest', 'rainbow', 'hill', 'waterfall', 'jungle', 'leaf/leaves', 'lake', 'snow', 'ice'] print(f"\nU29锁定词: {len(u29_locked)}个") # 大纲提示词 outline_hints = { 'U30': ['circus', 'lion', 'cage', 'clown', 'parrot', 'fire', 'mistake', 'bad'], 'U34': ['mountain', 'shout', 'opposite'] } # 计算去除锁定词后的可用池 available = [w for w in words if w['word'] not in u29_locked] print(f"去除U29锁定后可用: {len(available)}") print(f"需分配: 180 - 11(U29锁定) = 169 从 {len(available)} 中选")