#!/usr/bin/env python3 """ U28-U36 选词分配 - 先确定每Unit的20词池 规则: 1. U29锁定11词 + 补9词 2. 每Unit约2个虚词(总18个虚词分9个Unit) 3. 大纲提示词优先分配 4. 每个词只能出现在一个Unit 5. 同一词多词性视为不同词条但不能放同一Lesson """ # 读取词库 words = [] with open('/tmp/wordbank_all.tsv', 'r') as f: for line in f: parts = line.strip().split('\t') if len(parts) >= 4: words.append({ 'word': parts[0], 'pos': parts[1], 'meaning': parts[2], 'theme': parts[3], 'id': f"{parts[0]}({parts[1]})" # 唯一标识 }) # U29 锁定词(这些从可用池中移除) u29_locked_words = [ 'star', 'moon', 'forest', 'rainbow', 'hill', 'waterfall', 'jungle', 'leaf/leaves', 'lake', 'snow', 'ice' ] # 建立可用池(去除U29锁定词) available = [] u29_pool = [] for w in words: if w['word'] in u29_locked_words: u29_pool.append(w) else: available.append(w) print(f"可用池: {len(available)} 词条") print(f"U29锁定: {len(u29_pool)} 词条") # ========== 按Unit分配20词 ========== # 策略:先把与各Unit强关联的词标出来 # Unit主题关键词映射 unit_themes = { 'U28': { 'desc': '乡村奥德赛-社区建筑', 'strong_themes': ['地点与建筑', '交通与出行', '职业'], 'keywords': ['building', 'café', 'farm', 'field', 'village', 'countryside', 'town', 'square', 'elevator', 'lift', 'car park', 'pool', 'map', 'trip', 'travel', 'ride', 'station', 'bus station', 'ticket', 'tractor', 'farmer', 'worker', 'driver', 'address', 'centre', 'town centre'] }, 'U29': { 'desc': '追逐星星-四季岛自然', 'strong_themes': ['自然世界', '天气'], 'keywords': ['cloud', 'sky', 'river', 'wave', 'weather', 'wind', 'rain', 'cloudy', 'island'] }, 'U30': { 'desc': '马戏巡游', 'strong_themes': ['爱好与休闲', '动物'], 'keywords': ['circus', 'lion', 'cage', 'clown', 'parrot', 'fire', 'mistake', 'band', 'net', 'climb', 'frightened', 'naughty', 'skip', 'quickly', 'exciting', 'brave', 'careful', 'drop', 'hurry', 'loud'] }, 'U31': { 'desc': '家与家人-离别', 'strong_themes': ['家庭与朋友', '家居与房屋'], 'keywords': ['parent', 'aunt', 'uncle', 'grandparent', 'grandson', 'granddaughter', 'roof', 'stairs', 'floor', 'balcony', 'basement', 'wish', 'dream', 'surprised', 'everyone', 'quiet', 'think', 'different', 'only', 'share'] }, 'U32': { 'desc': '小小园地-打理生态球', 'strong_themes': ['自然世界', '天气', '动物'], 'keywords': ['plant', 'grow', 'ground', 'river', 'cloud', 'sky', 'mountain', 'light', 'weather', 'temperature', 'wind', 'rain', 'kitten', 'puppy', 'feed', 'water', 'build', 'fix', 'tidy', 'lovely'] }, 'U33': { 'desc': '森林派对-孤独', 'strong_themes': ['沟通与社交', '食物与饮品', '家居物品'], 'keywords': ['party', 'invite', 'send', 'message', 'breakfast', 'lunch', 'dinner', 'pancake', 'salad', 'cup', 'bowl', 'plate', 'glass', 'laugh', 'boring', 'noise', 'sometimes', 'nothing', 'picnic', 'walk'] }, 'U34': { 'desc': '孤独空谷-情绪崩溃', 'strong_themes': ['感受与情绪', '程度', '动作与行为'], 'keywords': ['afraid', 'cry', 'bad', 'terrible', 'wrong', 'mountain', 'shout', 'opposite', 'fall', 'lose', 'change', 'weak', 'difficult', 'dark', 'asleep', 'badly', 'worse', 'worst', 'never', 'move'] }, 'U35': { 'desc': '永不离弃-解心结', 'strong_themes': ['动作与行为', '沟通与社交', '状态与描述'], 'keywords': ['bring', 'carry', 'hurry', 'wait', 'call', 'need', 'should', 'must', 'idea', 'mean', 'safe', 'sure', 'strong', 'together', 'someone', 'something', 'then', 'find', 'turn', 'open'] }, 'U36': { 'desc': '冬假快乐-冬季活动', 'strong_themes': ['运动', '衣物与配饰', '程度'], 'keywords': ['ice skates', 'ice skating', 'skate', 'coat', 'scarf', 'sweater', 'wonderful', 'lovely', 'better', 'best', 'well', 'free', 'happy', 'swim', 'roller skates', 'hop', 'score', 'win', 'cold', 'blanket'] } } # 检查哪些关键词在可用池中 print("\n=== 各Unit关键词在词库中的匹配情况 ===\n") for unit, info in unit_themes.items(): found = [] missing = [] for kw in info['keywords']: matches = [w for w in available if w['word'] == kw] if matches: found.extend(matches) else: missing.append(kw) print(f"{unit} ({info['desc']}): 匹配{len(found)}词, 缺失{len(missing)}词") if missing: print(f" 缺失: {missing[:10]}") print()