137 lines
5.3 KiB
Python
137 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
U28-U36 选词分配 - 先确定每Unit的20词池
|
||
规则:
|
||
1. U29锁定11词 + 补9词
|
||
2. 每Unit约2个虚词(总18个虚词分9个Unit)
|
||
3. 大纲提示词优先分配
|
||
4. 每个词只能出现在一个Unit
|
||
5. 同一词多词性视为不同词条但不能放同一Lesson
|
||
"""
|
||
|
||
# 读取词库
|
||
words = []
|
||
with open('/tmp/wordbank_all.tsv', 'r') as f:
|
||
for line in f:
|
||
parts = line.strip().split('\t')
|
||
if len(parts) >= 4:
|
||
words.append({
|
||
'word': parts[0],
|
||
'pos': parts[1],
|
||
'meaning': parts[2],
|
||
'theme': parts[3],
|
||
'id': f"{parts[0]}({parts[1]})" # 唯一标识
|
||
})
|
||
|
||
# U29 锁定词(这些从可用池中移除)
|
||
u29_locked_words = [
|
||
'star', 'moon', 'forest', 'rainbow', 'hill',
|
||
'waterfall', 'jungle', 'leaf/leaves', 'lake', 'snow', 'ice'
|
||
]
|
||
|
||
# 建立可用池(去除U29锁定词)
|
||
available = []
|
||
u29_pool = []
|
||
for w in words:
|
||
if w['word'] in u29_locked_words:
|
||
u29_pool.append(w)
|
||
else:
|
||
available.append(w)
|
||
|
||
print(f"可用池: {len(available)} 词条")
|
||
print(f"U29锁定: {len(u29_pool)} 词条")
|
||
|
||
# ========== 按Unit分配20词 ==========
|
||
# 策略:先把与各Unit强关联的词标出来
|
||
|
||
# Unit主题关键词映射
|
||
unit_themes = {
|
||
'U28': {
|
||
'desc': '乡村奥德赛-社区建筑',
|
||
'strong_themes': ['地点与建筑', '交通与出行', '职业'],
|
||
'keywords': ['building', 'café', 'farm', 'field', 'village', 'countryside',
|
||
'town', 'square', 'elevator', 'lift', 'car park', 'pool',
|
||
'map', 'trip', 'travel', 'ride', 'station', 'bus station',
|
||
'ticket', 'tractor', 'farmer', 'worker', 'driver',
|
||
'address', 'centre', 'town centre']
|
||
},
|
||
'U29': {
|
||
'desc': '追逐星星-四季岛自然',
|
||
'strong_themes': ['自然世界', '天气'],
|
||
'keywords': ['cloud', 'sky', 'river', 'wave', 'weather', 'wind',
|
||
'rain', 'cloudy', 'island']
|
||
},
|
||
'U30': {
|
||
'desc': '马戏巡游',
|
||
'strong_themes': ['爱好与休闲', '动物'],
|
||
'keywords': ['circus', 'lion', 'cage', 'clown', 'parrot', 'fire',
|
||
'mistake', 'band', 'net', 'climb', 'frightened',
|
||
'naughty', 'skip', 'quickly', 'exciting', 'brave',
|
||
'careful', 'drop', 'hurry', 'loud']
|
||
},
|
||
'U31': {
|
||
'desc': '家与家人-离别',
|
||
'strong_themes': ['家庭与朋友', '家居与房屋'],
|
||
'keywords': ['parent', 'aunt', 'uncle', 'grandparent', 'grandson',
|
||
'granddaughter', 'roof', 'stairs', 'floor', 'balcony',
|
||
'basement', 'wish', 'dream', 'surprised', 'everyone',
|
||
'quiet', 'think', 'different', 'only', 'share']
|
||
},
|
||
'U32': {
|
||
'desc': '小小园地-打理生态球',
|
||
'strong_themes': ['自然世界', '天气', '动物'],
|
||
'keywords': ['plant', 'grow', 'ground', 'river', 'cloud', 'sky',
|
||
'mountain', 'light', 'weather', 'temperature', 'wind',
|
||
'rain', 'kitten', 'puppy', 'feed', 'water', 'build',
|
||
'fix', 'tidy', 'lovely']
|
||
},
|
||
'U33': {
|
||
'desc': '森林派对-孤独',
|
||
'strong_themes': ['沟通与社交', '食物与饮品', '家居物品'],
|
||
'keywords': ['party', 'invite', 'send', 'message', 'breakfast',
|
||
'lunch', 'dinner', 'pancake', 'salad', 'cup', 'bowl',
|
||
'plate', 'glass', 'laugh', 'boring', 'noise',
|
||
'sometimes', 'nothing', 'picnic', 'walk']
|
||
},
|
||
'U34': {
|
||
'desc': '孤独空谷-情绪崩溃',
|
||
'strong_themes': ['感受与情绪', '程度', '动作与行为'],
|
||
'keywords': ['afraid', 'cry', 'bad', 'terrible', 'wrong',
|
||
'mountain', 'shout', 'opposite', 'fall', 'lose',
|
||
'change', 'weak', 'difficult', 'dark', 'asleep',
|
||
'badly', 'worse', 'worst', 'never', 'move']
|
||
},
|
||
'U35': {
|
||
'desc': '永不离弃-解心结',
|
||
'strong_themes': ['动作与行为', '沟通与社交', '状态与描述'],
|
||
'keywords': ['bring', 'carry', 'hurry', 'wait', 'call', 'need',
|
||
'should', 'must', 'idea', 'mean', 'safe', 'sure',
|
||
'strong', 'together', 'someone', 'something', 'then',
|
||
'find', 'turn', 'open']
|
||
},
|
||
'U36': {
|
||
'desc': '冬假快乐-冬季活动',
|
||
'strong_themes': ['运动', '衣物与配饰', '程度'],
|
||
'keywords': ['ice skates', 'ice skating', 'skate', 'coat', 'scarf',
|
||
'sweater', 'wonderful', 'lovely', 'better', 'best',
|
||
'well', 'free', 'happy', 'swim', 'roller skates',
|
||
'hop', 'score', 'win', 'cold', 'blanket']
|
||
}
|
||
}
|
||
|
||
# 检查哪些关键词在可用池中
|
||
print("\n=== 各Unit关键词在词库中的匹配情况 ===\n")
|
||
for unit, info in unit_themes.items():
|
||
found = []
|
||
missing = []
|
||
for kw in info['keywords']:
|
||
matches = [w for w in available if w['word'] == kw]
|
||
if matches:
|
||
found.extend(matches)
|
||
else:
|
||
missing.append(kw)
|
||
print(f"{unit} ({info['desc']}): 匹配{len(found)}词, 缺失{len(missing)}词")
|
||
if missing:
|
||
print(f" 缺失: {missing[:10]}")
|
||
print()
|