96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract course outline (core words, sentence patterns) from vala_bak database."""
|
|
import json
|
|
import pymysql
|
|
import re
|
|
|
|
conn = pymysql.connect(
|
|
host='bj-cdb-8frbdwju.sql.tencentcdb.com',
|
|
port=25413,
|
|
user='read_only',
|
|
password='fdsfiidier^$*hjfdijjd232',
|
|
database='vala_bak',
|
|
charset='utf8mb4'
|
|
)
|
|
|
|
# L2-S1 chapters: game_ids 12,10,16,18,19,20,21,22,23,24,25,26
|
|
# L2-S2 chapters: game_ids 27-38
|
|
# L2-S3 chapters: game_ids 39-43
|
|
|
|
all_game_ids = [12,10,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43]
|
|
|
|
cursor = conn.cursor()
|
|
|
|
# Get chapters
|
|
placeholders = ','.join(['%s'] * len(all_game_ids))
|
|
cursor.execute(f"""
|
|
SELECT gi.id as game_id, gi.cn_name as unit_name, gi.game_code,
|
|
gc.id as chapter_id, gc.cn_name as lesson_name, gc.index as lesson_idx
|
|
FROM vala_game_info gi
|
|
JOIN vala_game_chapter gc ON gc.game_id = gi.id
|
|
WHERE gi.id IN ({placeholders}) AND gc.lesson_type = 1
|
|
ORDER BY gi.id, gc.index
|
|
""", all_game_ids)
|
|
|
|
chapters = cursor.fetchall()
|
|
|
|
# Get text_parse for all chapters
|
|
chapter_ids = [c[3] for c in chapters]
|
|
if chapter_ids:
|
|
placeholders2 = ','.join(['%s'] * len(chapter_ids))
|
|
cursor.execute(f"""
|
|
SELECT chapter_id, text_parse FROM unit_chapter_text_parse WHERE chapter_id IN ({placeholders2})
|
|
""", chapter_ids)
|
|
text_parses = {row[0]: row[1] for row in cursor.fetchall()}
|
|
|
|
def extract_from_text_parse(tp_str):
|
|
"""Extract core words and sentence patterns from text_parse JSON."""
|
|
if not tp_str:
|
|
return [], []
|
|
|
|
words = {} # word -> meaning
|
|
sentences = {} # pattern -> title
|
|
|
|
try:
|
|
data = json.loads(tp_str)
|
|
except:
|
|
return [], []
|
|
|
|
for item in data:
|
|
# Extract from textParse
|
|
for tp in item.get('textParse', []):
|
|
for sl in tp.get('slices', []):
|
|
if sl.get('type') == 1 and 'meaning' in sl:
|
|
word = sl.get('slice', '')
|
|
meaning = sl.get('meaning', '')
|
|
if word and word not in words:
|
|
words[word] = meaning
|
|
elif sl.get('type') == 2 and 'meaning' in sl:
|
|
# sentence pattern
|
|
pattern = sl.get('slice', '')
|
|
if pattern and pattern not in sentences:
|
|
sentences[pattern] = sl.get('meaning', '')
|
|
|
|
# Also check keySlices
|
|
for ks in tp.get('keySlices', []):
|
|
sl = ks.get('slice', '')
|
|
meaning = ks.get('meaning', '')
|
|
if sl and sl not in words:
|
|
words[sl] = meaning
|
|
|
|
return list(words.keys()), list(sentences.keys())
|
|
|
|
# Process each chapter
|
|
for game_id, unit_name, game_code, chapter_id, lesson_name, lesson_idx in chapters:
|
|
tp = text_parses.get(chapter_id, '')
|
|
words, sentences = extract_from_text_parse(tp)
|
|
|
|
# Deduplicate and limit
|
|
words_str = ', '.join(words[:8]) if words else ''
|
|
sentences_str = ', '.join(sentences[:4]) if sentences else ''
|
|
|
|
print(f"GAME={game_id}|UNIT={unit_name}|CODE={game_code}|CH={chapter_id}|L{lesson_idx}|NAME={lesson_name}|WORDS={words_str}|SENT={sentences_str}")
|
|
|
|
cursor.close()
|
|
conn.close()
|