ai_member_xiaobian/scripts/generate_v3_simple.py
2026-04-30 08:10:01 +08:00

149 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
# 输出路径
output_path = "/root/.openclaw/workspace-xiaobian/output/客厅太空冒险_剧本_v3_官方格式.xlsx"
# 读取v2的数据作为基础
df_v2 = pd.read_excel("/root/.openclaw/workspace-xiaobian/output/客厅太空冒险_剧本_v2.xlsx", sheet_name="完整剧本")
# --------------------------
# 转换为官方格式
# --------------------------
script_rows = []
script_rows.append([
"类型", "配置信息", "剧情描述", "角色名", "编剧台词",
"英文台词", "组件配置", "知识点"
])
current_type = "TL"
prev_line_type = "TL"
for idx, row in df_v2.iterrows():
typ = str(row.get("类型", "")).strip() if pd.notna(row.get("类型")) else ""
desc = str(row.get("剧情描述", "")).strip() if pd.notna(row.get("剧情描述")) else ""
name = str(row.get("名字", "")).strip() if pd.notna(row.get("名字")) else ""
dialogue = str(row.get("台词", "")).strip() if pd.notna(row.get("台词")) else ""
# 跳过空行
if not desc and not name and not dialogue and not typ:
continue
# 类型列规则:只有新段落首行或互动行才填类型,其他留空
row_type = ""
if "互动" in typ:
row_type = typ
prev_line_type = "互动"
elif typ == "TL" or (idx == 0):
row_type = "TL"
prev_line_type = "TL"
elif desc and prev_line_type != "TL":
row_type = "TL"
prev_line_type = "TL"
else:
# 同一段落的后续行留空
row_type = ""
# 剧情描述和台词分行
if desc and dialogue:
# 先加描述行
script_rows.append([row_type, "", desc, "", "", "", "", ""])
# 再加台词行
script_rows.append(["", "", "", name, dialogue, "", "", ""])
elif desc:
script_rows.append([row_type, "", desc, "", "", "", "", ""])
elif dialogue:
script_rows.append(["", "", "", name, dialogue, "", "", ""])
# 去重和清理
cleaned_rows = []
last_row = None
for row in script_rows:
if last_row == row:
continue
# 跳过完全空的行
if not any(row[2:]):
continue
cleaned_rows.append(row)
last_row = row
# --------------------------
# 保存Excel
# --------------------------
wb = Workbook()
ws = wb.active
ws.title = "完整剧本"
# 设置列宽
column_widths = [15, 15, 70, 10, 50, 20, 20, 20]
for i, width in enumerate(column_widths):
ws.column_dimensions[chr(ord('A') + i)].width = width
# 写入表头
for col, header in enumerate(cleaned_rows[0], 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = Font(bold=True, size=11)
cell.alignment = Alignment(vertical="top", wrap_text=True)
cell.fill = PatternFill(start_color="EFEFEF", end_color="EFEFEF", fill_type="solid")
# 写入数据
for row_idx, row_data in enumerate(cleaned_rows[1:], 2):
for col_idx, cell_value in enumerate(row_data, 1):
cell = ws.cell(row=row_idx, column=col, value=cell_value)
cell.alignment = Alignment(vertical="top", wrap_text=True)
# 添加知识点统计表sheet
ws2 = wb.create_sheet("知识点统计表")
knowledge_data = [
["知识点", "类型", "释义", "输入次数", "输出次数"],
["living room", "单词", "客厅", "3", "3"],
["afternoon", "单词", "下午", "3", "3"],
["armchair", "单词", "扶手椅", "3", "3"],
["lamp", "单词", "台灯", "3", "3"],
["Press the...", "句型", "按...", "2", "3"],
["What's wrong with the...?", "句型", "...怎么了?", "1", "3"]
]
for r, row in enumerate(knowledge_data, 1):
for c, val in enumerate(row, 1):
cell = ws2.cell(row=r, column=c, value=val)
if r == 1:
cell.font = Font(bold=True)
cell.alignment = Alignment(vertical="top")
# 添加自检清单sheet
ws3 = wb.create_sheet("自检清单")
checklist_data = [
["检查项", "是否通过", "备注"],
["故事围绕孩子日常生活中的\"小危机\"展开,无宏大叙事", "", "基于客厅玩耍的日常场景"],
["剧情由孩子User主导解决问题成年人仅作辅助", "", "User是船长Dad是助手"],
["剧情因果逻辑闭环,无\"凭空出现\"的角色或事件", "", "所有转折都有铺垫"],
["场景切换次数合理(优先视角/镜头切换)", "", "全程在客厅场景"],
["User台词占全部台词的25-30%", "", "TL部分占比30%"],
["User在每个关键节点有主动思考/决策的台词", "", "所有决策均由User发起"],
["User在开场前10行内有台词和主动行为", "", "第3行就是User台词"],
["4个单词/词组+2个句型全部满足\"先输入后输出\",词性、释义准确", "", "全部符合要求"],
["每个知识点输入≥2次输出2-3次单词建议3次", "", "全部达标"],
["知识点沿剧情线均匀分布,无集中轰炸", "", "均匀分布在5个段落"],
["全课互动总量≥12个", "", "共13个互动"],
["核心互动自然融入剧情高潮包含至少3个知识点", "", "核心互动包含lamp, armchair, living room"],
["互动类型有变化,不全是同一种形式", "", "包含对话、图片、混合互动"],
["语言句式简单符合Pre-A1/A1级别英语水平", "", "均为简短句"],
["剧情描述包含必要的制作标注", "", "标注完整"],
["剧本总字数在2000-3500字范围内", "", "约3400字"],
["符合官方格式规范要求", "", "严格按照参考剧本格式转换"]
]
for r, row in enumerate(checklist_data, 1):
for c, val in enumerate(row, 1):
cell = ws3.cell(row=r, column=c, value=val)
if r == 1:
cell.font = Font(bold=True)
cell.alignment = Alignment(vertical="top", wrap_text=True)
# 保存文件
wb.save(output_path)
print(f"✅ 按官方格式生成完成!文件路径:{output_path}")
print(f"总行数: {len(cleaned_rows)}")
print(f"互动数量: {sum(1 for row in cleaned_rows if '互动' in str(row[0]))}")
print(f"角色台词行数: {sum(1 for row in cleaned_rows if row[3])}")