From 7e0e163ef22800ce90378789bdf431fc0747993a Mon Sep 17 00:00:00 2001 From: wangxiaoming Date: Tue, 16 Jun 2026 14:02:48 +0800 Subject: [PATCH 1/4] feat(export): add video course export --- app/api/export/video-course/route.ts | 457 ++++++++++++++++++++ components/stage/header-controls.tsx | 113 ++++- lib/export/use-export-video-course.ts | 582 ++++++++++++++++++++++++++ lib/i18n/locales/ar-SA.json | 14 + lib/i18n/locales/en-US.json | 14 + lib/i18n/locales/ja-JP.json | 14 + lib/i18n/locales/pt-BR.json | 14 + lib/i18n/locales/ru-RU.json | 14 + lib/i18n/locales/zh-CN.json | 14 + lib/i18n/locales/zh-TW.json | 14 + 10 files changed, 1245 insertions(+), 5 deletions(-) create mode 100644 app/api/export/video-course/route.ts create mode 100644 lib/export/use-export-video-course.ts diff --git a/app/api/export/video-course/route.ts b/app/api/export/video-course/route.ts new file mode 100644 index 0000000000..57d0e994ac --- /dev/null +++ b/app/api/export/video-course/route.ts @@ -0,0 +1,457 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; +import { promisify } from 'node:util'; +import { NextRequest, NextResponse } from 'next/server'; +import { apiError } from '@/lib/server/api-response'; + +export const runtime = 'nodejs'; +export const maxDuration = 300; + +const execFileAsync = promisify(execFile); + +interface VideoCourseManifest { + fileName?: string; + width?: number; + height?: number; + fps?: number; + scenes: VideoCourseScene[]; +} + +interface VideoCourseScene { + title?: string; + imageField: string; + fallbackMs?: number; + tracks?: VideoCourseTrack[]; +} + +type VideoCourseTrack = + | { + type: 'file'; + field: string; + durationMs?: number; + mimeType?: string; + required?: boolean; + } + | { + type: 'url'; + url: string; + durationMs?: number; + mimeType?: string; + required?: boolean; + } + | { + type: 'silence'; + durationMs: number; + }; + +interface PreparedTrack { + path?: string; + durationMs: number; + type: 'file' | 'silence'; +} + +const DEFAULT_WIDTH = 1280; +const DEFAULT_HEIGHT = 720; +const DEFAULT_FPS = 30; +const DEFAULT_SCENE_MS = 2500; +const MIN_SCENE_MS = 1000; + +function asFile(value: FormDataEntryValue | null): File | null { + return value instanceof File ? value : null; +} + +function clampNumber(value: unknown, fallback: number, min: number, max: number): number { + const n = typeof value === 'number' && Number.isFinite(value) ? value : fallback; + return Math.max(min, Math.min(max, Math.round(n))); +} + +function extFromMime(mimeType: string | undefined, fallback: string): string { + if (!mimeType) return fallback; + if (mimeType.includes('png')) return 'png'; + if (mimeType.includes('jpeg') || mimeType.includes('jpg')) return 'jpg'; + if (mimeType.includes('wav')) return 'wav'; + if (mimeType.includes('webm')) return 'webm'; + if (mimeType.includes('ogg')) return 'ogg'; + if (mimeType.includes('mp4') || mimeType.includes('m4a')) return 'm4a'; + if (mimeType.includes('mpeg') || mimeType.includes('mp3')) return 'mp3'; + return fallback; +} + +function extFromUrl(url: URL, fallback: string): string { + const ext = path.extname(url.pathname).toLowerCase().replace(/^\./, ''); + return ext || fallback; +} + +function quoteConcatPath(filePath: string): string { + return filePath.replace(/'/g, "'\\''"); +} + +function resolveSafeAudioUrl(rawUrl: string, request: NextRequest): URL | null { + try { + const parsed = new URL(rawUrl, request.url); + if (!parsed.pathname.startsWith('/api/classroom-media/')) return null; + if (!parsed.pathname.includes('/audio/')) return null; + + // Only trust the path. Server-generated classrooms may preserve the + // origin used at generation time, while the user later opens the app from + // localhost/127.0.0.1 or another forwarded host. Rebase to this request's + // origin so we still read from our own media route, never from an external + // host supplied by the client. + return new URL(`${parsed.pathname}${parsed.search}`, request.url); + } catch { + return null; + } +} + +async function runFfmpeg(args: string[]): Promise { + const binary = process.env.FFMPEG_PATH || 'ffmpeg'; + try { + await execFileAsync(binary, ['-hide_banner', '-loglevel', 'error', ...args], { + maxBuffer: 1024 * 1024 * 16, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`ffmpeg failed: ${message}`); + } +} + +async function probeDurationMs(filePath: string, fallbackMs?: number): Promise { + const binary = process.env.FFPROBE_PATH || 'ffprobe'; + try { + const { stdout } = await execFileAsync( + binary, + [ + '-v', + 'error', + '-show_entries', + 'format=duration', + '-of', + 'default=noprint_wrappers=1:nokey=1', + filePath, + ], + { maxBuffer: 1024 * 1024 }, + ); + const seconds = Number.parseFloat(stdout.trim()); + if (Number.isFinite(seconds) && seconds > 0) return Math.round(seconds * 1000); + } catch { + // Keep export moving when ffprobe cannot read a file; the caller's + // duration estimate is still good enough for a still-image segment. + } + return Math.max(MIN_SCENE_MS, fallbackMs ?? DEFAULT_SCENE_MS); +} + +async function writeUploadedFile(file: File, targetPath: string): Promise { + const bytes = Buffer.from(await file.arrayBuffer()); + await writeFile(targetPath, bytes); +} + +async function writeFetchedAudio(url: URL, targetPath: string): Promise { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Audio download failed: ${response.status} ${response.statusText}`); + } + const bytes = Buffer.from(await response.arrayBuffer()); + await writeFile(targetPath, bytes); +} + +async function prepareTrack( + track: VideoCourseTrack, + formData: FormData, + workDir: string, + sceneIndex: number, + trackIndex: number, + request: NextRequest, +): Promise { + if (track.type === 'silence') { + return { + type: 'silence', + durationMs: Math.max(MIN_SCENE_MS, Math.round(track.durationMs || DEFAULT_SCENE_MS)), + }; + } + + if (track.type === 'url') { + const url = resolveSafeAudioUrl(track.url, request); + if (!url) { + if (track.required) { + throw new Error('Narration audio URL is not available'); + } + return { + type: 'silence', + durationMs: Math.max(MIN_SCENE_MS, Math.round(track.durationMs || DEFAULT_SCENE_MS)), + }; + } + + try { + const ext = extFromMime(track.mimeType, extFromUrl(url, 'audio')); + const filePath = path.join(workDir, `scene_${sceneIndex}_track_${trackIndex}.${ext}`); + await writeFetchedAudio(url, filePath); + return { + type: 'file', + path: filePath, + durationMs: await probeDurationMs(filePath, track.durationMs), + }; + } catch (error) { + if (track.required) { + throw error; + } + return { + type: 'silence', + durationMs: Math.max(MIN_SCENE_MS, Math.round(track.durationMs || DEFAULT_SCENE_MS)), + }; + } + } + + const file = asFile(formData.get(track.field)); + if (!file) { + if (track.required) { + throw new Error('Narration audio file is missing'); + } + return { + type: 'silence', + durationMs: Math.max(MIN_SCENE_MS, Math.round(track.durationMs || DEFAULT_SCENE_MS)), + }; + } + + const ext = extFromMime(track.mimeType || file.type, 'audio'); + const filePath = path.join(workDir, `scene_${sceneIndex}_track_${trackIndex}.${ext}`); + await writeUploadedFile(file, filePath); + + return { + type: 'file', + path: filePath, + durationMs: await probeDurationMs(filePath, track.durationMs), + }; +} + +async function buildSceneAudio( + preparedTracks: PreparedTrack[], + fallbackMs: number, + workDir: string, + sceneIndex: number, +): Promise<{ audioPath: string; durationMs: number }> { + const tracks = + preparedTracks.length > 0 + ? preparedTracks + : [{ type: 'silence' as const, durationMs: fallbackMs }]; + + const durationMs = Math.max( + MIN_SCENE_MS, + tracks.reduce((sum, track) => sum + Math.max(0, track.durationMs), 0), + ); + const audioPath = path.join(workDir, `scene_${sceneIndex}_audio.m4a`); + + if (tracks.length === 1 && tracks[0].type === 'silence') { + await runFfmpeg([ + '-y', + '-f', + 'lavfi', + '-t', + (durationMs / 1000).toFixed(3), + '-i', + 'anullsrc=channel_layout=stereo:sample_rate=44100', + '-c:a', + 'aac', + '-b:a', + '128k', + audioPath, + ]); + return { audioPath, durationMs }; + } + + if (tracks.length === 1 && tracks[0].type === 'file' && tracks[0].path) { + await runFfmpeg([ + '-y', + '-i', + tracks[0].path, + '-vn', + '-ac', + '2', + '-ar', + '44100', + '-c:a', + 'aac', + '-b:a', + '128k', + audioPath, + ]); + return { audioPath, durationMs }; + } + + const args = ['-y']; + const labels: string[] = []; + tracks.forEach((track, index) => { + if (track.type === 'silence') { + args.push( + '-f', + 'lavfi', + '-t', + (track.durationMs / 1000).toFixed(3), + '-i', + 'anullsrc=channel_layout=stereo:sample_rate=44100', + ); + } else if (track.path) { + args.push('-i', track.path); + } + labels.push(`[${index}:a]`); + }); + + args.push( + '-filter_complex', + `${labels.join('')}concat=n=${tracks.length}:v=0:a=1[a]`, + '-map', + '[a]', + '-ac', + '2', + '-ar', + '44100', + '-c:a', + 'aac', + '-b:a', + '128k', + audioPath, + ); + + await runFfmpeg(args); + return { audioPath, durationMs }; +} + +async function buildSceneVideo({ + imagePath, + audioPath, + outputPath, + durationMs, + width, + height, + fps, +}: { + imagePath: string; + audioPath: string; + outputPath: string; + durationMs: number; + width: number; + height: number; + fps: number; +}): Promise { + const duration = (durationMs / 1000).toFixed(3); + await runFfmpeg([ + '-y', + '-loop', + '1', + '-t', + duration, + '-i', + imagePath, + '-i', + audioPath, + '-vf', + `scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:color=white,setsar=1,format=yuv420p`, + '-r', + String(fps), + '-c:v', + 'libx264', + '-preset', + 'veryfast', + '-tune', + 'stillimage', + '-c:a', + 'aac', + '-b:a', + '128k', + '-ar', + '44100', + '-ac', + '2', + '-shortest', + outputPath, + ]); +} + +async function concatSegments(segmentPaths: string[], outputPath: string, workDir: string) { + const listPath = path.join(workDir, 'segments.txt'); + await writeFile( + listPath, + segmentPaths.map((segmentPath) => `file '${quoteConcatPath(segmentPath)}'`).join('\n'), + ); + await runFfmpeg(['-y', '-f', 'concat', '-safe', '0', '-i', listPath, '-c', 'copy', outputPath]); +} + +export async function POST(request: NextRequest) { + let workDir: string | null = null; + + try { + const formData = await request.formData(); + const rawManifest = formData.get('manifest'); + if (typeof rawManifest !== 'string') { + return apiError('MISSING_REQUIRED_FIELD', 400, 'Missing manifest'); + } + + const manifest = JSON.parse(rawManifest) as VideoCourseManifest; + if (!Array.isArray(manifest.scenes) || manifest.scenes.length === 0) { + return apiError('INVALID_REQUEST', 400, 'No scenes to export'); + } + + const width = clampNumber(manifest.width, DEFAULT_WIDTH, 320, 3840); + const height = clampNumber(manifest.height, DEFAULT_HEIGHT, 180, 2160); + const fps = clampNumber(manifest.fps, DEFAULT_FPS, 10, 60); + + workDir = await mkdtemp(path.join(tmpdir(), 'maic-video-export-')); + const segmentPaths: string[] = []; + + for (let sceneIndex = 0; sceneIndex < manifest.scenes.length; sceneIndex++) { + const scene = manifest.scenes[sceneIndex]; + const imageFile = asFile(formData.get(scene.imageField)); + if (!imageFile) { + return apiError('INVALID_REQUEST', 400, `Missing image for scene ${sceneIndex + 1}`); + } + + const imagePath = path.join(workDir, `scene_${sceneIndex}.png`); + await writeUploadedFile(imageFile, imagePath); + + const fallbackMs = Math.max(MIN_SCENE_MS, Math.round(scene.fallbackMs || DEFAULT_SCENE_MS)); + const preparedTracks = await Promise.all( + (scene.tracks ?? []).map((track, trackIndex) => + prepareTrack(track, formData, workDir!, sceneIndex, trackIndex, request), + ), + ); + const { audioPath, durationMs } = await buildSceneAudio( + preparedTracks, + fallbackMs, + workDir, + sceneIndex, + ); + + const segmentPath = path.join(workDir, `segment_${sceneIndex}.mp4`); + await buildSceneVideo({ + imagePath, + audioPath, + outputPath: segmentPath, + durationMs, + width, + height, + fps, + }); + segmentPaths.push(segmentPath); + } + + const outputPath = path.join(workDir, 'course.mp4'); + await concatSegments(segmentPaths, outputPath, workDir); + const output = await readFile(outputPath); + + const fileName = (manifest.fileName || 'course').replace(/[\\/:*?"<>|]/g, '_') || 'course'; + return new NextResponse(output, { + headers: { + 'Content-Type': 'video/mp4', + 'Content-Disposition': `attachment; filename="${encodeURIComponent(fileName)}.mp4"`, + }, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return apiError('INTERNAL_ERROR', 500, message); + } finally { + if (workDir) { + await rm(workDir, { recursive: true, force: true }); + } + } +} diff --git a/components/stage/header-controls.tsx b/components/stage/header-controls.tsx index 544e746f85..18d3506892 100644 --- a/components/stage/header-controls.tsx +++ b/components/stage/header-controls.tsx @@ -3,8 +3,10 @@ import { useCallback, useEffect, useRef, useState } from 'react'; import { Archive, + Check, Download, FileDown, + FileVideo, Loader2, Monitor, Moon, @@ -12,6 +14,15 @@ import { Settings, Sun, } from 'lucide-react'; +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; import { Switch } from '@/components/ui/switch'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useTheme } from '@/lib/hooks/use-theme'; @@ -19,6 +30,12 @@ import { useStageStore } from '@/lib/store'; import { useMediaGenerationStore } from '@/lib/store/media-generation'; import { useExportPPTX } from '@/lib/export/use-export-pptx'; import { useExportClassroom } from '@/lib/export/use-export-classroom'; +import { + DEFAULT_VIDEO_COURSE_EXPORT_RESOLUTION_ID, + VIDEO_COURSE_EXPORT_RESOLUTIONS, + useExportVideoCourse, + type VideoCourseExportResolutionId, +} from '@/lib/export/use-export-video-course'; import { LanguageSwitcher } from '../language-switcher'; import { SettingsDialog } from '../settings'; import { @@ -77,8 +94,13 @@ export function HeaderControls({ const mediaTasks = useMediaGenerationStore((s) => s.tasks); const { exporting: isExporting, exportPPTX, exportResourcePack } = useExportPPTX(); const { exporting: isExportingZip, exportClassroomZip } = useExportClassroom(); + const { exporting: isExportingVideo, exportVideoCourse } = useExportVideoCourse(); const [exportMenuOpen, setExportMenuOpen] = useState(false); + const [videoResolutionDialogOpen, setVideoResolutionDialogOpen] = useState(false); + const [selectedVideoResolutionId, setSelectedVideoResolutionId] = + useState(DEFAULT_VIDEO_COURSE_EXPORT_RESOLUTION_ID); const exportRef = useRef(null); + const isAnyExporting = isExporting || isExportingZip || isExportingVideo; const canExport = scenes.length > 0 && @@ -231,27 +253,27 @@ export function HeaderControls({
+ + ); + })} + + + + + + + + + ); diff --git a/lib/export/use-export-video-course.ts b/lib/export/use-export-video-course.ts new file mode 100644 index 0000000000..0bfec557e8 --- /dev/null +++ b/lib/export/use-export-video-course.ts @@ -0,0 +1,582 @@ +'use client'; + +import { useCallback, useRef, useState } from 'react'; +import { saveAs } from 'file-saver'; +import { toast } from 'sonner'; +import { slideToPng } from '@maic/renderer/snapshot'; +import { useI18n } from '@/lib/hooks/use-i18n'; +import { + BROWSER_NATIVE_TTS_PROVIDER_ID, + isTTSProviderEnabled, +} from '@/lib/audio/provider-enablement'; +import { resolveAgentVoiceOptions, pickNarratorAgent } from '@/lib/audio/agent-voice'; +import { useAgentRegistry } from '@/lib/orchestration/registry/store'; +import { useStageStore } from '@/lib/store/stage'; +import { useSettingsStore } from '@/lib/store/settings'; +import { db } from '@/lib/utils/database'; +import type { SpeechAction } from '@/lib/types/action'; +import type { Scene, SlideContent } from '@/lib/types/stage'; + +type RendererSlide = Parameters[0]; + +const EXPORT_FPS = 30; +const DEFAULT_SCENE_MS = 2500; +const BASE_FRAME_WIDTH = 1280; +const BASE_FRAME_HEIGHT = 720; + +export type VideoCourseExportResolutionId = '720p' | '1080p' | '1440p' | '2160p'; + +export interface VideoCourseExportResolution { + id: VideoCourseExportResolutionId; + label: string; + width: number; + height: number; +} + +export const VIDEO_COURSE_EXPORT_RESOLUTIONS: VideoCourseExportResolution[] = [ + { id: '720p', label: '720p', width: 1280, height: 720 }, + { id: '1080p', label: '1080p', width: 1920, height: 1080 }, + { id: '1440p', label: '2K', width: 2560, height: 1440 }, + { id: '2160p', label: '4K', width: 3840, height: 2160 }, +]; + +export const DEFAULT_VIDEO_COURSE_EXPORT_RESOLUTION_ID: VideoCourseExportResolutionId = '1080p'; + +export interface ExportVideoCourseOptions { + resolutionId?: VideoCourseExportResolutionId; +} + +type VideoCourseTrack = + | { + type: 'file'; + field: string; + durationMs?: number; + mimeType?: string; + required?: boolean; + } + | { + type: 'url'; + url: string; + durationMs?: number; + mimeType?: string; + required?: boolean; + } + | { + type: 'silence'; + durationMs: number; + }; + +interface VideoCourseManifest { + fileName: string; + width: number; + height: number; + fps: number; + scenes: Array<{ + title: string; + imageField: string; + fallbackMs: number; + tracks: VideoCourseTrack[]; + }>; +} + +interface AudioAsset { + blob: Blob; + durationMs?: number; + mimeType?: string; + extension: string; +} + +function safeFileName(name: string | undefined, fallback: string): string { + return (name || fallback).replace(/[\\/:*?"<>|]/g, '_') || fallback; +} + +function videoCourseFileName( + name: string | undefined, + resolution: VideoCourseExportResolution, +): string { + return `${safeFileName(name, 'course')}-${resolution.label}`; +} + +function estimateSpeechMs(text: string): number { + const cjkCount = ( + text.match(/[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || [] + ).length; + const isCJK = cjkCount > text.length * 0.3; + const rawMs = isCJK + ? Math.max(2000, text.length * 150) + : Math.max(2000, text.split(/\s+/).filter(Boolean).length * 240); + return Math.round(rawMs); +} + +function audioMimeFromFormat(format?: string, blobType?: string): string { + if (blobType) return blobType; + switch ((format || '').toLowerCase()) { + case 'wav': + return 'audio/wav'; + case 'webm': + return 'audio/webm'; + case 'm4a': + case 'mp4': + return 'audio/mp4'; + case 'ogg': + return 'audio/ogg'; + case 'mp3': + case 'mpeg': + default: + return 'audio/mpeg'; + } +} + +function audioExtFromMime(mimeType?: string): string { + if (!mimeType) return 'mp3'; + if (mimeType.includes('wav')) return 'wav'; + if (mimeType.includes('webm')) return 'webm'; + if (mimeType.includes('ogg')) return 'ogg'; + if (mimeType.includes('mp4') || mimeType.includes('m4a')) return 'm4a'; + return 'mp3'; +} + +async function blobFromDataUrl(dataUrl: string): Promise { + const res = await fetch(dataUrl); + return await res.blob(); +} + +async function canvasToBlob(canvas: HTMLCanvasElement): Promise { + return new Promise((resolve, reject) => { + canvas.toBlob((blob) => { + if (blob) resolve(blob); + else reject(new Error('Unable to render placeholder frame')); + }, 'image/png'); + }); +} + +function resolveVideoCourseExportResolution( + resolutionId?: VideoCourseExportResolutionId, +): VideoCourseExportResolution { + return ( + VIDEO_COURSE_EXPORT_RESOLUTIONS.find((resolution) => resolution.id === resolutionId) ?? + VIDEO_COURSE_EXPORT_RESOLUTIONS.find( + (resolution) => resolution.id === DEFAULT_VIDEO_COURSE_EXPORT_RESOLUTION_ID, + )! + ); +} + +function drawWrappedText( + ctx: CanvasRenderingContext2D, + text: string, + x: number, + y: number, + maxWidth: number, + lineHeight: number, + maxLines: number, +): number { + const words = text.split(/\s+/); + const hasSpaces = words.length > 1; + const units = hasSpaces ? words : Array.from(text); + let line = ''; + let lines = 0; + + for (const unit of units) { + const next = hasSpaces ? (line ? `${line} ${unit}` : unit) : line + unit; + if (ctx.measureText(next).width > maxWidth && line) { + ctx.fillText(line, x, y); + y += lineHeight; + lines++; + line = unit; + if (lines >= maxLines - 1) break; + } else { + line = next; + } + } + + if (line && lines < maxLines) { + ctx.fillText(line, x, y); + y += lineHeight; + } + return y; +} + +async function createStaticSceneFrame( + scene: Scene, + index: number, + resolution: VideoCourseExportResolution, +): Promise { + const { width, height } = resolution; + const canvas = document.createElement('canvas'); + canvas.width = width; + canvas.height = height; + const ctx = canvas.getContext('2d'); + if (!ctx) throw new Error('Canvas is not available'); + + ctx.fillStyle = '#ffffff'; + ctx.fillRect(0, 0, width, height); + ctx.save(); + ctx.scale(width / BASE_FRAME_WIDTH, height / BASE_FRAME_HEIGHT); + + const accent = + scene.type === 'quiz' + ? '#7c3aed' + : scene.type === 'interactive' + ? '#2563eb' + : scene.type === 'pbl' + ? '#059669' + : '#f59e0b'; + ctx.fillStyle = '#f8fafc'; + ctx.fillRect(0, 0, BASE_FRAME_WIDTH, BASE_FRAME_HEIGHT); + ctx.fillStyle = accent; + ctx.fillRect(0, 0, 14, BASE_FRAME_HEIGHT); + + ctx.fillStyle = '#e5e7eb'; + ctx.fillRect(120, 142, BASE_FRAME_WIDTH - 240, 2); + + ctx.fillStyle = accent; + ctx.beginPath(); + ctx.roundRect(120, 96, 150, 44, 22); + ctx.fill(); + + ctx.fillStyle = '#ffffff'; + ctx.font = '600 22px "Noto Sans SC", "Microsoft YaHei", sans-serif'; + ctx.textBaseline = 'middle'; + ctx.fillText(`第 ${index + 1} 页`, 150, 118); + + ctx.fillStyle = '#111827'; + ctx.font = '700 52px "Noto Sans SC", "Microsoft YaHei", sans-serif'; + ctx.textBaseline = 'alphabetic'; + drawWrappedText(ctx, scene.title || '课程页面', 120, 250, BASE_FRAME_WIDTH - 240, 68, 3); + + ctx.fillStyle = '#6b7280'; + ctx.font = '400 24px "Noto Sans SC", "Microsoft YaHei", sans-serif'; + const typeLabel = + scene.type === 'quiz' + ? '小测页面' + : scene.type === 'interactive' + ? '互动页面' + : scene.type === 'pbl' + ? '项目页面' + : '课程页面'; + ctx.fillText(typeLabel, 120, 520); + ctx.restore(); + + return await canvasToBlob(canvas); +} + +async function renderSceneFrame( + scene: Scene, + index: number, + resolution: VideoCourseExportResolution, +): Promise { + if (scene.content.type === 'slide') { + const slide = (scene.content as SlideContent).canvas as unknown as RendererSlide; + const image = await slideToPng(slide, { + width: resolution.width, + pixelRatio: 1, + backgroundColor: '#ffffff', + format: 'blob', + timeoutMs: 10000, + }); + return typeof image === 'string' ? await blobFromDataUrl(image) : image; + } + + return await createStaticSceneFrame(scene, index, resolution); +} + +async function resolveSpeechAudioFromDb(audioId?: string): Promise { + if (!audioId) return null; + const record = await db.audioFiles.get(audioId); + if (!record) return null; + const mimeType = audioMimeFromFormat(record.format, record.blob.type); + return { + blob: record.blob, + durationMs: record.duration ? Math.round(record.duration * 1000) : undefined, + mimeType, + extension: audioExtFromMime(mimeType), + }; +} + +function getSpeechActions(scene: Scene): SpeechAction[] { + return (scene.actions ?? []).filter((action): action is SpeechAction => action.type === 'speech'); +} + +function getSpeechExportAudioId(scene: Scene, speech: SpeechAction, speechIndex: number): string { + return speech.audioId || `tts_export_s${scene.order}_${speech.id || speechIndex}`; +} + +function isExportableAudioUrl(audioUrl?: string): audioUrl is string { + if (!audioUrl) return false; + try { + const parsed = new URL( + audioUrl, + typeof window === 'undefined' ? 'http://localhost' : window.location.href, + ); + return ( + parsed.pathname.startsWith('/api/classroom-media/') && parsed.pathname.includes('/audio/') + ); + } catch { + return false; + } +} + +async function hasExistingExportableNarration( + scene: Scene, + speech: SpeechAction, + speechIndex: number, +): Promise { + if (isExportableAudioUrl(speech.audioUrl)) return true; + + const exportAudioId = getSpeechExportAudioId(scene, speech, speechIndex); + if (await db.audioFiles.get(exportAudioId)) return true; + if (speech.audioId && speech.audioId !== exportAudioId) { + return !!(await db.audioFiles.get(speech.audioId)); + } + return false; +} + +async function generateSpeechAudioForExport( + audioId: string, + text: string, + language?: string, +): Promise { + const settings = useSettingsStore.getState(); + if (settings.ttsProviderId === BROWSER_NATIVE_TTS_PROVIDER_ID) return null; + if ( + !isTTSProviderEnabled( + settings.ttsProviderId, + settings.ttsProvidersConfig?.[settings.ttsProviderId], + ) + ) { + return null; + } + + const ttsProviderConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId]; + const teacher = pickNarratorAgent(useAgentRegistry.getState().listAgents()); + const providerOptions = await resolveAgentVoiceOptions(teacher, { + providerId: settings.ttsProviderId, + providerConfig: ttsProviderConfig, + voiceId: settings.ttsVoice, + language, + }); + + const response = await fetch('/api/generate/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + text, + audioId, + ttsProviderId: settings.ttsProviderId, + ttsModelId: ttsProviderConfig?.modelId, + ttsVoice: settings.ttsVoice, + ttsSpeed: settings.ttsSpeed, + ttsApiKey: ttsProviderConfig?.apiKey || undefined, + ttsBaseUrl: + ttsProviderConfig?.baseUrl || ttsProviderConfig?.customDefaultBaseUrl || undefined, + ttsProviderOptions: providerOptions, + }), + }); + + const data = await response + .json() + .catch(() => ({ success: false, error: response.statusText || 'Invalid TTS response' })); + if (!response.ok || !data.success || !data.base64 || !data.format) { + throw new Error(data.details || data.error || `TTS request failed: HTTP ${response.status}`); + } + + const binary = atob(data.base64); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + const blob = new Blob([bytes], { type: `audio/${data.format}` }); + await db.audioFiles.put({ + id: audioId, + blob, + format: data.format, + createdAt: Date.now(), + }); + + const mimeType = audioMimeFromFormat(data.format, blob.type); + return { + blob, + mimeType, + extension: audioExtFromMime(mimeType), + }; +} + +export function useExportVideoCourse() { + const [exporting, setExporting] = useState(false); + const exportingRef = useRef(false); + const { t } = useI18n(); + + const exportVideoCourse = useCallback( + async (options?: ExportVideoCourseOptions) => { + if (exportingRef.current) return; + const { stage, scenes } = useStageStore.getState(); + if (!stage?.id || scenes.length === 0) return; + + exportingRef.current = true; + setExporting(true); + const toastId = toast.loading(t('export.videoPreparing')); + + try { + const orderedScenes = [...scenes].sort((a, b) => a.order - b.order); + const exportResolution = resolveVideoCourseExportResolution(options?.resolutionId); + const settings = useSettingsStore.getState(); + const isBrowserNativeTTS = settings.ttsProviderId === BROWSER_NATIVE_TTS_PROVIDER_ID; + const speechEntries = orderedScenes.flatMap((scene) => + getSpeechActions(scene).map((speech, speechIndex) => ({ scene, speech, speechIndex })), + ); + + if (isBrowserNativeTTS && speechEntries.length > 0) { + let hasExistingAudio = false; + for (const { scene, speech, speechIndex } of speechEntries) { + if (await hasExistingExportableNarration(scene, speech, speechIndex)) { + hasExistingAudio = true; + break; + } + } + if (!hasExistingAudio) { + throw new Error(t('export.videoBrowserTTSUnsupported')); + } + } + + const formData = new FormData(); + let narrationTrackCount = 0; + let totalSpeechCount = 0; + let failedNarrationCount = 0; + const manifest: VideoCourseManifest = { + fileName: videoCourseFileName(stage.name, exportResolution), + width: exportResolution.width, + height: exportResolution.height, + fps: EXPORT_FPS, + scenes: [], + }; + + for (let sceneIndex = 0; sceneIndex < orderedScenes.length; sceneIndex++) { + const scene = orderedScenes[sceneIndex]; + toast.loading( + t('export.videoPreparingProgress', { + current: sceneIndex + 1, + total: orderedScenes.length, + }), + { id: toastId }, + ); + + const imageBlob = await renderSceneFrame(scene, sceneIndex, exportResolution); + const imageField = `image-${sceneIndex}`; + formData.append( + imageField, + imageBlob, + `scene-${String(sceneIndex + 1).padStart(3, '0')}.png`, + ); + + const tracks: VideoCourseTrack[] = []; + const speechActions = getSpeechActions(scene); + totalSpeechCount += speechActions.length; + + for (let speechIndex = 0; speechIndex < speechActions.length; speechIndex++) { + const speech = speechActions[speechIndex]; + const exportAudioId = getSpeechExportAudioId(scene, speech, speechIndex); + const audioUrl = isExportableAudioUrl(speech.audioUrl) ? speech.audioUrl : undefined; + let audio = await resolveSpeechAudioFromDb(exportAudioId); + if (!audio && speech.audioId && speech.audioId !== exportAudioId) { + audio = await resolveSpeechAudioFromDb(speech.audioId); + } + let generationFailed = false; + if (!audio && !audioUrl && !isBrowserNativeTTS) { + try { + toast.loading( + t('export.videoGeneratingNarration', { + current: sceneIndex + 1, + total: orderedScenes.length, + }), + { id: toastId }, + ); + audio = await generateSpeechAudioForExport( + exportAudioId, + speech.text, + stage.languageDirective, + ); + } catch { + generationFailed = true; + failedNarrationCount++; + } + } + if (audio) { + narrationTrackCount++; + const field = `audio-${sceneIndex}-${speechIndex}`; + formData.append(field, audio.blob, `${field}.${audio.extension}`); + tracks.push({ + type: 'file', + field, + durationMs: audio.durationMs, + mimeType: audio.mimeType, + required: true, + }); + } else if (audioUrl) { + narrationTrackCount++; + tracks.push({ + type: 'url', + url: audioUrl, + durationMs: estimateSpeechMs(speech.text), + required: true, + }); + } else { + if (!generationFailed) failedNarrationCount++; + tracks.push({ + type: 'silence', + durationMs: estimateSpeechMs(speech.text), + }); + } + } + + if (tracks.length === 0) { + tracks.push({ type: 'silence', durationMs: DEFAULT_SCENE_MS }); + } + + manifest.scenes.push({ + title: scene.title, + imageField, + fallbackMs: tracks.reduce((sum, track) => sum + (track.durationMs ?? 0), 0), + tracks, + }); + } + + if (totalSpeechCount > 0 && narrationTrackCount === 0) { + throw new Error(t('export.videoNarrationUnavailable')); + } + + formData.append('manifest', JSON.stringify(manifest)); + toast.loading(t('export.videoRendering'), { id: toastId }); + + const res = await fetch('/api/export/video-course', { + method: 'POST', + body: formData, + }); + + if (!res.ok) { + const payload = await res.json().catch(() => null); + const message = + payload && typeof payload.error === 'string' ? payload.error : t('export.exportFailed'); + throw new Error(message); + } + + const blob = await res.blob(); + saveAs(blob, `${videoCourseFileName(stage.name, exportResolution)}.mp4`); + toast.success(t('export.exportSuccess'), { id: toastId }); + if (narrationTrackCount === 0) { + toast.warning(t('export.videoNoNarration')); + } else if (failedNarrationCount > 0) { + toast.warning(t('export.videoPartialNarration', { count: failedNarrationCount })); + } + } catch (error) { + const message = error instanceof Error ? error.message : t('export.exportFailed'); + toast.error(message, { id: toastId }); + } finally { + exportingRef.current = false; + setExporting(false); + } + }, + [t], + ); + + return { exporting, exportVideoCourse }; +} diff --git a/lib/i18n/locales/ar-SA.json b/lib/i18n/locales/ar-SA.json index e873fa6e34..3937d6f242 100644 --- a/lib/i18n/locales/ar-SA.json +++ b/lib/i18n/locales/ar-SA.json @@ -41,6 +41,20 @@ "pptx": "تصدير PPTX", "resourcePack": "تصدير حزمة الموارد", "resourcePackDesc": "PPTX + صفحات تفاعلية", + "videoCourse": "تصدير دورة فيديو", + "videoCourseDesc": "شرائح + سرد صوتي MP4", + "videoPreparing": "جارٍ تجهيز مواد الفيديو...", + "videoPreparingProgress": "جارٍ تجهيز الصفحة {{current}} / {{total}}...", + "videoGeneratingNarration": "جارٍ إنشاء السرد للصفحة {{current}} / {{total}}...", + "videoRendering": "جارٍ إنشاء الفيديو...", + "videoNoNarration": "لم يتم العثور على صوت السرد؛ تم التصدير بصمت.", + "videoPartialNarration": "تعذّر إنشاء {{count}} عنصر سرد وتم استبداله بالصمت.", + "videoNarrationUnavailable": "لم يتم العثور على صوت سرد أو تعذّر إنشاؤه. يرجى إعداد خدمة TTS متاحة أولاً.", + "videoBrowserTTSUnsupported": "خدمة TTS الأصلية في المتصفح تشغّل الصوت محليًا فقط ولا تنشئ ملفات صوتية قابلة للتصدير. بدّل إلى خدمة TTS تنشئ ملفات صوتية مثل OpenAI أو Azure أو GLM أو Qwen أو MiniMax أو VoxCPM أو Doubao أو ElevenLabs أو Lemonade، ثم أعد التصدير.", + "videoResolutionTitle": "اختر جودة الفيديو", + "videoResolutionDesc": "كلما زادت الجودة، طال وقت التصدير وزاد حجم الملف.", + "videoResolutionRecommended": "موصى به", + "videoResolutionExport": "تصدير الفيديو", "classroomZip": "تصدير ملف الفصل ZIP", "classroomZipDesc": "هيكل المقرر + ملفات الوسائط", "exporting": "جارٍ التصدير...", diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json index 4145d87875..1192db1481 100644 --- a/lib/i18n/locales/en-US.json +++ b/lib/i18n/locales/en-US.json @@ -41,6 +41,20 @@ "pptx": "Export PPTX", "resourcePack": "Export Resource Pack", "resourcePackDesc": "PPTX + interactive pages", + "videoCourse": "Export Video Course", + "videoCourseDesc": "Slide visuals + narration MP4", + "videoPreparing": "Preparing video assets...", + "videoPreparingProgress": "Preparing page {{current}} / {{total}}...", + "videoGeneratingNarration": "Generating narration for page {{current}} / {{total}}...", + "videoRendering": "Rendering video...", + "videoNoNarration": "No narration audio was found; exported with silence.", + "videoPartialNarration": "{{count}} narration item(s) could not be generated and were filled with silence.", + "videoNarrationUnavailable": "No narration audio was found or generated. Configure an available TTS service first.", + "videoBrowserTTSUnsupported": "Browser native TTS can play speech locally, but it cannot generate exportable audio files. Switch to a file-generating TTS service such as OpenAI, Azure, GLM, Qwen, MiniMax, VoxCPM, Doubao, ElevenLabs, or Lemonade, then export again.", + "videoResolutionTitle": "Choose Video Quality", + "videoResolutionDesc": "Higher quality takes longer to export and creates a larger file.", + "videoResolutionRecommended": "Recommended", + "videoResolutionExport": "Export Video", "exporting": "Exporting...", "exportSuccess": "Export successful", "exportFailed": "Export failed", diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json index 7daea7110c..25d21ae394 100644 --- a/lib/i18n/locales/ja-JP.json +++ b/lib/i18n/locales/ja-JP.json @@ -41,6 +41,20 @@ "pptx": "PPTXエクスポート", "resourcePack": "リソースパックをエクスポート", "resourcePackDesc": "PPTX+インタラクティブページ", + "videoCourse": "動画コースをエクスポート", + "videoCourseDesc": "スライド画面 + ナレーション MP4", + "videoPreparing": "動画素材を準備中...", + "videoPreparingProgress": "{{current}} / {{total}} ページを準備中...", + "videoGeneratingNarration": "{{current}} / {{total}} ページのナレーションを生成中...", + "videoRendering": "動画を合成中...", + "videoNoNarration": "ナレーション音声が見つからなかったため、無音でエクスポートしました。", + "videoPartialNarration": "{{count}} 件のナレーションを生成できず、無音で補完しました。", + "videoNarrationUnavailable": "ナレーション音声が見つからず生成もできません。利用可能な TTS サービスを設定してください。", + "videoBrowserTTSUnsupported": "ブラウザー標準 TTS はローカル再生のみで、エクスポート可能な音声ファイルを生成できません。OpenAI、Azure、GLM、Qwen、MiniMax、VoxCPM、Doubao、ElevenLabs、Lemonade など音声ファイルを生成できる TTS サービスに切り替えてから再度エクスポートしてください。", + "videoResolutionTitle": "動画の画質を選択", + "videoResolutionDesc": "画質が高いほど、エクスポート時間とファイルサイズが増えます。", + "videoResolutionRecommended": "おすすめ", + "videoResolutionExport": "動画をエクスポート", "exporting": "エクスポート中...", "exportSuccess": "エクスポートが完了しました", "exportFailed": "エクスポートに失敗しました", diff --git a/lib/i18n/locales/pt-BR.json b/lib/i18n/locales/pt-BR.json index 7f8a112274..c28e73cd18 100644 --- a/lib/i18n/locales/pt-BR.json +++ b/lib/i18n/locales/pt-BR.json @@ -41,6 +41,20 @@ "pptx": "Exportar PPTX", "resourcePack": "Exportar Pacote de Recursos", "resourcePackDesc": "PPTX + páginas interativas", + "videoCourse": "Exportar Curso em Vídeo", + "videoCourseDesc": "Slides + narração em MP4", + "videoPreparing": "Preparando recursos do vídeo...", + "videoPreparingProgress": "Preparando página {{current}} / {{total}}...", + "videoGeneratingNarration": "Gerando narração da página {{current}} / {{total}}...", + "videoRendering": "Renderizando vídeo...", + "videoNoNarration": "Nenhuma narração foi encontrada; exportado com silêncio.", + "videoPartialNarration": "{{count}} item(ns) de narração não puderam ser gerados e foram preenchidos com silêncio.", + "videoNarrationUnavailable": "Nenhuma narração foi encontrada ou gerada. Configure primeiro um serviço TTS disponível.", + "videoBrowserTTSUnsupported": "O TTS nativo do navegador só reproduz a fala localmente e não gera arquivos de áudio exportáveis. Troque para um serviço TTS que gere arquivos, como OpenAI, Azure, GLM, Qwen, MiniMax, VoxCPM, Doubao, ElevenLabs ou Lemonade, e exporte novamente.", + "videoResolutionTitle": "Escolher Qualidade do Vídeo", + "videoResolutionDesc": "Quanto maior a qualidade, mais tempo a exportação leva e maior fica o arquivo.", + "videoResolutionRecommended": "Recomendado", + "videoResolutionExport": "Exportar Vídeo", "exporting": "Exportando...", "exportSuccess": "Exportação concluída", "exportFailed": "Falha na exportação", diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json index e52663b94c..e1cadd41f8 100644 --- a/lib/i18n/locales/ru-RU.json +++ b/lib/i18n/locales/ru-RU.json @@ -41,6 +41,20 @@ "pptx": "Экспорт PPTX", "resourcePack": "Экспорт ресурсного пакета", "resourcePackDesc": "PPTX + интерактивные страницы", + "videoCourse": "Экспорт видеокурса", + "videoCourseDesc": "Слайды + озвучка MP4", + "videoPreparing": "Подготовка материалов видео...", + "videoPreparingProgress": "Подготовка страницы {{current}} / {{total}}...", + "videoGeneratingNarration": "Создание озвучки для страницы {{current}} / {{total}}...", + "videoRendering": "Сборка видео...", + "videoNoNarration": "Озвучка не найдена; видео экспортировано без звука.", + "videoPartialNarration": "Не удалось создать {{count}} элемент(ов) озвучки; они заменены тишиной.", + "videoNarrationUnavailable": "Озвучка не найдена и не может быть создана. Сначала настройте доступный TTS-сервис.", + "videoBrowserTTSUnsupported": "Встроенный TTS браузера может только воспроизводить речь локально и не создает аудиофайлы для экспорта. Переключитесь на TTS-сервис, который создает аудиофайлы, например OpenAI, Azure, GLM, Qwen, MiniMax, VoxCPM, Doubao, ElevenLabs или Lemonade, затем экспортируйте снова.", + "videoResolutionTitle": "Выберите качество видео", + "videoResolutionDesc": "Чем выше качество, тем дольше экспорт и больше размер файла.", + "videoResolutionRecommended": "Рекомендуется", + "videoResolutionExport": "Экспорт видео", "exporting": "Экспорт...", "exportSuccess": "Экспорт успешен", "exportFailed": "Ошибка экспорта", diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json index e0c952be9c..8896ce6944 100644 --- a/lib/i18n/locales/zh-CN.json +++ b/lib/i18n/locales/zh-CN.json @@ -41,6 +41,20 @@ "pptx": "导出 PPTX", "resourcePack": "导出教学资源包", "resourcePackDesc": "PPTX + 交互式页面", + "videoCourse": "导出视频课程", + "videoCourseDesc": "课件画面 + 讲解音频 MP4", + "videoPreparing": "正在准备视频素材...", + "videoPreparingProgress": "正在准备第 {{current}} / {{total}} 页...", + "videoGeneratingNarration": "正在生成第 {{current}} / {{total}} 页讲解音频...", + "videoRendering": "正在合成视频...", + "videoNoNarration": "未找到讲解音频,已使用静音导出。", + "videoPartialNarration": "{{count}} 条讲解音频未生成,已用静音补齐。", + "videoNarrationUnavailable": "未找到或无法生成讲解音频,请先在设置中配置可用的 TTS 服务。", + "videoBrowserTTSUnsupported": "浏览器原生 TTS 只能在本机播放,不能生成可导出音频。请切换到 OpenAI、Azure、GLM、Qwen、MiniMax、VoxCPM、豆包、ElevenLabs 或 Lemonade 等可生成音频文件的 TTS 服务后重新导出。", + "videoResolutionTitle": "选择视频清晰度", + "videoResolutionDesc": "清晰度越高,导出时间和文件体积越大。", + "videoResolutionRecommended": "推荐", + "videoResolutionExport": "导出视频", "exporting": "正在导出...", "exportSuccess": "导出成功", "exportFailed": "导出失败", diff --git a/lib/i18n/locales/zh-TW.json b/lib/i18n/locales/zh-TW.json index f3ef14d959..e24826e7d0 100644 --- a/lib/i18n/locales/zh-TW.json +++ b/lib/i18n/locales/zh-TW.json @@ -41,6 +41,20 @@ "pptx": "匯出 PPTX", "resourcePack": "匯出教學資源包", "resourcePackDesc": "PPTX + 互動式頁面", + "videoCourse": "匯出影片課程", + "videoCourseDesc": "課件畫面 + 講解音訊 MP4", + "videoPreparing": "正在準備影片素材...", + "videoPreparingProgress": "正在準備第 {{current}} / {{total}} 頁...", + "videoGeneratingNarration": "正在生成第 {{current}} / {{total}} 頁講解音訊...", + "videoRendering": "正在合成影片...", + "videoNoNarration": "未找到講解音訊,已使用靜音匯出。", + "videoPartialNarration": "{{count}} 條講解音訊未生成,已用靜音補齊。", + "videoNarrationUnavailable": "未找到或無法生成講解音訊,請先在設定中配置可用的 TTS 服務。", + "videoBrowserTTSUnsupported": "瀏覽器原生 TTS 只能在本機播放,不能生成可匯出的音訊。請切換到 OpenAI、Azure、GLM、Qwen、MiniMax、VoxCPM、豆包、ElevenLabs 或 Lemonade 等可生成音訊檔案的 TTS 服務後重新匯出。", + "videoResolutionTitle": "選擇影片清晰度", + "videoResolutionDesc": "清晰度越高,匯出時間和檔案大小越大。", + "videoResolutionRecommended": "推薦", + "videoResolutionExport": "匯出影片", "exporting": "正在匯出...", "exportSuccess": "匯出成功", "exportFailed": "匯出失敗", From 5e4de7ea84219e2f45c4293db60e6b1b6ed65b9e Mon Sep 17 00:00:00 2001 From: wangxiaoming Date: Tue, 16 Jun 2026 15:26:42 +0800 Subject: [PATCH 2/4] fix(export): show video render failure details --- app/api/export/video-course/route.ts | 315 ++++++++++++++++++-------- lib/export/use-export-video-course.ts | 22 +- lib/i18n/locales/ar-SA.json | 1 + lib/i18n/locales/en-US.json | 1 + lib/i18n/locales/ja-JP.json | 1 + lib/i18n/locales/pt-BR.json | 1 + lib/i18n/locales/ru-RU.json | 1 + lib/i18n/locales/zh-CN.json | 1 + lib/i18n/locales/zh-TW.json | 1 + 9 files changed, 248 insertions(+), 96 deletions(-) diff --git a/app/api/export/video-course/route.ts b/app/api/export/video-course/route.ts index 57d0e994ac..be6df1f32e 100644 --- a/app/api/export/video-course/route.ts +++ b/app/api/export/video-course/route.ts @@ -10,6 +10,7 @@ export const runtime = 'nodejs'; export const maxDuration = 300; const execFileAsync = promisify(execFile); +type ApiErrorCode = Parameters[0]; interface VideoCourseManifest { fileName?: string; @@ -57,6 +58,73 @@ const DEFAULT_HEIGHT = 720; const DEFAULT_FPS = 30; const DEFAULT_SCENE_MS = 2500; const MIN_SCENE_MS = 1000; +const MAX_ERROR_DETAILS_LENGTH = 3000; + +class VideoCourseExportError extends Error { + constructor( + message: string, + readonly details?: string, + readonly status: number = 500, + readonly errorCode: ApiErrorCode = 'INTERNAL_ERROR', + ) { + super(message); + this.name = 'VideoCourseExportError'; + } +} + +function outputToString(value: unknown): string { + if (!value) return ''; + if (typeof value === 'string') return value; + if (Buffer.isBuffer(value)) return value.toString('utf8'); + return String(value); +} + +function normalizeErrorDetails(value: string): string { + const lines = value + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); + const normalized = lines.join('\n').trim(); + if (normalized.length <= MAX_ERROR_DETAILS_LENGTH) return normalized; + return `...${normalized.slice(normalized.length - MAX_ERROR_DETAILS_LENGTH)}`; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function execFailureDetails(error: unknown): string { + const err = error as Error & { + code?: string | number; + signal?: string; + stderr?: string | Buffer; + stdout?: string | Buffer; + }; + const parts = [ + errorMessage(error), + err.code !== undefined ? `exit code: ${err.code}` : '', + err.signal ? `signal: ${err.signal}` : '', + outputToString(err.stderr), + outputToString(err.stdout), + ].filter(Boolean); + return normalizeErrorDetails(parts.join('\n')); +} + +function sceneLabel(scene: VideoCourseScene, sceneIndex: number): string { + return `page ${sceneIndex + 1}${scene.title ? ` (${scene.title})` : ''}`; +} + +function wrapExportError(message: string, error: unknown): VideoCourseExportError { + if (error instanceof VideoCourseExportError) { + return new VideoCourseExportError( + message, + normalizeErrorDetails([error.message, error.details].filter(Boolean).join('\n')), + error.status, + error.errorCode, + ); + } + return new VideoCourseExportError(message, normalizeErrorDetails(errorMessage(error))); +} function asFile(value: FormDataEntryValue | null): File | null { return value instanceof File ? value : null; @@ -105,15 +173,14 @@ function resolveSafeAudioUrl(rawUrl: string, request: NextRequest): URL | null { } } -async function runFfmpeg(args: string[]): Promise { +async function runFfmpeg(args: string[], context: string): Promise { const binary = process.env.FFMPEG_PATH || 'ffmpeg'; try { await execFileAsync(binary, ['-hide_banner', '-loglevel', 'error', ...args], { maxBuffer: 1024 * 1024 * 16, }); } catch (error) { - const message = error instanceof Error ? error.message : String(error); - throw new Error(`ffmpeg failed: ${message}`); + throw new VideoCourseExportError(`${context} failed`, execFailureDetails(error)); } } @@ -150,7 +217,10 @@ async function writeUploadedFile(file: File, targetPath: string): Promise async function writeFetchedAudio(url: URL, targetPath: string): Promise { const response = await fetch(url); if (!response.ok) { - throw new Error(`Audio download failed: ${response.status} ${response.statusText}`); + throw new VideoCourseExportError( + 'Narration audio download failed', + `HTTP ${response.status} ${response.statusText}: ${url.pathname}`, + ); } const bytes = Buffer.from(await response.arrayBuffer()); await writeFile(targetPath, bytes); @@ -243,39 +313,45 @@ async function buildSceneAudio( const audioPath = path.join(workDir, `scene_${sceneIndex}_audio.m4a`); if (tracks.length === 1 && tracks[0].type === 'silence') { - await runFfmpeg([ - '-y', - '-f', - 'lavfi', - '-t', - (durationMs / 1000).toFixed(3), - '-i', - 'anullsrc=channel_layout=stereo:sample_rate=44100', - '-c:a', - 'aac', - '-b:a', - '128k', - audioPath, - ]); + await runFfmpeg( + [ + '-y', + '-f', + 'lavfi', + '-t', + (durationMs / 1000).toFixed(3), + '-i', + 'anullsrc=channel_layout=stereo:sample_rate=44100', + '-c:a', + 'aac', + '-b:a', + '128k', + audioPath, + ], + `Build audio for page ${sceneIndex + 1}`, + ); return { audioPath, durationMs }; } if (tracks.length === 1 && tracks[0].type === 'file' && tracks[0].path) { - await runFfmpeg([ - '-y', - '-i', - tracks[0].path, - '-vn', - '-ac', - '2', - '-ar', - '44100', - '-c:a', - 'aac', - '-b:a', - '128k', - audioPath, - ]); + await runFfmpeg( + [ + '-y', + '-i', + tracks[0].path, + '-vn', + '-ac', + '2', + '-ar', + '44100', + '-c:a', + 'aac', + '-b:a', + '128k', + audioPath, + ], + `Convert narration audio for page ${sceneIndex + 1}`, + ); return { audioPath, durationMs }; } @@ -313,7 +389,7 @@ async function buildSceneAudio( audioPath, ); - await runFfmpeg(args); + await runFfmpeg(args, `Merge narration audio for page ${sceneIndex + 1}`); return { audioPath, durationMs }; } @@ -335,37 +411,40 @@ async function buildSceneVideo({ fps: number; }): Promise { const duration = (durationMs / 1000).toFixed(3); - await runFfmpeg([ - '-y', - '-loop', - '1', - '-t', - duration, - '-i', - imagePath, - '-i', - audioPath, - '-vf', - `scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:color=white,setsar=1,format=yuv420p`, - '-r', - String(fps), - '-c:v', - 'libx264', - '-preset', - 'veryfast', - '-tune', - 'stillimage', - '-c:a', - 'aac', - '-b:a', - '128k', - '-ar', - '44100', - '-ac', - '2', - '-shortest', - outputPath, - ]); + await runFfmpeg( + [ + '-y', + '-loop', + '1', + '-t', + duration, + '-i', + imagePath, + '-i', + audioPath, + '-vf', + `scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:color=white,setsar=1,format=yuv420p`, + '-r', + String(fps), + '-c:v', + 'libx264', + '-preset', + 'veryfast', + '-tune', + 'stillimage', + '-c:a', + 'aac', + '-b:a', + '128k', + '-ar', + '44100', + '-ac', + '2', + '-shortest', + outputPath, + ], + 'Build page video segment', + ); } async function concatSegments(segmentPaths: string[], outputPath: string, workDir: string) { @@ -374,7 +453,10 @@ async function concatSegments(segmentPaths: string[], outputPath: string, workDi listPath, segmentPaths.map((segmentPath) => `file '${quoteConcatPath(segmentPath)}'`).join('\n'), ); - await runFfmpeg(['-y', '-f', 'concat', '-safe', '0', '-i', listPath, '-c', 'copy', outputPath]); + await runFfmpeg( + ['-y', '-f', 'concat', '-safe', '0', '-i', listPath, '-c', 'copy', outputPath], + 'Concat video segments', + ); } export async function POST(request: NextRequest) { @@ -403,52 +485,99 @@ export async function POST(request: NextRequest) { const scene = manifest.scenes[sceneIndex]; const imageFile = asFile(formData.get(scene.imageField)); if (!imageFile) { - return apiError('INVALID_REQUEST', 400, `Missing image for scene ${sceneIndex + 1}`); + return apiError( + 'INVALID_REQUEST', + 400, + `Missing rendered image for ${sceneLabel(scene, sceneIndex)}`, + ); } const imagePath = path.join(workDir, `scene_${sceneIndex}.png`); - await writeUploadedFile(imageFile, imagePath); + try { + await writeUploadedFile(imageFile, imagePath); + } catch (error) { + throw wrapExportError( + `Failed to write rendered image for ${sceneLabel(scene, sceneIndex)}`, + error, + ); + } const fallbackMs = Math.max(MIN_SCENE_MS, Math.round(scene.fallbackMs || DEFAULT_SCENE_MS)); - const preparedTracks = await Promise.all( - (scene.tracks ?? []).map((track, trackIndex) => - prepareTrack(track, formData, workDir!, sceneIndex, trackIndex, request), - ), - ); - const { audioPath, durationMs } = await buildSceneAudio( - preparedTracks, - fallbackMs, - workDir, - sceneIndex, - ); + let preparedTracks: PreparedTrack[]; + try { + preparedTracks = await Promise.all( + (scene.tracks ?? []).map((track, trackIndex) => + prepareTrack(track, formData, workDir!, sceneIndex, trackIndex, request), + ), + ); + } catch (error) { + throw wrapExportError( + `Failed to prepare narration audio for ${sceneLabel(scene, sceneIndex)}`, + error, + ); + } + + let sceneAudio: { audioPath: string; durationMs: number }; + try { + sceneAudio = await buildSceneAudio(preparedTracks, fallbackMs, workDir, sceneIndex); + } catch (error) { + throw wrapExportError( + `Failed to build narration audio for ${sceneLabel(scene, sceneIndex)}`, + error, + ); + } const segmentPath = path.join(workDir, `segment_${sceneIndex}.mp4`); - await buildSceneVideo({ - imagePath, - audioPath, - outputPath: segmentPath, - durationMs, - width, - height, - fps, - }); + try { + await buildSceneVideo({ + imagePath, + audioPath: sceneAudio.audioPath, + outputPath: segmentPath, + durationMs: sceneAudio.durationMs, + width, + height, + fps, + }); + } catch (error) { + throw wrapExportError( + `Failed to build video segment for ${sceneLabel(scene, sceneIndex)}`, + error, + ); + } segmentPaths.push(segmentPath); } const outputPath = path.join(workDir, 'course.mp4'); - await concatSegments(segmentPaths, outputPath, workDir); - const output = await readFile(outputPath); + try { + await concatSegments(segmentPaths, outputPath, workDir); + } catch (error) { + throw wrapExportError('Failed to concat video segments', error); + } + + let output: Buffer; + try { + output = await readFile(outputPath); + } catch (error) { + throw wrapExportError('Failed to read rendered video file', error); + } const fileName = (manifest.fileName || 'course').replace(/[\\/:*?"<>|]/g, '_') || 'course'; - return new NextResponse(output, { + return new NextResponse(new Uint8Array(output), { headers: { 'Content-Type': 'video/mp4', 'Content-Disposition': `attachment; filename="${encodeURIComponent(fileName)}.mp4"`, }, }); } catch (error) { - const message = error instanceof Error ? error.message : String(error); - return apiError('INTERNAL_ERROR', 500, message); + if (error instanceof VideoCourseExportError) { + return apiError(error.errorCode, error.status, error.message, error.details); + } + return apiError( + 'INTERNAL_ERROR', + 500, + 'Video course export failed', + normalizeErrorDetails(errorMessage(error)), + ); } finally { if (workDir) { await rm(workDir, { recursive: true, force: true }); diff --git a/lib/export/use-export-video-course.ts b/lib/export/use-export-video-course.ts index 0bfec557e8..cd3e904cc1 100644 --- a/lib/export/use-export-video-course.ts +++ b/lib/export/use-export-video-course.ts @@ -97,6 +97,19 @@ function videoCourseFileName( return `${safeFileName(name, 'course')}-${resolution.label}`; } +function errorPayloadText(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function compactErrorMessage(value: string): string { + const normalized = value + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean) + .join('\n'); + return normalized.length > 1800 ? `...${normalized.slice(normalized.length - 1800)}` : normalized; +} + function estimateSpeechMs(text: string): number { const cjkCount = ( text.match(/[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || [] @@ -554,9 +567,12 @@ export function useExportVideoCourse() { if (!res.ok) { const payload = await res.json().catch(() => null); - const message = - payload && typeof payload.error === 'string' ? payload.error : t('export.exportFailed'); - throw new Error(message); + const summary = errorPayloadText(payload?.error); + const details = errorPayloadText(payload?.details); + const reason = compactErrorMessage([summary, details].filter(Boolean).join('\n')); + throw new Error( + reason ? t('export.videoRenderFailedWithReason', { reason }) : t('export.exportFailed'), + ); } const blob = await res.blob(); diff --git a/lib/i18n/locales/ar-SA.json b/lib/i18n/locales/ar-SA.json index 3937d6f242..d676607f49 100644 --- a/lib/i18n/locales/ar-SA.json +++ b/lib/i18n/locales/ar-SA.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "جارٍ تجهيز الصفحة {{current}} / {{total}}...", "videoGeneratingNarration": "جارٍ إنشاء السرد للصفحة {{current}} / {{total}}...", "videoRendering": "جارٍ إنشاء الفيديو...", + "videoRenderFailedWithReason": "فشل إنشاء الفيديو: {{reason}}", "videoNoNarration": "لم يتم العثور على صوت السرد؛ تم التصدير بصمت.", "videoPartialNarration": "تعذّر إنشاء {{count}} عنصر سرد وتم استبداله بالصمت.", "videoNarrationUnavailable": "لم يتم العثور على صوت سرد أو تعذّر إنشاؤه. يرجى إعداد خدمة TTS متاحة أولاً.", diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json index 1192db1481..3322aa98b7 100644 --- a/lib/i18n/locales/en-US.json +++ b/lib/i18n/locales/en-US.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "Preparing page {{current}} / {{total}}...", "videoGeneratingNarration": "Generating narration for page {{current}} / {{total}}...", "videoRendering": "Rendering video...", + "videoRenderFailedWithReason": "Video rendering failed: {{reason}}", "videoNoNarration": "No narration audio was found; exported with silence.", "videoPartialNarration": "{{count}} narration item(s) could not be generated and were filled with silence.", "videoNarrationUnavailable": "No narration audio was found or generated. Configure an available TTS service first.", diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json index 25d21ae394..5859edec2e 100644 --- a/lib/i18n/locales/ja-JP.json +++ b/lib/i18n/locales/ja-JP.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "{{current}} / {{total}} ページを準備中...", "videoGeneratingNarration": "{{current}} / {{total}} ページのナレーションを生成中...", "videoRendering": "動画を合成中...", + "videoRenderFailedWithReason": "動画の合成に失敗しました:{{reason}}", "videoNoNarration": "ナレーション音声が見つからなかったため、無音でエクスポートしました。", "videoPartialNarration": "{{count}} 件のナレーションを生成できず、無音で補完しました。", "videoNarrationUnavailable": "ナレーション音声が見つからず生成もできません。利用可能な TTS サービスを設定してください。", diff --git a/lib/i18n/locales/pt-BR.json b/lib/i18n/locales/pt-BR.json index c28e73cd18..308b9b7877 100644 --- a/lib/i18n/locales/pt-BR.json +++ b/lib/i18n/locales/pt-BR.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "Preparando página {{current}} / {{total}}...", "videoGeneratingNarration": "Gerando narração da página {{current}} / {{total}}...", "videoRendering": "Renderizando vídeo...", + "videoRenderFailedWithReason": "Falha ao renderizar vídeo: {{reason}}", "videoNoNarration": "Nenhuma narração foi encontrada; exportado com silêncio.", "videoPartialNarration": "{{count}} item(ns) de narração não puderam ser gerados e foram preenchidos com silêncio.", "videoNarrationUnavailable": "Nenhuma narração foi encontrada ou gerada. Configure primeiro um serviço TTS disponível.", diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json index e1cadd41f8..dd518fbe14 100644 --- a/lib/i18n/locales/ru-RU.json +++ b/lib/i18n/locales/ru-RU.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "Подготовка страницы {{current}} / {{total}}...", "videoGeneratingNarration": "Создание озвучки для страницы {{current}} / {{total}}...", "videoRendering": "Сборка видео...", + "videoRenderFailedWithReason": "Не удалось собрать видео: {{reason}}", "videoNoNarration": "Озвучка не найдена; видео экспортировано без звука.", "videoPartialNarration": "Не удалось создать {{count}} элемент(ов) озвучки; они заменены тишиной.", "videoNarrationUnavailable": "Озвучка не найдена и не может быть создана. Сначала настройте доступный TTS-сервис.", diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json index 8896ce6944..379e9d2cf5 100644 --- a/lib/i18n/locales/zh-CN.json +++ b/lib/i18n/locales/zh-CN.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "正在准备第 {{current}} / {{total}} 页...", "videoGeneratingNarration": "正在生成第 {{current}} / {{total}} 页讲解音频...", "videoRendering": "正在合成视频...", + "videoRenderFailedWithReason": "视频合成失败:{{reason}}", "videoNoNarration": "未找到讲解音频,已使用静音导出。", "videoPartialNarration": "{{count}} 条讲解音频未生成,已用静音补齐。", "videoNarrationUnavailable": "未找到或无法生成讲解音频,请先在设置中配置可用的 TTS 服务。", diff --git a/lib/i18n/locales/zh-TW.json b/lib/i18n/locales/zh-TW.json index e24826e7d0..19d07713c5 100644 --- a/lib/i18n/locales/zh-TW.json +++ b/lib/i18n/locales/zh-TW.json @@ -47,6 +47,7 @@ "videoPreparingProgress": "正在準備第 {{current}} / {{total}} 頁...", "videoGeneratingNarration": "正在生成第 {{current}} / {{total}} 頁講解音訊...", "videoRendering": "正在合成影片...", + "videoRenderFailedWithReason": "影片合成失敗:{{reason}}", "videoNoNarration": "未找到講解音訊,已使用靜音匯出。", "videoPartialNarration": "{{count}} 條講解音訊未生成,已用靜音補齊。", "videoNarrationUnavailable": "未找到或無法生成講解音訊,請先在設定中配置可用的 TTS 服務。", From df0ce62cc15a2eb270a859acc400f2fe2b8fe215 Mon Sep 17 00:00:00 2001 From: wangxiaoming Date: Tue, 16 Jun 2026 15:47:45 +0800 Subject: [PATCH 3/4] fix(export): cache generated narration audio --- lib/export/use-export-video-course.ts | 165 ++++++++++++++++++++++---- 1 file changed, 143 insertions(+), 22 deletions(-) diff --git a/lib/export/use-export-video-course.ts b/lib/export/use-export-video-course.ts index cd3e904cc1..98186e262a 100644 --- a/lib/export/use-export-video-course.ts +++ b/lib/export/use-export-video-course.ts @@ -86,6 +86,22 @@ interface AudioAsset { extension: string; } +interface ExportTTSContext { + providerId: string; + modelId?: string; + voice?: string; + speed?: number; + baseUrl?: string; + providerOptions?: unknown; + providerConfig?: { + apiKey?: string; + baseUrl?: string; + customDefaultBaseUrl?: string; + modelId?: string; + }; + language?: string; +} + function safeFileName(name: string | undefined, fallback: string): string { return (name || fallback).replace(/[\\/:*?"<>|]/g, '_') || fallback; } @@ -110,6 +126,50 @@ function compactErrorMessage(value: string): string { return normalized.length > 1800 ? `...${normalized.slice(normalized.length - 1800)}` : normalized; } +function stableSerialize(value: unknown): string { + if (value === null || typeof value !== 'object') return JSON.stringify(value); + if (Array.isArray(value)) return `[${value.map(stableSerialize).join(',')}]`; + + const record = value as Record; + return `{${Object.keys(record) + .filter((key) => record[key] !== undefined) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableSerialize(record[key])}`) + .join(',')}}`; +} + +function fallbackHash(input: string): string { + let hash = 0x811c9dc5; + for (let i = 0; i < input.length; i++) { + hash ^= input.charCodeAt(i); + hash = Math.imul(hash, 0x01000193); + } + return (hash >>> 0).toString(16).padStart(8, '0'); +} + +async function sha256Hex(input: string): Promise { + if (!globalThis.crypto?.subtle) return fallbackHash(input); + const digest = await globalThis.crypto.subtle.digest('SHA-256', new TextEncoder().encode(input)); + return Array.from(new Uint8Array(digest)) + .map((byte) => byte.toString(16).padStart(2, '0')) + .join(''); +} + +async function exportSpeechCacheAudioId(text: string, context: ExportTTSContext): Promise { + const seed = stableSerialize({ + version: 1, + text: text.trim(), + language: context.language || '', + providerId: context.providerId, + modelId: context.modelId || '', + voice: context.voice || '', + speed: context.speed ?? 1, + baseUrl: context.baseUrl || '', + providerOptions: context.providerOptions ?? null, + }); + return `tts_export_${(await sha256Hex(seed)).slice(0, 48)}`; +} + function estimateSpeechMs(text: string): number { const cjkCount = ( text.match(/[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || [] @@ -344,11 +404,7 @@ async function hasExistingExportableNarration( return false; } -async function generateSpeechAudioForExport( - audioId: string, - text: string, - language?: string, -): Promise { +async function resolveExportTTSContext(language?: string): Promise { const settings = useSettingsStore.getState(); if (settings.ttsProviderId === BROWSER_NATIVE_TTS_PROVIDER_ID) return null; if ( @@ -360,29 +416,45 @@ async function generateSpeechAudioForExport( return null; } - const ttsProviderConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId]; + const providerConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId]; const teacher = pickNarratorAgent(useAgentRegistry.getState().listAgents()); const providerOptions = await resolveAgentVoiceOptions(teacher, { providerId: settings.ttsProviderId, - providerConfig: ttsProviderConfig, + providerConfig, voiceId: settings.ttsVoice, language, }); + return { + providerId: settings.ttsProviderId, + modelId: providerConfig?.modelId, + voice: settings.ttsVoice, + speed: settings.ttsSpeed, + baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl || undefined, + providerOptions, + providerConfig, + language, + }; +} + +async function generateSpeechAudioForExport( + audioId: string, + text: string, + context: ExportTTSContext, +): Promise { const response = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text, audioId, - ttsProviderId: settings.ttsProviderId, - ttsModelId: ttsProviderConfig?.modelId, - ttsVoice: settings.ttsVoice, - ttsSpeed: settings.ttsSpeed, - ttsApiKey: ttsProviderConfig?.apiKey || undefined, - ttsBaseUrl: - ttsProviderConfig?.baseUrl || ttsProviderConfig?.customDefaultBaseUrl || undefined, - ttsProviderOptions: providerOptions, + ttsProviderId: context.providerId, + ttsModelId: context.modelId, + ttsVoice: context.voice, + ttsSpeed: context.speed, + ttsApiKey: context.providerConfig?.apiKey || undefined, + ttsBaseUrl: context.baseUrl, + ttsProviderOptions: context.providerOptions, }), }); @@ -403,6 +475,8 @@ async function generateSpeechAudioForExport( id: audioId, blob, format: data.format, + text, + voice: context.voice, createdAt: Date.now(), }); @@ -414,6 +488,22 @@ async function generateSpeechAudioForExport( }; } +function persistSpeechAudioId(sceneId: string, actionId: string, audioId: string): void { + const { getSceneById, updateScene } = useStageStore.getState(); + const scene = getSceneById(sceneId); + if (!scene?.actions) return; + + let changed = false; + const actions = scene.actions.map((action) => { + if (action.id !== actionId || action.type !== 'speech') return action; + if ((action as SpeechAction).audioId === audioId) return action; + changed = true; + return { ...action, audioId }; + }); + + if (changed) updateScene(sceneId, { actions }); +} + export function useExportVideoCourse() { const [exporting, setExporting] = useState(false); const exportingRef = useRef(false); @@ -434,6 +524,27 @@ export function useExportVideoCourse() { const exportResolution = resolveVideoCourseExportResolution(options?.resolutionId); const settings = useSettingsStore.getState(); const isBrowserNativeTTS = settings.ttsProviderId === BROWSER_NATIVE_TTS_PROVIDER_ID; + const ttsContext = isBrowserNativeTTS + ? null + : await resolveExportTTSContext(stage.languageDirective); + const pendingAudioById = new Map>(); + const loadOrGenerateCachedAudio = async ( + audioId: string, + text: string, + context: ExportTTSContext, + ): Promise => { + const pending = pendingAudioById.get(audioId); + if (pending) return pending; + + const existing = await resolveSpeechAudioFromDb(audioId); + if (existing) return existing; + + const generated = generateSpeechAudioForExport(audioId, text, context).finally(() => { + pendingAudioById.delete(audioId); + }); + pendingAudioById.set(audioId, generated); + return generated; + }; const speechEntries = orderedScenes.flatMap((scene) => getSpeechActions(scene).map((speech, speechIndex) => ({ scene, speech, speechIndex })), ); @@ -488,13 +599,25 @@ export function useExportVideoCourse() { for (let speechIndex = 0; speechIndex < speechActions.length; speechIndex++) { const speech = speechActions[speechIndex]; const exportAudioId = getSpeechExportAudioId(scene, speech, speechIndex); + const cachedAudioId = ttsContext + ? await exportSpeechCacheAudioId(speech.text, ttsContext) + : undefined; const audioUrl = isExportableAudioUrl(speech.audioUrl) ? speech.audioUrl : undefined; - let audio = await resolveSpeechAudioFromDb(exportAudioId); + let actionAudioId: string | undefined; + let audio = cachedAudioId ? await resolveSpeechAudioFromDb(cachedAudioId) : null; + if (audio && cachedAudioId) { + actionAudioId = cachedAudioId; + } + if (!audio) { + audio = await resolveSpeechAudioFromDb(exportAudioId); + if (audio) actionAudioId = exportAudioId; + } if (!audio && speech.audioId && speech.audioId !== exportAudioId) { audio = await resolveSpeechAudioFromDb(speech.audioId); + if (audio) actionAudioId = speech.audioId; } let generationFailed = false; - if (!audio && !audioUrl && !isBrowserNativeTTS) { + if (!audio && !audioUrl && ttsContext && cachedAudioId) { try { toast.loading( t('export.videoGeneratingNarration', { @@ -503,11 +626,8 @@ export function useExportVideoCourse() { }), { id: toastId }, ); - audio = await generateSpeechAudioForExport( - exportAudioId, - speech.text, - stage.languageDirective, - ); + audio = await loadOrGenerateCachedAudio(cachedAudioId, speech.text, ttsContext); + if (audio) actionAudioId = cachedAudioId; } catch { generationFailed = true; failedNarrationCount++; @@ -524,6 +644,7 @@ export function useExportVideoCourse() { mimeType: audio.mimeType, required: true, }); + if (actionAudioId) persistSpeechAudioId(scene.id, speech.id, actionAudioId); } else if (audioUrl) { narrationTrackCount++; tracks.push({ From 5905c1459b74ad7978ce4c51afefac02dec9ae27 Mon Sep 17 00:00:00 2001 From: wangxiaoming Date: Tue, 16 Jun 2026 17:19:49 +0800 Subject: [PATCH 4/4] fix(i18n): add Korean video export labels --- lib/i18n/locales/ko-KR.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/i18n/locales/ko-KR.json b/lib/i18n/locales/ko-KR.json index 7babc428cb..f61fd0aa62 100644 --- a/lib/i18n/locales/ko-KR.json +++ b/lib/i18n/locales/ko-KR.json @@ -41,6 +41,21 @@ "pptx": "PPTX 내보내기", "resourcePack": "리소스 팩 내보내기", "resourcePackDesc": "PPTX + 인터랙티브 페이지", + "videoCourse": "동영상 강좌 내보내기", + "videoCourseDesc": "슬라이드 화면 + 내레이션 MP4", + "videoPreparing": "동영상 리소스를 준비하는 중...", + "videoPreparingProgress": "{{current}} / {{total}} 페이지를 준비하는 중...", + "videoGeneratingNarration": "{{current}} / {{total}} 페이지 내레이션 오디오를 생성하는 중...", + "videoRendering": "동영상을 렌더링하는 중...", + "videoRenderFailedWithReason": "동영상 렌더링 실패: {{reason}}", + "videoNoNarration": "내레이션 오디오를 찾을 수 없어 무음으로 내보냈습니다.", + "videoPartialNarration": "{{count}}개의 내레이션 항목을 생성하지 못해 무음으로 채웠습니다.", + "videoNarrationUnavailable": "내레이션 오디오를 찾거나 생성할 수 없습니다. 먼저 사용 가능한 TTS 서비스를 설정하세요.", + "videoBrowserTTSUnsupported": "브라우저 기본 TTS는 로컬에서 음성을 재생할 수 있지만 내보낼 수 있는 오디오 파일을 생성할 수 없습니다. OpenAI, Azure, GLM, Qwen, MiniMax, VoxCPM, Doubao, ElevenLabs, Lemonade 등 파일을 생성하는 TTS 서비스로 전환한 뒤 다시 내보내세요.", + "videoResolutionTitle": "동영상 품질 선택", + "videoResolutionDesc": "품질이 높을수록 내보내기 시간이 길어지고 파일 크기가 커집니다.", + "videoResolutionRecommended": "추천", + "videoResolutionExport": "동영상 내보내기", "exporting": "내보내는 중...", "exportSuccess": "내보내기 완료", "exportFailed": "내보내기 실패",