From 7ac0c43e631c77ed9013faa2f1cc5cc27dcbb305 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 29 May 2026 16:47:35 -0700 Subject: [PATCH 1/3] feat(ffmpeg): add server-side FFmpeg media-processing block Add an FFmpeg block that processes video/audio files server-side (no external service or auth) via an internal /api/tools/ffmpeg/process route. Files flow in and out as standard UserFile objects. Operations: convert, extract audio, trim, compress/scale, probe, thumbnail, concatenate, adjust volume, change speed. --- apps/docs/components/icons.tsx | 10 + apps/docs/components/ui/icon-mapping.ts | 2 + apps/docs/content/docs/en/tools/ffmpeg.mdx | 315 ++++++++++ apps/docs/content/docs/en/tools/meta.json | 1 + .../integrations/data/icon-mapping.ts | 2 + .../integrations/data/integrations.json | 55 ++ .../sim/app/api/tools/ffmpeg/process/route.ts | 552 ++++++++++++++++++ apps/sim/blocks/blocks/ffmpeg.ts | 327 +++++++++++ apps/sim/blocks/registry.ts | 2 + apps/sim/blocks/types.ts | 1 + apps/sim/blocks/utils.ts | 1 + apps/sim/components/icons.tsx | 10 + .../lib/api/contracts/tools/media/ffmpeg.ts | 86 +++ .../lib/api/contracts/tools/media/index.ts | 1 + apps/sim/tools/ffmpeg/compress.ts | 68 +++ apps/sim/tools/ffmpeg/concat.ts | 39 ++ apps/sim/tools/ffmpeg/convert.ts | 60 ++ apps/sim/tools/ffmpeg/extract_audio.ts | 53 ++ apps/sim/tools/ffmpeg/index.ts | 9 + apps/sim/tools/ffmpeg/probe.ts | 39 ++ apps/sim/tools/ffmpeg/shared.ts | 125 ++++ apps/sim/tools/ffmpeg/speed.ts | 46 ++ apps/sim/tools/ffmpeg/thumbnail.ts | 53 ++ apps/sim/tools/ffmpeg/trim.ts | 53 ++ apps/sim/tools/ffmpeg/types.ts | 104 ++++ apps/sim/tools/ffmpeg/volume.ts | 46 ++ apps/sim/tools/registry.ts | 20 + scripts/check-api-validation-contracts.ts | 4 +- 28 files changed, 2082 insertions(+), 2 deletions(-) create mode 100644 apps/docs/content/docs/en/tools/ffmpeg.mdx create mode 100644 apps/sim/app/api/tools/ffmpeg/process/route.ts create mode 100644 apps/sim/blocks/blocks/ffmpeg.ts create mode 100644 apps/sim/lib/api/contracts/tools/media/ffmpeg.ts create mode 100644 apps/sim/tools/ffmpeg/compress.ts create mode 100644 apps/sim/tools/ffmpeg/concat.ts create mode 100644 apps/sim/tools/ffmpeg/convert.ts create mode 100644 apps/sim/tools/ffmpeg/extract_audio.ts create mode 100644 apps/sim/tools/ffmpeg/index.ts create mode 100644 apps/sim/tools/ffmpeg/probe.ts create mode 100644 apps/sim/tools/ffmpeg/shared.ts create mode 100644 apps/sim/tools/ffmpeg/speed.ts create mode 100644 apps/sim/tools/ffmpeg/thumbnail.ts create mode 100644 apps/sim/tools/ffmpeg/trim.ts create mode 100644 apps/sim/tools/ffmpeg/types.ts create mode 100644 apps/sim/tools/ffmpeg/volume.ts diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index 7985328c089..ba42ffd9535 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -2432,6 +2432,16 @@ export function FathomIcon(props: SVGProps) { ) } +export function FFmpegIcon(props: SVGProps) { + return ( + + + + ) +} export function LinkupIcon(props: SVGProps) { return ( diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts index b937ee02340..248d66ac877 100644 --- a/apps/docs/components/ui/icon-mapping.ts +++ b/apps/docs/components/ui/icon-mapping.ts @@ -59,6 +59,7 @@ import { ExtendIcon, EyeIcon, FathomIcon, + FFmpegIcon, FindymailIcon, FirecrawlIcon, FirefliesIcon, @@ -271,6 +272,7 @@ export const blockTypeToIconMap: Record = { extend: ExtendIcon, extend_v2: ExtendIcon, fathom: FathomIcon, + ffmpeg: FFmpegIcon, file: DocumentIcon, file_v2: DocumentIcon, file_v3: DocumentIcon, diff --git a/apps/docs/content/docs/en/tools/ffmpeg.mdx b/apps/docs/content/docs/en/tools/ffmpeg.mdx new file mode 100644 index 00000000000..4d419826237 --- /dev/null +++ b/apps/docs/content/docs/en/tools/ffmpeg.mdx @@ -0,0 +1,315 @@ +--- +title: FFmpeg +description: Process video and audio files with FFmpeg +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Usage Instructions + +{/* MANUAL-CONTENT-START:usage */} +The FFmpeg block runs the FFmpeg media engine on Sim's servers — there is no external service, API key, or OAuth to configure. It takes a media file as input (uploaded directly or referenced from a previous block as a `UserFile`), processes it, and returns the result as a new `UserFile` you can pass to any downstream block. + +Pick an **Operation**, supply the input file, and fill in the fields shown for that operation: + +| Operation | What it does | Key inputs | Output | +| --- | --- | --- | --- | +| **Convert Format** | Transcode to a different container/codec | `Output Format` (required, e.g. `mp4`, `webm`, `mp3`); optional `Video Codec` / `Audio Codec` | Converted media file | +| **Extract Audio** | Pull the audio track out of a video | `Output Format` (defaults to `mp3`) | Audio file | +| **Trim / Cut** | Keep a segment of the media | `Start Time` and/or `Duration` (seconds or `HH:MM:SS`) | Trimmed media file | +| **Compress / Scale** | Reduce file size and/or rescale video | `Scale` (e.g. `1280x720`, `1280:-2`), `Quality (CRF)` 0–51, optional `Video Bitrate` | Compressed video file | +| **Get Media Info** | Inspect the file with `ffprobe` | — | Metadata only (duration, codecs, resolution, streams) — no file | +| **Extract Thumbnail** | Capture a single frame as an image | `Timestamp` (defaults to 1s), `Output Format` (`jpg`/`png`/`webp`) | Image file | +| **Concatenate** | Join multiple clips into one | `Media Files` (2+, **same codec/format**) | Joined media file | +| **Adjust Volume** | Change audio loudness | `Volume` — multiplier (`1.5`, `0.5`) or decibels (`10dB`, `-6dB`) | Media file | +| **Change Speed** | Speed up or slow down playback | `Speed` multiplier (`2` = 2× faster, `0.5` = half) | Retimed media file | + +**File handling.** Inputs and outputs are standard Sim `UserFile` objects, so the block chains naturally — e.g. *Convert → Trim → Extract Thumbnail*, or feed the output of an upload/HTTP block straight into FFmpeg. Every operation except **Get Media Info** returns a `file` output; **Get Media Info** returns structured metadata instead. + +**Notes & limits.** +- **Concatenate** uses FFmpeg's concat demuxer with stream copy, so all inputs must share the same codec, resolution, and format (typical for clips exported from one source). Mixed-codec inputs will fail. +- **Change Speed** automatically retimes whichever streams exist — video via `setpts`, audio via `atempo` (chained for speeds beyond 0.5×–2×). +- Input and output files are capped at 200 MB each. +- Requires the FFmpeg binary on the server (bundled via `ffmpeg-static`; `ffprobe` from the system PATH is needed for **Get Media Info**). +{/* MANUAL-CONTENT-END */} + + +Transcode, trim, compress, concatenate, and inspect video and audio files server-side with FFmpeg. Convert formats, extract audio, capture thumbnails, adjust volume, and change playback speed — no external service required. + + + +## Tools + +### `ffmpeg_convert` + +Convert (transcode) a video or audio file to a different container/format + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The media file to convert | +| `format` | string | Yes | Target output format/container \(e.g. mp4, webm, mov, mkv, mp3, wav\) | +| `videoCodec` | string | No | Optional video codec override \(e.g. libx264, vp9\) | +| `audioCodec` | string | No | Optional audio codec override \(e.g. aac, libmp3lame\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_extract_audio` + +Extract the audio track from a video file into an audio file + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The video file to extract audio from | +| `format` | string | No | Output audio format \(mp3, wav, aac, flac, ogg, m4a, opus\). Defaults to mp3 | +| `audioCodec` | string | No | Optional audio codec override \(e.g. aac, libmp3lame\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_trim` + +Cut a segment from a video or audio file using a start time and/or duration + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The media file to trim | +| `startTime` | string | No | Start offset in seconds or HH:MM:SS\(.ms\), e.g. 5 or 00:00:05 | +| `duration` | string | No | Duration to keep in seconds or HH:MM:SS\(.ms\), e.g. 30 or 00:00:30 | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_compress` + +Compress and/or rescale a video to reduce file size + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The video file to compress | +| `scale` | string | No | Optional output dimensions, e.g. 1280x720, 1280:720, or 1280:-2 \(keep aspect ratio\) | +| `crf` | number | No | Constant Rate Factor \(0 = lossless, 23 = default, 51 = worst quality\) | +| `videoBitrate` | string | No | Optional target video bitrate, e.g. 1M or 800k | +| `videoCodec` | string | No | Optional video codec override \(defaults to libx264\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_probe` + +Inspect a media file and return metadata (duration, format, codecs, resolution) + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The media file to inspect | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_thumbnail` + +Extract a single frame from a video at a given timestamp as an image + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The video file to extract a frame from | +| `time` | string | No | Timestamp in seconds or HH:MM:SS\(.ms\), e.g. 5 or 00:00:05. Defaults to 1s | +| `format` | string | No | Output image format \(jpg, png, webp\). Defaults to jpg | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_concat` + +Join multiple media files of the same format and codec into a single output file + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `files` | file[] | Yes | Two or more media files to join, in order. Files must share the same codec | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_volume` + +Adjust the audio volume of a video or audio file + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The media file to adjust | +| `volume` | string | Yes | Volume as a multiplier \(e.g. 1.5, 0.5\) or decibels \(e.g. 10dB, -6dB\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + +### `ffmpeg_speed` + +Speed up or slow down playback of a video or audio file + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `file` | file | Yes | The media file to retime | +| `speed` | number | Yes | Playback speed multiplier \(0.5 = half speed, 2 = double speed\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Processed media file | +| `fileName` | string | Generated output file name | +| `format` | string | Output or detected container format | +| `size` | number | Output file size in bytes | +| `durationSeconds` | number | Media duration in seconds | +| `bitrate` | number | Overall bitrate in bits per second | +| `width` | number | Video width in pixels | +| `height` | number | Video height in pixels | +| `hasVideo` | boolean | Whether a video stream is present | +| `hasAudio` | boolean | Whether an audio stream is present | +| `videoCodec` | string | Primary video codec | +| `audioCodec` | string | Primary audio codec | +| `streams` | array | All detected media streams | + + diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json index dbc2ef5b7f7..9e44a4d0925 100644 --- a/apps/docs/content/docs/en/tools/meta.json +++ b/apps/docs/content/docs/en/tools/meta.json @@ -54,6 +54,7 @@ "exa", "extend", "fathom", + "ffmpeg", "file", "findymail", "firecrawl", diff --git a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts index 81c24aeadd8..69b3542b8e5 100644 --- a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts +++ b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts @@ -58,6 +58,7 @@ import { ExaAIIcon, ExtendIcon, FathomIcon, + FFmpegIcon, FindymailIcon, FirecrawlIcon, FirefliesIcon, @@ -267,6 +268,7 @@ export const blockTypeToIconMap: Record = { exa: ExaAIIcon, extend_v2: ExtendIcon, fathom: FathomIcon, + ffmpeg: FFmpegIcon, file_v4: DocumentIcon, findymail: FindymailIcon, firecrawl: FirecrawlIcon, diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index 8fc5c3856ff..424e12ba24b 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -4156,6 +4156,61 @@ "integrationTypes": ["analytics", "communication", "documents", "productivity"], "tags": ["meeting", "note-taking"] }, + { + "type": "ffmpeg", + "slug": "ffmpeg", + "name": "FFmpeg", + "description": "Process video and audio files with FFmpeg", + "longDescription": "Transcode, trim, compress, concatenate, and inspect video and audio files server-side with FFmpeg. Convert formats, extract audio, capture thumbnails, adjust volume, and change playback speed — no external service required.", + "bgColor": "#FFFFFF", + "iconName": "FFmpegIcon", + "docsUrl": "https://docs.sim.ai/tools/ffmpeg", + "operations": [ + { + "name": "Convert Format", + "description": "Convert (transcode) a video or audio file to a different container/format" + }, + { + "name": "Extract Audio", + "description": "Extract the audio track from a video file into an audio file" + }, + { + "name": "Trim / Cut", + "description": "Cut a segment from a video or audio file using a start time and/or duration" + }, + { + "name": "Compress / Scale", + "description": "Compress and/or rescale a video to reduce file size" + }, + { + "name": "Get Media Info", + "description": "Inspect a media file and return metadata (duration, format, codecs, resolution)" + }, + { + "name": "Extract Thumbnail", + "description": "Extract a single frame from a video at a given timestamp as an image" + }, + { + "name": "Concatenate", + "description": "Join multiple media files of the same format and codec into a single output file" + }, + { + "name": "Adjust Volume", + "description": "Adjust the audio volume of a video or audio file" + }, + { + "name": "Change Speed", + "description": "Speed up or slow down playback of a video or audio file" + } + ], + "operationCount": 9, + "triggers": [], + "triggerCount": 0, + "authType": "none", + "category": "tools", + "integrationTypes": ["developer-tools", "documents"], + "tags": ["media-processing", "document-processing"] + }, { "type": "file_v4", "slug": "file", diff --git a/apps/sim/app/api/tools/ffmpeg/process/route.ts b/apps/sim/app/api/tools/ffmpeg/process/route.ts new file mode 100644 index 00000000000..8d46b5c73b7 --- /dev/null +++ b/apps/sim/app/api/tools/ffmpeg/process/route.ts @@ -0,0 +1,552 @@ +import { execSync } from 'node:child_process' +import fsSync from 'node:fs' +import fs from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' +import { createLogger } from '@sim/logger' +import { getErrorMessage } from '@sim/utils/errors' +import { generateId } from '@sim/utils/id' +import ffmpegStatic from 'ffmpeg-static' +import ffmpeg from 'fluent-ffmpeg' +import type { NextRequest } from 'next/server' +import { NextResponse } from 'next/server' +import { ffmpegToolContract } from '@/lib/api/contracts/tools/media/ffmpeg' +import { getValidationErrorMessage, parseRequest, validationErrorResponse } from '@/lib/api/server' +import { checkInternalAuth } from '@/lib/auth/hybrid' +import { getBaseUrl } from '@/lib/core/utils/urls' +import { withRouteHandler } from '@/lib/core/utils/with-route-handler' +import { StorageService } from '@/lib/uploads' +import type { ExecutionContext } from '@/lib/uploads/contexts/execution' +import { processFilesToUserFiles, type RawFileInput } from '@/lib/uploads/utils/file-utils' +import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server' +import type { UserFile } from '@/executor/types' + +const logger = createLogger('FfmpegProcessAPI') + +export const dynamic = 'force-dynamic' +export const maxDuration = 300 + +const MAX_FFMPEG_INPUT_BYTES = 200 * 1024 * 1024 +const MAX_FFMPEG_OUTPUT_BYTES = 200 * 1024 * 1024 + +let ffmpegInitialized = false +let ffmpegAvailable = false + +const FFMPEG_NOT_FOUND_ERROR = + 'FFmpeg not found. Install it on the server: apk add ffmpeg (Alpine) / apt-get install ffmpeg (Ubuntu) / brew install ffmpeg (macOS)' + +/** + * Lazily resolves the ffmpeg and ffprobe binaries. ffmpeg-static bundles ffmpeg + * but not ffprobe, so ffprobe is resolved from the system PATH when present. + */ +function ensureFfmpeg(): void { + if (ffmpegInitialized) { + if (!ffmpegAvailable) throw new Error(FFMPEG_NOT_FOUND_ERROR) + return + } + ffmpegInitialized = true + + if (ffmpegStatic && typeof ffmpegStatic === 'string') { + try { + fsSync.accessSync(ffmpegStatic, fsSync.constants.X_OK) + ffmpeg.setFfmpegPath(ffmpegStatic) + ffmpegAvailable = true + logger.info('Using ffmpeg-static binary', { path: ffmpegStatic }) + } catch { + // Fall through to system ffmpeg + } + } + + if (!ffmpegAvailable) { + try { + const cmd = process.platform === 'win32' ? 'where ffmpeg' : 'which ffmpeg' + const resolved = execSync(cmd, { encoding: 'utf-8' }).trim().split('\n')[0] + if (resolved) { + ffmpeg.setFfmpegPath(resolved) + ffmpegAvailable = true + logger.info('Using system ffmpeg binary', { path: resolved }) + } + } catch { + // ffmpeg not on PATH + } + } + + try { + const cmd = process.platform === 'win32' ? 'where ffprobe' : 'which ffprobe' + const resolvedProbe = execSync(cmd, { encoding: 'utf-8' }).trim().split('\n')[0] + if (resolvedProbe) { + ffmpeg.setFfprobePath(resolvedProbe) + logger.info('Using system ffprobe binary', { path: resolvedProbe }) + } + } catch { + // ffprobe not on PATH — only the `probe` operation strictly requires it + } + + if (!ffmpegAvailable) { + logger.warn('No ffmpeg binary found at initialization time') + throw new Error(FFMPEG_NOT_FOUND_ERROR) + } +} + +const VIDEO_MIME: Record = { + mp4: 'video/mp4', + webm: 'video/webm', + mov: 'video/quicktime', + mkv: 'video/x-matroska', + avi: 'video/x-msvideo', +} + +const AUDIO_MIME: Record = { + mp3: 'audio/mpeg', + wav: 'audio/wav', + aac: 'audio/aac', + flac: 'audio/flac', + ogg: 'audio/ogg', + m4a: 'audio/mp4', + opus: 'audio/opus', +} + +const IMAGE_MIME: Record = { + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + webp: 'image/webp', + gif: 'image/gif', +} + +function getMimeForFormat(format: string): string { + const normalized = format.toLowerCase() + return ( + VIDEO_MIME[normalized] || + AUDIO_MIME[normalized] || + IMAGE_MIME[normalized] || + 'application/octet-stream' + ) +} + +const AUDIO_CODEC: Record = { + mp3: 'libmp3lame', + wav: 'pcm_s16le', + flac: 'flac', + m4a: 'aac', + aac: 'aac', + ogg: 'libvorbis', + opus: 'libopus', +} + +function getAudioCodec(format: string): string { + return AUDIO_CODEC[format.toLowerCase()] || 'libmp3lame' +} + +/** + * Derives a sensible file extension for an input temp file from its name or MIME type. + */ +function getInputExtension(file: UserFile): string { + const fromName = path + .extname(file.name || '') + .replace('.', '') + .toLowerCase() + if (fromName) return fromName + const subtype = (file.type || '').split('/')[1] + return subtype ? subtype.toLowerCase() : 'dat' +} + +function isVideoExtension(ext: string): boolean { + return ext.toLowerCase() in VIDEO_MIME +} + +/** + * Runs a configured fluent-ffmpeg command, resolving once the output file is written. + */ +function runFfmpeg( + configure: (command: ffmpeg.FfmpegCommand) => ffmpeg.FfmpegCommand +): Promise { + return new Promise((resolve, reject) => { + configure(ffmpeg()) + .on('error', (err: Error) => reject(new Error(`FFmpeg error: ${err.message}`))) + .on('end', () => resolve()) + .run() + }) +} + +interface ProbeResult { + durationSeconds: number | null + format: string | null + bitrate: number | null + width: number | null + height: number | null + hasVideo: boolean + hasAudio: boolean + videoCodec: string | null + audioCodec: string | null + streams: Array<{ + index: number + type: string | null + codec: string | null + width: number | null + height: number | null + }> +} + +function probeMedia(inputPath: string): Promise { + return new Promise((resolve, reject) => { + ffmpeg.ffprobe(inputPath, (err, metadata) => { + if (err) { + reject(new Error(`FFprobe error: ${err.message}`)) + return + } + const videoStream = metadata.streams.find((s) => s.codec_type === 'video') + const audioStream = metadata.streams.find((s) => s.codec_type === 'audio') + resolve({ + durationSeconds: metadata.format.duration ?? null, + format: metadata.format.format_name ?? null, + bitrate: metadata.format.bit_rate ? Number(metadata.format.bit_rate) : null, + width: videoStream?.width ?? null, + height: videoStream?.height ?? null, + hasVideo: Boolean(videoStream), + hasAudio: Boolean(audioStream), + videoCodec: videoStream?.codec_name ?? null, + audioCodec: audioStream?.codec_name ?? null, + streams: metadata.streams.map((s) => ({ + index: s.index, + type: s.codec_type ?? null, + codec: s.codec_name ?? null, + width: s.width ?? null, + height: s.height ?? null, + })), + }) + }) + }) +} + +/** + * atempo only supports factors between 0.5 and 2.0; chain filters to reach + * arbitrary speeds (e.g. 4x -> "atempo=2.0,atempo=2.0"). + */ +function buildAtempoChain(speed: number): string { + const factors: number[] = [] + let remaining = speed + while (remaining > 2.0) { + factors.push(2.0) + remaining /= 2.0 + } + while (remaining < 0.5) { + factors.push(0.5) + remaining /= 0.5 + } + factors.push(remaining) + return factors.map((f) => `atempo=${f.toFixed(6)}`).join(',') +} + +/** + * Normalizes a user-supplied scale value (`1280x720`, `1280:-2`, `1280:720`) + * into an ffmpeg scale filter expression (`width:height`). + */ +function normalizeScale(scale: string): string { + return scale.trim().replace(/x/gi, ':') +} + +async function storeOutputFile( + buffer: Buffer, + fileName: string, + mimeType: string, + executionContext: ExecutionContext | null, + userId?: string +): Promise { + if (executionContext) { + const { uploadExecutionFile } = await import('@/lib/uploads/contexts/execution') + return uploadExecutionFile(executionContext, buffer, fileName, mimeType, userId) + } + + const fileInfo = await StorageService.uploadFile({ + file: buffer, + fileName, + contentType: mimeType, + context: 'copilot', + }) + + return { + id: generateId(), + name: fileInfo.name, + url: `${getBaseUrl()}${fileInfo.path}`, + size: fileInfo.size, + type: mimeType, + key: fileInfo.key, + context: 'copilot', + } +} + +export const POST = withRouteHandler(async (request: NextRequest) => { + const requestId = generateId() + logger.info(`[${requestId}] FFmpeg process request started`) + + const authResult = await checkInternalAuth(request, { requireWorkflowId: false }) + if (!authResult.success) { + logger.error(`[${requestId}] Authentication failed`, { error: authResult.error }) + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } + + const parsed = await parseRequest( + ffmpegToolContract, + request, + {}, + { + validationErrorResponse: (error) => { + logger.warn(`[${requestId}] Invalid FFmpeg request`, { issues: error.issues }) + return validationErrorResponse( + error, + getValidationErrorMessage(error, 'Invalid request data') + ) + }, + } + ) + if (!parsed.success) return parsed.response + + const body = parsed.data.body + const { operation, workspaceId, workflowId, executionId } = body + const executionContext: ExecutionContext | null = + workspaceId && workflowId && executionId ? { workspaceId, workflowId, executionId } : null + + let tempDir: string | null = null + + try { + ensureFfmpeg() + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'ffmpeg-')) + + if (operation === 'probe') { + const userFile = processFilesToUserFiles([body.file as RawFileInput], requestId, logger)[0] + if (!userFile) { + return NextResponse.json({ error: 'A valid media file is required' }, { status: 400 }) + } + const inputBuffer = await downloadFileFromStorage(userFile, requestId, logger, { + maxBytes: MAX_FFMPEG_INPUT_BYTES, + }) + const inputPath = path.join(tempDir, `input.${getInputExtension(userFile)}`) + await fs.writeFile(inputPath, inputBuffer) + + const probe = await probeMedia(inputPath) + logger.info(`[${requestId}] Probe completed`, { format: probe.format }) + return NextResponse.json({ success: true, output: probe }) + } + + if (operation === 'concat') { + const userFiles = processFilesToUserFiles( + (body.files ?? []) as RawFileInput[], + requestId, + logger + ) + if (userFiles.length < 2) { + return NextResponse.json( + { error: 'concat requires at least 2 valid media files' }, + { status: 400 } + ) + } + + const inputPaths: string[] = [] + for (let i = 0; i < userFiles.length; i++) { + const file = userFiles[i] + const buffer = await downloadFileFromStorage(file, requestId, logger, { + maxBytes: MAX_FFMPEG_INPUT_BYTES, + }) + const inputPath = path.join(tempDir, `concat-${i}.${getInputExtension(file)}`) + await fs.writeFile(inputPath, buffer) + inputPaths.push(inputPath) + } + + const outExt = getInputExtension(userFiles[0]) + const outputPath = path.join(tempDir, `output.${outExt}`) + const listPath = path.join(tempDir, 'concat-list.txt') + const listContent = inputPaths.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join('\n') + await fs.writeFile(listPath, listContent) + + await runFfmpeg((cmd) => + cmd + .input(listPath) + .inputOptions(['-f', 'concat', '-safe', '0']) + .outputOptions(['-c', 'copy']) + .output(outputPath) + ) + + const outputBuffer = await fs.readFile(outputPath) + const mimeType = getMimeForFormat(outExt) + const fileName = `ffmpeg-concat-${Date.now()}.${outExt}` + const file = await storeOutputFile( + outputBuffer, + fileName, + mimeType, + executionContext, + authResult.userId + ) + + return NextResponse.json({ + success: true, + output: { file, fileName, format: outExt, size: outputBuffer.length }, + }) + } + + // Single-input operations + const userFile = processFilesToUserFiles([body.file as RawFileInput], requestId, logger)[0] + if (!userFile) { + return NextResponse.json({ error: 'A valid media file is required' }, { status: 400 }) + } + const inputExt = getInputExtension(userFile) + const inputBuffer = await downloadFileFromStorage(userFile, requestId, logger, { + maxBytes: MAX_FFMPEG_INPUT_BYTES, + }) + const inputPath = path.join(tempDir, `input.${inputExt}`) + await fs.writeFile(inputPath, inputBuffer) + + let outExt = inputExt + let mimeType = getMimeForFormat(inputExt) + + if (operation === 'convert') { + if (!body.format) { + return NextResponse.json( + { error: 'format is required for the convert operation' }, + { status: 400 } + ) + } + outExt = body.format.trim().toLowerCase() + mimeType = getMimeForFormat(outExt) + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => { + let c = cmd.input(inputPath).toFormat(outExt) + if (body.videoCodec) c = c.videoCodec(body.videoCodec) + if (body.audioCodec) c = c.audioCodec(body.audioCodec) + return c.output(outputPath) + }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'extract_audio') { + outExt = (body.format?.trim() || 'mp3').toLowerCase() + mimeType = getMimeForFormat(outExt) + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => + cmd + .input(inputPath) + .noVideo() + .audioCodec(body.audioCodec || getAudioCodec(outExt)) + .toFormat(outExt) + .output(outputPath) + ) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'trim') { + if (!body.startTime && !body.duration) { + return NextResponse.json( + { error: 'trim requires startTime and/or duration' }, + { status: 400 } + ) + } + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => { + let c = cmd.input(inputPath) + if (body.startTime) c = c.setStartTime(body.startTime) + if (body.duration) c = c.setDuration(body.duration) + return c.toFormat(outExt).output(outputPath) + }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'compress') { + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => { + let c = cmd.input(inputPath).videoCodec(body.videoCodec || 'libx264') + if (body.crf !== undefined) c = c.outputOptions(['-crf', String(body.crf)]) + if (body.videoBitrate) c = c.videoBitrate(body.videoBitrate) + if (body.scale) c = c.videoFilters(`scale=${normalizeScale(body.scale)}`) + return c.toFormat(outExt).output(outputPath) + }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'thumbnail') { + outExt = (body.format?.trim() || 'jpg').toLowerCase() + mimeType = getMimeForFormat(outExt) + const time = body.time || '00:00:01' + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => + cmd.input(inputPath).seekInput(time).outputOptions(['-frames:v', '1']).output(outputPath) + ) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'volume') { + if (!body.volume) { + return NextResponse.json( + { error: 'volume is required for the volume operation' }, + { status: 400 } + ) + } + const outputPath = path.join(tempDir, `output.${outExt}`) + const isVideo = isVideoExtension(inputExt) + await runFfmpeg((cmd) => { + let c = cmd.input(inputPath).audioFilters(`volume=${body.volume}`) + if (isVideo) c = c.outputOptions(['-c:v', 'copy']) + return c.toFormat(outExt).output(outputPath) + }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + if (operation === 'speed') { + if (body.speed === undefined) { + return NextResponse.json( + { error: 'speed is required for the speed operation' }, + { status: 400 } + ) + } + const speed = body.speed + const probe = await probeMedia(inputPath) + const outputPath = path.join(tempDir, `output.${outExt}`) + await runFfmpeg((cmd) => { + let c = cmd.input(inputPath) + if (probe.hasVideo) { + c = c.outputOptions(['-filter:v', `setpts=${(1 / speed).toFixed(6)}*PTS`]) + } + if (probe.hasAudio) { + c = c.outputOptions(['-filter:a', buildAtempoChain(speed)]) + } + return c.toFormat(outExt).output(outputPath) + }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) + } + + return NextResponse.json({ error: `Unsupported operation: ${operation}` }, { status: 400 }) + } catch (error) { + const message = getErrorMessage(error, 'FFmpeg processing failed') + logger.error(`[${requestId}] FFmpeg processing failed`, { operation, error: message }) + return NextResponse.json({ error: message }, { status: 500 }) + } finally { + if (tempDir) { + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {}) + } + } + + async function finalize( + outputPath: string, + format: string, + mimeType: string, + context: ExecutionContext | null, + userId?: string + ): Promise { + const outputBuffer = await fs.readFile(outputPath) + if (outputBuffer.length === 0) { + throw new Error('FFmpeg produced an empty output file') + } + if (outputBuffer.length > MAX_FFMPEG_OUTPUT_BYTES) { + throw new Error('Output file exceeds the maximum allowed size') + } + const fileName = `ffmpeg-${operation}-${Date.now()}.${format}` + const file = await storeOutputFile(outputBuffer, fileName, mimeType, context, userId) + logger.info(`[${requestId}] FFmpeg ${operation} completed`, { + fileName, + size: outputBuffer.length, + }) + return NextResponse.json({ + success: true, + output: { file, fileName, format, size: outputBuffer.length }, + }) + } +}) diff --git a/apps/sim/blocks/blocks/ffmpeg.ts b/apps/sim/blocks/blocks/ffmpeg.ts new file mode 100644 index 00000000000..95e5af87bce --- /dev/null +++ b/apps/sim/blocks/blocks/ffmpeg.ts @@ -0,0 +1,327 @@ +import { FFmpegIcon } from '@/components/icons' +import { type BlockConfig, IntegrationType } from '@/blocks/types' +import { normalizeFileInput, parseOptionalNumberInput } from '@/blocks/utils' +import type { FfmpegFileResponse } from '@/tools/ffmpeg/types' + +const SINGLE_FILE_OPS = [ + 'convert', + 'extract_audio', + 'trim', + 'compress', + 'probe', + 'thumbnail', + 'volume', + 'speed', +] +const ACCEPTED_MEDIA_TYPES = '.mp4,.mov,.avi,.mkv,.webm,.mp3,.m4a,.wav,.ogg,.flac,.aac,.opus' + +export const FfmpegBlock: BlockConfig = { + type: 'ffmpeg', + name: 'FFmpeg', + description: 'Process video and audio files with FFmpeg', + longDescription: + 'Transcode, trim, compress, concatenate, and inspect video and audio files server-side with FFmpeg. Convert formats, extract audio, capture thumbnails, adjust volume, and change playback speed — no external service required.', + docsLink: 'https://docs.sim.ai/tools/ffmpeg', + category: 'tools', + integrationType: IntegrationType.DeveloperTools, + tags: ['media-processing', 'document-processing'], + bgColor: '#FFFFFF', + icon: FFmpegIcon, + + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + options: [ + { label: 'Convert Format', id: 'convert' }, + { label: 'Extract Audio', id: 'extract_audio' }, + { label: 'Trim / Cut', id: 'trim' }, + { label: 'Compress / Scale', id: 'compress' }, + { label: 'Get Media Info', id: 'probe' }, + { label: 'Extract Thumbnail', id: 'thumbnail' }, + { label: 'Concatenate', id: 'concat' }, + { label: 'Adjust Volume', id: 'volume' }, + { label: 'Change Speed', id: 'speed' }, + ], + value: () => 'convert', + required: true, + }, + + // Single-file input (basic) + { + id: 'inputFile', + title: 'Media File', + type: 'file-upload', + canonicalParamId: 'file', + placeholder: 'Upload a video or audio file', + mode: 'basic', + multiple: false, + acceptedTypes: ACCEPTED_MEDIA_TYPES, + condition: { field: 'operation', value: 'concat', not: true }, + required: { field: 'operation', value: 'concat', not: true }, + }, + // Single-file input (advanced) + { + id: 'inputFileRef', + title: 'Media File', + type: 'short-input', + canonicalParamId: 'file', + placeholder: 'Reference a media file from a previous block', + mode: 'advanced', + condition: { field: 'operation', value: 'concat', not: true }, + required: { field: 'operation', value: 'concat', not: true }, + }, + + // Multi-file input for concat (basic) + { + id: 'inputFiles', + title: 'Media Files', + type: 'file-upload', + canonicalParamId: 'files', + placeholder: 'Upload two or more files to join', + mode: 'basic', + multiple: true, + acceptedTypes: ACCEPTED_MEDIA_TYPES, + condition: { field: 'operation', value: 'concat' }, + required: { field: 'operation', value: 'concat' }, + }, + // Multi-file input for concat (advanced) + { + id: 'inputFilesRef', + title: 'Media Files', + type: 'short-input', + canonicalParamId: 'files', + placeholder: 'Reference media files from a previous block', + mode: 'advanced', + condition: { field: 'operation', value: 'concat' }, + required: { field: 'operation', value: 'concat' }, + }, + + // Output format + { + id: 'format', + title: 'Output Format', + type: 'short-input', + placeholder: 'convert: mp4, webm, mp3 · audio: mp3, wav · thumbnail: jpg, png', + condition: { field: 'operation', value: ['convert', 'extract_audio', 'thumbnail'] }, + required: { field: 'operation', value: 'convert' }, + }, + + // Trim fields + { + id: 'startTime', + title: 'Start Time', + type: 'short-input', + placeholder: 'e.g. 5 or 00:00:05', + condition: { field: 'operation', value: 'trim' }, + }, + { + id: 'duration', + title: 'Duration', + type: 'short-input', + placeholder: 'e.g. 30 or 00:00:30', + condition: { field: 'operation', value: 'trim' }, + }, + + // Compress fields + { + id: 'scale', + title: 'Scale', + type: 'short-input', + placeholder: 'e.g. 1280x720 or 1280:-2 (keep aspect ratio)', + condition: { field: 'operation', value: 'compress' }, + }, + { + id: 'crf', + title: 'Quality (CRF)', + type: 'slider', + min: 0, + max: 51, + step: 1, + integer: true, + defaultValue: 23, + condition: { field: 'operation', value: 'compress' }, + }, + { + id: 'videoBitrate', + title: 'Video Bitrate', + type: 'short-input', + placeholder: 'e.g. 1M or 800k', + mode: 'advanced', + condition: { field: 'operation', value: 'compress' }, + }, + + // Thumbnail timestamp + { + id: 'time', + title: 'Timestamp', + type: 'short-input', + placeholder: 'e.g. 5 or 00:00:05', + condition: { field: 'operation', value: 'thumbnail' }, + }, + + // Volume + { + id: 'volume', + title: 'Volume', + type: 'short-input', + placeholder: 'Multiplier (1.5, 0.5) or decibels (10dB, -6dB)', + condition: { field: 'operation', value: 'volume' }, + required: { field: 'operation', value: 'volume' }, + }, + + // Speed + { + id: 'speed', + title: 'Speed', + type: 'short-input', + placeholder: 'Multiplier, e.g. 2 (faster) or 0.5 (slower)', + condition: { field: 'operation', value: 'speed' }, + required: { field: 'operation', value: 'speed' }, + }, + + // Codec overrides (advanced) + { + id: 'videoCodec', + title: 'Video Codec', + type: 'short-input', + placeholder: 'e.g. libx264, vp9', + mode: 'advanced', + condition: { field: 'operation', value: ['convert', 'compress'] }, + }, + { + id: 'audioCodec', + title: 'Audio Codec', + type: 'short-input', + placeholder: 'e.g. aac, libmp3lame', + mode: 'advanced', + condition: { field: 'operation', value: ['convert', 'extract_audio'] }, + }, + ], + + tools: { + access: [ + 'ffmpeg_convert', + 'ffmpeg_extract_audio', + 'ffmpeg_trim', + 'ffmpeg_compress', + 'ffmpeg_probe', + 'ffmpeg_thumbnail', + 'ffmpeg_concat', + 'ffmpeg_volume', + 'ffmpeg_speed', + ], + config: { + tool: (params) => `ffmpeg_${params.operation}`, + params: (params) => { + const file = + params.operation && SINGLE_FILE_OPS.includes(params.operation) + ? normalizeFileInput(params.file, { single: true }) + : undefined + const files = params.operation === 'concat' ? normalizeFileInput(params.files) : undefined + + return { + file, + files, + format: params.format, + videoCodec: params.videoCodec, + audioCodec: params.audioCodec, + startTime: params.startTime, + duration: params.duration, + scale: params.scale, + crf: parseOptionalNumberInput(params.crf, 'Quality (CRF)', { + integer: true, + min: 0, + max: 51, + }), + videoBitrate: params.videoBitrate, + time: params.time, + volume: params.volume, + speed: parseOptionalNumberInput(params.speed, 'Speed', { min: 0 }), + } + }, + }, + }, + + inputs: { + operation: { type: 'string', description: 'FFmpeg operation to perform' }, + file: { type: 'json', description: 'Input media file (UserFile)' }, + files: { type: 'json', description: 'Input media files for concatenation (UserFile[])' }, + format: { type: 'string', description: 'Output format/container' }, + videoCodec: { type: 'string', description: 'Video codec override' }, + audioCodec: { type: 'string', description: 'Audio codec override' }, + startTime: { type: 'string', description: 'Trim start offset' }, + duration: { type: 'string', description: 'Trim duration' }, + scale: { type: 'string', description: 'Output scale dimensions' }, + crf: { type: 'number', description: 'Constant Rate Factor (compress quality)' }, + videoBitrate: { type: 'string', description: 'Target video bitrate' }, + time: { type: 'string', description: 'Thumbnail timestamp' }, + volume: { type: 'string', description: 'Volume adjustment' }, + speed: { type: 'number', description: 'Playback speed multiplier' }, + }, + + outputs: { + file: { + type: 'file', + description: 'Processed media file', + condition: { field: 'operation', value: 'probe', not: true }, + }, + fileName: { + type: 'string', + description: 'Generated output file name', + condition: { field: 'operation', value: 'probe', not: true }, + }, + format: { type: 'string', description: 'Output or detected container format' }, + size: { + type: 'number', + description: 'Output file size in bytes', + condition: { field: 'operation', value: 'probe', not: true }, + }, + durationSeconds: { + type: 'number', + description: 'Media duration in seconds', + condition: { field: 'operation', value: 'probe' }, + }, + bitrate: { + type: 'number', + description: 'Overall bitrate in bits per second', + condition: { field: 'operation', value: 'probe' }, + }, + width: { + type: 'number', + description: 'Video width in pixels', + condition: { field: 'operation', value: 'probe' }, + }, + height: { + type: 'number', + description: 'Video height in pixels', + condition: { field: 'operation', value: 'probe' }, + }, + hasVideo: { + type: 'boolean', + description: 'Whether a video stream is present', + condition: { field: 'operation', value: 'probe' }, + }, + hasAudio: { + type: 'boolean', + description: 'Whether an audio stream is present', + condition: { field: 'operation', value: 'probe' }, + }, + videoCodec: { + type: 'string', + description: 'Primary video codec', + condition: { field: 'operation', value: 'probe' }, + }, + audioCodec: { + type: 'string', + description: 'Primary audio codec', + condition: { field: 'operation', value: 'probe' }, + }, + streams: { + type: 'array', + description: 'All detected media streams', + condition: { field: 'operation', value: 'probe' }, + }, + }, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 14f3bda53fd..6632d6fd971 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -57,6 +57,7 @@ import { EvernoteBlock } from '@/blocks/blocks/evernote' import { ExaBlock } from '@/blocks/blocks/exa' import { ExtendBlock, ExtendV2Block } from '@/blocks/blocks/extend' import { FathomBlock } from '@/blocks/blocks/fathom' +import { FfmpegBlock } from '@/blocks/blocks/ffmpeg' import { FileBlock, FileV2Block, FileV3Block, FileV4Block } from '@/blocks/blocks/file' import { FindymailBlock } from '@/blocks/blocks/findymail' import { FirecrawlBlock } from '@/blocks/blocks/firecrawl' @@ -305,6 +306,7 @@ export const registry: Record = { elasticsearch: ElasticsearchBlock, elevenlabs: ElevenLabsBlock, fathom: FathomBlock, + ffmpeg: FfmpegBlock, enrich: EnrichBlock, enrichment: EnrichmentBlock, evaluator: EvaluatorBlock, diff --git a/apps/sim/blocks/types.ts b/apps/sim/blocks/types.ts index 010be57690a..df6a627e806 100644 --- a/apps/sim/blocks/types.ts +++ b/apps/sim/blocks/types.ts @@ -55,6 +55,7 @@ export type IntegrationTag = | 'speech-to-text' | 'image-generation' | 'video-generation' + | 'media-processing' | 'cloud' | 'google-workspace' | 'microsoft-365' diff --git a/apps/sim/blocks/utils.ts b/apps/sim/blocks/utils.ts index 4a17b845263..bc5f7ff1acc 100644 --- a/apps/sim/blocks/utils.ts +++ b/apps/sim/blocks/utils.ts @@ -585,6 +585,7 @@ export function normalizeFileInput( */ export const BUILT_IN_TOOL_TYPES = new Set([ 'api', + 'ffmpeg', 'file', 'function', 'knowledge', diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index 7985328c089..ba42ffd9535 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -2432,6 +2432,16 @@ export function FathomIcon(props: SVGProps) { ) } +export function FFmpegIcon(props: SVGProps) { + return ( + + + + ) +} export function LinkupIcon(props: SVGProps) { return ( diff --git a/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts b/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts new file mode 100644 index 00000000000..ea5779549b8 --- /dev/null +++ b/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts @@ -0,0 +1,86 @@ +import { z } from 'zod' +import { userFileSchema } from '@/lib/api/contracts/primitives' +import { toolJsonResponseSchema } from '@/lib/api/contracts/tools/media/shared' +import { defineRouteContract } from '@/lib/api/contracts/types' + +/** + * Supported FFmpeg operations. Each maps to a tool id of the form `ffmpeg_`. + */ +export const ffmpegOperations = [ + 'convert', + 'extract_audio', + 'trim', + 'compress', + 'probe', + 'thumbnail', + 'concat', + 'volume', + 'speed', +] as const + +export type FfmpegOperation = (typeof ffmpegOperations)[number] + +const MISSING_FILE_ERROR = 'A media file is required' + +export const ffmpegFileSchema = userFileSchema.extend({ + type: z.string().optional().default(''), +}) + +export const ffmpegToolBodySchema = z + .object({ + operation: z.enum(ffmpegOperations, { + error: `operation must be one of: ${ffmpegOperations.join(', ')}`, + }), + /** Single input file (all operations except `concat`). */ + file: ffmpegFileSchema.optional(), + /** Multiple input files (`concat` operation). */ + files: z.array(ffmpegFileSchema).min(2, 'concat requires at least 2 files').optional(), + /** Output container/format, e.g. `mp4`, `webm`, `mp3`, `wav`. */ + format: z.string().min(1).max(16).optional(), + /** Explicit video codec override, e.g. `libx264`, `vp9`. */ + videoCodec: z.string().min(1).max(32).optional(), + /** Explicit audio codec override, e.g. `aac`, `libmp3lame`. */ + audioCodec: z.string().min(1).max(32).optional(), + /** Trim start offset in seconds or `HH:MM:SS(.ms)`. */ + startTime: z.string().min(1).max(32).optional(), + /** Trim duration in seconds or `HH:MM:SS(.ms)`. */ + duration: z.string().min(1).max(32).optional(), + /** Scale dimensions for compress, e.g. `1280:720`, `1280x720`, `1280:-2`, `50%`. */ + scale: z.string().min(1).max(32).optional(), + /** Constant Rate Factor for compress (0 = lossless, 51 = worst). */ + crf: z.coerce.number().int().min(0).max(51).optional(), + /** Target video bitrate for compress, e.g. `1M`, `800k`. */ + videoBitrate: z.string().min(1).max(16).optional(), + /** Timestamp for thumbnail extraction in seconds or `HH:MM:SS(.ms)`. */ + time: z.string().min(1).max(32).optional(), + /** Volume adjustment: a multiplier (`1.5`, `0.5`) or decibel value (`10dB`, `-6dB`). */ + volume: z.string().min(1).max(16).optional(), + /** Playback speed multiplier for the `speed` operation (0.5 = half, 2 = double). */ + speed: z.coerce.number().positive().max(100).optional(), + workspaceId: z.string().optional(), + workflowId: z.string().optional(), + executionId: z.string().optional(), + }) + .passthrough() + .superRefine((data, ctx) => { + if (data.operation === 'concat') { + if (!data.files || data.files.length < 2) { + ctx.addIssue({ + code: 'custom', + path: ['files'], + message: 'concat requires at least 2 input files', + }) + } + } else if (!data.file) { + ctx.addIssue({ code: 'custom', path: ['file'], message: MISSING_FILE_ERROR }) + } + }) + +export type FfmpegToolBody = z.input + +export const ffmpegToolContract = defineRouteContract({ + method: 'POST', + path: '/api/tools/ffmpeg/process', + body: ffmpegToolBodySchema, + response: { mode: 'json', schema: toolJsonResponseSchema }, +}) diff --git a/apps/sim/lib/api/contracts/tools/media/index.ts b/apps/sim/lib/api/contracts/tools/media/index.ts index 922d0e5651e..0913527bd7a 100644 --- a/apps/sim/lib/api/contracts/tools/media/index.ts +++ b/apps/sim/lib/api/contracts/tools/media/index.ts @@ -1,4 +1,5 @@ export * from '@/lib/api/contracts/tools/media/document-parse' +export * from '@/lib/api/contracts/tools/media/ffmpeg' export * from '@/lib/api/contracts/tools/media/image' export * from '@/lib/api/contracts/tools/media/shared' export * from '@/lib/api/contracts/tools/media/stt' diff --git a/apps/sim/tools/ffmpeg/compress.ts b/apps/sim/tools/ffmpeg/compress.ts new file mode 100644 index 00000000000..9d9fc5ab793 --- /dev/null +++ b/apps/sim/tools/ffmpeg/compress.ts @@ -0,0 +1,68 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegCompressParams, FfmpegFileResponse } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegCompressTool: ToolConfig = { + id: 'ffmpeg_compress', + name: 'FFmpeg Compress', + description: 'Compress and/or rescale a video to reduce file size', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The video file to compress', + }, + scale: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'Optional output dimensions, e.g. 1280x720, 1280:720, or 1280:-2 (keep aspect ratio)', + }, + crf: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Constant Rate Factor (0 = lossless, 23 = default, 51 = worst quality)', + }, + videoBitrate: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional target video bitrate, e.g. 1M or 800k', + }, + videoCodec: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional video codec override (defaults to libx264)', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'compress', + file: params.file, + scale: params.scale, + crf: params.crf, + videoBitrate: params.videoBitrate, + videoCodec: params.videoCodec, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/concat.ts b/apps/sim/tools/ffmpeg/concat.ts new file mode 100644 index 00000000000..bc4727e63cc --- /dev/null +++ b/apps/sim/tools/ffmpeg/concat.ts @@ -0,0 +1,39 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegConcatParams, FfmpegFileResponse } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegConcatTool: ToolConfig = { + id: 'ffmpeg_concat', + name: 'FFmpeg Concatenate', + description: 'Join multiple media files of the same format and codec into a single output file', + version: '1.0.0', + + params: { + files: { + type: 'file[]', + required: true, + visibility: 'user-only', + description: 'Two or more media files to join, in order. Files must share the same codec', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'concat', + files: params.files, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/convert.ts b/apps/sim/tools/ffmpeg/convert.ts new file mode 100644 index 00000000000..5d0fcd8bb33 --- /dev/null +++ b/apps/sim/tools/ffmpeg/convert.ts @@ -0,0 +1,60 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegConvertParams, FfmpegFileResponse } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegConvertTool: ToolConfig = { + id: 'ffmpeg_convert', + name: 'FFmpeg Convert', + description: 'Convert (transcode) a video or audio file to a different container/format', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The media file to convert', + }, + format: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Target output format/container (e.g. mp4, webm, mov, mkv, mp3, wav)', + }, + videoCodec: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional video codec override (e.g. libx264, vp9)', + }, + audioCodec: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional audio codec override (e.g. aac, libmp3lame)', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'convert', + file: params.file, + format: params.format, + videoCodec: params.videoCodec, + audioCodec: params.audioCodec, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/extract_audio.ts b/apps/sim/tools/ffmpeg/extract_audio.ts new file mode 100644 index 00000000000..d9064393d5f --- /dev/null +++ b/apps/sim/tools/ffmpeg/extract_audio.ts @@ -0,0 +1,53 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegExtractAudioParams, FfmpegFileResponse } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegExtractAudioTool: ToolConfig = { + id: 'ffmpeg_extract_audio', + name: 'FFmpeg Extract Audio', + description: 'Extract the audio track from a video file into an audio file', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The video file to extract audio from', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output audio format (mp3, wav, aac, flac, ogg, m4a, opus). Defaults to mp3', + }, + audioCodec: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional audio codec override (e.g. aac, libmp3lame)', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'extract_audio', + file: params.file, + format: params.format, + audioCodec: params.audioCodec, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/index.ts b/apps/sim/tools/ffmpeg/index.ts new file mode 100644 index 00000000000..6f55c55484c --- /dev/null +++ b/apps/sim/tools/ffmpeg/index.ts @@ -0,0 +1,9 @@ +export { ffmpegCompressTool } from '@/tools/ffmpeg/compress' +export { ffmpegConcatTool } from '@/tools/ffmpeg/concat' +export { ffmpegConvertTool } from '@/tools/ffmpeg/convert' +export { ffmpegExtractAudioTool } from '@/tools/ffmpeg/extract_audio' +export { ffmpegProbeTool } from '@/tools/ffmpeg/probe' +export { ffmpegSpeedTool } from '@/tools/ffmpeg/speed' +export { ffmpegThumbnailTool } from '@/tools/ffmpeg/thumbnail' +export { ffmpegTrimTool } from '@/tools/ffmpeg/trim' +export { ffmpegVolumeTool } from '@/tools/ffmpeg/volume' diff --git a/apps/sim/tools/ffmpeg/probe.ts b/apps/sim/tools/ffmpeg/probe.ts new file mode 100644 index 00000000000..37f266bb86e --- /dev/null +++ b/apps/sim/tools/ffmpeg/probe.ts @@ -0,0 +1,39 @@ +import { + FFMPEG_PROBE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegProbeResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegProbeParams, FfmpegProbeResponse } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegProbeTool: ToolConfig = { + id: 'ffmpeg_probe', + name: 'FFmpeg Probe', + description: 'Inspect a media file and return metadata (duration, format, codecs, resolution)', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The media file to inspect', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'probe', + file: params.file, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegProbeResponse, + + outputs: FFMPEG_PROBE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/shared.ts b/apps/sim/tools/ffmpeg/shared.ts new file mode 100644 index 00000000000..088ab379c01 --- /dev/null +++ b/apps/sim/tools/ffmpeg/shared.ts @@ -0,0 +1,125 @@ +import type { FfmpegContext, FfmpegFileResponse, FfmpegProbeResponse } from '@/tools/ffmpeg/types' +import type { OutputProperty } from '@/tools/types' + +export const FFMPEG_PROCESS_URL = '/api/tools/ffmpeg/process' + +/** + * Extracts the execution context fields injected into tool params at runtime. + */ +export function ffmpegContextBody(params: FfmpegContext): { + workspaceId?: string + workflowId?: string + executionId?: string +} { + return { + workspaceId: params._context?.workspaceId, + workflowId: params._context?.workflowId, + executionId: params._context?.executionId, + } +} + +/** + * Shared transform for FFmpeg operations that produce a single output file. + */ +export async function transformFfmpegFileResponse(response: Response): Promise { + const data = await response.json() + + if (!response.ok || data.error || data.success === false) { + return { + success: false, + error: data.error || 'FFmpeg processing failed', + output: { file: undefined as never, fileName: '', format: '', size: 0 }, + } + } + + const output = data.output ?? {} + return { + success: true, + output: { + file: output.file, + fileName: output.fileName ?? '', + format: output.format ?? '', + size: output.size ?? 0, + }, + } +} + +/** + * Transform for the FFmpeg probe operation that returns media metadata. + */ +export async function transformFfmpegProbeResponse( + response: Response +): Promise { + const data = await response.json() + + const emptyOutput = { + durationSeconds: null, + format: null, + bitrate: null, + width: null, + height: null, + hasVideo: false, + hasAudio: false, + videoCodec: null, + audioCodec: null, + streams: [], + } + + if (!response.ok || data.error || data.success === false) { + return { + success: false, + error: data.error || 'FFmpeg probe failed', + output: emptyOutput, + } + } + + const output = data.output ?? {} + return { + success: true, + output: { + durationSeconds: output.durationSeconds ?? null, + format: output.format ?? null, + bitrate: output.bitrate ?? null, + width: output.width ?? null, + height: output.height ?? null, + hasVideo: Boolean(output.hasVideo), + hasAudio: Boolean(output.hasAudio), + videoCodec: output.videoCodec ?? null, + audioCodec: output.audioCodec ?? null, + streams: Array.isArray(output.streams) ? output.streams : [], + }, + } +} + +export const FFMPEG_FILE_OUTPUTS: Record = { + file: { type: 'file', description: 'The processed media file for use in downstream blocks' }, + fileName: { type: 'string', description: 'Generated output file name' }, + format: { type: 'string', description: 'Output container/format' }, + size: { type: 'number', description: 'Output file size in bytes' }, +} + +export const FFMPEG_PROBE_OUTPUTS: Record = { + durationSeconds: { type: 'number', description: 'Media duration in seconds', nullable: true }, + format: { type: 'string', description: 'Container format name', nullable: true }, + bitrate: { type: 'number', description: 'Overall bitrate in bits per second', nullable: true }, + width: { type: 'number', description: 'Video width in pixels', nullable: true }, + height: { type: 'number', description: 'Video height in pixels', nullable: true }, + hasVideo: { type: 'boolean', description: 'Whether the media contains a video stream' }, + hasAudio: { type: 'boolean', description: 'Whether the media contains an audio stream' }, + videoCodec: { type: 'string', description: 'Primary video codec', nullable: true }, + audioCodec: { type: 'string', description: 'Primary audio codec', nullable: true }, + streams: { + type: 'array', + description: 'All detected media streams', + items: { + type: 'object', + properties: { + index: { type: 'number', description: 'Stream index' }, + type: { type: 'string', description: 'Stream type (video, audio, subtitle)' }, + codec: { type: 'string', description: 'Stream codec name' }, + width: { type: 'number', description: 'Stream width in pixels' }, + height: { type: 'number', description: 'Stream height in pixels' }, + }, + }, + }, +} diff --git a/apps/sim/tools/ffmpeg/speed.ts b/apps/sim/tools/ffmpeg/speed.ts new file mode 100644 index 00000000000..9ba968e382d --- /dev/null +++ b/apps/sim/tools/ffmpeg/speed.ts @@ -0,0 +1,46 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegFileResponse, FfmpegSpeedParams } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegSpeedTool: ToolConfig = { + id: 'ffmpeg_speed', + name: 'FFmpeg Change Speed', + description: 'Speed up or slow down playback of a video or audio file', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The media file to retime', + }, + speed: { + type: 'number', + required: true, + visibility: 'user-or-llm', + description: 'Playback speed multiplier (0.5 = half speed, 2 = double speed)', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'speed', + file: params.file, + speed: params.speed, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/thumbnail.ts b/apps/sim/tools/ffmpeg/thumbnail.ts new file mode 100644 index 00000000000..56debdb448a --- /dev/null +++ b/apps/sim/tools/ffmpeg/thumbnail.ts @@ -0,0 +1,53 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegFileResponse, FfmpegThumbnailParams } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegThumbnailTool: ToolConfig = { + id: 'ffmpeg_thumbnail', + name: 'FFmpeg Thumbnail', + description: 'Extract a single frame from a video at a given timestamp as an image', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The video file to extract a frame from', + }, + time: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Timestamp in seconds or HH:MM:SS(.ms), e.g. 5 or 00:00:05. Defaults to 1s', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output image format (jpg, png, webp). Defaults to jpg', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'thumbnail', + file: params.file, + time: params.time, + format: params.format, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/trim.ts b/apps/sim/tools/ffmpeg/trim.ts new file mode 100644 index 00000000000..341d574b3eb --- /dev/null +++ b/apps/sim/tools/ffmpeg/trim.ts @@ -0,0 +1,53 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegFileResponse, FfmpegTrimParams } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegTrimTool: ToolConfig = { + id: 'ffmpeg_trim', + name: 'FFmpeg Trim', + description: 'Cut a segment from a video or audio file using a start time and/or duration', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The media file to trim', + }, + startTime: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Start offset in seconds or HH:MM:SS(.ms), e.g. 5 or 00:00:05', + }, + duration: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Duration to keep in seconds or HH:MM:SS(.ms), e.g. 30 or 00:00:30', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'trim', + file: params.file, + startTime: params.startTime, + duration: params.duration, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/ffmpeg/types.ts b/apps/sim/tools/ffmpeg/types.ts new file mode 100644 index 00000000000..a5e4685cf83 --- /dev/null +++ b/apps/sim/tools/ffmpeg/types.ts @@ -0,0 +1,104 @@ +import type { UserFile } from '@/executor/types' +import type { ToolResponse } from '@/tools/types' + +/** + * Execution context injected into tool params at runtime. + */ +export interface FfmpegContext { + _context?: { + workspaceId?: string + workflowId?: string + executionId?: string + } +} + +export interface FfmpegFileOutput { + /** The processed media file, stored and available to downstream blocks. */ + file: UserFile + /** Generated output file name. */ + fileName: string + /** Output container/format (e.g. `mp4`, `mp3`). */ + format: string + /** Output file size in bytes. */ + size: number +} + +export interface FfmpegFileResponse extends ToolResponse { + output: FfmpegFileOutput +} + +export interface FfmpegProbeStream { + index: number + type: string | null + codec: string | null + width: number | null + height: number | null +} + +export interface FfmpegProbeOutput { + durationSeconds: number | null + format: string | null + bitrate: number | null + width: number | null + height: number | null + hasVideo: boolean + hasAudio: boolean + videoCodec: string | null + audioCodec: string | null + streams: FfmpegProbeStream[] +} + +export interface FfmpegProbeResponse extends ToolResponse { + output: FfmpegProbeOutput +} + +export interface FfmpegConvertParams extends FfmpegContext { + file?: UserFile + format: string + videoCodec?: string + audioCodec?: string +} + +export interface FfmpegExtractAudioParams extends FfmpegContext { + file?: UserFile + format?: string + audioCodec?: string +} + +export interface FfmpegTrimParams extends FfmpegContext { + file?: UserFile + startTime?: string + duration?: string +} + +export interface FfmpegCompressParams extends FfmpegContext { + file?: UserFile + scale?: string + crf?: number + videoBitrate?: string + videoCodec?: string +} + +export interface FfmpegProbeParams extends FfmpegContext { + file?: UserFile +} + +export interface FfmpegThumbnailParams extends FfmpegContext { + file?: UserFile + time?: string + format?: string +} + +export interface FfmpegConcatParams extends FfmpegContext { + files?: UserFile[] +} + +export interface FfmpegVolumeParams extends FfmpegContext { + file?: UserFile + volume: string +} + +export interface FfmpegSpeedParams extends FfmpegContext { + file?: UserFile + speed: number +} diff --git a/apps/sim/tools/ffmpeg/volume.ts b/apps/sim/tools/ffmpeg/volume.ts new file mode 100644 index 00000000000..bbca6b71085 --- /dev/null +++ b/apps/sim/tools/ffmpeg/volume.ts @@ -0,0 +1,46 @@ +import { + FFMPEG_FILE_OUTPUTS, + FFMPEG_PROCESS_URL, + ffmpegContextBody, + transformFfmpegFileResponse, +} from '@/tools/ffmpeg/shared' +import type { FfmpegFileResponse, FfmpegVolumeParams } from '@/tools/ffmpeg/types' +import type { ToolConfig } from '@/tools/types' + +export const ffmpegVolumeTool: ToolConfig = { + id: 'ffmpeg_volume', + name: 'FFmpeg Adjust Volume', + description: 'Adjust the audio volume of a video or audio file', + version: '1.0.0', + + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'The media file to adjust', + }, + volume: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Volume as a multiplier (e.g. 1.5, 0.5) or decibels (e.g. 10dB, -6dB)', + }, + }, + + request: { + url: FFMPEG_PROCESS_URL, + method: 'POST', + headers: () => ({ 'Content-Type': 'application/json' }), + body: (params) => ({ + operation: 'volume', + file: params.file, + volume: params.volume, + ...ffmpegContextBody(params), + }), + }, + + transformResponse: transformFfmpegFileResponse, + + outputs: FFMPEG_FILE_OUTPUTS, +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index d6558b49423..3846f50dd04 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -662,6 +662,17 @@ import { fathomListTeamMembersTool, fathomListTeamsTool, } from '@/tools/fathom' +import { + ffmpegCompressTool, + ffmpegConcatTool, + ffmpegConvertTool, + ffmpegExtractAudioTool, + ffmpegProbeTool, + ffmpegSpeedTool, + ffmpegThumbnailTool, + ffmpegTrimTool, + ffmpegVolumeTool, +} from '@/tools/ffmpeg' import { fileAppendTool, fileFetchTool, @@ -5040,6 +5051,15 @@ export const tools: Record = { fathom_get_transcript: fathomGetTranscriptTool, fathom_list_team_members: fathomListTeamMembersTool, fathom_list_teams: fathomListTeamsTool, + ffmpeg_convert: ffmpegConvertTool, + ffmpeg_extract_audio: ffmpegExtractAudioTool, + ffmpeg_trim: ffmpegTrimTool, + ffmpeg_compress: ffmpegCompressTool, + ffmpeg_probe: ffmpegProbeTool, + ffmpeg_thumbnail: ffmpegThumbnailTool, + ffmpeg_concat: ffmpegConcatTool, + ffmpeg_volume: ffmpegVolumeTool, + ffmpeg_speed: ffmpegSpeedTool, findymail_find_email_from_linkedin: findymailFindEmailFromLinkedInTool, findymail_find_email_from_name: findymailFindEmailFromNameTool, findymail_find_emails_by_domain: findymailFindEmailsByDomainTool, diff --git a/scripts/check-api-validation-contracts.ts b/scripts/check-api-validation-contracts.ts index 4fb99356eed..5868ac3ec30 100644 --- a/scripts/check-api-validation-contracts.ts +++ b/scripts/check-api-validation-contracts.ts @@ -9,8 +9,8 @@ const QUERY_HOOKS_DIR = path.join(ROOT, 'apps/sim/hooks/queries') const SELECTOR_HOOKS_DIR = path.join(ROOT, 'apps/sim/hooks/selectors') const BASELINE = { - totalRoutes: 758, - zodRoutes: 758, + totalRoutes: 759, + zodRoutes: 759, nonZodRoutes: 0, } as const From 52323f640314253cd60c00b283075fbd4e239a55 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 29 May 2026 17:31:20 -0700 Subject: [PATCH 2/3] fix(ffmpeg): guard concat output size, validate scale/volume, clarify ffprobe errors - Route concat output through finalize() so it enforces the empty-file and 200 MB output-size checks like every other operation - Validate scale (width:height) and volume (multiplier/dB) against strict patterns before interpolating into the filter graph (prevents filter injection) - Default compress audio to -c:a copy so audio isn't silently re-encoded - Surface a clear 'ffprobe not found' message and document that Change Speed also requires ffprobe --- apps/docs/content/docs/en/tools/ffmpeg.mdx | 5 +- .../sim/app/api/tools/ffmpeg/process/route.ts | 58 +++++++++++++------ 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/apps/docs/content/docs/en/tools/ffmpeg.mdx b/apps/docs/content/docs/en/tools/ffmpeg.mdx index 4d419826237..ec1bd36a0ef 100644 --- a/apps/docs/content/docs/en/tools/ffmpeg.mdx +++ b/apps/docs/content/docs/en/tools/ffmpeg.mdx @@ -34,8 +34,9 @@ Pick an **Operation**, supply the input file, and fill in the fields shown for t **Notes & limits.** - **Concatenate** uses FFmpeg's concat demuxer with stream copy, so all inputs must share the same codec, resolution, and format (typical for clips exported from one source). Mixed-codec inputs will fail. - **Change Speed** automatically retimes whichever streams exist — video via `setpts`, audio via `atempo` (chained for speeds beyond 0.5×–2×). -- Input and output files are capped at 200 MB each. -- Requires the FFmpeg binary on the server (bundled via `ffmpeg-static`; `ffprobe` from the system PATH is needed for **Get Media Info**). +- Input and output files are capped at 200 MB each (this includes the joined output of **Concatenate**). +- **Scale** and **Volume** accept only `width:height` dimensions and numeric/decibel values respectively — other characters are rejected. +- Requires the FFmpeg binary on the server (bundled via `ffmpeg-static`). `ffprobe` from the system PATH is additionally needed for **Get Media Info** and **Change Speed** (which inspects streams before retiming). {/* MANUAL-CONTENT-END */} diff --git a/apps/sim/app/api/tools/ffmpeg/process/route.ts b/apps/sim/app/api/tools/ffmpeg/process/route.ts index 8d46b5c73b7..db48b1800b0 100644 --- a/apps/sim/app/api/tools/ffmpeg/process/route.ts +++ b/apps/sim/app/api/tools/ffmpeg/process/route.ts @@ -192,7 +192,10 @@ function probeMedia(inputPath: string): Promise { return new Promise((resolve, reject) => { ffmpeg.ffprobe(inputPath, (err, metadata) => { if (err) { - reject(new Error(`FFprobe error: ${err.message}`)) + const message = /cannot find ffprobe|ENOENT|not found/i.test(err.message) + ? 'ffprobe binary not found. Install it on the server (it ships with a full ffmpeg install: apk add ffmpeg / apt-get install ffmpeg / brew install ffmpeg).' + : `FFprobe error: ${err.message}` + reject(new Error(message)) return } const videoStream = metadata.streams.find((s) => s.codec_type === 'video') @@ -246,6 +249,18 @@ function normalizeScale(scale: string): string { return scale.trim().replace(/x/gi, ':') } +/** + * Strict `width:height` form (each a positive integer or a negative auto value + * like -1/-2). Rejects anything that could append extra filter stages. + */ +const SCALE_FILTER_PATTERN = /^-?\d{1,5}:-?\d{1,5}$/ + +/** + * A linear multiplier (`1.5`, `0.5`) or a decibel value (`10dB`, `-6dB`). + * Rejects commas, brackets, and any other filter-graph metacharacters. + */ +const VOLUME_FILTER_PATTERN = /^-?\d+(\.\d+)?(dB)?$/i + async function storeOutputFile( buffer: Buffer, fileName: string, @@ -367,21 +382,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { .output(outputPath) ) - const outputBuffer = await fs.readFile(outputPath) const mimeType = getMimeForFormat(outExt) - const fileName = `ffmpeg-concat-${Date.now()}.${outExt}` - const file = await storeOutputFile( - outputBuffer, - fileName, - mimeType, - executionContext, - authResult.userId - ) - - return NextResponse.json({ - success: true, - output: { file, fileName, format: outExt, size: outputBuffer.length }, - }) + return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) } // Single-input operations @@ -451,12 +453,25 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } if (operation === 'compress') { + let scaleFilter: string | undefined + if (body.scale) { + scaleFilter = normalizeScale(body.scale) + if (!SCALE_FILTER_PATTERN.test(scaleFilter)) { + return NextResponse.json( + { error: 'Invalid scale. Use width:height with integers, e.g. 1280:720 or 1280:-2' }, + { status: 400 } + ) + } + } const outputPath = path.join(tempDir, `output.${outExt}`) await runFfmpeg((cmd) => { - let c = cmd.input(inputPath).videoCodec(body.videoCodec || 'libx264') + let c = cmd + .input(inputPath) + .videoCodec(body.videoCodec || 'libx264') + .audioCodec(body.audioCodec || 'copy') if (body.crf !== undefined) c = c.outputOptions(['-crf', String(body.crf)]) if (body.videoBitrate) c = c.videoBitrate(body.videoBitrate) - if (body.scale) c = c.videoFilters(`scale=${normalizeScale(body.scale)}`) + if (scaleFilter) c = c.videoFilters(`scale=${scaleFilter}`) return c.toFormat(outExt).output(outputPath) }) return await finalize(outputPath, outExt, mimeType, executionContext, authResult.userId) @@ -480,10 +495,17 @@ export const POST = withRouteHandler(async (request: NextRequest) => { { status: 400 } ) } + const volume = body.volume.trim() + if (!VOLUME_FILTER_PATTERN.test(volume)) { + return NextResponse.json( + { error: 'Invalid volume. Use a multiplier (e.g. 1.5) or decibels (e.g. 10dB, -6dB)' }, + { status: 400 } + ) + } const outputPath = path.join(tempDir, `output.${outExt}`) const isVideo = isVideoExtension(inputExt) await runFfmpeg((cmd) => { - let c = cmd.input(inputPath).audioFilters(`volume=${body.volume}`) + let c = cmd.input(inputPath).audioFilters(`volume=${volume}`) if (isVideo) c = c.outputOptions(['-c:v', 'copy']) return c.toFormat(outExt).output(outputPath) }) From 7108486567f8c0e319c93ded9a48734d45b98642 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 29 May 2026 18:23:27 -0700 Subject: [PATCH 3/3] fix(ffmpeg): sanitize output extension, stat before read, bound speed - Sanitize all temp-file extensions (input name/MIME and output format) to [a-z0-9] so a crafted format like ../../../t.jpg cannot escape the temp dir (path traversal) - finalize() now checks output size via fs.stat before fs.readFile, so an oversized output is rejected without loading the whole file into memory - Bound the speed multiplier to [0.1, 100] in the contract (and block) to prevent tiny values producing pathologically large outputs --- .../sim/app/api/tools/ffmpeg/process/route.ts | 43 ++++++++++++------- apps/sim/blocks/blocks/ffmpeg.ts | 2 +- .../lib/api/contracts/tools/media/ffmpeg.ts | 6 ++- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/apps/sim/app/api/tools/ffmpeg/process/route.ts b/apps/sim/app/api/tools/ffmpeg/process/route.ts index db48b1800b0..1e055bc50b5 100644 --- a/apps/sim/app/api/tools/ffmpeg/process/route.ts +++ b/apps/sim/app/api/tools/ffmpeg/process/route.ts @@ -139,16 +139,26 @@ function getAudioCodec(format: string): string { } /** - * Derives a sensible file extension for an input temp file from its name or MIME type. + * Reduces a user- or filename-derived extension to a safe `[a-z0-9]` token. + * Strips path separators, dots, and other metacharacters so the value can be + * interpolated into a temp file name without enabling path traversal. */ -function getInputExtension(file: UserFile): string { - const fromName = path - .extname(file.name || '') - .replace('.', '') +function safeExtension(value: string | undefined): string { + return (value ?? '') + .trim() .toLowerCase() + .replace(/[^a-z0-9]/g, '') + .slice(0, 8) +} + +/** + * Derives a safe file extension for an input temp file from its name or MIME type. + */ +function getInputExtension(file: UserFile): string { + const fromName = safeExtension(path.extname(file.name || '')) if (fromName) return fromName - const subtype = (file.type || '').split('/')[1] - return subtype ? subtype.toLowerCase() : 'dat' + const subtype = safeExtension((file.type || '').split('/')[1]) + return subtype || 'dat' } function isVideoExtension(ext: string): boolean { @@ -402,13 +412,13 @@ export const POST = withRouteHandler(async (request: NextRequest) => { let mimeType = getMimeForFormat(inputExt) if (operation === 'convert') { - if (!body.format) { + outExt = safeExtension(body.format) + if (!outExt) { return NextResponse.json( - { error: 'format is required for the convert operation' }, + { error: 'A valid output format is required for the convert operation (e.g. mp4, mp3)' }, { status: 400 } ) } - outExt = body.format.trim().toLowerCase() mimeType = getMimeForFormat(outExt) const outputPath = path.join(tempDir, `output.${outExt}`) await runFfmpeg((cmd) => { @@ -421,7 +431,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } if (operation === 'extract_audio') { - outExt = (body.format?.trim() || 'mp3').toLowerCase() + outExt = safeExtension(body.format) || 'mp3' mimeType = getMimeForFormat(outExt) const outputPath = path.join(tempDir, `output.${outExt}`) await runFfmpeg((cmd) => @@ -478,7 +488,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } if (operation === 'thumbnail') { - outExt = (body.format?.trim() || 'jpg').toLowerCase() + outExt = safeExtension(body.format) || 'jpg' mimeType = getMimeForFormat(outExt) const time = body.time || '00:00:01' const outputPath = path.join(tempDir, `output.${outExt}`) @@ -553,13 +563,16 @@ export const POST = withRouteHandler(async (request: NextRequest) => { context: ExecutionContext | null, userId?: string ): Promise { - const outputBuffer = await fs.readFile(outputPath) - if (outputBuffer.length === 0) { + // Check size via stat before reading so an oversized output is rejected + // without first pulling the entire file into memory. + const { size: outputSize } = await fs.stat(outputPath) + if (outputSize === 0) { throw new Error('FFmpeg produced an empty output file') } - if (outputBuffer.length > MAX_FFMPEG_OUTPUT_BYTES) { + if (outputSize > MAX_FFMPEG_OUTPUT_BYTES) { throw new Error('Output file exceeds the maximum allowed size') } + const outputBuffer = await fs.readFile(outputPath) const fileName = `ffmpeg-${operation}-${Date.now()}.${format}` const file = await storeOutputFile(outputBuffer, fileName, mimeType, context, userId) logger.info(`[${requestId}] FFmpeg ${operation} completed`, { diff --git a/apps/sim/blocks/blocks/ffmpeg.ts b/apps/sim/blocks/blocks/ffmpeg.ts index 95e5af87bce..bd3e56c5685 100644 --- a/apps/sim/blocks/blocks/ffmpeg.ts +++ b/apps/sim/blocks/blocks/ffmpeg.ts @@ -238,7 +238,7 @@ export const FfmpegBlock: BlockConfig = { videoBitrate: params.videoBitrate, time: params.time, volume: params.volume, - speed: parseOptionalNumberInput(params.speed, 'Speed', { min: 0 }), + speed: parseOptionalNumberInput(params.speed, 'Speed', { min: 0.1, max: 100 }), } }, }, diff --git a/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts b/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts index ea5779549b8..12d486535b4 100644 --- a/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts +++ b/apps/sim/lib/api/contracts/tools/media/ffmpeg.ts @@ -56,7 +56,11 @@ export const ffmpegToolBodySchema = z /** Volume adjustment: a multiplier (`1.5`, `0.5`) or decibel value (`10dB`, `-6dB`). */ volume: z.string().min(1).max(16).optional(), /** Playback speed multiplier for the `speed` operation (0.5 = half, 2 = double). */ - speed: z.coerce.number().positive().max(100).optional(), + speed: z.coerce + .number() + .min(0.1, 'speed must be at least 0.1 (10x slower)') + .max(100, 'speed must be at most 100 (100x faster)') + .optional(), workspaceId: z.string().optional(), workflowId: z.string().optional(), executionId: z.string().optional(),