docs: align OpenAPI spec with content API implementation (#120)

sweetmantech · claude · web-flow · commit 2ccfba13a063 · 2026-04-09T11:50:22.000-05:00
* docs: align content OpenAPI spec with API implementation Applied to the split content.json (after #121 merged): 1. Image: add num_images, aspect_ratio, resolution; fix model name; add images array 2. Video: make image_url required; document model limitation 3. Edit: remove image_url (not implemented) 4. Transcribe: rename songUrl → audioUrl 5. Upscale: add upscale_factor and target_resolution Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: align edit endpoint description with schema (remove image reference) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: add missing video mode response, transcribe params, fix caption examples 1. Video response: add mode field (prompt/animate/reference/extend/first-last/lipsync) 2. Transcribe request: add language, chunk_level, diarize params 3. Caption response: fix color examples to match API (white/black, maxFontSize 42) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/api-reference/openapi/content.json b/api-reference/openapi/content.json
@@ -549,7 +549,7 @@
     },
     "/api/content": {
       "patch": {
-        "description": "Apply edits to content — trim, crop, resize, overlay text, or add an audio track. Accepts video, image, or audio input. Pass a `template` for a preset edit pipeline, or build your own with an `operations` array.",
+        "description": "Apply edits to content — trim, crop, resize, overlay text, or add an audio track. Accepts video or audio input. Pass a `template` for a preset edit pipeline, or build your own with an `operations` array.",
         "security": [
           {
             "apiKeyAuth": []
@@ -2103,6 +2103,26 @@
             "minItems": 1,
             "description": "Audio file URLs to transcribe"
           },
+          "language": {
+            "type": "string",
+            "default": "en",
+            "description": "Language code for transcription (e.g. en, es, fr). Defaults to en."
+          },
+          "chunk_level": {
+            "type": "string",
+            "enum": [
+              "none",
+              "segment",
+              "word"
+            ],
+            "default": "word",
+            "description": "Granularity of timestamp chunks. Defaults to word-level."
+          },
+          "diarize": {
+            "type": "boolean",
+            "default": false,
+            "description": "Enable speaker diarization. Defaults to false."
+          },
           "model": {
             "type": "string",
             "description": "fal.ai model ID. Defaults to fal-ai/whisper"
@@ -2112,15 +2132,15 @@
       "ContentCreateAudioResponse": {
         "type": "object",
         "required": [
-          "songUrl",
+          "audioUrl",
           "fullLyrics",
           "segments",
           "segmentCount"
         ],
         "properties": {
-          "songUrl": {
+          "audioUrl": {
             "type": "string",
-            "description": "URL of the transcribed song"
+            "description": "URL of the transcribed audio"
           },
           "fullLyrics": {
             "type": "string",
@@ -2163,18 +2183,13 @@
       },
       "ContentCreateEditRequest": {
         "type": "object",
-        "description": "Must provide at least one input (video_url, image_url, or audio_url)",
+        "description": "Must provide at least one input (video_url or audio_url)",
         "properties": {
           "video_url": {
             "type": "string",
             "format": "uri",
             "description": "Input video URL"
           },
-          "image_url": {
-            "type": "string",
-            "format": "uri",
-            "description": "Input image URL"
-          },
           "audio_url": {
             "type": "string",
             "format": "uri",
@@ -2359,22 +2374,50 @@
             },
             "description": "Optional reference image URLs to guide generation"
           },
+          "num_images": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 4,
+            "default": 1,
+            "description": "Number of images to generate (1-4)"
+          },
+          "aspect_ratio": {
+            "type": "string",
+            "enum": ["auto", "21:9", "16:9", "3:2", "4:3", "5:4", "1:1", "4:5", "3:4", "2:3", "9:16", "4:1", "1:4", "8:1", "1:8"],
+            "default": "auto",
+            "description": "Aspect ratio of the generated image"
+          },
+          "resolution": {
+            "type": "string",
+            "enum": ["0.5K", "1K", "2K", "4K"],
+            "default": "1K",
+            "description": "Output resolution"
+          },
           "model": {
             "type": "string",
-            "description": "fal.ai model ID. Defaults to fal-ai/nano-banana-pro/edit"
+            "description": "fal.ai model ID. Defaults to fal-ai/nano-banana-2"
           }
         }
       },
       "ContentCreateImageResponse": {
         "type": "object",
         "required": [
-          "imageUrl"
+          "imageUrl",
+          "images"
         ],
         "properties": {
           "imageUrl": {
             "type": "string",
             "format": "uri",
-            "description": "URL of the generated image"
+            "description": "URL of the first generated image"
+          },
+          "images": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "format": "uri"
+            },
+            "description": "URLs of all generated images. Contains one element when num_images is 1."
           }
         }
       },
@@ -2551,17 +2594,17 @@
           "color": {
             "type": "string",
             "description": "Text color as a CSS color value",
-            "example": "#FFFFFF"
+            "example": "white"
           },
           "borderColor": {
             "type": "string",
             "description": "Text border/stroke color as a CSS color value",
-            "example": "#000000"
+            "example": "black"
           },
           "maxFontSize": {
             "type": "number",
             "description": "Maximum font size in pixels",
-            "example": 48
+            "example": 42
           }
         }
       },
@@ -2584,6 +2627,23 @@
               "video"
             ],
             "description": "Whether the input is an image or video"
+          },
+          "upscale_factor": {
+            "type": "number",
+            "minimum": 1,
+            "maximum": 4,
+            "default": 2,
+            "description": "Upscale multiplier (1-4x). Defaults to 2x."
+          },
+          "target_resolution": {
+            "type": "string",
+            "enum": [
+              "720p",
+              "1080p",
+              "1440p",
+              "2160p"
+            ],
+            "description": "Target output resolution. When provided, overrides upscale_factor with a resolution-based upscale."
           }
         }
       },
@@ -2602,6 +2662,9 @@
       },
       "ContentCreateVideoRequest": {
         "type": "object",
+        "required": [
+          "image_url"
+        ],
         "properties": {
           "template": {
             "type": "string",
@@ -2626,7 +2689,7 @@
           "image_url": {
             "type": "string",
             "format": "uri",
-            "description": "Image URL. Used as the first frame (animate), style reference (reference), start frame (first-last), or face source (lipsync)"
+            "description": "Required. Image URL used as the input frame. The underlying model (fal-ai/veo3.1/fast/image-to-video) requires an image — text-only prompt mode is not supported. Generate an image first via POST /api/content/image if needed."
           },
           "end_image_url": {
             "type": "string",
@@ -2692,13 +2755,26 @@
       "ContentCreateVideoResponse": {
         "type": "object",
         "required": [
-          "videoUrl"
+          "videoUrl",
+          "mode"
         ],
         "properties": {
           "videoUrl": {
             "type": "string",
             "format": "uri",
             "description": "URL of the generated video"
+          },
+          "mode": {
+            "type": "string",
+            "enum": [
+              "prompt",
+              "animate",
+              "reference",
+              "extend",
+              "first-last",
+              "lipsync"
+            ],
+            "description": "The resolved video generation mode (explicit or inferred from inputs)"
           }
         }
       },