openapi: 3.0.0
paths:
  /v2/playground/images/generations:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateImageV2_v2
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Image.v1.GenerateImageDTO
              $ref: '#/components/schemas/Image.v1.GenerateImageDTO'
      responses:
        '200':
          description: Successfully generated image
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Image.v1.GenerateImageResponseDTO'
      tags: &ref_0
        - Playground
  /v2/playground/videos/generations:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateVideoV2_v2
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Video.v2.SubmitVideoPayload
              $ref: '#/components/schemas/Video.v2.SubmitVideoPayload'
      responses:
        '200':
          description: Successfully generated video
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Video.v2.PollVideoResponseDTO'
      tags: *ref_0
  /v1/playground/audio/generations:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateAudio_v1
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Audio.v2.SubmitGenerationPayloadDTO
              $ref: '#/components/schemas/Audio.v2.SubmitGenerationPayloadDTO'
      responses:
        '201':
          description: ''
      tags: *ref_0
  /v1/playground/stt:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateSpeechToText_v1
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Voice.v1.SpeechToTextPayloadDTO
              $ref: '#/components/schemas/Voice.v1.SpeechToTextPayloadDTO'
      responses:
        '201':
          description: ''
      tags: *ref_0
  /v1/playground/tts:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateTextToSpeech_v1
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Voice.v1.TextToSpeechPayload
              $ref: '#/components/schemas/Voice.v1.TextToSpeechPayload'
      responses:
        '201':
          description: ''
      tags: *ref_0
  /v1/playground/embeddings:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateEmbeddings_v1
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Embedding.v1.CreateEmbeddingsDTO
              $ref: '#/components/schemas/Embedding.v1.CreateEmbeddingsDTO'
      responses:
        '200':
          description: Successfully generated embeddings
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Embedding.v1.CreateEmbeddingsResponseDTO'
      tags: *ref_0
  /v1/playground/ocr:
    post:
      x-hideTryItPanel: true
      operationId: PlaygroundController_generateOcr_v1
      parameters: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $refId: Vision.v1.OCRPayloadDTO
              $ref: '#/components/schemas/Vision.v1.OCRPayloadDTO'
      responses:
        '200':
          description: Successfully generated ocr
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/undefined'
      tags: *ref_0
  /v1/models:
    get:
      x-hideTryItPanel: true
      operationId: ModelsController_getModels_v1
      parameters: []
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model.v1.ModelsResponseDto'
      tags: &ref_1
        - Models
  /models:
    get:
      x-hideTryItPanel: true
      operationId: ModelsController_getModels
      parameters: []
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model.v1.ModelsResponseDto'
      tags: *ref_1
  /v1/models/with-details:
    get:
      x-hideTryItPanel: true
      operationId: ModelsController_getModelsWithDetails_v1
      parameters: []
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model.v1.ModelDetailedDto'
      tags: *ref_1
  /models/with-details:
    get:
      x-hideTryItPanel: true
      operationId: ModelsController_getModelsWithDetails
      parameters: []
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model.v1.ModelDetailedDto'
      tags: *ref_1
info:
  title: AI/ML Gateway
  description: ''
  version: '1.0'
  contact: {}
tags: []
servers:
  - url: https://api.aimlapi.com
components:
  securitySchemes:
    access-token:
      scheme: bearer
      bearerFormat: <YOUR_AIMLAPI_KEY>
      type: http
      description: Bearer key
  schemas:
    Image.v1.GenerateImageDTO:
      anyOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - dall-e-2
            prompt:
              type: string
              maxLength: 1000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            'n':
              type: number
              minimum: 1
              maximum: 10
              default: 1
              description: The number of images to generate.
            size:
              type: string
              enum:
                - 1024x1024
                - 512x512
                - 256x256
              default: 1024x1024
              description: The size of the generated image.
            response_format:
              type: string
              enum: &ref_2
                - url
                - b64_json
              default: url
              description: The format in which the generated images are returned.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - dall-e-3
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            'n':
              type: number
              enum:
                - 1
              default: 1
              description: The number of images to generate.
            quality:
              type: string
              enum:
                - standard
                - hd
              default: standard
              description: The quality of the image that will be generated.
            size:
              type: string
              enum:
                - 1024x1024
                - 1024x1792
                - 1792x1024
              default: 1024x1024
              description: The size of the generated image.
            style:
              type: string
              enum:
                - vivid
                - natural
              default: vivid
              description: The style of the generated images.
            response_format:
              type: string
              enum: *ref_2
              default: url
              description: The format in which the generated images are returned.
          required:
            - model
            - prompt
        - $ref: '#/components/schemas/Image.v1.GptImage'
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/schnell
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: &ref_3
                    - square_hd
                    - square
                    - portrait_4_3
                    - portrait_16_9
                    - landscape_4_3
                    - landscape_16_9
                  description: The size of the generated image.
              default: landscape_4_3
            num_inference_steps:
              type: integer
              minimum: 1
              description: The number of inference steps to perform.
              maximum: 12
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux-pro
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 256
                      maximum: 1440
                      default: 1024
                    height:
                      type: integer
                      minimum: 256
                      maximum: 1440
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: landscape_4_3
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            safety_tolerance:
              type: string
              enum: &ref_4
                - '1'
                - '2'
                - '3'
                - '4'
                - '5'
                - '6'
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: &ref_5
                - jpeg
                - png
              default: jpeg
              description: The format of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux-pro/v1.1
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 256
                      maximum: 1440
                      default: 1024
                    height:
                      type: integer
                      minimum: 256
                      maximum: 1440
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: landscape_4_3
            safety_tolerance:
              type: string
              enum: *ref_4
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux-pro/v1.1-ultra
            safety_tolerance:
              type: string
              enum: *ref_4
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            aspect_ratio:
              type: string
              enum: &ref_6
                - '21:9'
                - '16:9'
                - '4:3'
                - '3:2'
                - '1:1'
                - '2:3'
                - '3:4'
                - '9:16'
                - '9:21'
              default: '16:9'
              description: The aspect ratio of the generated image.
            raw:
              type: boolean
              enum:
                - false
              default: false
              description: Generate less processed, more natural-looking images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux-pro/v1.1-ultra-raw
            safety_tolerance:
              type: string
              enum: *ref_4
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            aspect_ratio:
              type: string
              enum: *ref_6
              default: '16:9'
              description: The aspect ratio of the generated image.
            raw:
              type: boolean
              enum:
                - true
              default: true
              description: Generate less processed, more natural-looking images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/dev
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: landscape_4_3
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/dev/image-to-image
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            strength:
              type: number
              default: 0.95
              description: Determines how much the prompt influences the generated image.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/srpo
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: landscape_4_3
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              default: 28
              description: The number of inference steps to perform.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              default: 4.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            acceleration:
              type: string
              enum: &ref_7
                - none
                - regular
                - high
              default: regular
              description: >-
                The speed of the generation. The higher the speed, the faster
                the generation.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/srpo/image-to-image
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              default: 40
              description: The number of inference steps to perform.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              default: 4.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            acceleration:
              type: string
              enum: *ref_7
              default: regular
              description: >-
                The speed of the generation. The higher the speed, the faster
                the generation.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            strength:
              type: number
              minimum: 0
              maximum: 1
              default: 0.95
              description: Determines how much the prompt influences the generated image.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/kontext-pro/text-to-image
                - flux/kontext-max/text-to-image
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            safety_tolerance:
              type: string
              enum: &ref_8
                - '1'
                - '2'
                - '3'
                - '4'
                - '5'
                - '6'
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            aspect_ratio:
              type: string
              enum: *ref_6
              default: '16:9'
              description: The aspect ratio of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux/kontext-pro/image-to-image
                - flux/kontext-max/image-to-image
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            safety_tolerance:
              type: string
              enum: *ref_8
              default: '2'
              description: >-
                The safety tolerance level for the generated image. 1 being the
                most strict and 5 being the most permissive.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            aspect_ratio:
              type: string
              enum: *ref_6
              default: '16:9'
              description: The aspect ratio of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: >-
                Number of image variations to generate. Each image is a
                different attempt to combine the reference images (from the
                image_url parameter) according to the prompt.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            image_url:
              anyOf:
                - type: string
                  format: uri
                - type: array
                  items:
                    type: string
                    format: uri
                  maxItems: 4
              description: >-
                One or more image URLs used as visual references. The model
                merges them into a single image following the prompt
                instructions.
          required:
            - model
            - prompt
            - image_url
        - $ref: '#/components/schemas/Image.v1.flux2Payload'
        - $ref: '#/components/schemas/Image.v1.flux2EditPayload'
        - $ref: '#/components/schemas/Image.v1.flux2LoraPayload'
        - $ref: '#/components/schemas/Image.v1.flux2LoraEditPayload'
        - $ref: '#/components/schemas/Image.v1.flux2ProPayload'
        - $ref: '#/components/schemas/Image.v1.flux2ProEditPayload'
        - $ref: '#/components/schemas/Image.v1.flux2MaxPayload'
        - $ref: '#/components/schemas/Image.v1.flux2MaxEditPayload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - stable-diffusion-v3-medium
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: square_hd
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated image.
            prompt_expansion:
              type: boolean
              description: If set to True, prompt will be upsampled with more details.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - stable-diffusion-v35-large
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: square_hd
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated image.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - flux-realism
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: landscape_4_3
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            num_images:
              type: number
              enum:
                - 1
              default: 1
              description: >-
                The number of images to generate. This is always set to 1 for
                streaming output. Default value: 1.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - triposr
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            output_format:
              type: string
              enum:
                - glb
                - obj
              default: glb
              description: The format of the generated image.
            do_remove_background:
              type: boolean
              description: Enables removing the background from the input image.
            foreground_ratio:
              type: number
              minimum: 0.5
              maximum: 1
              default: 0.9
              description: Ratio of the foreground image to the original image.
            mc_resolution:
              type: integer
              minimum: 32
              maximum: 1024
              default: 256
              description: Resolution of the marching cubes. Above 512 is not recommended.
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - recraft-v3
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 1024
                    height:
                      type: integer
                      minimum: 64
                      maximum: 1536
                      default: 768
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_3
                  description: The size of the generated image.
              default: square_hd
            style:
              type: string
              enum:
                - any
                - realistic_image
                - digital_illustration
                - vector_illustration
                - realistic_image/b_and_w
                - realistic_image/hard_flash
                - realistic_image/hdr
                - realistic_image/natural_light
                - realistic_image/studio_portrait
                - realistic_image/enterprise
                - realistic_image/motion_blur
                - digital_illustration/pixel_art
                - digital_illustration/hand_drawn
                - digital_illustration/grain
                - digital_illustration/infantile_sketch
                - digital_illustration/2d_art_poster
                - digital_illustration/handmade_3d
                - digital_illustration/hand_drawn_outline
                - digital_illustration/engraving_color
                - digital_illustration/2d_art_poster_2
                - vector_illustration/engraving
                - vector_illustration/line_art
                - vector_illustration/line_circuit
                - vector_illustration/linocut
              default: realistic_image
              description: The style of the generated images.
            colors:
              type: array
              items:
                type: object
                properties:
                  r:
                    type: integer
                    minimum: 0
                    maximum: 255
                  g:
                    type: integer
                    minimum: 0
                    maximum: 255
                  b:
                    type: integer
                    minimum: 0
                    maximum: 255
                required:
                  - r
                  - g
                  - b
              default: []
              description: An array of preferred colors.
            num_images:
              type: number
              enum:
                - 1
              default: 1
              description: The number of images to generate.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/qwen-image
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            output_format:
              type: string
              enum: *ref_5
              default: jpeg
              description: The format of the generated image.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated image.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-2.5-flash-image
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-2.5-flash-image-edit
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            image_urls:
              type: array
              items:
                type: string
                format: uri
              description: List of URLs or local Base64 encoded images to edit.
            aspect_ratio:
              type: string
              enum:
                - '21:9'
                - '1:1'
                - '4:3'
                - '3:2'
                - '2:3'
                - '5:4'
                - '4:5'
                - '3:4'
                - '16:9'
                - '9:16'
              default: '1:1'
              description: The aspect ratio of the generated image.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
          required:
            - model
            - prompt
            - image_urls
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-3-pro-image-preview
                - google/nano-banana-pro
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            aspect_ratio:
              type: string
              enum:
                - '21:9'
                - '1:1'
                - '4:3'
                - '3:2'
                - '2:3'
                - '5:4'
                - '4:5'
                - '3:4'
                - '16:9'
                - '9:16'
              default: '1:1'
              description: The aspect ratio of the generated image.
            resolution:
              type: string
              enum:
                - 1K
                - 2K
                - 4K
              default: 1K
              description: The size of the generated image.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-3-pro-image-preview-edit
                - google/nano-banana-pro-edit
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              description: >-
                List of URLs or local Base64 encoded images to edit. Supports up
                to 14 images.
            aspect_ratio:
              type: string
              enum:
                - auto
                - '21:9'
                - '1:1'
                - '4:3'
                - '3:2'
                - '2:3'
                - '5:4'
                - '4:5'
                - '3:4'
                - '16:9'
                - '9:16'
              default: auto
              description: The aspect ratio of the generated image.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            resolution:
              type: string
              enum:
                - 1K
                - 2K
                - 4K
              default: 1K
              description: The size of the generated image.
          required:
            - model
            - prompt
            - image_urls
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/uso
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 3
              description: >-
                An array of up to 3 image URLs. The first image is always
                treated as the primary input for image-to-image generation,
                while the remaining images (if provided) serve as visual style
                references for the output.
            image_size:
              anyOf:
                - type: string
                  enum:
                    - square_hd
                    - square
                    - portrait_4_3
                    - portrait_16_9
                    - landscape_4_3
                    - landscape_16_9
                - type: object
                  properties:
                    width:
                      type: number
                    height:
                      type: number
                  required:
                    - width
                    - height
              default: square_hd
              description: The size of the generated image.
            negative_prompt:
              type: string
              default: ''
              description: The description of elements to avoid in the generated image.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              default: 28
              description: The number of inference steps to perform.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              default: 4
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            keep_size:
              type: boolean
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            sync_mode:
              type: boolean
              default: false
              description: >-
                If set to true, the function will wait for the image to be
                generated and uploaded before returning the response. This will
                increase the latency of the function but it allows you to get
                the image directly in the response without going through the
                CDN.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            output_format:
              type: string
              enum:
                - jpeg
                - png
              default: png
              description: The format of the generated image.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
          required:
            - model
            - image_urls
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedream-v4-edit
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 10
              description: List of URLs or local Base64 encoded images to edit.
            image_size:
              anyOf:
                - type: string
                  enum:
                    - square_hd
                    - square
                    - portrait_4_3
                    - portrait_16_9
                    - landscape_4_3
                    - landscape_16_9
                - type: object
                  properties:
                    width:
                      type: number
                      minimum: 1024
                      maximum: 4096
                    height:
                      type: number
                      minimum: 1024
                      maximum: 4096
                  required:
                    - width
                    - height
              default: square_hd
              description: The size of the generated image.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            sync_mode:
              type: boolean
              default: false
              description: >-
                If set to true, the function will wait for the image to be
                generated and uploaded before returning the response. This will
                increase the latency of the function but it allows you to get
                the image directly in the response without going through the
                CDN.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
          required:
            - model
            - image_urls
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedream-v4-text-to-image
            image_size:
              anyOf:
                - type: string
                  enum:
                    - square_hd
                    - square
                    - portrait_4_3
                    - portrait_16_9
                    - landscape_4_3
                    - landscape_16_9
                - type: object
                  properties:
                    width:
                      type: number
                      minimum: 1024
                      maximum: 4096
                    height:
                      type: number
                      minimum: 1024
                      maximum: 4096
                  required:
                    - width
                    - height
              default: square_hd
              description: The size of the generated image.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            sync_mode:
              type: boolean
              default: false
              description: >-
                If set to true, the function will wait for the image to be
                generated and uploaded before returning the response. This will
                increase the latency of the function but it allows you to get
                the image directly in the response without going through the
                CDN.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - hunyuan/hunyuan-image-v3-text-to-image
            negative_prompt:
              type: string
              default: ''
              description: The description of elements to avoid in the generated image.
            image_size:
              anyOf:
                - type: string
                  enum:
                    - square_hd
                    - square
                    - portrait_4_3
                    - portrait_16_9
                    - landscape_4_3
                    - landscape_16_9
                - type: object
                  properties:
                    width:
                      type: number
                    height:
                      type: number
                  required:
                    - width
                    - height
              default: square_hd
              description: The size of the generated image.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 50
              default: 28
              description: The number of inference steps to perform.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              default: 7.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            sync_mode:
              type: boolean
              default: false
              description: >-
                If set to true, the function will wait for the image to be
                generated and uploaded before returning the response. This will
                increase the latency of the function but it allows you to get
                the image directly in the response without going through the
                CDN.
            output_format:
              type: string
              enum:
                - jpeg
                - png
              default: png
              description: The format of the generated image.
            enable_prompt_expansion:
              type: boolean
              description: If set to True, prompt will be upsampled with more details.
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            num_images:
              type: number
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 1
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - $ref: '#/components/schemas/Image.v1.hunyuanPartPayload'
        - $ref: '#/components/schemas/Image.v1.zImageTurboPayload'
        - $ref: '#/components/schemas/Image.v1.zImageTurboLoraPayload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/image-o1
            prompt:
              type: string
              maxLength: 4000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 10
              description: List of URLs or local Base64 encoded images to edit.
            aspect_ratio:
              type: string
              enum:
                - '21:9'
                - '16:9'
                - '4:3'
                - '3:2'
                - '1:1'
                - '2:3'
                - '3:4'
                - '9:16'
              default: '16:9'
              description: The aspect ratio of the generated image.
            resolution:
              type: string
              enum:
                - 1K
                - 2K
              default: 1K
              description: The resolution of the output image.
            output_format:
              type: string
              enum:
                - jpeg
                - png
                - webp
              default: png
              description: The format of the generated image.
            num_images:
              type: number
              minimum: 1
              maximum: 9
              default: 1
              description: The number of images to generate.
          required:
            - model
            - prompt
            - image_urls
        - type: object
          properties:
            model:
              type: string
              enum:
                - imagen-3.0-generate-002
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: &ref_9
                - '1:1'
                - '9:16'
                - '16:9'
                - '3:4'
                - '4:3'
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: &ref_10
                - dont_allow
                - allow_adult
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: &ref_11
                - block_low_and_above
                - block_medium_and_above
                - block_only_high
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - imagen-4.0-ultra-generate-preview-06-06
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/imagen-4.0-generate-001
                - google/imagen4/preview
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/imagen-4.0-fast-generate-001
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/imagen-4.0-ultra-generate-001
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-2.5-flash-image
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-2.5-flash-image-edit
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              description: URLs of the source images to edit
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: &ref_12
                - auto
                - '1:1'
                - '9:16'
                - '16:9'
                - '3:4'
                - '4:3'
              default: auto
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - image_urls
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-3-pro-image-preview
                - google/nano-banana-pro
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_9
              default: '1:1'
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/gemini-3-pro-image-preview-edit
                - google/nano-banana-pro-edit
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              description: URLs of the source images to edit
            prompt:
              type: string
              maxLength: 400
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
            num_images:
              type: integer
              maximum: 4
              default: 1
              description: The number of images to generate.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            aspect_ratio:
              type: string
              enum: *ref_12
              default: auto
              description: The aspect ratio of the generated image.
            person_generation:
              type: string
              enum: *ref_10
              default: allow_adult
              description: Allow generation of people.
            safety_setting:
              type: string
              enum: *ref_11
              default: block_medium_and_above
              description: Adds a filter level to safety filtering.
            add_watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
          required:
            - model
            - image_urls
            - prompt
        - oneOf:
            - type: object
              properties:
                provider:
                  type: string
                  enum:
                    - bytedance
                  default: bytedance
                  deprecated: true
                model:
                  type: string
                  enum:
                    - bytedance/seedream-4-5
                prompt:
                  type: string
                  description: >-
                    The text prompt describing the content, style, or
                    composition of the image to be generated.
                image_urls:
                  type: array
                  items:
                    type: string
                    format: uri
                  minItems: 1
                  maxItems: 14
                  description: List of URLs or local Base64 encoded images to edit.
                image_size:
                  anyOf:
                    - type: object
                      properties:
                        width:
                          type: integer
                          minimum: 1440
                          maximum: 4096
                          default: 2048
                        height:
                          type: integer
                          minimum: 1440
                          maximum: 4096
                          default: 2048
                    - type: string
                      enum:
                        - 2K
                        - 4K
                response_format:
                  type: string
                  enum:
                    - url
                    - b64_json
                  default: url
                  description: The format in which the generated images are returned.
                seed:
                  type: integer
                  description: >-
                    The same seed and the same prompt given to the same version
                    of the model will output the same image every time.
                watermark:
                  type: boolean
                  default: false
                  description: Add an invisible watermark to the generated images.
              required:
                - model
                - prompt
            - type: object
              properties:
                provider:
                  type: string
                  enum:
                    - bytedance
                  default: bytedance
                  deprecated: true
                model:
                  type: string
                  enum:
                    - bytedance/seedream-5-0-lite-preview
                prompt:
                  type: string
                  description: >-
                    The text prompt describing the content, style, or
                    composition of the image to be generated.
                image_urls:
                  type: array
                  items:
                    type: string
                    format: uri
                  minItems: 1
                  maxItems: 14
                  description: List of URLs or local Base64 encoded images to edit.
                image_size:
                  anyOf:
                    - type: object
                      properties:
                        width:
                          type: integer
                          minimum: 1440
                          maximum: 4096
                          default: 2048
                        height:
                          type: integer
                          minimum: 1440
                          maximum: 4096
                          default: 2048
                    - type: string
                      enum:
                        - 2K
                        - 4K
                  description: The size of the generated image.
                response_format:
                  type: string
                  enum:
                    - url
                    - b64_json
                  default: url
                  description: The format in which the generated images are returned.
                seed:
                  type: integer
                  description: >-
                    The same seed and the same prompt given to the same version
                    of the model will output the same image every time.
                watermark:
                  type: boolean
                  default: false
                  description: Add an invisible watermark to the generated images.
              required:
                - model
                - prompt
        - oneOf:
            - type: object
              properties:
                model:
                  type: string
                  enum:
                    - alibaba/qwen-image-edit
                provider:
                  type: string
                  enum:
                    - alibaba
                  default: alibaba
                  deprecated: true
                prompt:
                  type: string
                  maxLength: 800
                  description: >-
                    A positive prompt that describes the desired elements and
                    visual features in the edited image. Maximum 800 characters.
                image:
                  type: string
                  description: >-
                    The image to be edited. Enter the Base64 encoding of the
                    picture or an accessible URL. Image URL: Make sure that the
                    image URL is accessible. Base64-encoded content: The format
                    must be in lowercase.
                negative_prompt:
                  type: string
                  maxLength: 500
                  description: The description of elements to avoid in the generated image.
                watermark:
                  type: boolean
                  default: false
                  description: Add an invisible watermark to the generated images.
              required:
                - model
                - prompt
                - image
            - type: object
              properties:
                provider:
                  type: string
                  enum:
                    - alibaba
                  default: alibaba
                  deprecated: true
                model:
                  type: string
                  enum:
                    - alibaba/wan-2-6-image
                prompt:
                  type: string
                  maxLength: 2000
                  description: >-
                    A positive prompt that describes the desired elements and
                    visual features in the edited image.
                image_urls:
                  type: array
                  items:
                    type: string
                    format: uri
                  minItems: 1
                  maxItems: 3
                  description: List of URLs or local Base64 encoded images to edit.
                image_size:
                  anyOf:
                    - type: object
                      properties:
                        width:
                          type: integer
                          minimum: 512
                          maximum: 1440
                        height:
                          type: integer
                          minimum: 512
                          maximum: 1440
                      required:
                        - width
                        - height
                      description: >-
                        For both height and width, the value must be a multiple
                        of 32.
                    - type: string
                      enum: &ref_13
                        - square_hd
                        - square
                        - portrait_4_3
                        - portrait_16_9
                        - landscape_4_3
                        - landscape_16_9
                      description: The size of the generated image.
                  default: landscape_4_3
                  description: The size of the generated image.
                enhance_prompt:
                  type: boolean
                  default: true
                  description: >-
                    Optional parameter to use an LLM-based prompt rewriting
                    feature for higher-quality images that better match the
                    original prompt. Disabling it may affect image quality and
                    prompt alignment.
                negative_prompt:
                  type: string
                  maxLength: 500
                  description: The description of elements to avoid in the generated image.
                seed:
                  type: integer
                  minimum: 0
                  maximum: 2147483647
                  description: >-
                    The same seed and the same prompt given to the same version
                    of the model will output the same image every time.
                watermark:
                  type: boolean
                  default: false
                  description: Add an invisible watermark to the generated images.
              required:
                - model
                - prompt
        - type: object
          properties:
            prompt:
              type: string
              maxLength: 800
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            negative_prompt:
              type: string
              maxLength: 500
              description: The description of elements to avoid in the generated image.
            num_images:
              type: integer
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1664
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1664
                  required:
                    - width
                    - height
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_13
                  description: The size of the generated image.
              default:
                width: 1664
                height: 928
              description: The size of the generated image.
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
            seed:
              type: integer
              minimum: 0
              maximum: 2147483647
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            model:
              type: string
              enum:
                - alibaba/qwen-image
            output_format:
              type: string
              enum:
                - jpeg
                - png
              default: jpeg
              description: The format of the generated image.
            prompt_extend:
              type: boolean
              default: true
              description: If set to True, prompt will be upsampled with more details.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 20
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
          required:
            - prompt
            - model
        - type: object
          properties:
            prompt:
              type: string
              maxLength: 800
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            negative_prompt:
              type: string
              maxLength: 500
              description: The description of elements to avoid in the generated image.
            num_images:
              type: integer
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 2048
                    height:
                      type: integer
                      minimum: 512
                      maximum: 2048
                  required:
                    - width
                    - height
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_13
                  description: The size of the generated image.
              default:
                width: 1024
                height: 1536
              description: The size of the generated image.
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
            seed:
              type: integer
              minimum: 0
              maximum: 2147483647
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            model:
              type: string
              enum:
                - alibaba/z-image-turbo
            output_format:
              type: string
              enum:
                - jpeg
                - png
                - webp
              default: png
              description: The format of the generated image.
            num_inference_steps:
              type: integer
              minimum: 1
              maximum: 8
              description: The number of inference steps to perform.
            enable_safety_checker:
              type: boolean
              default: true
              description: If set to True, the safety checker will be enabled.
            acceleration:
              type: string
              enum:
                - none
                - regular
                - high
              default: regular
              description: >-
                The speed of the generation. The higher the speed, the faster
                the generation.
            prompt_extend:
              type: boolean
              default: false
              description: If set to True, prompt will be upsampled with more details.
          required:
            - prompt
            - model
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.5-t2i-preview
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            negative_prompt:
              type: string
              maxLength: 500
              description: The description of elements to avoid in the generated image.
            num_images:
              type: integer
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1440
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1440
                  required:
                    - width
                    - height
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_13
                  description: The size of the generated image.
              default: landscape_4_3
              description: The size of the generated image.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
            seed:
              type: integer
              minimum: 0
              maximum: 2147483647
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.2-t2i-plus
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            negative_prompt:
              type: string
              maxLength: 500
              description: The description of elements to avoid in the generated image.
            num_images:
              type: integer
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1440
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1440
                  required:
                    - width
                    - height
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_13
                  description: The size of the generated image.
              default: landscape_4_3
              description: The size of the generated image.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
            seed:
              type: integer
              minimum: 0
              maximum: 2147483647
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.2-t2i-flash
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            negative_prompt:
              type: string
              maxLength: 500
              description: The description of elements to avoid in the generated image.
            num_images:
              type: integer
              minimum: 1
              maximum: 4
              default: 1
              description: The number of images to generate.
            image_size:
              anyOf:
                - type: object
                  properties:
                    width:
                      type: integer
                      minimum: 512
                      maximum: 1440
                    height:
                      type: integer
                      minimum: 512
                      maximum: 1440
                  required:
                    - width
                    - height
                  description: >-
                    For both height and width, the value must be a multiple of
                    32.
                - type: string
                  enum: *ref_13
                  description: The size of the generated image.
              default: landscape_4_3
              description: The size of the generated image.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                Optional parameter to use an LLM-based prompt rewriting feature
                for higher-quality images that better match the original prompt.
                Disabling it may affect image quality and prompt alignment.
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
            seed:
              type: integer
              minimum: 0
              maximum: 2147483647
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/gpt-image-1
            prompt:
              type: string
              maxLength: 32000
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            background:
              type: string
              enum:
                - transparent
                - opaque
                - auto
              default: auto
              description: >-
                Allows to set transparency for the background of the generated
                image(s). When auto is used, the model will automatically
                determine the best background for the image.

                If transparent, the output format needs to support transparency,
                so it should be set to either png (default value) or webp.
            moderation:
              type: string
              enum:
                - low
                - auto
              default: auto
              description: Control the content-moderation level for images.
            'n':
              type: number
              enum:
                - 1
              default: 1
              description: The number of images to generate.
              deprecated: true
            output_compression:
              type: integer
              minimum: 0
              maximum: 100
              default: 100
              description: The compression level (0-100%) for the generated images.
            output_format:
              type: string
              enum: &ref_17
                - png
                - jpeg
                - webp
              default: png
              description: The format of the generated image.
            quality:
              type: string
              enum: &ref_18
                - low
                - medium
                - high
              default: medium
              description: The quality of the image that will be generated.
            size:
              type: string
              enum: &ref_19
                - 1024x1024
                - 1024x1536
                - 1536x1024
              default: 1024x1024
              description: The size of the generated image.
            response_format:
              type: string
              enum:
                - url
                - b64_json
              default: url
              description: The format in which the generated images are returned.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedream-3.0
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            aspect_ratio:
              type: string
              enum:
                - '1:1'
                - '16:9'
                - '9:16'
                - '3:4'
                - '4:3'
              default: '1:1'
              description: The aspect ratio of the generated image.
            seed:
              type: integer
              description: >-
                The same seed and the same prompt given to the same version of
                the model will output the same image every time.
            guidance_scale:
              type: number
              minimum: 1
              maximum: 10
              default: 2.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt when looking
                for a related image to show you.
            response_format:
              type: string
              enum:
                - url
                - b64_json
              default: url
              description: The format in which the generated images are returned.
            size:
              type: string
              description: >-
                Specifies the dimensions (width x height in pixels) of the
                generated image. Must be between [512x512, 2048x2048].
              example: 1024x1024
              deprecated: true
            watermark:
              type: boolean
              default: false
              description: Add an invisible watermark to the generated images.
              deprecated: true
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - reve/create-image
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '3:2'
                - '2:3'
                - '4:3'
                - '3:4'
                - '1:1'
              default: '3:2'
              description: The aspect ratio of the generated image.
            prompt:
              type: string
              maxLength: 2560
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - reve/edit-image
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            prompt:
              type: string
              maxLength: 2560
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - reve/remix-edit-image
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 4
              description: List of URLs or local Base64 encoded images to edit.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '3:2'
                - '2:3'
                - '4:3'
                - '3:4'
                - '1:1'
              default: '3:2'
              description: The aspect ratio of the generated image.
            prompt:
              type: string
              maxLength: 2560
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            convert_base64_to_url:
              type: boolean
              default: true
              description: >-
                If True, the URL to the image will be returned; otherwise, the
                file will be provided in base64 format.
          required:
            - model
            - image_urls
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - topaz-labs/sharpen
            mode:
              type: string
              enum:
                - Standard
                - Strong
                - Lens Blur
                - Lens Blur V2
                - Motion Blur
                - Natural
                - Refocus
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            strength:
              type: number
              minimum: 0.01
              maximum: 1
              description: >-
                Defines the overall intensity of the sharpening effect.
                Increases details. Too much sharpening can create an unrealistic
                result.
            minor_denoise:
              type: number
              minimum: 0.01
              maximum: 1
              description: >-
                Removes noisy pixels to increase clarity. Can slightly increase
                image sharpness.
            output_format:
              type: string
              enum: &ref_14
                - jpeg
                - jpg
                - png
                - tiff
                - tif
              default: jpeg
              description: The format of the generated image.
            subject_detection:
              type: string
              enum: &ref_15
                - All
                - Foreground
                - Background
              default: All
              description: >-
                Specifies which subjects to detect and process. Options: 'All'
                (detect all subjects), 'Foreground' (detect only foreground
                subjects), 'Background' (detect background subjects).
            face_enhancement:
              type: boolean
              default: true
              description: >-
                Whether to enhance faces in the image. When true, the model
                applies face-specific improvements.
            face_enhancement_creativity:
              type: number
              minimum: 0
              maximum: 1
              default: 0
              description: >-
                Level of creativity for face enhancement (0-1). Higher values
                allow more creative, less conservative changes.
            face_enhancement_strength:
              type: number
              minimum: 0
              maximum: 1
              default: 0.8
              description: >-
                How sharp enhanced faces are relative to background (0-1). Lower
                values blend changes subtly; higher values make faces more
                pronounced.
          required:
            - model
            - mode
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - topaz-labs/sharpen-gen
            mode:
              type: string
              enum:
                - Super Focus
                - Super Focus V2
            image_url:
              type: string
              format: uri
              description: The URL of the reference image.
            strength:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                Defines the overall intensity of the sharpening effect.
                Increases details. Too much sharpening can create an unrealistic
                result.
            focus_boost:
              type: number
              minimum: 0.25
              maximum: 1
              description: >-
                Corrects images that are missing detail by downscaling your
                image then upscaling the results back to the original size. Use
                on very blurry images!
            seed:
              type: integer
              description: Optional fixed seed for repeatable results.
            output_format:
              type: string
              enum: *ref_14
              default: jpeg
              description: The format of the generated image.
            subject_detection:
              type: string
              enum: *ref_15
              default: All
              description: >-
                Specifies which subjects to detect and process. Options: 'All'
                (detect all subjects), 'Foreground' (detect only foreground
                subjects), 'Background' (detect background subjects).
            face_enhancement:
              type: boolean
              default: true
              description: >-
                Whether to enhance faces in the image. When true, the model
                applies face-specific improvements.
            face_enhancement_creativity:
              type: number
              minimum: 0
              maximum: 1
              default: 0
              description: >-
                Level of creativity for face enhancement (0-1). Higher values
                allow more creative, less conservative changes.
            face_enhancement_strength:
              type: number
              minimum: 0
              maximum: 1
              default: 0.8
              description: >-
                How sharp enhanced faces are relative to background (0-1). Lower
                values blend changes subtly; higher values make faces more
                pronounced.
          required:
            - model
            - mode
            - image_url
        - type: object
          properties:
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            'n':
              type: number
              minimum: 1
              maximum: 10
              default: 1
              description: The number of images to generate.
            response_format:
              type: string
              enum: &ref_16
                - url
                - b64_json
              default: url
              description: The format in which the generated images are returned.
            model:
              type: string
              enum:
                - x-ai/grok-2-image
          required:
            - prompt
            - model
        - type: object
          properties:
            prompt:
              type: string
              description: >-
                The text prompt describing the content, style, or composition of
                the image to be generated.
            'n':
              type: number
              minimum: 1
              maximum: 10
              default: 1
              description: The number of images to generate.
            response_format:
              type: string
              enum: *ref_16
              default: url
              description: The format in which the generated images are returned.
            model:
              type: string
              enum:
                - x-ai/grok-imagine-image
                - x-ai/grok-imagine-image-pro
            aspect_ratio:
              type: string
              enum:
                - '1:1'
                - '3:4'
                - '4:3'
                - '9:16'
                - '16:9'
                - '2:3'
                - '3:2'
                - '9:19.5'
                - 19.5:9
                - '9:20'
                - '20:9'
                - '1:2'
                - '2:1'
                - auto
              default: '16:9'
              description: The aspect ratio of the generated image.
            resolution:
              type: string
              enum:
                - 1k
                - 2k
              default: 2k
              description: The resolution of the output image.
          required:
            - prompt
            - model
        - type: object
          properties:
            model:
              type: string
              enum:
                - magic/image-to-3d
            front_image_url:
              type: string
              format: uri
            left_image_url:
              type: string
              format: uri
            back_image_url:
              type: string
              format: uri
            right_image_url:
              type: string
              format: uri
          required:
            - model
            - front_image_url
    Image.v1.GptImage:
      type: object
      properties:
        model:
          type: string
          enum:
            - openai/gpt-image-1
            - openai/gpt-image-1-mini
            - openai/gpt-image-1-5
        prompt:
          type: string
          maxLength: 32000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        background:
          type: string
          enum:
            - transparent
            - opaque
            - auto
          default: auto
          description: >-
            Allows to set transparency for the background of the generated
            image(s). When auto is used, the model will automatically determine
            the best background for the image.

            If transparent, the output format needs to support transparency, so
            it should be set to either png (default value) or webp.
        moderation:
          type: string
          enum:
            - low
            - auto
          default: auto
          description: Control the content-moderation level for images.
        'n':
          type: number
          enum:
            - 1
          default: 1
          description: The number of images to generate.
          deprecated: true
        output_compression:
          type: integer
          minimum: 0
          maximum: 100
          default: 100
          description: The compression level (0-100%) for the generated images.
        output_format:
          type: string
          enum: *ref_17
          default: png
          description: The format of the generated image.
        quality:
          type: string
          enum: *ref_18
          default: medium
          description: The quality of the image that will be generated.
        size:
          type: string
          enum: *ref_19
          default: 1024x1024
          description: The size of the generated image.
        response_format:
          type: string
          enum: *ref_2
          default: url
          description: The format in which the generated images are returned.
      required:
        - model
        - prompt
    Image.v1.flux2Payload:
      type: object
      properties:
        model:
          type: string
          enum:
            - blackforestlabs/flux-2
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        num_images:
          type: number
          minimum: 1
          maximum: 4
          default: 1
          description: The number of images to generate.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        guidance_scale:
          type: number
          minimum: 0
          maximum: 20
          description: >-
            The CFG (Classifier Free Guidance) scale is a measure of how close
            you want the model to stick to your prompt when looking for a
            related image to show you.
        num_inference_steps:
          type: integer
          minimum: 4
          maximum: 50
          description: The number of inference steps to perform.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        acceleration:
          type: string
          enum:
            - none
            - regular
            - high
          default: regular
          description: >-
            The speed of the generation. The higher the speed, the faster the
            generation.
        enable_prompt_expansion:
          type: boolean
          description: If set to True, prompt will be upsampled with more details.
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        output_format:
          type: string
          enum:
            - jpeg
            - png
            - webp
          default: png
          description: The format of the generated image.
      required:
        - model
        - prompt
    Image.v1.flux2EditPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - blackforestlabs/flux-2-edit
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_urls:
          type: array
          items:
            type: string
            format: uri
          minItems: 1
          maxItems: 3
          description: >-
            An array of up to 3 image URLs. The first image is always treated as
            the primary input for image-to-image generation, while the remaining
            images (if provided) serve as visual style references for the
            output.
        num_images:
          type: number
          minimum: 1
          maximum: 4
          default: 1
          description: The number of images to generate.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        guidance_scale:
          type: number
          minimum: 0
          maximum: 20
          description: >-
            The CFG (Classifier Free Guidance) scale is a measure of how close
            you want the model to stick to your prompt when looking for a
            related image to show you.
        num_inference_steps:
          type: integer
          minimum: 4
          maximum: 50
          description: The number of inference steps to perform.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        acceleration:
          type: string
          enum:
            - none
            - regular
            - high
          default: regular
          description: >-
            The speed of the generation. The higher the speed, the faster the
            generation.
        enable_prompt_expansion:
          type: boolean
          description: If set to True, prompt will be upsampled with more details.
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        output_format:
          type: string
          enum:
            - jpeg
            - png
            - webp
          default: png
          description: The format of the generated image.
      required:
        - model
        - prompt
        - image_urls
    Image.v1.flux2LoraPayload:
      allOf:
        - $ref: '#/components/schemas/Image.v1.flux2Payload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - blackforestlabs/flux-2-lora
            loras:
              type: array
              items:
                type: object
                properties:
                  path:
                    type: string
                    description: URL, HuggingFace repo ID (owner/repo).
                  scale:
                    type: number
                    minimum: 0
                    maximum: 4
                    description: Scale factor for LoRA application.
                required:
                  - path
              maxItems: 3
              description: >-
                List of LoRA weights to apply (maximum 3). Each LoRA can be a
                URL, HuggingFace repo ID, or local path.
    Image.v1.flux2LoraEditPayload:
      allOf:
        - $ref: '#/components/schemas/Image.v1.flux2EditPayload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - blackforestlabs/flux-2-lora-edit
            loras:
              type: array
              items:
                type: object
                properties:
                  path:
                    type: string
                    description: URL, HuggingFace repo ID (owner/repo).
                  scale:
                    type: number
                    minimum: 0
                    maximum: 4
                    description: Scale factor for LoRA application.
                required:
                  - path
              maxItems: 3
              description: >-
                List of LoRA weights to apply (maximum 3). Each LoRA can be a
                URL, HuggingFace repo ID, or local path.
    Image.v1.flux2ProPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - blackforestlabs/flux-2-pro
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        safety_tolerance:
          type: string
          enum:
            - '1'
            - '2'
            - '3'
            - '4'
            - '5'
            - '6'
          default: '2'
          description: >-
            The safety tolerance level for the generated image. 1 being the most
            strict and 5 being the most permissive.
        output_format:
          type: string
          enum:
            - jpeg
            - png
            - webp
          default: png
          description: The format of the generated image.
      required:
        - model
        - prompt
    Image.v1.flux2ProEditPayload:
      allOf:
        - $ref: '#/components/schemas/Image.v1.flux2ProPayload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - blackforestlabs/flux-2-pro-edit
            image_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 3
              description: >-
                An array of up to 3 image URLs. The first image is always
                treated as the primary input for image-to-image generation,
                while the remaining images (if provided) serve as visual style
                references for the output.
          required:
            - image_urls
    Image.v1.flux2MaxPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - blackforestlabs/flux-2-max
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        safety_tolerance:
          type: string
          enum:
            - '1'
            - '2'
            - '3'
            - '4'
            - '5'
          default: '2'
          description: >-
            The safety tolerance level for the generated image. 1 being the most
            strict and 5 being the most permissive.
        output_format:
          type: string
          enum:
            - jpeg
            - png
          default: jpeg
          description: The format of the generated image.
      required:
        - model
        - prompt
    Image.v1.flux2MaxEditPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - blackforestlabs/flux-2-max-edit
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_urls:
          type: array
          items:
            type: string
            format: uri
          minItems: 1
          description: List of URLs or local Base64 encoded images to edit.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        safety_tolerance:
          type: string
          enum:
            - '1'
            - '2'
            - '3'
            - '4'
            - '5'
          default: '2'
          description: >-
            The safety tolerance level for the generated image. 1 being the most
            strict and 5 being the most permissive.
        output_format:
          type: string
          enum:
            - jpeg
            - png
          default: jpeg
          description: The format of the generated image.
      required:
        - model
        - prompt
        - image_urls
    Image.v1.hunyuanPartPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - tencent/hunyuan-part
        mesh_url:
          type: string
          format: uri
          description: URL of the 3D model file (.glb or .obj) to process for segmentation.
        point_prompt_x:
          type: number
          minimum: -1
          maximum: 1
          description: X coordinate of the point prompt for segmentation.
        point_prompt_y:
          type: number
          minimum: -1
          maximum: 1
          description: Y coordinate of the point prompt for segmentation.
        point_prompt_z:
          type: number
          minimum: -1
          maximum: 1
          description: Z coordinate of the point prompt for segmentation.
        point_num:
          type: integer
          default: 100000
          description: Number of points to sample from the mesh.
        use_normal:
          type: boolean
          default: true
          description: Whether to use normal information for segmentation.
        noise_std:
          type: number
          description: Standard deviation of noise to add to sampled points.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
      required:
        - model
        - mesh_url
    Image.v1.zImageTurboPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/z-image-turbo
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        num_inference_steps:
          type: integer
          minimum: 1
          maximum: 8
          description: The number of inference steps to perform.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        num_images:
          type: number
          minimum: 1
          maximum: 4
          default: 1
          description: The number of images to generate.
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          default: true
          description: If set to True, prompt will be upsampled with more details.
        acceleration:
          type: string
          enum: &ref_20
            - none
            - regular
            - high
          default: regular
          description: >-
            The speed of the generation. The higher the speed, the faster the
            generation.
        output_format:
          type: string
          enum: &ref_21
            - jpeg
            - png
            - webp
          default: png
          description: The format of the generated image.
      required:
        - model
        - prompt
    Image.v1.zImageTurboLoraPayload:
      type: object
      properties:
        prompt:
          type: string
          maxLength: 4000
          description: >-
            The text prompt describing the content, style, or composition of the
            image to be generated.
        image_size:
          anyOf:
            - type: object
              properties:
                width:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 1024
                height:
                  type: integer
                  minimum: 512
                  maximum: 2048
                  default: 768
              description: For both height and width, the value must be a multiple of 32.
            - type: string
              enum: *ref_3
              description: The size of the generated image.
          default: landscape_4_3
        num_inference_steps:
          type: integer
          minimum: 1
          maximum: 8
          description: The number of inference steps to perform.
        seed:
          type: integer
          minimum: 1
          description: >-
            The same seed and the same prompt given to the same version of the
            model will output the same image every time.
        num_images:
          type: number
          minimum: 1
          maximum: 4
          default: 1
          description: The number of images to generate.
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to True, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          default: true
          description: If set to True, prompt will be upsampled with more details.
        acceleration:
          type: string
          enum: *ref_20
          default: regular
          description: >-
            The speed of the generation. The higher the speed, the faster the
            generation.
        output_format:
          type: string
          enum: *ref_21
          default: png
          description: The format of the generated image.
        model:
          type: string
          enum:
            - alibaba/z-image-turbo-lora
        loras:
          type: array
          items:
            type: object
            properties:
              path:
                type: string
                description: URL, HuggingFace repo ID (owner/repo).
              scale:
                type: number
                minimum: 0
                maximum: 4
                description: Scale factor for LoRA application.
            required:
              - path
          maxItems: 3
          description: >-
            List of LoRA weights to apply (maximum 3). Each LoRA can be a URL,
            HuggingFace repo ID, or local path.
      required:
        - prompt
        - model
    Image.v1.GenerateImageResponseDTO:
      type: object
      properties:
        data:
          type: array
          nullable: true
          items:
            type: object
            properties:
              url:
                type: string
                nullable: true
                description: The URL where the file can be downloaded from.
                example: >-
                  https://cdn.aimlapi.com/generations/hedgehog/1749730923700-29fe35d2-4aef-4bc5-a911-6c39884d16a8.png
              b64_json:
                type: string
                nullable: true
                description: The base64-encoded JSON of the generated image.
                example: null
          description: The list of generated images.
        meta:
          type: object
          nullable: true
          properties:
            usage:
              type: object
              nullable: true
              properties:
                credits_used:
                  type: number
                  description: The number of tokens consumed during generation.
                  example: 120000
              required:
                - credits_used
          description: Additional details about the generation.
    Video.v2.SubmitVideoPayload:
      anyOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.1-t2v-plus
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum:
                - 720P
              default: 720P
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
                - '4:3'
                - '3:4'
              default: '16:9'
              description: The aspect ratio of the generated video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.1-t2v-turbo
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum:
                - 480P
                - 720P
              default: 720P
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
                - '4:3'
                - '3:4'
              default: '16:9'
              description: The aspect ratio of the generated video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.2-t2v-plus
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum:
                - 480P
                - 1080P
              default: 1080P
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
                - '4:3'
                - '3:4'
              default: '16:9'
              description: The aspect ratio of the generated video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.2-i2v-plus
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            resolution:
              type: string
              enum:
                - 480P
                - 1080P
              default: 1080P
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - kling-video/v1.6/standard/multi-image-to-video
            type:
              type: string
              enum:
                - multi-image-to-video
            image_list:
              type: array
              items:
                type: string
                format: uri
              minItems: 2
              maxItems: 4
              description: >-
                Array of image URLs (2-10 images) for multi-image-to-video
                generation
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_22
                - 5
                - 10
            external_task_id:
              type: string
              description: Customized Task ID
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
          required:
            - model
            - image_list
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/kling-video-v1.6-pro-effects
                - klingai/kling-video-v1.6-standard-effects
            type:
              type: string
              enum:
                - effects
            effect_scene:
              type: string
              enum:
                - magic_fireball
                - pet_moto_rider
                - media_interview
                - pet_lion
                - pet_delivery
                - pet_chef
                - santa_gifts
                - santa_hug
                - girlfriend
                - boyfriend
                - heart_gesture_1
                - pet_wizard
                - smoke_smoke
                - thumbs_up
                - instant_kid
                - dollar_rain
                - cry_cry
                - building_collapse
                - gun_shot
                - mushroom
                - double_gun
                - pet_warrior
                - lightning_power
                - jesus_hug
                - shark_alert
                - long_hair
                - lie_flat
                - polar_bear_hug
                - brown_bear_hug
                - jazz_jazz
                - office_escape_plow
                - fly_fly
                - watermelon_bomb
                - pet_dance
                - boss_coming
                - wool_curly
                - iron_warrior
                - pet_bee
                - marry_me
                - swing_swing
                - day_to_night
                - piggy_morph
                - wig_out
                - car_explosion
                - ski_ski
                - tiger_hug
                - siblings
                - construction_worker
                - let's_ride
                - snatched
                - magic_broom
                - felt_felt
                - jumpdrop
                - celebration
                - splashsplash
                - hula
                - surfsurf
                - fairy_wing
                - angel_wing
                - dark_wing
                - skateskate
                - plushcut
                - jelly_press
                - jelly_slice
                - jelly_squish
                - jelly_jiggle
                - pixelpixel
                - yearbook
                - instant_film
                - anime_figure
                - rocketrocket
                - bloombloom
                - dizzydizzy
                - fuzzyfuzzy
                - squish
                - expansion
                - hug
                - kiss
                - heart_gesture
                - fight
              description: Video effect scene type
            image_url:
              anyOf:
                - type: string
                  format: uri
                - type: array
                  items:
                    type: string
                    format: uri
              description: >-
                For hug, kiss, and heart_gesture effects, pass an array
                containing exactly two image URLs. For squish or expansion, only
                one image URL is required.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_22
          required:
            - model
            - effect_scene
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - kling-video/v1/standard/image-to-video
                - kling-video/v1/pro/image-to-video
                - kling-video/v1.5/standard/image-to-video
                - kling-video/v1.5/pro/image-to-video
                - kling-video/v1.6/pro/image-to-video
            image_url:
              type: string
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            type:
              type: string
              enum:
                - image-to-video
            tail_image_url:
              type: string
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            static_mask:
              type: string
              description: >-
                URL of the image for Static Brush Application Area (Mask image
                created by users using the motion brush).
            dynamic_masks:
              type: array
              items:
                type: object
                properties:
                  mask:
                    type: string
                  trajectories:
                    type: array
                    items:
                      type: object
                      properties:
                        x:
                          type: integer
                        'y':
                          type: integer
                      required:
                        - x
                        - 'y'
                    minItems: 2
                    maxItems: 77
                required:
                  - mask
                  - trajectories
              maxItems: 6
              description: List of dynamic masks.
            camera_control:
              type: object
              properties:
                type:
                  type: string
                  enum: &ref_23
                    - simple
                    - down_back
                    - forward_up
                    - right_turn_forward
                    - left_turn_forward
                config:
                  type: object
                  properties:
                    horizontal:
                      type: number
                      minimum: -10
                      maximum: 10
                    vertical:
                      type: number
                      minimum: -10
                      maximum: 10
                    pan:
                      type: number
                      minimum: -10
                      maximum: 10
                    tilt:
                      type: number
                      minimum: -10
                      maximum: 10
                    roll:
                      type: number
                      minimum: -10
                      maximum: 10
                    zoom:
                      type: number
                      minimum: -10
                      maximum: 10
              description: Advanced camera control parameters.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_22
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              default: 0.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            external_task_id:
              type: string
              description: Customized Task ID
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/v2-master-image-to-video
                - kling-video/v2.1/standard/image-to-video
                - klingai/v2.1-master-image-to-video
                - kling-video/v2.1/pro/image-to-video
            image_url:
              type: string
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            type:
              type: string
              enum:
                - image-to-video
            static_mask:
              type: string
              description: >-
                URL of the image for Static Brush Application Area (Mask image
                created by users using the motion brush).
            dynamic_masks:
              type: array
              items:
                type: object
                properties:
                  mask:
                    type: string
                  trajectories:
                    type: array
                    items:
                      type: object
                      properties:
                        x:
                          type: integer
                        'y':
                          type: integer
                      required:
                        - x
                        - 'y'
                    minItems: 2
                    maxItems: 77
                required:
                  - mask
                  - trajectories
              maxItems: 6
              description: List of dynamic masks.
            camera_control:
              type: object
              properties:
                type:
                  type: string
                  enum: *ref_23
                config:
                  type: object
                  properties:
                    horizontal:
                      type: number
                      minimum: -10
                      maximum: 10
                    vertical:
                      type: number
                      minimum: -10
                      maximum: 10
                    pan:
                      type: number
                      minimum: -10
                      maximum: 10
                    tilt:
                      type: number
                      minimum: -10
                      maximum: 10
                    roll:
                      type: number
                      minimum: -10
                      maximum: 10
                    zoom:
                      type: number
                      minimum: -10
                      maximum: 10
              description: Advanced camera control parameters.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_22
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              default: 0.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            external_task_id:
              type: string
              description: Customized Task ID
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - kling-video/v1/standard/text-to-video
                - kling-video/v1/pro/text-to-video
                - kling-video/v1.5/pro/text-to-video
                - kling-video/v1.6/pro/text-to-video
                - klingai/v2-master-text-to-video
                - klingai/v2.1-master-text-to-video
            type:
              type: string
              enum:
                - text-to-video
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            camera_control:
              type: object
              properties:
                type:
                  type: string
                  enum: *ref_23
                config:
                  type: object
                  properties:
                    horizontal:
                      type: number
                      minimum: -10
                      maximum: 10
                    vertical:
                      type: number
                      minimum: -10
                      maximum: 10
                    pan:
                      type: number
                      minimum: -10
                      maximum: 10
                    tilt:
                      type: number
                      minimum: -10
                      maximum: 10
                    roll:
                      type: number
                      minimum: -10
                      maximum: 10
                    zoom:
                      type: number
                      minimum: -10
                      maximum: 10
              description: Camera control parameters.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_22
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              default: 0.5
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            external_task_id:
              type: string
              description: Customized Task ID
          required:
            - model
            - prompt
        - $ref: '#/components/schemas/Kling.v2.klingV16StandardImageToVideo'
        - $ref: '#/components/schemas/Kling.v2.klingTextToVideoPayload'
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/v2.5-turbo/pro/image-to-video
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_24
                - 5
                - 10
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/v2.5-turbo/pro/text-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_24
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            aspect_ratio:
              type: string
              enum: &ref_25
                - '16:9'
                - '9:16'
                - '1:1'
              description: The aspect ratio of the generated video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-v2-6-pro-motion-control
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            character_orientation:
              type: string
              enum:
                - image
                - video
              default: image
              description: >-
                Generate the orientation of the characters in the video, which
                can be selected to match the image or the video:

                - image: has the same orientation as the person in the picture;
                At this time, the reference video duration should not exceed 10
                seconds;

                - video: consistent with the orientation of the characters in
                the video; At this time, the reference video duration should not
                exceed 30 seconds;
            keep_audio:
              type: boolean
              default: true
              description: Whether to keep the original audio from the video.
          required:
            - model
            - image_url
            - video_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/v2.5-turbo/pro/image-to-video
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_24
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/v2.5-turbo/pro/text-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_24
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            aspect_ratio:
              type: string
              enum: *ref_25
              description: The aspect ratio of the generated video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - video-01-live2d
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the first frame for the video.

                Image specifications: 

                - format must be JPG, JPEG, or PNG; 

                - aspect ratio should be greater than 2:5 and less than 5:2; 

                - the shorter side must exceed 300 pixels; 

                - file size must not exceed 20MB.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                If True, the incoming prompt will be automatically optimized to
                improve generation quality when needed. For more precise
                control, set it to False — the model will then follow the
                instructions more strictly.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - video-01
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the first frame for the video.

                Image specifications: 

                - format must be JPG, JPEG, or PNG; 

                - aspect ratio should be greater than 2:5 and less than 5:2; 

                - the shorter side must exceed 300 pixels; 

                - file size must not exceed 20MB.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                If True, the incoming prompt will be automatically optimized to
                improve generation quality when needed. For more precise
                control, set it to False — the model will then follow the
                instructions more strictly.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax/hailuo-02
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the first frame for the video.

                Image specifications: 

                - format must be JPG, JPEG, or PNG; 

                - aspect ratio should be greater than 2:5 and less than 5:2; 

                - the shorter side must exceed 300 pixels; 

                - file size must not exceed 20MB.
            last_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 6
                - 10
            resolution:
              type: string
              enum:
                - 768P
                - 1080P
              default: 768P
              description: >-
                The dimensions of the video display. 1080p corresponds to 1920 x
                1080 pixels, 768p corresponds to 1366 x 768 pixels.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                If True, the incoming prompt will be automatically optimized to
                improve generation quality when needed. For more precise
                control, set it to False — the model will then follow the
                instructions more strictly.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax/hailuo-2.3
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the first frame for the video.

                Image specifications: 

                - format must be JPG, JPEG, or PNG; 

                - aspect ratio should be greater than 2:5 and less than 5:2; 

                - the shorter side must exceed 300 pixels; 

                - file size must not exceed 20MB.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 6
                - 10
            resolution:
              type: string
              enum:
                - 768P
                - 1080P
              default: 768P
              description: >-
                The dimensions of the video display. 1080p corresponds to 1920 x
                1080 pixels, 768p corresponds to 1366 x 768 pixels.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                If True, the incoming prompt will be automatically optimized to
                improve generation quality when needed. For more precise
                control, set it to False — the model will then follow the
                instructions more strictly.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax/hailuo-2.3-fast
            prompt:
              type: string
              maxLength: 2000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the first frame for the video.

                Image specifications: 

                - format must be JPG, JPEG, or PNG; 

                - aspect ratio should be greater than 2:5 and less than 5:2; 

                - the shorter side must exceed 300 pixels; 

                - file size must not exceed 20MB.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 6
                - 10
            resolution:
              type: string
              enum:
                - 768P
                - 1080P
              default: 768P
              description: >-
                The dimensions of the video display. 1080p corresponds to 1920 x
                1080 pixels, 768p corresponds to 1366 x 768 pixels.
            enhance_prompt:
              type: boolean
              default: true
              description: >-
                If True, the incoming prompt will be automatically optimized to
                improve generation quality when needed. For more precise
                control, set it to False — the model will then follow the
                instructions more strictly.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - gen3a_turbo
            prompt:
              type: string
              maxLength: 1000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL or data URI containing an encoded image to be used
                as the first frame of the generated video.
            tail_image_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL or data URI containing an encoded image to be used
                as the last frame of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_26
                - 5
                - 10
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              default: '16:9'
              description: The aspect ratio of the generated video.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - runway/gen4_turbo
            prompt:
              type: string
              maxLength: 1000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL or data URI containing an encoded image to be used
                as the first frame of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_26
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '4:3'
                - '3:4'
                - '1:1'
                - '21:9'
              default: '16:9'
              description: The aspect ratio of the generated video.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - runway/gen4_aleph
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            prompt:
              type: string
              maxLength: 1000
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: number
              enum:
                - 5
              default: 5
              description: The length of the output video in seconds.
            frame_size:
              type: string
              enum: &ref_27
                - '1280:720'
                - '720:1280'
                - '1104:832'
                - '832:1104'
                - '960:960'
                - '1584:672'
                - '848:480'
                - '640:480'
              default: '1280:720'
              description: The width and height of the video.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            references:
              type: array
              items:
                type: object
                properties:
                  type:
                    type: string
                    enum:
                      - image
                  url:
                    type: string
                    format: uri
                required:
                  - type
                  - url
              description: >-
                Passing an image reference allows the model to emulate the style
                or content of the reference in the output.
          required:
            - model
            - video_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - runway/act_two
            character:
              oneOf:
                - type: object
                  properties:
                    type:
                      type: string
                      enum:
                        - video
                    url:
                      type: string
                      format: uri
                  required:
                    - type
                    - url
                  description: >-
                    A video of your character. In the output, the character will
                    use the reference video performance in its original animated
                    environment and some of the character's own movements.
                - type: object
                  properties:
                    type:
                      type: string
                      enum:
                        - image
                    url:
                      type: string
                      format: uri
                  required:
                    - type
                    - url
                  description: >-
                    An image of your character. In the output, the character
                    will use the reference video performance in its original
                    static environment.
              description: >-
                The character to control. You can either provide a video or an
                image. A visually recognizable face must be visible and stay
                within the frame.
            reference:
              type: object
              properties:
                type:
                  type: string
                  enum:
                    - video
                url:
                  type: string
                  format: uri
              required:
                - type
                - url
              description: >-
                Passing a video reference allows the model to emulate the style
                or content of the reference in the output.
            frame_size:
              type: string
              enum: *ref_27
              default: '1280:720'
              description: The width and height of the video.
            body_control:
              type: boolean
              description: >-
                A boolean indicating whether to enable body control. When
                enabled, non-facial movements and gestures will be applied to
                the character in addition to facial expressions.
            expression_intensity:
              type: integer
              minimum: 1
              maximum: 5
              default: 3
              description: >-
                An integer between 1 and 5 (inclusive). A larger value increases
                the intensity of the character's expression.
            seed:
              type: integer
              minimum: 0
              maximum: 4294967295
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - character
            - reference
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedance-1-0-lite-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum: &ref_28
                - 480p
                - 720p
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum: &ref_29
                - '16:9'
                - '4:3'
                - '1:1'
                - '3:4'
                - '9:16'
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_30
                - 5
                - 10
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            camera_fixed:
              type: boolean
              default: false
              description: >-
                Whether to fix the camera position.

                - true: Fix the camera position. The platform will append
                instructions to fix the camera position in the user's prompt,
                but the actual effect is not guaranteed.

                - false: Do not fix the camera position.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
              deprecated: true
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedance-1-0-pro-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum: *ref_28
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum: *ref_29
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_30
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            camera_fixed:
              type: boolean
              default: false
              description: >-
                Whether to fix the camera position.

                - true: Fix the camera position. The platform will append
                instructions to fix the camera position in the user's prompt,
                but the actual effect is not guaranteed.

                - false: Do not fix the camera position.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
              deprecated: true
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedance-1-0-lite-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum: *ref_28
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_30
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            camera_fixed:
              type: boolean
              default: false
              description: >-
                Whether to fix the camera position.

                - true: Fix the camera position. The platform will append
                instructions to fix the camera position in the user's prompt,
                but the actual effect is not guaranteed.

                - false: Do not fix the camera position.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
              deprecated: true
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedance-1-0-pro-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum: *ref_28
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_30
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            camera_fixed:
              type: boolean
              default: false
              description: >-
                Whether to fix the camera position.

                - true: Fix the camera position. The platform will append
                instructions to fix the camera position in the user's prompt,
                but the actual effect is not guaranteed.

                - false: Do not fix the camera position.
            watermark:
              type: boolean
              default: false
              description: Whether the video contains a watermark.
              deprecated: true
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - bytedance/seedance-1-0-pro-fast
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            resolution:
              type: string
              enum: *ref_28
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            aspect_ratio:
              type: string
              enum: *ref_29
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_30
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            camera_fixed:
              type: boolean
              default: false
              description: >-
                Whether to fix the camera position.

                - true: Fix the camera position. The platform will append
                instructions to fix the camera position in the user's prompt,
                but the actual effect is not guaranteed.

                - false: Do not fix the camera position.
          required:
            - model
            - prompt
        - $ref: '#/components/schemas/Pixverse.v2.pixverseTransition'
        - $ref: '#/components/schemas/Pixverse.v2.pixverseImageToVideo'
        - $ref: '#/components/schemas/Pixverse.v2.pixverseTextToVideo'
        - $ref: '#/components/schemas/Pixverse.v2.pixverseV55TextToVideo'
        - $ref: '#/components/schemas/Pixverse.v2.pixverseV55ImageToVideo'
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.5-t2v-preview
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 480p
                - 720p
                - 1080p
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan2.5-i2v-preview
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            resolution:
              type: string
              enum:
                - 480p
                - 720p
                - 1080p
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan-2-6-t2v
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            audio_url:
              type: string
              format: uri
              description: >-
                The URL of the audio file. The model will use this audio to
                generate the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
                - '4:3'
                - '3:4'
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum: &ref_31
                - 720p
                - 1080p
              default: 1080p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_32
                - 5
                - 10
                - 15
            shot_type:
              type: string
              enum: &ref_33
                - single
                - multi
              default: single
              description: >-
                Specifies the shot type of the generated video, that is, whether
                the video consists of a single continuous shot or multiple
                switched shots.

                This parameter takes effect only when "prompt_extend" is set to
                'true':.

                - single: (default) Outputs a single-shot video.

                - multi: Outputs a multi-shot video.
            generate_audio:
              type: boolean
              default: true
              description: >-
                Specifies whether to automatically add audio to the generated
                video.

                This parameter takes effect only when 'audio_url' is not
                provided.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan-2-6-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            audio_url:
              type: string
              format: uri
              description: >-
                The URL of the audio file. The model will use this audio to
                generate the video.
            resolution:
              type: string
              enum: *ref_31
              default: 1080p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_32
            shot_type:
              type: string
              enum: *ref_33
              default: single
              description: >-
                Specifies the shot type of the generated video, that is, whether
                the video consists of a single continuous shot or multiple
                switched shots.

                This parameter takes effect only when "prompt_extend" is set to
                'true':.

                - single: (default) Outputs a single-shot video.

                - multi: Outputs a multi-shot video.
            generate_audio:
              type: boolean
              default: true
              description: >-
                Specifies whether to automatically add audio to the generated
                video.

                This parameter takes effect only when 'audio_url' is not
                provided.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan-2-6-image-to-video-flash
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            audio_url:
              type: string
              format: uri
              description: >-
                The URL of the audio file. The model will use this audio to
                generate the video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 720p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: >-
                Duration of the generated video in seconds (up to 15 seconds for
                Flash model).
              enum:
                - 5
                - 10
                - 15
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            shot_type:
              type: string
              enum:
                - single
                - multi
              default: single
              description: >-
                Specifies the shot type of the generated video, that is, whether
                the video consists of a single continuous shot or multiple
                switched shots.

                This parameter takes effect only when "prompt_extend" is set to
                'true':

                - single: (default) Outputs a single-shot video.

                - multi: Outputs a multi-shot video.
            generate_audio:
              type: boolean
              default: true
              description: >-
                Specifies whether to automatically add audio to the generated
                video.

                This parameter takes effect only when 'audio_url' is not
                provided.
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/wan-2-6-r2v
            prompt:
              type: string
              minLength: 1
              maxLength: 800
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_urls:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 3
              description: >-
                An array of URLs for the uploaded reference video files. This
                parameter is used to extract the character's appearance and
                voice (if any) to generate a video that matches the reference
                features.

                Each reference video must contain only one character. For
                example, character1 is a little girl and character2 is an alarm
                clock.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
                - '4:3'
                - '3:4'
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
              description: >-
                An enumeration where the short side of the video frame
                determines the resolution.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
            shot_type:
              type: string
              enum:
                - single
                - multi
              default: single
              description: >-
                Specifies the shot type of the generated video, that is, whether
                the video consists of a single continuous shot or multiple
                switched shots.

                This parameter takes effect only when "prompt_extend" is set to
                'true':.

                - single: (default) Outputs a single-shot video.

                - multi: Outputs a multi-shot video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
          required:
            - model
            - prompt
            - video_urls
        - $ref: '#/components/schemas/Bytedance.v2.omnihuman'
        - type: object
          properties:
            model:
              type: string
              enum:
                - veed/fabric-1.0
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            audio_url:
              type: string
              format: uri
              description: >-
                Reference song, should contain music and vocals. Must be a .wav
                or .mp3 file longer than 15 seconds.
            resolution:
              type: string
              enum:
                - 480p
                - 720p
              description: The resolution of the generated video.
          required:
            - model
            - image_url
            - audio_url
            - resolution
        - type: object
          properties:
            model:
              type: string
              enum:
                - veed/fabric-1.0-fast
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            audio_url:
              type: string
              format: uri
              description: >-
                Reference song, should contain music and vocals. Must be a .wav
                or .mp3 file longer than 15 seconds.
            resolution:
              type: string
              enum:
                - 480p
                - 720p
              description: The resolution of the generated video.
          required:
            - model
            - image_url
            - audio_url
            - resolution
        - type: object
          properties:
            model:
              type: string
              enum:
                - tencent/hunyuan-video-foley
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            negative_prompt:
              type: string
              default: noisy, harsh
              description: The description of elements to avoid in the generated video.
            guidance_scale:
              type: number
              default: 4.5
              description: >-
                Classifier-free guidance scale. Controls prompt adherence /
                creativity.
            num_inference_steps:
              type: integer
              default: 50
              description: >-
                Number of inference steps for sampling. Higher values give
                better quality but take longer.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - video_url
        - $ref: '#/components/schemas/Alibaba.v2.wan22VaceDepth'
        - $ref: '#/components/schemas/Alibaba.v2.wan22VacePose'
        - $ref: '#/components/schemas/Alibaba.v2.wan22VaceInpainting'
        - $ref: '#/components/schemas/Alibaba.v2.wan22VaceOutpainting'
        - $ref: '#/components/schemas/Alibaba.v2.wan22VaceReframe'
        - $ref: '#/components/schemas/Alibaba.v2.wan22AnimateMove'
        - $ref: '#/components/schemas/Alibaba.v2.wan22AnimateReplace'
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            auto_fix:
              type: boolean
              default: true
              description: >-
                Whether to automatically attempt to fix prompts that fail
                content policy or other validation checks by rewriting them.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-i2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                URL of the input image to animate. Should be 720p or higher
                resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-t2v-fast
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            auto_fix:
              type: boolean
              default: true
              description: >-
                Whether to automatically attempt to fix prompts that fail
                content policy or other validation checks by rewriting them.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-i2v-fast
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                URL of the input image to animate. Should be 720p or higher
                resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-first-last-image-to-video-fast
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                URL of the input image to animate. Should be 720p or higher
                resolution.
            last_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
            - last_image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-first-last-image-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                URL of the input image to animate. Should be 720p or higher
                resolution.
            last_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
            - last_image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.1-reference-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_urls:
              type: array
              items:
                type: string
                format: uri
              description: >-
                URL of the input image to animate. Should be 720p or higher
                resolution.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 8
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_urls
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo3-1-extend-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            aspect_ratio:
              type: string
              enum:
                - auto
                - '16:9'
                - '9:16'
              default: auto
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 7
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            auto_fix:
              type: boolean
              default: false
              description: >-
                Whether to automatically attempt to fix prompts that fail
                content policy or other validation checks by rewriting them.
          required:
            - model
            - prompt
            - video_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo3-1-fast-extend-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            aspect_ratio:
              type: string
              enum:
                - auto
                - '16:9'
                - '9:16'
              default: auto
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 7
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            auto_fix:
              type: boolean
              default: false
              description: >-
                Whether to automatically attempt to fix prompts that fail
                content policy or other validation checks by rewriting them.
          required:
            - model
            - prompt
            - video_url
        - $ref: '#/components/schemas/Video.v2.kandinsky5TextToVideo'
        - $ref: '#/components/schemas/Video.v2.kandinsky5TextToVideoDistill'
        - type: object
          properties:
            model:
              type: string
              enum:
                - krea/krea-wan-14b/text-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            num_frames:
              type: integer
              minimum: 18
              maximum: 162
              default: 78
              description: >-
                Number of frames to generate. Must be a multiple of 12 plus 6,
                for example 18, 30, 42, etc.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - krea/krea-wan-14b/video-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            strength:
              type: number
              minimum: 0
              maximum: 1
              default: 0.85
              description: >-
                Denoising strength for the video-to-video generation. 0.0
                preserves the original, 1.0 completely remakes the video.
            enable_prompt_expansion:
              type: boolean
              default: true
              description: Whether to enable prompt expansion.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
          required:
            - model
            - prompt
            - video_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - veo2/image-to-video
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            tail_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            aspect_ratio:
              type: string
              enum: &ref_34
                - '16:9'
                - '9:16'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_35
                - 5
                - 6
                - 7
                - 8
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - veo2
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum: *ref_34
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_35
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo3
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum: *ref_34
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum: &ref_36
                - 720P
                - 1080P
              default: 720P
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.0-i2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            aspect_ratio:
              type: string
              enum: *ref_34
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum: *ref_36
              default: 720P
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.0-fast
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum: *ref_34
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum: *ref_36
              default: 720P
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/veo-3.0-i2v-fast
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            aspect_ratio:
              type: string
              enum: *ref_34
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum: *ref_36
              default: 720P
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 4
                - 6
                - 8
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            seed:
              type: integer
              description: >-
                Varying the seed integer is a way to get different results for
                the same other request parameters. Using the same value for an
                identical request will produce similar results. If unspecified,
                a random number is chosen.
            enhance_prompt:
              type: boolean
              default: true
              description: Whether to enhance the video generation.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_37
                - 4
                - 8
                - 12
            aspect_ratio:
              type: string
              enum: &ref_38
                - '16:9'
                - '9:16'
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A URL or a Base64-encoded image file used as the initial frame
                for video generation.

                The image dimensions must match the selected video resolution
                and aspect ratio.

                Supported configurations include:

                720p with aspect ratios:

                - 16:9 — 1280x720

                - 9:16 — 720x1280


                1080p with aspect ratios:

                - 16:9 — 1792x1024

                - 9:16 — 1024x1792
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_37
            aspect_ratio:
              type: string
              enum: *ref_38
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-pro-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_37
            aspect_ratio:
              type: string
              enum: *ref_38
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-pro-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A URL or a Base64-encoded image file used as the initial frame
                for video generation.

                The image dimensions must match the selected video resolution
                and aspect ratio.

                Supported configurations include:

                720p with aspect ratios:

                - 16:9 — 1280x720

                - 9:16 — 720x1280


                1080p with aspect ratios:

                - 16:9 — 1792x1024

                - 9:16 — 1024x1792
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_37
            aspect_ratio:
              type: string
              enum: *ref_38
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_39
                - 4
                - 8
                - 12
            aspect_ratio:
              type: string
              enum: &ref_40
                - '16:9'
                - '9:16'
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A URL or a Base64-encoded image file used as the initial frame
                for video generation.

                The image dimensions must match the selected video resolution
                and aspect ratio.

                Supported configurations include:

                720p with aspect ratios:

                - 16:9 — 1280x720

                - 9:16 — 720x1280


                1080p with aspect ratios:

                - 16:9 — 1792x1024

                - 9:16 — 1024x1792
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_39
            aspect_ratio:
              type: string
              enum: *ref_40
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
              default: 720p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-pro-t2v
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_39
            aspect_ratio:
              type: string
              enum: *ref_40
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/sora-2-pro-i2v
            image_url:
              type: string
              format: uri
              description: >-
                A URL or a Base64-encoded image file used as the initial frame
                for video generation.

                The image dimensions must match the selected video resolution
                and aspect ratio.

                Supported configurations include:

                720p with aspect ratios:

                - 16:9 — 1280x720

                - 9:16 — 720x1280


                1080p with aspect ratios:

                - 16:9 — 1792x1024

                - 9:16 — 1024x1792
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_39
            aspect_ratio:
              type: string
              enum: *ref_40
              default: '16:9'
              description: The aspect ratio of the generated video.
            resolution:
              type: string
              enum:
                - 720p
                - 1080p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
          required:
            - model
            - image_url
            - prompt
        - type: object
          properties:
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum:
                - '1:1'
                - '16:9'
                - '9:16'
                - '4:3'
                - '3:4'
                - '21:9'
                - '9:21'
              description: The aspect ratio of the generated video.
            loop:
              type: boolean
              default: false
              description: Whether to loop the video
            keyframes:
              type: object
              properties:
                frame0:
                  anyOf:
                    - type: object
                      properties:
                        type:
                          type: string
                          enum:
                            - generation
                        id:
                          type: string
                          description: Id of a previous generation to extend
                      required:
                        - type
                        - id
                    - type: object
                      properties:
                        type:
                          type: string
                          enum:
                            - image
                        url:
                          type: string
                          format: uri
                          description: >-
                            A direct link to an online image or a Base64-encoded
                            local image that will serve as the visual base or
                            the first frame for the video.
                      required:
                        - type
                        - url
                frame1:
                  anyOf:
                    - type: object
                      properties:
                        type:
                          type: string
                          enum:
                            - generation
                        id:
                          type: string
                          description: Id of a previous generation to extend
                      required:
                        - type
                        - id
                    - type: object
                      properties:
                        type:
                          type: string
                          enum:
                            - image
                        url:
                          type: string
                          format: uri
                          description: >-
                            A direct link to an online image or a Base64-encoded
                            local image that will serve as the visual base or
                            the first frame for the video.
                      required:
                        - type
                        - url
              description: Keyframes for image-to-video, extend, or interpolate
            model:
              type: string
              enum:
                - luma/ray-2
                - luma/ray-flash-2
            resolution:
              type: string
              enum:
                - 540p
                - 720p
                - 1080p
                - 4k
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 9
          required:
            - prompt
            - model
          additionalProperties: false
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/avatar-standard
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            audio_url:
              type: string
              description: >-
                The URL of the audio file. Supported formats: MP3, WAV, M4A,
                AAC. Maximum file size: 5 MB.
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
          required:
            - model
            - image_url
            - audio_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/avatar-pro
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            audio_url:
              type: string
              description: >-
                The URL of the audio file. Supported formats: MP3, WAV, M4A,
                AAC. Maximum file size: 5 MB.
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
          required:
            - model
            - image_url
            - audio_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - ltxv/ltxv-2
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 6
                - 8
                - 10
            resolution:
              type: string
              enum:
                - 1080p
                - 1440p
                - 2160p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
              default: '16:9'
              description: The aspect ratio of the generated video.
            fps:
              type: integer
              description: Frames per second of the generated video.
              enum:
                - 25
                - 50
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - ltxv/ltxv-2-fast
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            prompt:
              type: string
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 6
                - 8
                - 10
            resolution:
              type: string
              enum:
                - 1080p
                - 1440p
                - 2160p
              default: 1080p
              description: >-
                The resolution of the output video, where the number refers to
                the short side in pixels.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
              default: '16:9'
              description: The aspect ratio of the generated video.
            fps:
              type: integer
              description: Frames per second of the generated video.
              enum:
                - 25
                - 50
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
        - $ref: '#/components/schemas/Video.v2.longcatDistilled'
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-o1-image-to-video
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            last_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
              default: '5'
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-o1-reference-to-video
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_list:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 7
              description: Array of image URLs for multi-image-to-video generation.
            elements:
              type: array
              items:
                type: object
                properties:
                  reference_image_urls:
                    type: array
                    items:
                      type: string
                      format: uri
                    minItems: 1
                    maxItems: 4
                    description: Additional reference images from different angles.
                  frontal_image_url:
                    type: string
                    format: uri
                    description: The frontal image of the element (main view).
                required:
                  - reference_image_urls
                  - frontal_image_url
              maxItems: 4
              description: Elements (characters/objects) to include in the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
              default: '5'
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-o1-video-to-video-edit
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            image_list:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 7
              description: Array of image URLs for multi-image-to-video generation.
            elements:
              type: array
              items:
                type: object
                properties:
                  reference_image_urls:
                    type: array
                    items:
                      type: string
                      format: uri
                    minItems: 1
                    maxItems: 4
                    description: Additional reference images from different angles.
                  frontal_image_url:
                    type: string
                    format: uri
                    description: The frontal image of the element (main view).
                required:
                  - reference_image_urls
                  - frontal_image_url
              maxItems: 4
              description: Elements (characters/objects) to include in the video.
            keep_audio:
              type: boolean
              default: false
              description: Whether to keep the original audio from the video.
          required:
            - model
            - prompt
            - video_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-o1-video-to-video-reference
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            video_url:
              type: string
              format: uri
              description: >-
                A HTTPS URL pointing to a video or a data URI containing a
                video. This video will be used as a reference during generation.
            image_list:
              type: array
              items:
                type: string
                format: uri
              minItems: 1
              maxItems: 4
              description: Array of image URLs for multi-image-to-video generation.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
              default: '5'
            elements:
              type: array
              items:
                type: object
                properties:
                  reference_image_urls:
                    type: array
                    items:
                      type: string
                      format: uri
                    minItems: 1
                    maxItems: 4
                    description: Additional reference images from different angles.
                  frontal_image_url:
                    type: string
                    format: uri
                    description: The frontal image of the element (main view).
                required:
                  - reference_image_urls
                  - frontal_image_url
              maxItems: 4
              description: Elements (characters/objects) to include in the video.
            keep_audio:
              type: boolean
              default: false
              description: Whether to keep the original audio from the video.
          required:
            - model
            - prompt
            - video_url
            - image_list
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-v2-6-pro-text-to-video
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
              default: '5'
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-v2-6-pro-image-to-video
            prompt:
              type: string
              maxLength: 2500
              description: >-
                The text description of the scene, subject, or action to
                generate in the video.
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum:
                - 5
                - 10
              default: '5'
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
          required:
            - model
            - prompt
            - image_url
        - type: object
          properties:
            prompt:
              type: string
              description: >-
                Text prompt for video generation. Either prompt or multi_prompt
                must be provided, but not both.
            multi_prompt:
              type: array
              items:
                type: string
              description: >-
                List of prompts for multi-shot video generation. If provided,
                overrides the single prompt and divides the video into multiple
                shots with specified prompts and durations.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: &ref_41
                - 3
                - 4
                - 5
                - 6
                - 7
                - 8
                - 9
                - 10
                - 11
                - 12
                - 13
                - 14
                - 15
              default: '5'
            shot_type:
              type: string
              enum: &ref_42
                - customize
                - intelligent
              default: customize
              description: The type of multi-shot video generation
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
            model:
              type: string
              enum:
                - klingai/video-v3-standard-image-to-video
                - klingai/video-v3-pro-image-to-video
            image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                that will serve as the visual base or the first frame for the
                video.
            tail_image_url:
              type: string
              format: uri
              description: >-
                A direct link to an online image or a Base64-encoded local image
                to be used as the last frame of the video.
            elements:
              type: array
              items:
                type: object
                properties:
                  reference_image_urls:
                    type: array
                    items:
                      type: string
                      format: uri
                    minItems: 1
                    maxItems: 4
                    description: Additional reference images from different angles.
                  frontal_image_url:
                    type: string
                    format: uri
                    description: The frontal image of the element (main view).
                  video_url:
                    type: string
                    format: uri
                    description: >-
                      The video URL of the element. A request can only have one
                      element with a video.
                required:
                  - reference_image_urls
              maxItems: 4
              description: >-
                Elements (characters/objects) to include in the video. Each
                example can either be an image set (frontal + reference images)
                or a video
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - klingai/video-v3-standard-text-to-video
                - klingai/video-v3-pro-text-to-video
            prompt:
              type: string
              description: >-
                Text prompt for video generation. Either prompt or multi_prompt
                must be provided, but not both.
            multi_prompt:
              type: array
              items:
                type: string
              description: >-
                List of prompts for multi-shot video generation. If provided,
                overrides the single prompt and divides the video into multiple
                shots with specified prompts and durations.
            aspect_ratio:
              type: string
              enum:
                - '16:9'
                - '9:16'
                - '1:1'
              default: '16:9'
              description: The aspect ratio of the generated video.
            duration:
              type: integer
              description: The length of the output video in seconds.
              enum: *ref_41
              default: '5'
            shot_type:
              type: string
              enum: *ref_42
              default: customize
              description: The type of multi-shot video generation
            generate_audio:
              type: boolean
              default: true
              description: Whether to generate audio for the video.
            negative_prompt:
              type: string
              description: The description of elements to avoid in the generated video.
            cfg_scale:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
          required:
            - model
        - type: object
          properties:
            model:
              type: string
              enum:
                - magic/text-to-video
            prompt:
              type: string
            template:
              type: string
              enum:
                - Shanghai Drone Show
              default: Shanghai Drone Show
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - magic/image-to-video
            image_url:
              type: string
              format: uri
            template:
              type: string
              enum:
                - Thailand Street
                - Times Square Billboard
                - New York Times Square (77)
                - Phone Social
                - Art Gallery
                - New York Times Square (66)
                - Dubai Museum
                - Digital Float
                - Rotating Cards
                - Desktop Reveal
                - Egypt Pyramid
                - Frames Drop
                - Cappadocia Balloons
                - Times Square Round Screen
                - Stockholm Metro
                - Tokyo Billboard
                - San Francisco Skyscrapers
                - Malaysia Shop
                - Las Vegas LED
                - Phone App
                - Paris Eiffel Tower
              default: Thailand Street
          required:
            - model
            - image_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - magic/video-to-video
            video_url:
              type: string
              format: uri
            template:
              type: string
              enum:
                - Thailand Street
                - Times Square Billboard
                - New York Times Square (78)
                - Phone Social
                - Art Gallery
                - New York Times Square (67)
                - Dubai Museum
                - Rotating Cards
                - Desktop Reveal
                - Egypt Pyramid
                - Cappadocia Balloons
                - Times Square Round Screen
                - Stockholm Metro
                - Tokyo Billboard
                - San Francisco Skyscrapers
                - Malaysia Shop
                - Las Vegas LED
                - Phone App
                - Paris Eiffel Tower
              default: Thailand Street
          required:
            - model
            - video_url
    Kling.v2.klingV16StandardImageToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - kling-video/v1.6/standard/image-to-video
        image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image
            that will serve as the visual base or the first frame for the video.
        ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          deprecated: true
        aspect_ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          deprecated: true
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        duration:
          type: integer
          description: The length of the output video in seconds.
          enum: &ref_43
            - 5
            - 10
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        cfg_scale:
          type: number
          minimum: 0
          maximum: 1
          description: >-
            The CFG (Classifier Free Guidance) scale is a measure of how close
            you want the model to stick to your prompt.
      required:
        - model
        - image_url
        - prompt
    Kling.v2.klingTextToVideoPayload:
      type: object
      properties:
        model:
          type: string
          enum:
            - kling-video/v1.6/standard/text-to-video
        ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          deprecated: true
        aspect_ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          description: The aspect ratio of the generated video.
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        duration:
          type: integer
          description: The length of the output video in seconds.
          enum: *ref_43
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        cfg_scale:
          type: number
          minimum: 0
          maximum: 1
          description: >-
            The CFG (Classifier Free Guidance) scale is a measure of how close
            you want the model to stick to your prompt.
      required:
        - model
        - prompt
    Pixverse.v2.pixverseTransition:
      type: object
      properties:
        model:
          type: string
          enum:
            - pixverse/v5/transition
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        tail_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        resolution:
          type: string
          enum: &ref_44
            - 360p
            - 540p
            - 720p
            - 1080p
          default: 720p
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        duration:
          type: integer
          description: >-
            The output video length in seconds. The 1080p quality option does
            not support 8-second videos.
          enum: &ref_45
            - 5
            - 8
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        style:
          type: string
          enum: &ref_46
            - anime
            - 3d_animation
            - clay
            - comic
            - cyberpunk
          description: The style of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
      required:
        - model
        - image_url
        - tail_image_url
        - prompt
    Pixverse.v2.pixverseImageToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - pixverse/v5/image-to-video
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum: &ref_47
            - '16:9'
            - '4:3'
            - '1:1'
            - '3:4'
            - '9:16'
          default: '16:9'
          description: The aspect ratio of the generated video.
        resolution:
          type: string
          enum: *ref_44
          default: 720p
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        duration:
          type: integer
          description: >-
            The output video length in seconds. The 1080p quality option does
            not support 8-second videos.
          enum: *ref_45
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        style:
          type: string
          enum: *ref_46
          description: The style of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
      required:
        - model
        - image_url
        - prompt
    Pixverse.v2.pixverseTextToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - pixverse/v5/text-to-video
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum: *ref_47
          default: '16:9'
          description: The aspect ratio of the generated video.
        resolution:
          type: string
          enum: *ref_44
          default: 720p
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        duration:
          type: integer
          description: >-
            The output video length in seconds. The 1080p quality option does
            not support 8-second videos.
          enum: *ref_45
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        style:
          type: string
          enum: *ref_46
          description: The style of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
      required:
        - model
        - prompt
    Pixverse.v2.pixverseV55TextToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - pixverse/v5-5-text-to-video
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum: &ref_48
            - '16:9'
            - '4:3'
            - '1:1'
            - '3:4'
            - '9:16'
          default: '16:9'
          description: The aspect ratio of the generated video.
        resolution:
          type: string
          enum: &ref_49
            - 360p
            - 540p
            - 720p
            - 1080p
          default: 720p
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        duration:
          type: integer
          description: >-
            The output video length in seconds. The 1080p quality option does
            not support 8-second videos.
          enum: &ref_50
            - 5
            - 8
            - 10
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        style:
          type: string
          enum: &ref_51
            - anime
            - 3d_animation
            - clay
            - comic
            - cyberpunk
          description: The style of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        generate_audio_switch:
          type: boolean
          description: >-
            Audio switch. Controls whether the video has audio. true: Audio on,
            false: Audio off
        generate_multi_clip_switch:
          type: boolean
          description: >-
            Single or multi-clip switch. Controls single-clip and multi-clip
            generation modes. true: Multi-clip, false: Single-clip
        thinking_type:
          type: string
          enum: &ref_52
            - enabled
            - disabled
            - auto
          description: >-
            Prompt reasoning enhancement. Controls whether the system should
            enhance your prompt with internal reasoning and optimization.
      required:
        - model
        - prompt
    Pixverse.v2.pixverseV55ImageToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - pixverse/v5-5-image-to-video
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum: *ref_48
          default: '16:9'
          description: The aspect ratio of the generated video.
        resolution:
          type: string
          enum: *ref_49
          default: 720p
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        duration:
          type: integer
          description: >-
            The output video length in seconds. The 1080p quality option does
            not support 8-second videos.
          enum: *ref_50
        negative_prompt:
          type: string
          description: The description of elements to avoid in the generated video.
        style:
          type: string
          enum: *ref_51
          description: The style of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        generate_audio_switch:
          type: boolean
          description: >-
            Audio switch. Controls whether the video has audio. true: Audio on,
            false: Audio off
        generate_multi_clip_switch:
          type: boolean
          description: >-
            Single or multi-clip switch. Controls single-clip and multi-clip
            generation modes. true: Multi-clip, false: Single-clip
        thinking_type:
          type: string
          enum: *ref_52
          description: >-
            Prompt reasoning enhancement. Controls whether the system should
            enhance your prompt with internal reasoning and optimization.
      required:
        - model
        - image_url
        - prompt
    Bytedance.v2.omnihuman:
      type: object
      properties:
        model:
          type: string
          enum:
            - bytedance/omnihuman
            - bytedance/omnihuman/v1.5
        image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image
            that will serve as the visual base or the first frame for the video.
        audio_url:
          type: string
          format: uri
          description: >-
            The URL of the audio file for lip-sync animation. The model detects
            spoken parts and syncs the character's mouth to them. Audio must be
            under 30s long.
      required:
        - model
        - image_url
        - audio_url
    Alibaba.v2.wan22VaceDepth:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-vace-fun-a14b-depth
        video_url:
          type: string
          format: uri
          description: URL to the source video file. Required for depth task
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        negative_prompt:
          type: string
          default: >-
            letterboxing, borders, black bars, bright colors, overexposed,
            static, blurred details, subtitles, style, artwork, painting,
            picture, still, overall gray, worst quality, low quality, JPEG
            compression residue, ugly, incomplete, extra fingers, poorly drawn
            hands, poorly drawn faces, deformed, disfigured, malformed limbs,
            fused fingers, still picture, cluttered background, three legs, many
            people in the background, walking backwards
          description: The description of elements to avoid in the generated video.
        match_input_num_frames:
          type: boolean
        num_frames:
          type: integer
          minimum: 81
          maximum: 241
          default: 81
          description: Number of frames to generate.
        match_input_frames_per_second:
          type: boolean
          description: Whether to match the input video's frames per second (FPS).
        frames_per_second:
          type: integer
          minimum: 5
          maximum: 30
          default: 16
          description: Frames per second of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        resolution:
          type: string
          enum: &ref_53
            - auto
            - 240p
            - 360p
            - 480p
            - 580p
            - 720p
          default: auto
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        aspect_ratio:
          type: string
          enum: &ref_54
            - auto
            - '16:9'
            - '1:1'
            - '9:16'
          default: auto
          description: The aspect ratio of the generated video.
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        guidance_scale:
          type: number
          default: 5
          description: >-
            Classifier-free guidance scale. Controls prompt adherence /
            creativity.
        shift:
          type: number
          default: 5
          description: Noise schedule shift parameter. Affects temporal dynamics.
        image_list:
          type: array
          items:
            type: string
            format: uri
          description: Array of image URLs for multi-image-to-video generation.
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        last_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        enable_safety_checker:
          type: boolean
          description: If set to true, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          description: Whether to enable prompt expansion.
        preprocess:
          type: boolean
          description: Whether to preprocess the input video.
        acceleration:
          type: string
          enum: &ref_55
            - none
            - regular
          default: regular
          description: Acceleration to use for inference.
        video_quality:
          type: string
          enum: &ref_56
            - low
            - medium
            - high
            - maximum
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum: &ref_57
            - fast
            - balanced
            - small
          default: balanced
          description: The method used to write the video.
        num_interpolated_frames:
          type: integer
          description: Number of frames to interpolate between the original frames.
        temporal_downsample_factor:
          type: integer
          description: Temporal downsample factor for the video.
        enable_auto_downsample:
          type: boolean
          description: The minimum frames per second to downsample the video to.
        auto_downsample_min_fps:
          type: number
          default: 15
          description: The minimum frames per second to downsample the video to.
        interpolator_model:
          type: string
          enum: &ref_58
            - rife
            - film
          default: film
          description: The model to use for interpolation. Rife, or film are available.
        sync_mode:
          type: boolean
          description: >-
            The synchronization mode for audio and video. Loose or tight are
            available.
      required:
        - model
        - video_url
        - prompt
    Alibaba.v2.wan22VacePose:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-vace-fun-a14b-pose
        video_url:
          type: string
          format: uri
          description: URL to the source video file. Required for pose task
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        negative_prompt:
          type: string
          default: >-
            letterboxing, borders, black bars, bright colors, overexposed,
            static, blurred details, subtitles, style, artwork, painting,
            picture, still, overall gray, worst quality, low quality, JPEG
            compression residue, ugly, incomplete, extra fingers, poorly drawn
            hands, poorly drawn faces, deformed, disfigured, malformed limbs,
            fused fingers, still picture, cluttered background, three legs, many
            people in the background, walking backwards
          description: The description of elements to avoid in the generated video.
        match_input_num_frames:
          type: boolean
        num_frames:
          type: integer
          minimum: 81
          maximum: 241
          default: 81
          description: Number of frames to generate.
        match_input_frames_per_second:
          type: boolean
          description: Whether to match the input video's frames per second (FPS).
        frames_per_second:
          type: integer
          minimum: 5
          maximum: 30
          default: 16
          description: Frames per second of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        resolution:
          type: string
          enum: *ref_53
          default: auto
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        aspect_ratio:
          type: string
          enum: *ref_54
          default: auto
          description: The aspect ratio of the generated video.
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        guidance_scale:
          type: number
          default: 5
          description: >-
            Classifier-free guidance scale. Controls prompt adherence /
            creativity.
        shift:
          type: number
          default: 5
          description: Noise schedule shift parameter. Affects temporal dynamics.
        image_list:
          type: array
          items:
            type: string
            format: uri
          description: Array of image URLs for multi-image-to-video generation.
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        last_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        enable_safety_checker:
          type: boolean
          description: If set to true, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          description: Whether to enable prompt expansion.
        preprocess:
          type: boolean
          description: Whether to preprocess the input video.
        acceleration:
          type: string
          enum: *ref_55
          default: regular
          description: Acceleration to use for inference.
        video_quality:
          type: string
          enum: *ref_56
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum: *ref_57
          default: balanced
          description: The method used to write the video.
        num_interpolated_frames:
          type: integer
          description: Number of frames to interpolate between the original frames.
        temporal_downsample_factor:
          type: integer
          description: Temporal downsample factor for the video.
        enable_auto_downsample:
          type: boolean
          description: The minimum frames per second to downsample the video to.
        auto_downsample_min_fps:
          type: number
          default: 15
          description: The minimum frames per second to downsample the video to.
        interpolator_model:
          type: string
          enum: *ref_58
          default: film
          description: The model to use for interpolation. Rife, or film are available.
        sync_mode:
          type: boolean
          description: >-
            The synchronization mode for audio and video. Loose or tight are
            available.
      required:
        - model
        - video_url
        - prompt
    Alibaba.v2.wan22VaceInpainting:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-vace-fun-a14b-inpainting
        video_url:
          type: string
          format: uri
          description: URL to the source video file. Required for inpainting
        mask_video_url:
          type: string
          format: uri
          description: URL to the source mask file. Required for inpainting
        mask_image_url:
          type: string
          format: uri
          description: >-
            URL to the guiding mask file. If provided, the model will use this
            mask as a reference to create masked video using salient mask
            tracking. Will be ignored if mask_video_url is provided
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        negative_prompt:
          type: string
          default: >-
            letterboxing, borders, black bars, bright colors, overexposed,
            static, blurred details, subtitles, style, artwork, painting,
            picture, still, overall gray, worst quality, low quality, JPEG
            compression residue, ugly, incomplete, extra fingers, poorly drawn
            hands, poorly drawn faces, deformed, disfigured, malformed limbs,
            fused fingers, still picture, cluttered background, three legs, many
            people in the background, walking backwards
          description: The description of elements to avoid in the generated video.
        match_input_num_frames:
          type: boolean
        num_frames:
          type: integer
          minimum: 81
          maximum: 241
          default: 81
          description: Number of frames to generate.
        match_input_frames_per_second:
          type: boolean
          description: Whether to match the input video's frames per second (FPS).
        frames_per_second:
          type: integer
          minimum: 5
          maximum: 30
          default: 16
          description: Frames per second of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        resolution:
          type: string
          enum: *ref_53
          default: auto
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        aspect_ratio:
          type: string
          enum: *ref_54
          default: auto
          description: The aspect ratio of the generated video.
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        guidance_scale:
          type: number
          default: 5
          description: >-
            Classifier-free guidance scale. Controls prompt adherence /
            creativity.
        shift:
          type: number
          default: 5
          description: Noise schedule shift parameter. Affects temporal dynamics.
        image_list:
          type: array
          items:
            type: string
            format: uri
          description: Array of image URLs for multi-image-to-video generation.
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        last_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        enable_safety_checker:
          type: boolean
          description: If set to true, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          description: Whether to enable prompt expansion.
        preprocess:
          type: boolean
          description: Whether to preprocess the input video.
        acceleration:
          type: string
          enum: *ref_55
          default: regular
          description: Acceleration to use for inference.
        video_quality:
          type: string
          enum: *ref_56
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum: *ref_57
          default: balanced
          description: The method used to write the video.
        num_interpolated_frames:
          type: integer
          description: Number of frames to interpolate between the original frames.
        temporal_downsample_factor:
          type: integer
          description: Temporal downsample factor for the video.
        enable_auto_downsample:
          type: boolean
          description: The minimum frames per second to downsample the video to.
        auto_downsample_min_fps:
          type: number
          default: 15
          description: The minimum frames per second to downsample the video to.
        interpolator_model:
          type: string
          enum: *ref_58
          default: film
          description: The model to use for interpolation. Rife, or film are available.
        sync_mode:
          type: boolean
          description: >-
            The synchronization mode for audio and video. Loose or tight are
            available.
      required:
        - model
        - video_url
        - prompt
    Alibaba.v2.wan22VaceOutpainting:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-vace-fun-a14b-outpainting
        video_url:
          type: string
          format: uri
          description: URL to the source video file. Required for outpainting
        expand_left:
          type: boolean
          default: true
          description: Whether to expand the video to the left
        expand_right:
          type: boolean
          default: true
          description: Whether to expand the video to the right
        expand_top:
          type: boolean
          default: true
          description: Whether to expand the video to the top
        expand_bottom:
          type: boolean
          default: true
          description: Whether to expand the video to the bottom
        expand_ratio:
          type: number
          default: 0.25
          description: >-
            Amount of expansion. This is a float value between 0 and 1, where
            0.25 adds 25% to the original video size on the specified sides
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        negative_prompt:
          type: string
          default: >-
            letterboxing, borders, black bars, bright colors, overexposed,
            static, blurred details, subtitles, style, artwork, painting,
            picture, still, overall gray, worst quality, low quality, JPEG
            compression residue, ugly, incomplete, extra fingers, poorly drawn
            hands, poorly drawn faces, deformed, disfigured, malformed limbs,
            fused fingers, still picture, cluttered background, three legs, many
            people in the background, walking backwards
          description: The description of elements to avoid in the generated video.
        match_input_num_frames:
          type: boolean
        num_frames:
          type: integer
          minimum: 81
          maximum: 241
          default: 81
          description: Number of frames to generate.
        match_input_frames_per_second:
          type: boolean
          description: Whether to match the input video's frames per second (FPS).
        frames_per_second:
          type: integer
          minimum: 5
          maximum: 30
          default: 16
          description: Frames per second of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        resolution:
          type: string
          enum: *ref_53
          default: auto
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        aspect_ratio:
          type: string
          enum: *ref_54
          default: auto
          description: The aspect ratio of the generated video.
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        guidance_scale:
          type: number
          default: 5
          description: >-
            Classifier-free guidance scale. Controls prompt adherence /
            creativity.
        shift:
          type: number
          default: 5
          description: Noise schedule shift parameter. Affects temporal dynamics.
        image_list:
          type: array
          items:
            type: string
            format: uri
          description: Array of image URLs for multi-image-to-video generation.
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        last_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        enable_safety_checker:
          type: boolean
          description: If set to true, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          description: Whether to enable prompt expansion.
        preprocess:
          type: boolean
          description: Whether to preprocess the input video.
        acceleration:
          type: string
          enum: *ref_55
          default: regular
          description: Acceleration to use for inference.
        video_quality:
          type: string
          enum: *ref_56
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum: *ref_57
          default: balanced
          description: The method used to write the video.
        num_interpolated_frames:
          type: integer
          description: Number of frames to interpolate between the original frames.
        temporal_downsample_factor:
          type: integer
          description: Temporal downsample factor for the video.
        enable_auto_downsample:
          type: boolean
          description: The minimum frames per second to downsample the video to.
        auto_downsample_min_fps:
          type: number
          default: 15
          description: The minimum frames per second to downsample the video to.
        interpolator_model:
          type: string
          enum: *ref_58
          default: film
          description: The model to use for interpolation. Rife, or film are available.
        sync_mode:
          type: boolean
          description: >-
            The synchronization mode for audio and video. Loose or tight are
            available.
      required:
        - model
        - video_url
        - prompt
    Alibaba.v2.wan22VaceReframe:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-vace-fun-a14b-reframe
        video_url:
          type: string
          format: uri
          description: >-
            URL to the source video file. This video will be used as a reference
            for the reframe task
        zoom_factor:
          type: number
          description: >-
            Zoom factor for the video. When this value is greater than 0, the
            video will be zoomed in by this factor (in relation to the canvas
            size,) cutting off the edges of the video. A value of 0 means no
            zoom
        trim_borders:
          type: boolean
          default: true
          description: Whether to trim borders from the video
        prompt:
          type: string
          description: The text prompt to guide video generation. Optional for reframing
        negative_prompt:
          type: string
          default: >-
            letterboxing, borders, black bars, bright colors, overexposed,
            static, blurred details, subtitles, style, artwork, painting,
            picture, still, overall gray, worst quality, low quality, JPEG
            compression residue, ugly, incomplete, extra fingers, poorly drawn
            hands, poorly drawn faces, deformed, disfigured, malformed limbs,
            fused fingers, still picture, cluttered background, three legs, many
            people in the background, walking backwards
          description: The description of elements to avoid in the generated video.
        match_input_num_frames:
          type: boolean
          default: true
        num_frames:
          type: integer
          minimum: 81
          maximum: 241
          default: 81
          description: Number of frames to generate.
        match_input_frames_per_second:
          type: boolean
          default: true
        frames_per_second:
          type: integer
          minimum: 5
          maximum: 30
          default: 16
          description: Frames per second of the generated video.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        resolution:
          type: string
          enum: *ref_53
          default: auto
          description: >-
            An enumeration where the short side of the video frame determines
            the resolution.
        aspect_ratio:
          type: string
          enum: *ref_54
          default: auto
          description: The aspect ratio of the generated video.
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        guidance_scale:
          type: number
          default: 5
          description: >-
            Classifier-free guidance scale. Controls prompt adherence /
            creativity.
        shift:
          type: number
          default: 5
          description: Noise schedule shift parameter. Affects temporal dynamics.
        image_list:
          type: array
          items:
            type: string
            format: uri
          description: Array of image URLs for multi-image-to-video generation.
        image_url:
          type: string
          format: uri
          description: URL of the image to be used as the first frame of the video.
        last_image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image to
            be used as the last frame of the video.
        enable_safety_checker:
          type: boolean
          description: If set to true, the safety checker will be enabled.
        enable_prompt_expansion:
          type: boolean
          description: Whether to enable prompt expansion.
        preprocess:
          type: boolean
          description: Whether to preprocess the input video.
        acceleration:
          type: string
          enum: *ref_55
          default: regular
          description: Acceleration to use for inference.
        video_quality:
          type: string
          enum: *ref_56
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum: *ref_57
          default: balanced
          description: The method used to write the video.
        num_interpolated_frames:
          type: integer
          description: Number of frames to interpolate between the original frames.
        temporal_downsample_factor:
          type: integer
          description: Temporal downsample factor for the video.
        enable_auto_downsample:
          type: boolean
          description: The minimum frames per second to downsample the video to.
        auto_downsample_min_fps:
          type: number
          default: 15
          description: The minimum frames per second to downsample the video to.
        interpolator_model:
          type: string
          enum: *ref_58
          default: film
          description: The model to use for interpolation. Rife, or film are available.
        sync_mode:
          type: boolean
          description: >-
            The synchronization mode for audio and video. Loose or tight are
            available.
      required:
        - model
        - video_url
    Alibaba.v2.wan22AnimateMove:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-14b-animate-move
        video_url:
          type: string
          format: uri
          description: URL of the input video
        image_url:
          type: string
          format: uri
          description: >-
            URL of the input image. If the input image does not match the chosen
            aspect ratio, it is resized and center cropped
        resolution:
          type: string
          enum: &ref_59
            - 480p
            - 580p
            - 720p
          default: 480p
          description: Resolution of the generated video (480p, 580p, or 720p)
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        num_inference_steps:
          type: integer
          default: 20
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer
        enable_safety_checker:
          type: boolean
          description: >-
            If set to true, input data will be checked for safety before
            processing
        shift:
          type: number
          default: 5
          description: Shift value for the video. Must be between 1.0 and 10.0
        video_quality:
          type: string
          enum: &ref_60
            - low
            - medium
            - high
            - maximum
          default: high
          description: >-
            The quality of the output video. Higher quality means better visual
            quality but larger file size
        video_write_mode:
          type: string
          enum: &ref_61
            - fast
            - balanced
            - small
          default: balanced
          description: >-
            The write mode of the output video. Faster write mode means faster
            results but larger file size, balanced write mode is a good
            compromise between speed and quality, and small write mode is the
            slowest but produces the smallest file size
      required:
        - model
        - video_url
        - image_url
    Alibaba.v2.wan22AnimateReplace:
      type: object
      properties:
        model:
          type: string
          enum:
            - alibaba/wan2.2-14b-animate-replace
        video_url:
          type: string
          format: uri
          description: URL of the input video
        image_url:
          type: string
          format: uri
          description: >-
            URL of the input image. If the input image does not match the chosen
            aspect ratio, it is resized and center cropped
        resolution:
          type: string
          enum: *ref_59
          default: 480p
          description: Resolution of the generated video (480p, 580p, or 720p)
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        num_inference_steps:
          type: integer
          default: 20
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer
        enable_safety_checker:
          type: boolean
          description: >-
            If set to true, input data will be checked for safety before
            processing
        shift:
          type: number
          default: 5
          description: Shift value for the video. Must be between 1.0 and 10.0
        video_quality:
          type: string
          enum: *ref_60
          default: high
          description: >-
            The quality of the output video. Higher quality means better visual
            quality but larger file size
        video_write_mode:
          type: string
          enum: *ref_61
          default: balanced
          description: >-
            The write mode of the output video. Faster write mode means faster
            results but larger file size, balanced write mode is a good
            compromise between speed and quality, and small write mode is the
            slowest but produces the smallest file size
      required:
        - model
        - video_url
        - image_url
    Video.v2.kandinsky5TextToVideo:
      type: object
      properties:
        model:
          type: string
          enum:
            - sber-ai/kandinsky5-t2v
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          default: '16:9'
          description: The aspect ratio of the generated video.
        duration:
          type: integer
          description: The length of the output video in seconds.
          enum:
            - 5
            - 10
        num_inference_steps:
          type: integer
          default: 30
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
      required:
        - model
        - prompt
    Video.v2.kandinsky5TextToVideoDistill:
      type: object
      properties:
        model:
          type: string
          enum:
            - sber-ai/kandinsky5-distill-t2v
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        aspect_ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          default: '16:9'
          description: The aspect ratio of the generated video.
        duration:
          type: integer
          description: The length of the output video in seconds.
          enum:
            - 5
            - 10
      required:
        - model
        - prompt
    Video.v2.longcatDistilled:
      type: object
      properties:
        model:
          type: string
          enum:
            - longcat/distilled/480p
            - longcat/distilled/720p
        prompt:
          type: string
          description: >-
            The text description of the scene, subject, or action to generate in
            the video.
        image_url:
          type: string
          format: uri
          description: >-
            A direct link to an online image or a Base64-encoded local image
            that will serve as the visual base or the first frame for the video.
        fps:
          type: integer
          minimum: 1
          maximum: 60
          default: 15
          description: Frames per second of the generated video.
        num_frames:
          type: integer
          minimum: 17
          maximum: 961
          default: 162
          description: Number of frames to generate.
        aspect_ratio:
          type: string
          enum:
            - '16:9'
            - '9:16'
            - '1:1'
          default: '16:9'
          description: >-
            The aspect ratio of the generated video. This parameter is ignored
            if image_url is provided.
        num_inference_steps:
          type: integer
          minimum: 2
          maximum: 16
          default: 12
          description: >-
            Number of inference steps for sampling. Higher values give better
            quality but take longer.
        seed:
          type: integer
          description: >-
            Varying the seed integer is a way to get different results for the
            same other request parameters. Using the same value for an identical
            request will produce similar results. If unspecified, a random
            number is chosen.
        enable_prompt_expansion:
          type: boolean
          default: false
          description: Whether to enable prompt expansion.
        enable_safety_checker:
          type: boolean
          default: true
          description: If set to true, the safety checker will be enabled.
        video_output_type:
          type: string
          enum:
            - X264 (.mp4)
            - VP9 (.webm)
            - PRORES4444 (.mov)
            - GIF (.gif)
          default: X264 (.mp4)
          description: The output type of the generated video.
        video_quality:
          type: string
          enum:
            - low
            - medium
            - high
            - maximum
          default: high
          description: The quality of the generated video.
        video_write_mode:
          type: string
          enum:
            - fast
            - balanced
            - small
          default: balanced
          description: The method used to write the video.
      required:
        - model
        - prompt
    Video.v2.PollVideoResponseDTO:
      type: object
      properties:
        id:
          type: string
          description: The ID of the generated video.
          example: 60ac7c34-3224-4b14-8e7d-0aa0db708325
        status:
          type: string
          enum:
            - queued
            - generating
            - completed
            - error
          description: The current status of the generation task.
          example: completed
        video:
          type: object
          nullable: true
          properties:
            url:
              type: string
              format: uri
              description: The URL where the file can be downloaded from.
              example: >-
                https://cdn.aimlapi.com/generations/hedgehog/1759866285599-0cdfb138-c03a-49d4-a601-4f6413e27b15.mp4
            duration:
              type: number
              nullable: true
              description: The duration of the video.
              example: 8
          required:
            - url
        duration:
          type: number
          nullable: true
          description: The duration of the video.
          example: 8
        error:
          nullable: true
          description: Description of the error, if any.
        meta:
          type: object
          nullable: true
          properties:
            usage:
              type: object
              nullable: true
              properties:
                credits_used:
                  type: number
                  description: The number of tokens consumed during generation.
                  example: 120000
              required:
                - credits_used
          description: Additional details about the generation.
      required:
        - id
        - status
    Audio.v2.SubmitGenerationPayloadDTO:
      oneOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - elevenlabs/eleven_music
            prompt:
              type: string
              maxLength: 2000
              description: >-
                A text description that can define the genre, mood, instruments,
                vocals, tempo, structure, and even lyrics of the track. It can
                be high-level (“peaceful meditation with voiceover”) or detailed
                (“solo piano in C minor, 90 BPM, raw and emotional”). Use
                keywords to control genre, emotional tone, vocals (e.g., a
                cappella, two singers harmonizing), structure (e.g., “lyrics
                begin at 15 seconds”), or provide custom lyrics directly in the
                prompt.
            music_length_ms:
              type: integer
              minimum: 10000
              maximum: 300000
              default: 10000
              description: >-
                The length of the song to generate in milliseconds. This
                parameter may not always be respected by the model, and the
                actual audio length can differ.
              format: milliseconds
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - stable-audio
            prompt:
              type: string
              description: The prompt to generate audio.
            seconds_start:
              type: integer
              maximum: 47
              minimum: 1
              description: The start point of the audio clip to generate.
            seconds_total:
              type: integer
              maximum: 47
              minimum: 1
              default: 30
              description: The duration of the audio clip to generate.
            steps:
              type: integer
              minimum: 1
              maximum: 1000
              default: 100
              description: The number of steps to denoise the audio.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax-music
            prompt:
              type: string
              description: >-
                Lyrics with optional formatting. You can use a newline to
                separate each line of lyrics. You can use two newlines to add a
                pause between lines. You can use double hash marks (##) at the
                beginning and end of the lyrics to add accompaniment. Maximum
                600 characters.
            reference_audio_url:
              type: string
              format: uri
              description: >-
                Reference song, should contain music and vocals. Must be a .wav
                or .mp3 file longer than 15 seconds.
          required:
            - model
            - prompt
            - reference_audio_url
        - type: object
          properties:
            model:
              type: string
              enum:
                - google/lyria2
            prompt:
              type: string
              description: The prompt to generate audio.
            negative_prompt:
              type: string
              description: A description of what to exclude from the generated audio
            seed:
              type: integer
              minimum: 0
              description: >-
                A seed for deterministic generation. If provided, the model will
                attempt to produce the same audio given the same prompt and
                other parameters.
          required:
            - model
            - prompt
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax/music-1.5
            prompt:
              type: string
              minLength: 10
              maxLength: 300
              description: >-
                A description of the music, specifying style, mood, and
                scenario. Length: 10–300 characters.
            lyrics:
              type: string
              minLength: 10
              maxLength: 3000
              description: >-
                Lyrics of the song. Use (

                ) to separate lines. You may add structure tags like [Intro],
                [Verse], [Chorus], [Bridge], [Outro] to enhance the arrangement.
                Length: 10–3000 characters.
              example: |-
                [Verse]
                Streetlights flicker, the night breeze sighs
                Shadows stretch as I walk alone
                An old coat wraps my silent sorrow
                Wandering, longing, where should I go
                [Chorus]
                Pushing the wooden door, the aroma spreads
                In a familiar corner, a stranger gazes
            audio_setting:
              type: object
              properties:
                sample_rate:
                  type: integer
                  description: The sampling rate of the generated music.
                  enum: &ref_62
                    - 16000
                    - 24000
                    - 32000
                    - 44100
                bitrate:
                  type: integer
                  description: The bit rate of the generated music.
                  enum: &ref_63
                    - 32000
                    - 64000
                    - 128000
                    - 256000
                format:
                  type: string
                  enum: &ref_64
                    - mp3
                    - wav
                    - pcm
                  description: The format of the generated music.
              required:
                - format
          required:
            - model
            - prompt
            - lyrics
        - type: object
          properties:
            model:
              type: string
              enum:
                - minimax/music-2.0
            prompt:
              type: string
              minLength: 10
              maxLength: 2000
              description: >-
                A description of the music, specifying style, mood, and
                scenario. Length: 10–2000 characters.
            lyrics:
              type: string
              minLength: 10
              maxLength: 3000
              description: >-
                Lyrics of the song. Use (

                ) to separate lines. You may add structure tags like [Intro],
                [Verse], [Chorus], [Bridge], [Outro] to enhance the arrangement.
                Length: 10–3000 characters.
              example: |-
                [Verse]
                Streetlights flicker, the night breeze sighs
                Shadows stretch as I walk alone
                An old coat wraps my silent sorrow
                Wandering, longing, where should I go
                [Chorus]
                Pushing the wooden door, the aroma spreads
                In a familiar corner, a stranger gazes
            audio_setting:
              type: object
              properties:
                sample_rate:
                  type: integer
                  description: The sampling rate of the generated music.
                  enum: *ref_62
                bitrate:
                  type: integer
                  description: The bit rate of the generated music.
                  enum: *ref_63
                format:
                  type: string
                  enum: *ref_64
                  description: The format of the generated music.
              required:
                - format
          required:
            - model
            - prompt
            - lyrics
    Voice.v1.SpeechToTextPayloadDTO:
      anyOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - '#g1_nova-2-general'
                - '#g1_nova-2-meeting'
                - '#g1_nova-2-phonecall'
                - '#g1_nova-2-voicemail'
                - '#g1_nova-2-finance'
                - '#g1_nova-2-conversationalai'
                - '#g1_nova-2-video'
                - '#g1_nova-2-medical'
                - '#g1_nova-2-drivethru'
                - '#g1_nova-2-automotive'
                - '#g1_whisper-large'
                - '#g1_whisper-medium'
                - '#g1_whisper-small'
                - '#g1_whisper-tiny'
                - '#g1_whisper-base'
            custom_intent:
              anyOf:
                - type: string
                - type: array
                  items:
                    type: string
              description: >-
                A custom intent you want the model to detect within your input
                audio if present. Submit up to 100.
            custom_topic:
              anyOf:
                - type: string
                - type: array
                  items:
                    type: string
              description: >-
                A custom topic you want the model to detect within your input
                audio if present. Submit up to 100.
            custom_intent_mode:
              type: string
              enum: &ref_65
                - strict
                - extended
              description: >-
                Sets how the model will interpret strings submitted to the
                custom_intent param. When strict, the model will only return
                intents submitted using the custom_intent param. When extended,
                the model will return its own detected intents in addition those
                submitted using the custom_intents param.
            custom_topic_mode:
              type: string
              enum: &ref_66
                - strict
                - extended
              description: >-
                Sets how the model will interpret strings submitted to the
                custom_topic param. When strict, the model will only return
                topics submitted using the custom_topic param. When extended,
                the model will return its own detected topics in addition to
                those submitted using the custom_topic param.
            detect_language:
              type: boolean
              description: >-
                Enables language detection to identify the dominant language
                spoken in the submitted audio.
            detect_entities:
              type: boolean
              description: >-
                When Entity Detection is enabled, the Punctuation feature will
                be enabled by default.
            detect_topics:
              type: boolean
              description: >-
                Detects the most important and relevant topics that are
                referenced in speech within the audio.
            diarize:
              type: boolean
              description: >-
                Recognizes speaker changes. Each word in the transcript will be
                assigned a speaker number starting at 0.
            dictation:
              type: boolean
              description: >-
                Identifies and extracts key entities from content in submitted
                audio.
            diarize_version:
              type: string
              description: ''
            extra:
              type: string
              description: >-
                Arbitrary key-value pairs that are attached to the API response
                for usage in downstream processing.
            filler_words:
              type: boolean
              description: >-
                Filler Words can help transcribe interruptions in your audio,
                like “uh” and “um”.
            intents:
              type: boolean
              description: Recognizes speaker intent throughout a transcript or text.
            keywords:
              type: string
              description: >-
                Keywords can boost or suppress specialized terminology and
                brands.
            language:
              type: string
              description: >-
                The BCP-47 language tag that hints at the primary spoken
                language. Depending on the Model and API endpoint you choose
                only certain languages are available
            measurements:
              type: boolean
              description: >-
                Spoken measurements will be converted to their corresponding
                abbreviations
            multi_channel:
              type: boolean
              description: Transcribes each audio channel independently
            numerals:
              type: boolean
              description: >-
                Numerals converts numbers from written format to numerical
                format
            paragraphs:
              type: boolean
              description: Splits audio into paragraphs to improve transcript readability
            profanity_filter:
              type: boolean
              description: >-
                Profanity Filter looks for recognized profanity and converts it
                to the nearest recognized non-profane word or removes it from
                the transcript completely
            punctuate:
              type: boolean
              description: Adds punctuation and capitalization to the transcript
            search:
              type: string
              description: Search for terms or phrases in submitted audio
            sentiment:
              type: boolean
              description: Recognizes the sentiment throughout a transcript or text
            smart_format:
              type: boolean
              description: >-
                Applies formatting to transcript output. When set to true,
                additional formatting will be applied to transcripts to improve
                readability
            summarize:
              type: string
              description: >-
                Summarizes content. For Listen API, supports string version
                option. For Read API, accepts boolean only.
            tag:
              type: array
              items:
                type: string
              description: >-
                Labels your requests for the purpose of identification during
                usage reporting
            topics:
              type: boolean
              description: Detects topics throughout a transcript or text
            utterances:
              type: boolean
              description: Segments speech into meaningful semantic units
            utt_split:
              type: number
              description: >-
                Seconds to wait before detecting a pause between words in
                submitted audio
            url:
              type: string
              format: uri
          required:
            - model
            - url
        - type: object
          properties:
            model:
              type: string
              enum:
                - '#g1_nova-2-general'
                - '#g1_nova-2-meeting'
                - '#g1_nova-2-phonecall'
                - '#g1_nova-2-voicemail'
                - '#g1_nova-2-finance'
                - '#g1_nova-2-conversationalai'
                - '#g1_nova-2-video'
                - '#g1_nova-2-medical'
                - '#g1_nova-2-drivethru'
                - '#g1_nova-2-automotive'
                - '#g1_whisper-large'
                - '#g1_whisper-medium'
                - '#g1_whisper-small'
                - '#g1_whisper-tiny'
                - '#g1_whisper-base'
            custom_intent:
              anyOf:
                - type: string
                - type: array
                  items:
                    type: string
              description: >-
                A custom intent you want the model to detect within your input
                audio if present. Submit up to 100.
            custom_topic:
              anyOf:
                - type: string
                - type: array
                  items:
                    type: string
              description: >-
                A custom topic you want the model to detect within your input
                audio if present. Submit up to 100.
            custom_intent_mode:
              type: string
              enum: *ref_65
              description: >-
                Sets how the model will interpret strings submitted to the
                custom_intent param. When strict, the model will only return
                intents submitted using the custom_intent param. When extended,
                the model will return its own detected intents in addition those
                submitted using the custom_intents param.
            custom_topic_mode:
              type: string
              enum: *ref_66
              description: >-
                Sets how the model will interpret strings submitted to the
                custom_topic param. When strict, the model will only return
                topics submitted using the custom_topic param. When extended,
                the model will return its own detected topics in addition to
                those submitted using the custom_topic param.
            detect_language:
              type: boolean
              description: >-
                Enables language detection to identify the dominant language
                spoken in the submitted audio.
            detect_entities:
              type: boolean
              description: >-
                When Entity Detection is enabled, the Punctuation feature will
                be enabled by default.
            detect_topics:
              type: boolean
              description: >-
                Detects the most important and relevant topics that are
                referenced in speech within the audio.
            diarize:
              type: boolean
              description: >-
                Recognizes speaker changes. Each word in the transcript will be
                assigned a speaker number starting at 0.
            dictation:
              type: boolean
              description: >-
                Identifies and extracts key entities from content in submitted
                audio.
            diarize_version:
              type: string
              description: ''
            extra:
              type: string
              description: >-
                Arbitrary key-value pairs that are attached to the API response
                for usage in downstream processing.
            filler_words:
              type: boolean
              description: >-
                Filler Words can help transcribe interruptions in your audio,
                like “uh” and “um”.
            intents:
              type: boolean
              description: Recognizes speaker intent throughout a transcript or text.
            keywords:
              type: string
              description: >-
                Keywords can boost or suppress specialized terminology and
                brands.
            language:
              type: string
              description: >-
                The BCP-47 language tag that hints at the primary spoken
                language. Depending on the Model and API endpoint you choose
                only certain languages are available
            measurements:
              type: boolean
              description: >-
                Spoken measurements will be converted to their corresponding
                abbreviations
            multi_channel:
              type: boolean
              description: Transcribes each audio channel independently
            numerals:
              type: boolean
              description: >-
                Numerals converts numbers from written format to numerical
                format
            paragraphs:
              type: boolean
              description: Splits audio into paragraphs to improve transcript readability
            profanity_filter:
              type: boolean
              description: >-
                Profanity Filter looks for recognized profanity and converts it
                to the nearest recognized non-profane word or removes it from
                the transcript completely
            punctuate:
              type: boolean
              description: Adds punctuation and capitalization to the transcript
            search:
              type: string
              description: Search for terms or phrases in submitted audio
            sentiment:
              type: boolean
              description: Recognizes the sentiment throughout a transcript or text
            smart_format:
              type: boolean
              description: >-
                Applies formatting to transcript output. When set to true,
                additional formatting will be applied to transcripts to improve
                readability
            summarize:
              type: string
              description: >-
                Summarizes content. For Listen API, supports string version
                option. For Read API, accepts boolean only.
            tag:
              type: array
              items:
                type: string
              description: >-
                Labels your requests for the purpose of identification during
                usage reporting
            topics:
              type: boolean
              description: Detects topics throughout a transcript or text
            utterances:
              type: boolean
              description: Segments speech into meaningful semantic units
            utt_split:
              type: number
              description: >-
                Seconds to wait before detecting a pause between words in
                submitted audio
          required:
            - model
        - type: object
          properties:
            model:
              type: string
              enum:
                - aai/slam-1
                - aai/universal
            audio:
              type: object
              properties:
                buffer:
                  nullable: true
                mimetype:
                  type: string
                size:
                  type: integer
                originalname:
                  type: string
                encoding:
                  type: string
                fieldname:
                  type: string
              required:
                - mimetype
                - originalname
                - encoding
                - fieldname
              description: The audio file to transcribe.
            audio_start_from:
              type: integer
              description: >-
                The point in time, in milliseconds, in the file at which the
                transcription was started.
            audio_end_at:
              type: integer
              description: >-
                The point in time, in milliseconds, in the file at which the
                transcription was terminated.
            language_code:
              type: string
              description: >-
                The language of your audio file. Possible values are found in
                Supported Languages. The default value is 'en_us'.
            language_confidence_threshold:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The confidence threshold for the automatically detected
                language. An error will be returned if the language confidence
                is below this threshold. Defaults to 0.
            language_detection:
              type: boolean
              description: >-
                Enable Automatic language detection, either true or false.
                Available for universal model only.
            punctuate:
              type: boolean
              nullable: true
              default: null
              description: Adds punctuation and capitalization to the transcript
            format_text:
              type: boolean
              default: true
              description: Enable Text Formatting, can be true or false.
            disfluencies:
              type: boolean
              default: false
              description: >-
                Transcribe Filler Words, like "umm", in your media file; can be
                true or false.
            multichannel:
              type: boolean
              default: false
              description: Enable Multichannel transcription, can be true or false.
            speaker_labels:
              type: boolean
              nullable: true
              default: null
              description: Enable Speaker diarization, can be true or false.
            speakers_expected:
              type: integer
              nullable: true
              default: null
              description: >-
                Tell the speaker label model how many speakers it should attempt
                to identify. See Speaker diarization for more details.
            content_safety:
              type: boolean
              default: false
              description: Enable Content Moderation, can be true or false.
            iab_categories:
              type: boolean
              default: false
              description: Enable Topic Detection, can be true or false.
            custom_spelling:
              type: array
              items:
                type: object
                properties:
                  from:
                    type: string
                  to:
                    type: string
                required:
                  - from
                  - to
              description: >-
                Customize how words are spelled and formatted using to and from
                values.
            auto_highlights:
              type: boolean
              default: false
              description: Enable Key Phrases, either true or false.
            word_boost:
              type: array
              items:
                type: string
              description: >-
                The list of custom vocabulary to boost transcription probability
                for.
            boost_param:
              type: string
              enum:
                - low
                - default
                - high
              description: >-
                How much to boost specified words. Allowed values: low, default,
                high.
            filter_profanity:
              type: boolean
              default: false
              description: >-
                Filter profanity from the transcribed text, can be true or
                false.
            redact_pii:
              type: boolean
              default: false
              description: >-
                Redact PII from the transcribed text using the Redact PII model,
                can be true or false.
            redact_pii_audio:
              type: boolean
              default: false
              description: >-
                Generate a copy of the original media file with spoken PII
                "beeped" out, can be true or false. See PII redaction for more
                details.
            redact_pii_audio_quality:
              type: string
              enum:
                - mp3
                - wav
              description: >-
                Controls the filetype of the audio created by redact_pii_audio.
                Currently supports mp3 (default) and wav. See PII redaction for
                more details.
            redact_pii_policies:
              type: array
              items:
                type: string
                enum:
                  - account_number
                  - banking_information
                  - blood_type
                  - credit_card_cvv
                  - credit_card_expiration
                  - credit_card_number
                  - date
                  - date_interval
                  - date_of_birth
                  - drivers_license
                  - drug
                  - duration
                  - email_address
                  - event
                  - filename
                  - gender_sexuality
                  - healthcare_number
                  - injury
                  - ip_address
                  - language
                  - location
                  - marital_status
                  - medical_condition
                  - medical_process
                  - money_amount
                  - nationality
                  - number_sequence
                  - occupation
                  - organization
                  - passport_number
                  - password
                  - person_age
                  - person_name
                  - phone_number
                  - physical_attribute
                  - political_affiliation
                  - religion
                  - statistics
                  - time
                  - url
                  - us_social_security_number
                  - username
                  - vehicle_id
                  - zodiac_sign
              description: >-
                The list of PII Redaction policies to enable. See PII redaction
                for more details.
            redact_pii_sub:
              type: string
              enum:
                - entity_name
                - hash
              description: >-
                The replacement logic for detected PII, can be `entity_type` or
                `hash`. See PII redaction for more details.
            sentiment_analysis:
              type: boolean
              default: false
              description: Enable Sentiment Analysis, can be true or false.
            entity_detection:
              type: boolean
              default: false
              description: Enable Entity Detection, can be true or false.
            summarization:
              type: boolean
              default: false
              description: Enable Summarization, can be true or false.
            summary_model:
              type: string
              enum:
                - informative
                - conversational
                - catchy
              description: >-
                The model to summarize the transcript. Allowed values:
                informative, conversational, catchy.
            summary_type:
              type: string
              enum:
                - bullets
                - bullets_verbose
                - gist
                - headline
                - paragraph
              description: >-
                The type of summary. Allowed values: bullets, bullets_verbose,
                gist, headline, paragraph.
            auto_chapters:
              type: boolean
              default: false
              description: Enable Auto Chapters, either true or false.
            speech_threshold:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                Reject audio files that contain less than this fraction of
                speech. Valid values are in the range [0, 1] inclusive.
          required:
            - model
            - audio
        - type: object
          properties:
            model:
              type: string
              enum:
                - aai/slam-1
                - aai/universal
            url:
              type: string
              format: uri
              description: URL of the input audio file.
            audio_start_from:
              type: integer
              description: >-
                The point in time, in milliseconds, in the file at which the
                transcription was started.
            audio_end_at:
              type: integer
              description: >-
                The point in time, in milliseconds, in the file at which the
                transcription was terminated.
            language_code:
              type: string
              description: >-
                The language of your audio file. Possible values are found in
                Supported Languages. The default value is 'en_us'.
            language_confidence_threshold:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                The confidence threshold for the automatically detected
                language. An error will be returned if the language confidence
                is below this threshold. Defaults to 0.
            language_detection:
              type: boolean
              description: >-
                Enable Automatic language detection, either true or false.
                Available for universal model only.
            punctuate:
              type: boolean
              nullable: true
              default: null
              description: Adds punctuation and capitalization to the transcript
            format_text:
              type: boolean
              default: true
              description: Enable Text Formatting, can be true or false.
            disfluencies:
              type: boolean
              default: false
              description: >-
                Transcribe Filler Words, like "umm", in your media file; can be
                true or false.
            multichannel:
              type: boolean
              default: false
              description: Enable Multichannel transcription, can be true or false.
            speaker_labels:
              type: boolean
              nullable: true
              default: false
              description: Enable Speaker diarization, can be true or false.
            speakers_expected:
              type: integer
              nullable: true
              default: null
              description: >-
                Tell the speaker label model how many speakers it should attempt
                to identify. See Speaker diarization for more details.
            content_safety:
              type: boolean
              default: false
              description: Enable Content Moderation, can be true or false.
            iab_categories:
              type: boolean
              default: false
              description: Enable Topic Detection, can be true or false.
            custom_spelling:
              type: array
              items:
                type: object
                properties:
                  from:
                    type: string
                  to:
                    type: string
                required:
                  - from
                  - to
              description: >-
                Customize how words are spelled and formatted using to and from
                values.
            auto_highlights:
              type: boolean
              default: false
              description: Enable Key Phrases, either true or false.
            word_boost:
              type: array
              items:
                type: string
              description: >-
                The list of custom vocabulary to boost transcription probability
                for.
            boost_param:
              type: string
              enum:
                - low
                - default
                - high
              description: >-
                How much to boost specified words. Allowed values: low, default,
                high.
            filter_profanity:
              type: boolean
              default: false
              description: >-
                Filter profanity from the transcribed text, can be true or
                false.
            redact_pii:
              type: boolean
              default: false
              description: >-
                Redact PII from the transcribed text using the Redact PII model,
                can be true or false.
            redact_pii_audio:
              type: boolean
              default: false
              description: >-
                Generate a copy of the original media file with spoken PII
                "beeped" out, can be true or false. See PII redaction for more
                details.
            redact_pii_audio_quality:
              type: string
              enum:
                - mp3
                - wav
              description: >-
                Controls the filetype of the audio created by redact_pii_audio.
                Currently supports mp3 (default) and wav. See PII redaction for
                more details.
            redact_pii_policies:
              type: array
              items:
                type: string
                enum:
                  - account_number
                  - banking_information
                  - blood_type
                  - credit_card_cvv
                  - credit_card_expiration
                  - credit_card_number
                  - date
                  - date_interval
                  - date_of_birth
                  - drivers_license
                  - drug
                  - duration
                  - email_address
                  - event
                  - filename
                  - gender_sexuality
                  - healthcare_number
                  - injury
                  - ip_address
                  - language
                  - location
                  - marital_status
                  - medical_condition
                  - medical_process
                  - money_amount
                  - nationality
                  - number_sequence
                  - occupation
                  - organization
                  - passport_number
                  - password
                  - person_age
                  - person_name
                  - phone_number
                  - physical_attribute
                  - political_affiliation
                  - religion
                  - statistics
                  - time
                  - url
                  - us_social_security_number
                  - username
                  - vehicle_id
                  - zodiac_sign
              description: >-
                The list of PII Redaction policies to enable. See PII redaction
                for more details.
            redact_pii_sub:
              type: string
              enum:
                - entity_name
                - hash
              description: >-
                The replacement logic for detected PII, can be `entity_type` or
                `hash`. See PII redaction for more details.
            sentiment_analysis:
              type: boolean
              default: false
              description: Enable Sentiment Analysis, can be true or false.
            entity_detection:
              type: boolean
              default: false
              description: Enable Entity Detection, can be true or false.
            summarization:
              type: boolean
              default: false
              description: Enable Summarization, can be true or false.
            summary_model:
              type: string
              enum:
                - informative
                - conversational
                - catchy
              description: >-
                The model to summarize the transcript. Allowed values:
                informative, conversational, catchy.
            summary_type:
              type: string
              enum:
                - bullets
                - bullets_verbose
                - gist
                - headline
                - paragraph
              description: >-
                The type of summary. Allowed values: bullets, bullets_verbose,
                gist, headline, paragraph.
            auto_chapters:
              type: boolean
              default: false
              description: Enable Auto Chapters, either true or false.
            speech_threshold:
              type: number
              minimum: 0
              maximum: 1
              description: >-
                Reject audio files that contain less than this fraction of
                speech. Valid values are in the range [0, 1] inclusive.
          required:
            - model
            - url
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/gpt-4o-transcribe
                - openai/gpt-4o-mini-transcribe
            language:
              type: string
              description: >-
                The BCP-47 language tag that hints at the primary spoken
                language. Depending on the Model and API endpoint you choose
                only certain languages are available
            prompt:
              type: string
              description: >-
                An optional text to guide the model's style or continue a
                previous audio segment. The prompt should match the audio
                language.
            temperature:
              type: number
              minimum: 0
              maximum: 1
              default: 0
              description: >-
                The sampling temperature, between 0 and 1. Higher values like
                0.8 will make the output more random, while lower values like
                0.2 will make it more focused and deterministic.
            url:
              type: string
              format: uri
              description: URL of the input audio file.
          required:
            - model
            - url
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/gpt-4o-transcribe
                - openai/gpt-4o-mini-transcribe
            language:
              type: string
              description: >-
                The BCP-47 language tag that hints at the primary spoken
                language. Depending on the Model and API endpoint you choose
                only certain languages are available
            prompt:
              type: string
              description: >-
                An optional text to guide the model's style or continue a
                previous audio segment. The prompt should match the audio
                language.
            temperature:
              type: number
              minimum: 0
              maximum: 1
              default: 0
              description: >-
                The sampling temperature, between 0 and 1. Higher values like
                0.8 will make the output more random, while lower values like
                0.2 will make it more focused and deterministic.
          required:
            - model
    Voice.v1.TextToSpeechPayload:
      anyOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - '#g1_aura-asteria-en'
                - '#g1_aura-hera-en'
                - '#g1_aura-luna-en'
                - '#g1_aura-stella-en'
                - '#g1_aura-athena-en'
                - '#g1_aura-zeus-en'
                - '#g1_aura-orion-en'
                - '#g1_aura-arcas-en'
                - '#g1_aura-perseus-en'
                - '#g1_aura-angus-en'
                - '#g1_aura-orpheus-en'
                - '#g1_aura-helios-en'
                - '#g1_aura-2-amalthea-en'
                - '#g1_aura-2-andromeda-en'
                - '#g1_aura-2-apollo-en'
                - '#g1_aura-2-arcas-en'
                - '#g1_aura-2-aries-en'
                - '#g1_aura-2-asteria-en'
                - '#g1_aura-2-athena-en'
                - '#g1_aura-2-atlas-en'
                - '#g1_aura-2-aurora-en'
                - '#g1_aura-2-callista-en'
                - '#g1_aura-2-cora-en'
                - '#g1_aura-2-cordelia-en'
                - '#g1_aura-2-delia-en'
                - '#g1_aura-2-draco-en'
                - '#g1_aura-2-electra-en'
                - '#g1_aura-2-harmonia-en'
                - '#g1_aura-2-helena-en'
                - '#g1_aura-2-hera-en'
                - '#g1_aura-2-hermes-en'
                - '#g1_aura-2-hyperion-en'
                - '#g1_aura-2-iris-en'
                - '#g1_aura-2-janus-en'
                - '#g1_aura-2-juno-en'
                - '#g1_aura-2-jupiter-en'
                - '#g1_aura-2-luna-en'
                - '#g1_aura-2-mars-en'
                - '#g1_aura-2-minerva-en'
                - '#g1_aura-2-neptune-en'
                - '#g1_aura-2-odysseus-en'
                - '#g1_aura-2-ophelia-en'
                - '#g1_aura-2-orion-en'
                - '#g1_aura-2-orpheus-en'
                - '#g1_aura-2-pandora-en'
                - '#g1_aura-2-phoebe-en'
                - '#g1_aura-2-pluto-en'
                - '#g1_aura-2-saturn-en'
                - '#g1_aura-2-selene-en'
                - '#g1_aura-2-thalia-en'
                - '#g1_aura-2-theia-en'
                - '#g1_aura-2-vesta-en'
                - '#g1_aura-2-zeus-en'
                - '#g1_aura-2-celeste-es'
                - '#g1_aura-2-estrella-es'
                - '#g1_aura-2-nestor-es'
            text:
              type: string
              description: The text content to be converted to speech.
            container:
              type: string
              description: >-
                The file format wrapper for the output audio. The available
                options depend on the encoding type.
            encoding:
              type: string
              enum:
                - linear16
                - mulaw
                - alaw
                - mp3
                - opus
                - flac
                - aac
              default: linear16
              description: Specifies the expected encoding of your audio output
            sample_rate:
              type: string
              description: Audio sample rate in Hz.
          required:
            - model
            - text
        - oneOf:
            - type: object
              properties:
                model:
                  type: string
                  enum:
                    - elevenlabs/eleven_multilingual_v2
                    - elevenlabs/eleven_turbo_v2_5
                text:
                  type: string
                  description: The text content to be converted to speech.
                voice:
                  type: string
                  enum: &ref_67
                    - Rachel
                    - Drew
                    - Clyde
                    - Paul
                    - Aria
                    - Domi
                    - Dave
                    - Roger
                    - Fin
                    - Sarah
                    - Antoni
                    - Laura
                    - Thomas
                    - Charlie
                    - George
                    - Emily
                    - Elli
                    - Callum
                    - Patrick
                    - River
                    - Harry
                    - Liam
                    - Dorothy
                    - Josh
                    - Arnold
                    - Charlotte
                    - Alice
                    - Matilda
                    - James
                    - Joseph
                    - Will
                    - Jeremy
                    - Jessica
                    - Eric
                    - Michael
                    - Ethan
                    - Chris
                    - Gigi
                    - Freya
                    - Santa Claus
                    - Brian
                    - Grace
                    - Daniel
                    - Lily
                    - Serena
                    - Adam
                    - Nicole
                    - Bill
                    - Jessie
                    - Sam
                    - Glinda
                    - Giovanni
                    - Mimi
                  default: Rachel
                  description: Name of the voice to be used.
                apply_text_normalization:
                  type: string
                  enum: &ref_68
                    - auto
                    - 'on'
                    - 'off'
                  description: >-
                    This parameter controls text normalization with three modes:
                    'auto', 'on', and 'off'. When set to 'auto', the system will
                    automatically decide whether to apply text normalization
                    (e.g., spelling out numbers). With 'on', text normalization
                    will always be applied, while with 'off', it will be
                    skipped.
                next_text:
                  type: string
                  description: >-
                    The text that comes after the text of the current request.
                    Can be used to improve the speech's continuity when
                    concatenating together multiple generations or to influence
                    the speech's continuity in the current generation.
                previous_text:
                  type: string
                  description: >-
                    The text that came before the text of the current request.
                    Can be used to improve the speech's continuity when
                    concatenating together multiple generations or to influence
                    the speech's continuity in the current generation.
                output_format:
                  type: string
                  enum: &ref_69
                    - mp3_22050_32
                    - mp3_44100_32
                    - mp3_44100_64
                    - mp3_44100_96
                    - mp3_44100_128
                    - mp3_44100_192
                    - pcm_8000
                    - pcm_16000
                    - pcm_22050
                    - pcm_24000
                    - pcm_44100
                    - pcm_48000
                    - ulaw_8000
                    - alaw_8000
                    - opus_48000_32
                    - opus_48000_64
                    - opus_48000_96
                    - opus_48000_128
                    - opus_48000_192
                  description: >-
                    Format of the output content for non-streaming requests.
                    Controls how the generated audio data is encoded in the
                    response.
                voice_settings:
                  type: object
                  properties:
                    stability:
                      type: number
                      description: >-
                        Determines how stable the voice is and the randomness
                        between each generation. Lower values introduce broader
                        emotional range for the voice. Higher values can result
                        in a monotonous voice with limited emotion.
                    use_speaker_boost:
                      type: boolean
                      description: >-
                        This setting boosts the similarity to the original
                        speaker. Using this setting requires a slightly higher
                        computational load, which in turn increases latency.
                    similarity_boost:
                      type: number
                      description: >-
                        Determines how closely the AI should adhere to the
                        original voice when attempting to replicate it.
                    style:
                      type: number
                      description: >-
                        Determines the style exaggeration of the voice. This
                        setting attempts to amplify the style of the original
                        speaker. It does consume additional computational
                        resources and might increase latency if set to anything
                        other than 0.
                    speed:
                      type: number
                      description: >-
                        Adjusts the speed of the voice. A value of 1.0 is the
                        default speed, while values less than 1.0 slow down the
                        speech, and values greater than 1.0 speed it up.
                  description: >-
                    Voice settings overriding stored settings for the given
                    voice. They are applied only on the given request.
                seed:
                  type: integer
                  description: >-
                    If specified, our system will make a best effort to sample
                    deterministically, such that repeated requests with the same
                    seed and parameters should return the same result.
                    Determinism is not guaranteed.
              required:
                - model
                - text
            - type: object
              properties:
                model:
                  type: string
                  enum:
                    - elevenlabs/v3_alpha
                text:
                  type: string
                  description: The text content to be converted to speech.
                voice:
                  type: string
                  enum: *ref_67
                  default: Rachel
                  description: Name of the voice to be used.
                apply_text_normalization:
                  type: string
                  enum: *ref_68
                  description: >-
                    This parameter controls text normalization with three modes:
                    'auto', 'on', and 'off'. When set to 'auto', the system will
                    automatically decide whether to apply text normalization
                    (e.g., spelling out numbers). With 'on', text normalization
                    will always be applied, while with 'off', it will be
                    skipped.
                output_format:
                  type: string
                  enum: *ref_69
                  description: >-
                    Format of the output content for non-streaming requests.
                    Controls how the generated audio data is encoded in the
                    response.
                voice_settings:
                  type: object
                  properties:
                    stability:
                      type: number
                      description: >-
                        Determines how stable the voice is and the randomness
                        between each generation. Lower values introduce broader
                        emotional range for the voice. Higher values can result
                        in a monotonous voice with limited emotion.
                    use_speaker_boost:
                      type: boolean
                      description: >-
                        This setting boosts the similarity to the original
                        speaker. Using this setting requires a slightly higher
                        computational load, which in turn increases latency.
                    similarity_boost:
                      type: number
                      description: >-
                        Determines how closely the AI should adhere to the
                        original voice when attempting to replicate it.
                    style:
                      type: number
                      description: >-
                        Determines the style exaggeration of the voice. This
                        setting attempts to amplify the style of the original
                        speaker. It does consume additional computational
                        resources and might increase latency if set to anything
                        other than 0.
                    speed:
                      type: number
                      description: >-
                        Adjusts the speed of the voice. A value of 1.0 is the
                        default speed, while values less than 1.0 slow down the
                        speech, and values greater than 1.0 speed it up.
                  description: >-
                    Voice settings overriding stored settings for the given
                    voice. They are applied only on the given request.
                seed:
                  type: integer
                  description: >-
                    If specified, our system will make a best effort to sample
                    deterministically, such that repeated requests with the same
                    seed and parameters should return the same result.
                    Determinism is not guaranteed.
              required:
                - model
                - text
        - $ref: '#/components/schemas/Minimax.v2.TextToSpeechPayloadDTO'
        - type: object
          properties:
            model:
              type: string
              enum:
                - microsoft/vibevoice-7b
                - microsoft/vibevoice-1.5b
            script:
              type: string
              minLength: 1
              maxLength: 5000
              description: >-
                The script to convert to speech. Can be formatted with "Speaker
                X:" prefixes for multi-speaker dialogues.
            speakers:
              type: array
              items:
                type: object
                properties:
                  preset:
                    type: string
                    enum:
                      - Alice [EN]
                      - Alice [EN] (Background Music)
                      - Carter [EN]
                      - Frank [EN]
                      - Maya [EN]
                      - Anchen [ZH] (Background Music)
                      - Bowen [ZH]
                      - Xinran [ZH]
                    description: >-
                      Default voice preset to use for the speaker. Not used if
                      audio_url is provided.
                  audio_url:
                    type: string
                    format: uri
                    description: >-
                      URL to a voice sample audio file. If provided, preset will
                      be ignored.
              minItems: 1
              maxItems: 4
              default:
                - preset: Alice [EN]
              description: >-
                List of speakers to use for the script. If not provided, will be
                inferred from the script or voice samples.
            seed:
              type: integer
              description: >-
                If specified, our system will make a best effort to sample
                deterministically, such that repeated requests with the same
                seed and parameters should return the same result. Determinism
                is not guaranteed.
            cfg_scale:
              type: number
              minimum: 0.1
              maximum: 2
              default: 1.3
              description: >-
                The CFG (Classifier Free Guidance) scale is a measure of how
                close you want the model to stick to your prompt.
          required:
            - model
            - script
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/qwen3-tts-flash
            text:
              type: string
              minLength: 1
              maxLength: 600
              description: The text content to be converted to speech.
            voice:
              type: string
              enum:
                - Cherry
                - Ethan
                - Nofish
                - Jennifer
                - Ryan
                - Katerina
                - Elias
                - Jada
                - Dylan
                - Sunny
                - Li
                - Marcus
                - Roy
                - Peter
                - Rocky
                - Kiki
                - Eric
              default: Cherry
              description: Name of the voice to be used.
          required:
            - model
            - text
        - type: object
          properties:
            model:
              type: string
              enum:
                - openai/gpt-4o-mini-tts
                - openai/tts-1
                - openai/tts-1-hd
            text:
              type: string
              minLength: 1
              maxLength: 4096
              description: The text content to be converted to speech.
            voice:
              type: string
              enum:
                - alloy
                - ash
                - ballad
                - coral
                - echo
                - fable
                - nova
                - onyx
                - sage
                - shimmer
                - verse
              default: alloy
              description: Name of the voice to be used.
            style:
              type: string
              description: >-
                Determines the style exaggeration of the voice. This setting
                attempts to amplify the style of the original speaker. It does
                consume additional computational resources and might increase
                latency if set to anything other than 0.
            response_format:
              type: string
              enum:
                - mp3
                - opus
                - aac
                - flac
                - wav
                - pcm
              default: mp3
              description: >-
                Format of the output content for non-streaming requests.
                Controls how the generated audio data is encoded in the
                response.
            speed:
              type: number
              minimum: 0.25
              maximum: 4
              default: 1
              description: >-
                Adjusts the speed of the voice. A value of 1.0 is the default
                speed, while values less than 1.0 slow down the speech, and
                values greater than 1.0 speed it up.
          required:
            - model
            - text
        - type: object
          properties:
            model:
              type: string
              enum:
                - hume/octave-2
            text:
              type: string
              minLength: 1
              maxLength: 500000
              description: The text content to be converted to speech.
            voice:
              type: string
              enum:
                - Vince Douglas
                - Male English Actor
                - Ava Song
                - Campfire Narrator
                - TikTok Fashion Influencer
                - Colton Rivers
                - Literature Professor
                - Booming American Narrator
                - Imani Carter
                - Terrence Bentley
                - Nature Documentary Narrator
                - Alice Bennett
                - Sitcom Girl
                - Unserious Movie Trailer Narrator
                - Articulate ASMR British Narrator
                - Big Dicky
                - English Children's Book Narrator
                - Sebastian Lockwood
                - Donovan Sinclair
                - Booming British Narrator
                - Relaxing ASMR Woman
                - Lady Elizabeth
                - Male Protagonist
                - Tough Guy
                - French Chef
                - Spanish Instructor
                - Charming Cowgirl
              default: Vince Douglas
              description: Name of the voice to be used.
            format:
              type: string
              enum:
                - wav
                - mp3
              description: >-
                Audio output format. MP3 provides good compression and
                compatibility, PCM offers uncompressed high quality, and FLAC
                provides lossless compression.
          required:
            - model
            - text
        - type: object
          properties:
            model:
              type: string
              enum:
                - inworld/tts-1
                - inworld/tts-1-max
                - inworld/tts-1-5-max
                - inworld/tts-1-5-mini
            text:
              type: string
              minLength: 1
              maxLength: 500000
              description: The text content to be converted to speech.
            voice:
              type: string
              enum:
                - Alex
                - Ashley
                - Craig
                - Deborah
                - Dennis
                - Dominus
                - Edward
                - Elizabeth
                - Hades
                - Heitor
                - Julia
                - Maitê
                - Mark
                - Olivia
                - Pixie
                - Priya
                - Ronald
                - Sarah
                - Shaun
                - Theodore
                - Timothy
                - Wendy
              default: Alex
              description: Name of the voice to be used.
            format:
              type: string
              enum:
                - wav
                - mp3
              default: mp3
              description: >-
                Audio output format. WAV delivers uncompressed audio in a widely
                supported container format, while MP3 provides good compression
                and compatibility.
          required:
            - model
            - text
    Minimax.v2.TextToSpeechPayloadDTO:
      type: object
      properties:
        model:
          type: string
          enum:
            - minimax/speech-2.5-turbo-preview
            - minimax/speech-2.5-hd-preview
            - minimax/speech-2.6-hd
            - minimax/speech-2.6-turbo
          description: TTS model to use for synthesis
        text:
          type: string
          minLength: 1
          maxLength: 5000
          description: The text content to be converted to speech.
        voice_setting:
          type: object
          properties:
            voice_id:
              anyOf:
                - type: string
                  enum: &ref_70
                    - Wise_Woman
                    - Friendly_Person
                    - Inspirational_girl
                    - Deep_Voice_Man
                    - Calm_Woman
                    - Casual_Guy
                    - Lively_Girl
                    - Patient_Man
                    - Young_Knight
                    - Determined_Man
                    - Lovely_Girl
                    - Decent_Boy
                    - Imposing_Manner
                    - Elegant_Man
                    - Abbess
                    - Sweet_Girl_2
                    - Exuberant_Girl
                - type: string
                  minLength: 1
                  maxLength: 64
              default: Wise_Woman
              description: A predefined system voice for text-to-speech synthesis.
            speed:
              type: number
              minimum: 0.5
              maximum: 2
              default: 1
              description: >-
                Adjusts the speed of the voice. A value of 1.0 is the default
                speed, while values less than 1.0 slow down the speech, and
                values greater than 1.0 speed it up.
            vol:
              type: number
              minimum: 0.01
              maximum: 10
              default: 1
              description: >-
                The volume of the generated speech. Range: (0, 10]. Larger
                values indicate larger volumes.
            pitch:
              type: number
              minimum: -12
              maximum: 12
              default: 0
              description: >-
                The pitch of the generated speech. Range: [-12, 12]. 0 = default
                voice output.
            emotion:
              type: string
              enum:
                - happy
                - sad
                - angry
                - fearful
                - disgusted
                - surprised
                - neutral
              description: >-
                Emotional tone to apply to the synthesized speech. Controls the
                emotional expression of the generated voice output.
            text_normalization:
              type: boolean
              default: false
              description: >-
                English text normalization support. Improves number-reading but
                increases latency.
          default:
            voice_id: Wise_Woman
          description: >-
            Voice settings overriding stored settings for the given voice. They
            are applied only on the given request.
        audio_setting:
          type: object
          properties:
            sample_rate:
              type: integer
              description: Audio sample rate in Hz.
              enum:
                - 8000
                - 16000
                - 22050
                - 24000
                - 32000
                - 44100
            bitrate:
              type: integer
              description: >-
                Audio bitrate in bits per second. Controls the compression level
                and audio quality. Higher bitrates provide better quality but
                larger file sizes.
              enum:
                - 32000
                - 64000
                - 128000
                - 256000
            format:
              type: string
              enum:
                - mp3
                - pcm
                - flac
              default: mp3
              description: >-
                Audio output format. MP3 provides good compression and
                compatibility, PCM offers uncompressed high quality, and FLAC
                provides lossless compression.
            channel:
              type: integer
              description: >-
                Number of audio channels. 1 for mono (single channel), 2 for
                stereo (dual channel) output.
              enum:
                - 1
                - 2
          description: Audio output configuration
        pronunciation_dict:
          type: object
          properties:
            tone:
              type: array
              items:
                type: string
              description: >-
                Replacement of text and pronunciations. Format:
                ["燕少飞/(yan4)(shao3)(fei1)", "达菲/(da2)(fei1)", "omg/oh my god"]
          required:
            - tone
          description: >-
            Custom pronunciation dictionary for handling specific words or
            phrases. Allows fine-tuning of how certain text should be pronounced
            using phonetic representations.
        timbre_weights:
          type: array
          items:
            type: object
            properties:
              voice_id:
                anyOf:
                  - type: string
                    enum: *ref_70
                  - type: string
                    minLength: 1
                    maxLength: 64
                description: A predefined system voice for text-to-speech synthesis.
              weight:
                type: integer
                minimum: 1
                maximum: 100
                description: >-
                  Weight for voice mixing. Range: [1, 100]. Higher weights are
                  sampled more heavily.
            required:
              - voice_id
              - weight
          maxItems: 4
          description: >-
            Voice mixing configuration allowing combination of up to 4 different
            voices with specified weights. Each voice contributes to the final
            output based on its weight value (1-100).
        stream:
          type: boolean
          default: false
          description: >-
            Enable streaming mode for real-time audio generation. When enabled,
            audio is generated and delivered in chunks as it's processed.
        language_boost:
          type: string
          enum:
            - Chinese
            - Chinese,Yue
            - English
            - Arabic
            - Russian
            - Spanish
            - French
            - Portuguese
            - German
            - Turkish
            - Dutch
            - Ukrainian
            - Vietnamese
            - Indonesian
            - Japanese
            - Italian
            - Korean
            - Thai
            - Polish
            - Romanian
            - Greek
            - Czech
            - Finnish
            - Hindi
            - Bulgarian
            - Danish
            - Hebrew
            - Malay
            - Persian
            - Slovak
            - Swedish
            - Croatian
            - Filipino
            - Hungarian
            - Norwegian
            - Slovenian
            - Catalan
            - Nynorsk
            - Tamil
            - Afrikaans
            - auto
          description: Language recognition enhancement option.
        voice_modify:
          type: object
          properties:
            pitch:
              type: integer
              minimum: -100
              maximum: 100
              description: Pitch level (-100 to 100)
            intensity:
              type: integer
              minimum: -100
              maximum: 100
              description: Intensity level (-100 to 100)
            timbre:
              type: integer
              minimum: -100
              maximum: 100
              description: Timbre level (-100 to 100)
            sound_effects:
              type: string
              enum:
                - spacious_echo
                - auditorium_echo
                - lofi_telephone
                - robotic
              description: >-
                Audio effects to apply to the synthesized speech. Includes
                options like spacious_echo, auditorium_echo, lofi_telephone, and
                robotic effects.
          description: >-
            Voice modification settings for adjusting pitch, intensity, timbre,
            and applying sound effects to customize the voice characteristics.
        subtitle_enable:
          type: boolean
          default: false
          description: >-
            Enable subtitle generation service. Only available for non-streaming
            requests. Generates timing information for the synthesized speech.
        output_format:
          type: string
          enum:
            - url
            - hex
          default: hex
          description: >-
            Format of the output content for non-streaming requests. Controls
            how the generated audio data is encoded in the response.
      required:
        - model
        - text
    Embedding.v1.CreateEmbeddingsDTO:
      oneOf:
        - type: object
          properties:
            model:
              type: string
              enum:
                - text-embedding-3-small
                - text-embedding-3-large
                - text-embedding-ada-002
            input:
              anyOf:
                - type: string
                  minLength: 1
                - type: array
                  items:
                    type: string
                  minItems: 1
              description: Input text to embed, encoded as a string or array of tokens.
            encoding_format:
              type: string
              nullable: true
              enum:
                - float
                - base64
              default: float
              description: The format in which to return the embeddings.
            dimensions:
              type: number
              nullable: true
              description: The number of dimensions for the embedding. Default is 1024.
          required:
            - model
            - input
          additionalProperties: false
        - type: object
          properties:
            model:
              type: string
              enum:
                - voyage-large-2-instruct
                - voyage-finance-2
                - voyage-multilingual-2
                - voyage-law-2
                - voyage-code-2
                - voyage-large-2
                - voyage-2
            input:
              anyOf:
                - type: string
                  minLength: 1
                  maxLength: 8000
                - type: array
                  items:
                    type: string
                    maxLength: 800
              description: Input text to embed, encoded as a string or array of tokens.
            input_type:
              type: string
              enum:
                - document
              default: document
              description: The type of input data for the model.
          required:
            - model
            - input
        - type: object
          properties:
            model:
              type: string
              enum:
                - togethercomputer/m2-bert-80M-32k-retrieval
            input:
              anyOf:
                - type: string
                  minLength: 1
                - type: array
                  items:
                    type: string
                  minItems: 1
              description: Input text to embed, encoded as a string or array of tokens.
          required:
            - model
            - input
        - type: object
          properties:
            model:
              type: string
              enum:
                - text-multilingual-embedding-002
            input:
              anyOf:
                - type: string
                  minLength: 1
                - type: array
                  items:
                    type: string
                  minItems: 1
              description: Input text to embed, encoded as a string or array of tokens.
            dimensions:
              type: number
              nullable: true
              description: The number of dimensions for the embedding. Default is 1024.
            auto_truncate:
              type: boolean
              default: true
              description: >-
                If enabled, this parameter automatically truncates the input
                text to fit within the model’s maximum token limit. It helps
                ensure that longer texts are processed without errors.
            task_type:
              type: string
              enum:
                - RETRIEVAL_QUERY
                - RETRIEVAL_DOCUMENT
                - SEMANTIC_SIMILARITY
                - CLASSIFICATION
                - CLUSTERING
                - QUESTION_ANSWERING
                - FACT_VERIFICATION
              description: Optional task type for which the embeddings will be used.
            title:
              type: string
              description: >-
                An optional title for the text. Only applicable when task_type
                is RETRIEVAL_DOCUMENT.
                  
                  Note: Specifying a title for RETRIEVAL_DOCUMENT provides better quality embeddings for retrieval.
          required:
            - model
            - input
        - type: object
          properties:
            model:
              type: string
              enum:
                - alibaba/qwen-text-embedding-v4
                - alibaba/qwen-text-embedding-v3
            input:
              anyOf:
                - type: string
                  minLength: 1
                - type: array
                  items:
                    type: string
                  minItems: 1
              description: Input text to embed, encoded as a string or array of tokens.
            dimensions:
              type: integer
              minimum: 64
              maximum: 2048
              default: 1024
              description: The number of dimensions for the embedding. Default is 1024.
          required:
            - model
            - input
    Embedding.v1.CreateEmbeddingsResponseDTO:
      type: object
      properties:
        object:
          type: string
          enum:
            - object
        data:
          type: array
          items:
            type: object
            properties:
              object:
                type: string
                enum:
                  - embedding
              index:
                type: number
              embedding:
                type: array
                items:
                  type: number
            required:
              - object
              - index
              - embedding
        model:
          type: string
        usage:
          type: object
          properties:
            total_tokens:
              type: number
              nullable: true
      required:
        - object
        - data
        - model
        - usage
    Vision.v1.OCRPayloadDTO:
      anyOf:
        - $ref: '#/components/schemas/Vision.v1.OCRGoogleRequestDTO'
        - $ref: '#/components/schemas/Vision.v1.OCRMistralRequestDTO'
        - $ref: '#/components/schemas/Vision.v1.OCRZhipuRequestDTO'
    Vision.v1.OCRGoogleRequestDTO:
      type: object
      properties:
        model:
          type: string
          enum:
            - google/gc-document-ai
          default: google/gc-document-ai
          deprecated: true
        document:
          anyOf:
            - type: string
              format: uri
            - type: string
          description: The document file to be processed by the OCR model.
        mimeType:
          type: string
          enum:
            - application/pdf
            - image/gif
            - image/tiff
            - image/jpeg
            - image/png
            - image/bmp
            - image/webp
            - text/html
          description: The MIME type of the document.
        pages:
          anyOf:
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - start
                start:
                  type: integer
                  minimum: 1
              required:
                - type
                - start
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - end
                end:
                  type: integer
                  minimum: 1
              required:
                - type
                - end
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - range
                start:
                  type: integer
                  minimum: 1
                end:
                  type: integer
                  minimum: 2
              required:
                - type
                - start
                - end
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - indices
                indices:
                  type: array
                  items:
                    type: integer
                    minimum: 1
                  maxItems: 15
              required:
                - type
                - indices
          description: Specific pages you wants to process
      required:
        - document
      additionalProperties: false
    Vision.v1.OCRMistralRequestDTO:
      type: object
      properties:
        model:
          type: string
          enum:
            - mistral/mistral-ocr-latest
          default: mistral/mistral-ocr-latest
          description: Model ID
        document:
          oneOf:
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - document_url
                  description: Type of document.
                document_url:
                  type: string
                  format: uri
                  description: Document URL.
              required:
                - type
                - document_url
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - image_url
                  description: Image URL.
                image_url:
                  type: string
                  format: uri
                  description: Type of document.
              required:
                - type
                - image_url
          description: Document to run OCR
        pages:
          anyOf:
            - type: string
            - type: array
              items:
                type: integer
            - nullable: true
          description: Specific pages you wants to process
          example: '"3" or "0-2" or [0, 3, 4]'
        include_image_base64:
          type: boolean
          nullable: true
          description: Include base64 images in response
        image_limit:
          type: integer
          nullable: true
          description: Max images to extract
        image_min_size:
          type: integer
          nullable: true
          description: Minimum height and width of image to extract
      required:
        - document
    Vision.v1.OCRZhipuRequestDTO:
      type: object
      properties:
        model:
          type: string
          enum:
            - zhipu/glm-ocr
          default: zhipu/glm-ocr
          description: Model ID
        document:
          oneOf:
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - document_url
                  description: Type of document.
                document_url:
                  type: string
                  format: uri
                  description: >-
                    URL of a document file to be processed by the OCR model.
                    Supported file formats: PDF ≤ 50MB.
              required:
                - type
                - document_url
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - image_url
                  description: Image URL.
                image_url:
                  type: string
                  format: uri
                  description: >-
                    URL of a single image to be processed by the OCR model.
                    Supported file formats: JPG, PNG. Single image ≤10MB.
              required:
                - type
                - image_url
          description: Document to run OCR.
        pages:
          anyOf:
            - type: string
            - type: array
              items:
                type: integer
            - nullable: true
          description: Specific pages to process, e.g. "3", "0-2", [0, 3, 4].
        include_image_base64:
          type: boolean
          nullable: true
          description: Include base64 images in response.
        image_limit:
          type: integer
          nullable: true
          description: Max images to extract.
        image_min_size:
          type: integer
          nullable: true
          description: Minimum height and width of image to extract.
        return_crop_images:
          type: boolean
          nullable: true
          description: Whether to return screenshot information
        need_layout_visualization:
          type: boolean
          nullable: true
          description: Whether to return detailed layout image result information
      required:
        - document
    Model.v1.ModelsResponseDto:
      type: object
      properties:
        object:
          type: string
        data:
          type: array
          items:
            $ref: '#/components/schemas/Model.v1.ModelListItemDto'
      required:
        - object
        - data
    Model.v1.ModelListItemDto:
      type: object
      properties:
        id:
          type: string
          description: name of model
        type:
          type: string
        info:
          $ref: '#/components/schemas/Model.v1.ModelInfoDto'
        features:
          type: array
          items:
            type: string
      required:
        - id
        - type
        - info
        - features
    Model.v1.ModelInfoDto:
      type: object
      properties:
        name:
          type: string
        developer:
          type: string
        description:
          type: string
        contextLength:
          type: number
        url:
          type: string
      required:
        - name
        - developer
        - description
        - contextLength
        - url
    Model.v1.ModelDetailedDto:
      type: object
      properties:
        name:
          type: string
        provider:
          type: string
        type:
          type: string
        info:
          $ref: '#/components/schemas/Model.v1.ModelInfoDto'
        features:
          type: array
          items:
            type: string
        tags:
          type: array
          items:
            type: string
        isPremium:
          type: boolean
        isFree:
          type: boolean
      required:
        - name
        - provider
        - type
        - info
        - features
        - tags
        - isPremium
        - isFree
