Add Veo3 video generation node with audio support (#9110)

- Create new Veo3VideoGenerationNode that extends VeoVideoGenerationNode - Add support for generateAudio parameter (only for Veo3 models) - Support new Veo3 models: veo-3.0-generate-001, veo-3.0-fast-generate-001 - Fix Veo3 duration constraint to 8 seconds only - Update original node to be clearly Veo 2 only - Update API paths to use model parameter: /proxy/veo/{model}/generate - Regenerate API types from staging to include generateAudio parameter - Fix TripoModelVersion enum reference after regeneration - Mark generated API types file in .gitattributes
2025-10-24 23:44:20 +00:00 · 2025-08-04 22:52:25 -07:00
parent c012400240
commit f69609bbd6
4 changed files with 2664 additions and 93 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,3 @@
 /web/assets/** linguist-generated
 /web/** linguist-vendored
+comfy_api_nodes/apis/__init__.py linguist-generated
--- a/comfy_api_nodes/apis/init.py
+++ b/comfy_api_nodes/apis/init.py
--- a/comfy_api_nodes/apis/tripo_api.py
+++ b/comfy_api_nodes/apis/tripo_api.py
@@ -127,7 +127,7 @@ class TripoTextToModelRequest(BaseModel):
    type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task')
    prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024)
    negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024)
-    model_version: Optional[TripoModelVersion] = TripoModelVersion.V2_5
+    model_version: Optional[TripoModelVersion] = TripoModelVersion.v2_5_20250123
    face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to')
    texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model')
    pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model')
--- a/comfy_api_nodes/nodes_veo2.py
+++ b/comfy_api_nodes/nodes_veo2.py
@@ -8,10 +8,10 @@ from typing import Optional
 from comfy.comfy_types.node_typing import IO, ComfyNodeABC
 from comfy_api.input_impl.video_types import VideoFromFile
 from comfy_api_nodes.apis import (
-    Veo2GenVidRequest,
-    Veo2GenVidResponse,
-    Veo2GenVidPollRequest,
-    Veo2GenVidPollResponse
+    VeoGenVidRequest,
+    VeoGenVidResponse,
+    VeoGenVidPollRequest,
+    VeoGenVidPollResponse
 )
 from comfy_api_nodes.apis.client import (
    ApiEndpoint,
@@ -35,7 +35,7 @@ def convert_image_to_base64(image: torch.Tensor):
    return tensor_to_base64_string(scaled_image)


-def get_video_url_from_response(poll_response: Veo2GenVidPollResponse) -> Optional[str]:
+def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optional[str]:
    if (
        poll_response.response
        and hasattr(poll_response.response, "videos")
@@ -130,6 +130,14 @@ class VeoVideoGenerationNode(ComfyNodeABC):
                    "default": None,
                    "tooltip": "Optional reference image to guide video generation",
                }),
+                "model": (
+                    IO.COMBO,
+                    {
+                        "options": ["veo-2.0-generate-001"],
+                        "default": "veo-2.0-generate-001",
+                        "tooltip": "Veo 2 model to use for video generation",
+                    },
+                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
@@ -141,7 +149,7 @@ class VeoVideoGenerationNode(ComfyNodeABC):
    RETURN_TYPES = (IO.VIDEO,)
    FUNCTION = "generate_video"
    CATEGORY = "api node/video/Veo"
-    DESCRIPTION = "Generates videos from text prompts using Google's Veo API"
+    DESCRIPTION = "Generates videos from text prompts using Google's Veo 2 API"
    API_NODE = True

    def generate_video(
@@ -154,6 +162,8 @@ class VeoVideoGenerationNode(ComfyNodeABC):
        person_generation="ALLOW",
        seed=0,
        image=None,
+        model="veo-2.0-generate-001",
+        generate_audio=False,
        unique_id: Optional[str] = None,
        **kwargs,
    ):
@@ -188,16 +198,19 @@ class VeoVideoGenerationNode(ComfyNodeABC):
            parameters["negativePrompt"] = negative_prompt
        if seed > 0:
            parameters["seed"] = seed
+        # Only add generateAudio for Veo 3 models
+        if "veo-3.0" in model:
+            parameters["generateAudio"] = generate_audio

        # Initial request to start video generation
        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
-                path="/proxy/veo/generate",
+                path=f"/proxy/veo/{model}/generate",
                method=HttpMethod.POST,
-                request_model=Veo2GenVidRequest,
-                response_model=Veo2GenVidResponse
+                request_model=VeoGenVidRequest,
+                response_model=VeoGenVidResponse
            ),
-            request=Veo2GenVidRequest(
+            request=VeoGenVidRequest(
                instances=instances,
                parameters=parameters
            ),
@@ -223,16 +236,16 @@ class VeoVideoGenerationNode(ComfyNodeABC):
        # Define the polling operation
        poll_operation = PollingOperation(
            poll_endpoint=ApiEndpoint(
-                path="/proxy/veo/poll",
+                path=f"/proxy/veo/{model}/poll",
                method=HttpMethod.POST,
-                request_model=Veo2GenVidPollRequest,
-                response_model=Veo2GenVidPollResponse
+                request_model=VeoGenVidPollRequest,
+                response_model=VeoGenVidPollResponse
            ),
            completed_statuses=["completed"],
            failed_statuses=[],  # No failed statuses, we'll handle errors after polling
            status_extractor=status_extractor,
            progress_extractor=progress_extractor,
-            request=Veo2GenVidPollRequest(
+            request=VeoGenVidPollRequest(
                operationName=operation_name
            ),
            auth_kwargs=kwargs,
@@ -298,11 +311,64 @@ class VeoVideoGenerationNode(ComfyNodeABC):
        return (VideoFromFile(video_io),)


-# Register the node
+class Veo3VideoGenerationNode(VeoVideoGenerationNode):
+    """
+    Generates videos from text prompts using Google's Veo 3 API.
+
+    Supported models:
+    - veo-3.0-generate-001
+    - veo-3.0-fast-generate-001
+
+    This node extends the base Veo node with Veo 3 specific features including
+    audio generation and fixed 8-second duration.
+    """
+
+    @classmethod
+    def INPUT_TYPES(s):
+        parent_input = super().INPUT_TYPES()
+
+        # Update model options for Veo 3
+        parent_input["optional"]["model"] = (
+            IO.COMBO,
+            {
+                "options": ["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
+                "default": "veo-3.0-generate-001",
+                "tooltip": "Veo 3 model to use for video generation",
+            },
+        )
+
+        # Add generateAudio parameter
+        parent_input["optional"]["generate_audio"] = (
+            IO.BOOLEAN,
+            {
+                "default": False,
+                "tooltip": "Generate audio for the video. Supported by all Veo 3 models.",
+            }
+        )
+
+        # Update duration constraints for Veo 3 (only 8 seconds supported)
+        parent_input["optional"]["duration_seconds"] = (
+            IO.INT,
+            {
+                "default": 8,
+                "min": 8,
+                "max": 8,
+                "step": 1,
+                "display": "number",
+                "tooltip": "Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
+            },
+        )
+
+        return parent_input
+
+
+# Register the nodes
 NODE_CLASS_MAPPINGS = {
    "VeoVideoGenerationNode": VeoVideoGenerationNode,
+    "Veo3VideoGenerationNode": Veo3VideoGenerationNode,
 }

 NODE_DISPLAY_NAME_MAPPINGS = {
-    "VeoVideoGenerationNode": "Google Veo2 Video Generation",
+    "VeoVideoGenerationNode": "Google Veo 2 Video Generation",
+    "Veo3VideoGenerationNode": "Google Veo 3 Video Generation",
 }