Add Veo3 video generation node with audio support (#9110)

- Create new Veo3VideoGenerationNode that extends VeoVideoGenerationNode
- Add support for generateAudio parameter (only for Veo3 models)
- Support new Veo3 models: veo-3.0-generate-001, veo-3.0-fast-generate-001
- Fix Veo3 duration constraint to 8 seconds only
- Update original node to be clearly Veo 2 only
- Update API paths to use model parameter: /proxy/veo/{model}/generate
- Regenerate API types from staging to include generateAudio parameter
- Fix TripoModelVersion enum reference after regeneration
- Mark generated API types file in .gitattributes
This commit is contained in:
Christian Byrne 2025-08-04 22:52:25 -07:00 committed by GitHub
parent c012400240
commit f69609bbd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 2664 additions and 93 deletions

1
.gitattributes vendored
View File

@ -1,2 +1,3 @@
/web/assets/** linguist-generated
/web/** linguist-vendored
comfy_api_nodes/apis/__init__.py linguist-generated

File diff suppressed because it is too large Load Diff

View File

@ -127,7 +127,7 @@ class TripoTextToModelRequest(BaseModel):
type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task')
prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024)
negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024)
model_version: Optional[TripoModelVersion] = TripoModelVersion.V2_5
model_version: Optional[TripoModelVersion] = TripoModelVersion.v2_5_20250123
face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to')
texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model')
pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model')

View File

@ -8,10 +8,10 @@ from typing import Optional
from comfy.comfy_types.node_typing import IO, ComfyNodeABC
from comfy_api.input_impl.video_types import VideoFromFile
from comfy_api_nodes.apis import (
Veo2GenVidRequest,
Veo2GenVidResponse,
Veo2GenVidPollRequest,
Veo2GenVidPollResponse
VeoGenVidRequest,
VeoGenVidResponse,
VeoGenVidPollRequest,
VeoGenVidPollResponse
)
from comfy_api_nodes.apis.client import (
ApiEndpoint,
@ -35,7 +35,7 @@ def convert_image_to_base64(image: torch.Tensor):
return tensor_to_base64_string(scaled_image)
def get_video_url_from_response(poll_response: Veo2GenVidPollResponse) -> Optional[str]:
def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optional[str]:
if (
poll_response.response
and hasattr(poll_response.response, "videos")
@ -130,6 +130,14 @@ class VeoVideoGenerationNode(ComfyNodeABC):
"default": None,
"tooltip": "Optional reference image to guide video generation",
}),
"model": (
IO.COMBO,
{
"options": ["veo-2.0-generate-001"],
"default": "veo-2.0-generate-001",
"tooltip": "Veo 2 model to use for video generation",
},
),
},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
@ -141,7 +149,7 @@ class VeoVideoGenerationNode(ComfyNodeABC):
RETURN_TYPES = (IO.VIDEO,)
FUNCTION = "generate_video"
CATEGORY = "api node/video/Veo"
DESCRIPTION = "Generates videos from text prompts using Google's Veo API"
DESCRIPTION = "Generates videos from text prompts using Google's Veo 2 API"
API_NODE = True
def generate_video(
@ -154,6 +162,8 @@ class VeoVideoGenerationNode(ComfyNodeABC):
person_generation="ALLOW",
seed=0,
image=None,
model="veo-2.0-generate-001",
generate_audio=False,
unique_id: Optional[str] = None,
**kwargs,
):
@ -188,16 +198,19 @@ class VeoVideoGenerationNode(ComfyNodeABC):
parameters["negativePrompt"] = negative_prompt
if seed > 0:
parameters["seed"] = seed
# Only add generateAudio for Veo 3 models
if "veo-3.0" in model:
parameters["generateAudio"] = generate_audio
# Initial request to start video generation
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path="/proxy/veo/generate",
path=f"/proxy/veo/{model}/generate",
method=HttpMethod.POST,
request_model=Veo2GenVidRequest,
response_model=Veo2GenVidResponse
request_model=VeoGenVidRequest,
response_model=VeoGenVidResponse
),
request=Veo2GenVidRequest(
request=VeoGenVidRequest(
instances=instances,
parameters=parameters
),
@ -223,16 +236,16 @@ class VeoVideoGenerationNode(ComfyNodeABC):
# Define the polling operation
poll_operation = PollingOperation(
poll_endpoint=ApiEndpoint(
path="/proxy/veo/poll",
path=f"/proxy/veo/{model}/poll",
method=HttpMethod.POST,
request_model=Veo2GenVidPollRequest,
response_model=Veo2GenVidPollResponse
request_model=VeoGenVidPollRequest,
response_model=VeoGenVidPollResponse
),
completed_statuses=["completed"],
failed_statuses=[], # No failed statuses, we'll handle errors after polling
status_extractor=status_extractor,
progress_extractor=progress_extractor,
request=Veo2GenVidPollRequest(
request=VeoGenVidPollRequest(
operationName=operation_name
),
auth_kwargs=kwargs,
@ -298,11 +311,64 @@ class VeoVideoGenerationNode(ComfyNodeABC):
return (VideoFromFile(video_io),)
# Register the node
class Veo3VideoGenerationNode(VeoVideoGenerationNode):
"""
Generates videos from text prompts using Google's Veo 3 API.
Supported models:
- veo-3.0-generate-001
- veo-3.0-fast-generate-001
This node extends the base Veo node with Veo 3 specific features including
audio generation and fixed 8-second duration.
"""
@classmethod
def INPUT_TYPES(s):
parent_input = super().INPUT_TYPES()
# Update model options for Veo 3
parent_input["optional"]["model"] = (
IO.COMBO,
{
"options": ["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
"default": "veo-3.0-generate-001",
"tooltip": "Veo 3 model to use for video generation",
},
)
# Add generateAudio parameter
parent_input["optional"]["generate_audio"] = (
IO.BOOLEAN,
{
"default": False,
"tooltip": "Generate audio for the video. Supported by all Veo 3 models.",
}
)
# Update duration constraints for Veo 3 (only 8 seconds supported)
parent_input["optional"]["duration_seconds"] = (
IO.INT,
{
"default": 8,
"min": 8,
"max": 8,
"step": 1,
"display": "number",
"tooltip": "Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
},
)
return parent_input
# Register the nodes
NODE_CLASS_MAPPINGS = {
"VeoVideoGenerationNode": VeoVideoGenerationNode,
"Veo3VideoGenerationNode": Veo3VideoGenerationNode,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"VeoVideoGenerationNode": "Google Veo2 Video Generation",
"VeoVideoGenerationNode": "Google Veo 2 Video Generation",
"Veo3VideoGenerationNode": "Google Veo 3 Video Generation",
}