import logging from enum import Enum from typing import Any, Callable, Optional, Literal, TypeVar from typing_extensions import override import torch from pydantic import BaseModel, Field from comfy_api.latest import ComfyExtension, io as comfy_io from comfy_api_nodes.util.validation_utils import ( validate_aspect_ratio_closeness, validate_image_dimensions, validate_image_aspect_ratio_range, get_number_of_images, ) from comfy_api_nodes.apis.client import ( ApiEndpoint, HttpMethod, SynchronousOperation, PollingOperation, EmptyRequest, ) from comfy_api_nodes.apinode_utils import download_url_to_video_output, upload_images_to_comfyapi VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video" VIDU_IMAGE_TO_VIDEO = "/proxy/vidu/img2video" VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video" VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video" VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations" R = TypeVar("R") class VideoModelName(str, Enum): vidu_q1 = 'viduq1' class AspectRatio(str, Enum): r_16_9 = "16:9" r_9_16 = "9:16" r_1_1 = "1:1" class Resolution(str, Enum): r_1080p = "1080p" class MovementAmplitude(str, Enum): auto = "auto" small = "small" medium = "medium" large = "large" class TaskCreationRequest(BaseModel): model: VideoModelName = VideoModelName.vidu_q1 prompt: Optional[str] = Field(None, max_length=1500) duration: Optional[Literal[5]] = 5 seed: Optional[int] = Field(0, ge=0, le=2147483647) aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9 resolution: Optional[Resolution] = Resolution.r_1080p movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL") class TaskStatus(str, Enum): created = "created" queueing = "queueing" processing = "processing" success = "success" failed = "failed" class TaskCreationResponse(BaseModel): task_id: str = Field(...) state: TaskStatus = Field(...) created_at: str = Field(...) code: Optional[int] = Field(None, description="Error code") class TaskResult(BaseModel): id: str = Field(..., description="Creation id") url: str = Field(..., description="The URL of the generated results, valid for one hour") cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour") class TaskStatusResponse(BaseModel): state: TaskStatus = Field(...) err_code: Optional[str] = Field(None) creations: list[TaskResult] = Field(..., description="Generated results") async def poll_until_finished( auth_kwargs: dict[str, str], api_endpoint: ApiEndpoint[Any, R], result_url_extractor: Optional[Callable[[R], str]] = None, estimated_duration: Optional[int] = None, node_id: Optional[str] = None, ) -> R: return await PollingOperation( poll_endpoint=api_endpoint, completed_statuses=[TaskStatus.success.value], failed_statuses=[TaskStatus.failed.value], status_extractor=lambda response: response.state.value, auth_kwargs=auth_kwargs, result_url_extractor=result_url_extractor, estimated_duration=estimated_duration, node_id=node_id, poll_interval=16.0, max_poll_attempts=256, ).execute() def get_video_url_from_response(response) -> Optional[str]: if response.creations: return response.creations[0].url return None def get_video_from_response(response) -> TaskResult: if not response.creations: error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}" logging.info(error_msg) raise RuntimeError(error_msg) logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url) return response.creations[0] async def execute_task( vidu_endpoint: str, auth_kwargs: Optional[dict[str, str]], payload: TaskCreationRequest, estimated_duration: int, node_id: str, ) -> R: response = await SynchronousOperation( endpoint=ApiEndpoint( path=vidu_endpoint, method=HttpMethod.POST, request_model=TaskCreationRequest, response_model=TaskCreationResponse, ), request=payload, auth_kwargs=auth_kwargs, ).execute() if response.state == TaskStatus.failed: error_msg = f"Vidu request failed. Code: {response.code}" logging.error(error_msg) raise RuntimeError(error_msg) return await poll_until_finished( auth_kwargs, ApiEndpoint( path=VIDU_GET_GENERATION_STATUS % response.task_id, method=HttpMethod.GET, request_model=EmptyRequest, response_model=TaskStatusResponse, ), result_url_extractor=get_video_url_from_response, estimated_duration=estimated_duration, node_id=node_id, ) class ViduTextToVideoNode(comfy_io.ComfyNode): @classmethod def define_schema(cls): return comfy_io.Schema( node_id="ViduTextToVideoNode", display_name="Vidu Text To Video Generation", category="api node/video/Vidu", description="Generate video from text prompt", inputs=[ comfy_io.Combo.Input( "model", options=[model.value for model in VideoModelName], default=VideoModelName.vidu_q1.value, tooltip="Model name", ), comfy_io.String.Input( "prompt", multiline=True, tooltip="A textual description for video generation", ), comfy_io.Int.Input( "duration", default=5, min=5, max=5, step=1, display_mode=comfy_io.NumberDisplay.number, tooltip="Duration of the output video in seconds", optional=True, ), comfy_io.Int.Input( "seed", default=0, min=0, max=2147483647, step=1, display_mode=comfy_io.NumberDisplay.number, control_after_generate=True, tooltip="Seed for video generation (0 for random)", optional=True, ), comfy_io.Combo.Input( "aspect_ratio", options=[model.value for model in AspectRatio], default=AspectRatio.r_16_9.value, tooltip="The aspect ratio of the output video", optional=True, ), comfy_io.Combo.Input( "resolution", options=[model.value for model in Resolution], default=Resolution.r_1080p.value, tooltip="Supported values may vary by model & duration", optional=True, ), comfy_io.Combo.Input( "movement_amplitude", options=[model.value for model in MovementAmplitude], default=MovementAmplitude.auto.value, tooltip="The movement amplitude of objects in the frame", optional=True, ), ], outputs=[ comfy_io.Video.Output(), ], hidden=[ comfy_io.Hidden.auth_token_comfy_org, comfy_io.Hidden.api_key_comfy_org, comfy_io.Hidden.unique_id, ], is_api_node=True, ) @classmethod async def execute( cls, model: str, prompt: str, duration: int, seed: int, aspect_ratio: str, resolution: str, movement_amplitude: str, ) -> comfy_io.NodeOutput: if not prompt: raise ValueError("The prompt field is required and cannot be empty.") payload = TaskCreationRequest( model_name=model, prompt=prompt, duration=duration, seed=seed, aspect_ratio=aspect_ratio, resolution=resolution, movement_amplitude=movement_amplitude, ) auth = { "auth_token": cls.hidden.auth_token_comfy_org, "comfy_api_key": cls.hidden.api_key_comfy_org, } results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id) return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) class ViduImageToVideoNode(comfy_io.ComfyNode): @classmethod def define_schema(cls): return comfy_io.Schema( node_id="ViduImageToVideoNode", display_name="Vidu Image To Video Generation", category="api node/video/Vidu", description="Generate video from image and optional prompt", inputs=[ comfy_io.Combo.Input( "model", options=[model.value for model in VideoModelName], default=VideoModelName.vidu_q1.value, tooltip="Model name", ), comfy_io.Image.Input( "image", tooltip="An image to be used as the start frame of the generated video", ), comfy_io.String.Input( "prompt", multiline=True, default="", tooltip="A textual description for video generation", optional=True, ), comfy_io.Int.Input( "duration", default=5, min=5, max=5, step=1, display_mode=comfy_io.NumberDisplay.number, tooltip="Duration of the output video in seconds", optional=True, ), comfy_io.Int.Input( "seed", default=0, min=0, max=2147483647, step=1, display_mode=comfy_io.NumberDisplay.number, control_after_generate=True, tooltip="Seed for video generation (0 for random)", optional=True, ), comfy_io.Combo.Input( "resolution", options=[model.value for model in Resolution], default=Resolution.r_1080p.value, tooltip="Supported values may vary by model & duration", optional=True, ), comfy_io.Combo.Input( "movement_amplitude", options=[model.value for model in MovementAmplitude], default=MovementAmplitude.auto.value, tooltip="The movement amplitude of objects in the frame", optional=True, ), ], outputs=[ comfy_io.Video.Output(), ], hidden=[ comfy_io.Hidden.auth_token_comfy_org, comfy_io.Hidden.api_key_comfy_org, comfy_io.Hidden.unique_id, ], is_api_node=True, ) @classmethod async def execute( cls, model: str, image: torch.Tensor, prompt: str, duration: int, seed: int, resolution: str, movement_amplitude: str, ) -> comfy_io.NodeOutput: if get_number_of_images(image) > 1: raise ValueError("Only one input image is allowed.") validate_image_aspect_ratio_range(image, (1, 4), (4, 1)) payload = TaskCreationRequest( model_name=model, prompt=prompt, duration=duration, seed=seed, resolution=resolution, movement_amplitude=movement_amplitude, ) auth = { "auth_token": cls.hidden.auth_token_comfy_org, "comfy_api_key": cls.hidden.api_key_comfy_org, } payload.images = await upload_images_to_comfyapi( image, max_images=1, mime_type="image/png", auth_kwargs=auth, ) results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id) return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) class ViduReferenceVideoNode(comfy_io.ComfyNode): @classmethod def define_schema(cls): return comfy_io.Schema( node_id="ViduReferenceVideoNode", display_name="Vidu Reference To Video Generation", category="api node/video/Vidu", description="Generate video from multiple images and prompt", inputs=[ comfy_io.Combo.Input( "model", options=[model.value for model in VideoModelName], default=VideoModelName.vidu_q1.value, tooltip="Model name", ), comfy_io.Image.Input( "images", tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).", ), comfy_io.String.Input( "prompt", multiline=True, tooltip="A textual description for video generation", ), comfy_io.Int.Input( "duration", default=5, min=5, max=5, step=1, display_mode=comfy_io.NumberDisplay.number, tooltip="Duration of the output video in seconds", optional=True, ), comfy_io.Int.Input( "seed", default=0, min=0, max=2147483647, step=1, display_mode=comfy_io.NumberDisplay.number, control_after_generate=True, tooltip="Seed for video generation (0 for random)", optional=True, ), comfy_io.Combo.Input( "aspect_ratio", options=[model.value for model in AspectRatio], default=AspectRatio.r_16_9.value, tooltip="The aspect ratio of the output video", optional=True, ), comfy_io.Combo.Input( "resolution", options=[model.value for model in Resolution], default=Resolution.r_1080p.value, tooltip="Supported values may vary by model & duration", optional=True, ), comfy_io.Combo.Input( "movement_amplitude", options=[model.value for model in MovementAmplitude], default=MovementAmplitude.auto.value, tooltip="The movement amplitude of objects in the frame", optional=True, ), ], outputs=[ comfy_io.Video.Output(), ], hidden=[ comfy_io.Hidden.auth_token_comfy_org, comfy_io.Hidden.api_key_comfy_org, comfy_io.Hidden.unique_id, ], is_api_node=True, ) @classmethod async def execute( cls, model: str, images: torch.Tensor, prompt: str, duration: int, seed: int, aspect_ratio: str, resolution: str, movement_amplitude: str, ) -> comfy_io.NodeOutput: if not prompt: raise ValueError("The prompt field is required and cannot be empty.") a = get_number_of_images(images) if a > 7: raise ValueError("Too many images, maximum allowed is 7.") for image in images: validate_image_aspect_ratio_range(image, (1, 4), (4, 1)) validate_image_dimensions(image, min_width=128, min_height=128) payload = TaskCreationRequest( model_name=model, prompt=prompt, duration=duration, seed=seed, aspect_ratio=aspect_ratio, resolution=resolution, movement_amplitude=movement_amplitude, ) auth = { "auth_token": cls.hidden.auth_token_comfy_org, "comfy_api_key": cls.hidden.api_key_comfy_org, } payload.images = await upload_images_to_comfyapi( images, max_images=7, mime_type="image/png", auth_kwargs=auth, ) results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id) return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) class ViduStartEndToVideoNode(comfy_io.ComfyNode): @classmethod def define_schema(cls): return comfy_io.Schema( node_id="ViduStartEndToVideoNode", display_name="Vidu Start End To Video Generation", category="api node/video/Vidu", description="Generate a video from start and end frames and a prompt", inputs=[ comfy_io.Combo.Input( "model", options=[model.value for model in VideoModelName], default=VideoModelName.vidu_q1.value, tooltip="Model name", ), comfy_io.Image.Input( "first_frame", tooltip="Start frame", ), comfy_io.Image.Input( "end_frame", tooltip="End frame", ), comfy_io.String.Input( "prompt", multiline=True, tooltip="A textual description for video generation", optional=True, ), comfy_io.Int.Input( "duration", default=5, min=5, max=5, step=1, display_mode=comfy_io.NumberDisplay.number, tooltip="Duration of the output video in seconds", optional=True, ), comfy_io.Int.Input( "seed", default=0, min=0, max=2147483647, step=1, display_mode=comfy_io.NumberDisplay.number, control_after_generate=True, tooltip="Seed for video generation (0 for random)", optional=True, ), comfy_io.Combo.Input( "resolution", options=[model.value for model in Resolution], default=Resolution.r_1080p.value, tooltip="Supported values may vary by model & duration", optional=True, ), comfy_io.Combo.Input( "movement_amplitude", options=[model.value for model in MovementAmplitude], default=MovementAmplitude.auto.value, tooltip="The movement amplitude of objects in the frame", optional=True, ), ], outputs=[ comfy_io.Video.Output(), ], hidden=[ comfy_io.Hidden.auth_token_comfy_org, comfy_io.Hidden.api_key_comfy_org, comfy_io.Hidden.unique_id, ], is_api_node=True, ) @classmethod async def execute( cls, model: str, first_frame: torch.Tensor, end_frame: torch.Tensor, prompt: str, duration: int, seed: int, resolution: str, movement_amplitude: str, ) -> comfy_io.NodeOutput: validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False) payload = TaskCreationRequest( model_name=model, prompt=prompt, duration=duration, seed=seed, resolution=resolution, movement_amplitude=movement_amplitude, ) auth = { "auth_token": cls.hidden.auth_token_comfy_org, "comfy_api_key": cls.hidden.api_key_comfy_org, } payload.images = [ (await upload_images_to_comfyapi(frame, max_images=1, mime_type="image/png", auth_kwargs=auth))[0] for frame in (first_frame, end_frame) ] results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id) return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) class ViduExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]: return [ ViduTextToVideoNode, ViduImageToVideoNode, ViduReferenceVideoNode, ViduStartEndToVideoNode, ] async def comfy_entrypoint() -> ViduExtension: return ViduExtension()