mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-09-10 19:46:38 +00:00
623 lines
22 KiB
Python
623 lines
22 KiB
Python
import logging
|
|
from enum import Enum
|
|
from typing import Any, Callable, Optional, Literal, TypeVar
|
|
from typing_extensions import override
|
|
|
|
import torch
|
|
from pydantic import BaseModel, Field
|
|
|
|
from comfy_api.latest import ComfyExtension, io as comfy_io
|
|
from comfy_api_nodes.util.validation_utils import (
|
|
validate_aspect_ratio_closeness,
|
|
validate_image_dimensions,
|
|
validate_image_aspect_ratio_range,
|
|
get_number_of_images,
|
|
)
|
|
from comfy_api_nodes.apis.client import (
|
|
ApiEndpoint,
|
|
HttpMethod,
|
|
SynchronousOperation,
|
|
PollingOperation,
|
|
EmptyRequest,
|
|
)
|
|
from comfy_api_nodes.apinode_utils import download_url_to_video_output, upload_images_to_comfyapi
|
|
|
|
|
|
VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
|
|
VIDU_IMAGE_TO_VIDEO = "/proxy/vidu/img2video"
|
|
VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
|
|
VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
|
|
VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"
|
|
|
|
R = TypeVar("R")
|
|
|
|
class VideoModelName(str, Enum):
|
|
vidu_q1 = 'viduq1'
|
|
|
|
|
|
class AspectRatio(str, Enum):
|
|
r_16_9 = "16:9"
|
|
r_9_16 = "9:16"
|
|
r_1_1 = "1:1"
|
|
|
|
|
|
class Resolution(str, Enum):
|
|
r_1080p = "1080p"
|
|
|
|
|
|
class MovementAmplitude(str, Enum):
|
|
auto = "auto"
|
|
small = "small"
|
|
medium = "medium"
|
|
large = "large"
|
|
|
|
|
|
class TaskCreationRequest(BaseModel):
|
|
model: VideoModelName = VideoModelName.vidu_q1
|
|
prompt: Optional[str] = Field(None, max_length=1500)
|
|
duration: Optional[Literal[5]] = 5
|
|
seed: Optional[int] = Field(0, ge=0, le=2147483647)
|
|
aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
|
|
resolution: Optional[Resolution] = Resolution.r_1080p
|
|
movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
|
|
images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
|
|
|
|
|
|
class TaskStatus(str, Enum):
|
|
created = "created"
|
|
queueing = "queueing"
|
|
processing = "processing"
|
|
success = "success"
|
|
failed = "failed"
|
|
|
|
|
|
class TaskCreationResponse(BaseModel):
|
|
task_id: str = Field(...)
|
|
state: TaskStatus = Field(...)
|
|
created_at: str = Field(...)
|
|
code: Optional[int] = Field(None, description="Error code")
|
|
|
|
|
|
class TaskResult(BaseModel):
|
|
id: str = Field(..., description="Creation id")
|
|
url: str = Field(..., description="The URL of the generated results, valid for one hour")
|
|
cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
|
|
|
|
|
|
class TaskStatusResponse(BaseModel):
|
|
state: TaskStatus = Field(...)
|
|
err_code: Optional[str] = Field(None)
|
|
creations: list[TaskResult] = Field(..., description="Generated results")
|
|
|
|
|
|
async def poll_until_finished(
|
|
auth_kwargs: dict[str, str],
|
|
api_endpoint: ApiEndpoint[Any, R],
|
|
result_url_extractor: Optional[Callable[[R], str]] = None,
|
|
estimated_duration: Optional[int] = None,
|
|
node_id: Optional[str] = None,
|
|
) -> R:
|
|
return await PollingOperation(
|
|
poll_endpoint=api_endpoint,
|
|
completed_statuses=[TaskStatus.success.value],
|
|
failed_statuses=[TaskStatus.failed.value],
|
|
status_extractor=lambda response: response.state.value,
|
|
auth_kwargs=auth_kwargs,
|
|
result_url_extractor=result_url_extractor,
|
|
estimated_duration=estimated_duration,
|
|
node_id=node_id,
|
|
poll_interval=16.0,
|
|
max_poll_attempts=256,
|
|
).execute()
|
|
|
|
|
|
def get_video_url_from_response(response) -> Optional[str]:
|
|
if response.creations:
|
|
return response.creations[0].url
|
|
return None
|
|
|
|
|
|
def get_video_from_response(response) -> TaskResult:
|
|
if not response.creations:
|
|
error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
|
|
logging.info(error_msg)
|
|
raise RuntimeError(error_msg)
|
|
logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
|
|
return response.creations[0]
|
|
|
|
|
|
async def execute_task(
|
|
vidu_endpoint: str,
|
|
auth_kwargs: Optional[dict[str, str]],
|
|
payload: TaskCreationRequest,
|
|
estimated_duration: int,
|
|
node_id: str,
|
|
) -> R:
|
|
response = await SynchronousOperation(
|
|
endpoint=ApiEndpoint(
|
|
path=vidu_endpoint,
|
|
method=HttpMethod.POST,
|
|
request_model=TaskCreationRequest,
|
|
response_model=TaskCreationResponse,
|
|
),
|
|
request=payload,
|
|
auth_kwargs=auth_kwargs,
|
|
).execute()
|
|
if response.state == TaskStatus.failed:
|
|
error_msg = f"Vidu request failed. Code: {response.code}"
|
|
logging.error(error_msg)
|
|
raise RuntimeError(error_msg)
|
|
return await poll_until_finished(
|
|
auth_kwargs,
|
|
ApiEndpoint(
|
|
path=VIDU_GET_GENERATION_STATUS % response.task_id,
|
|
method=HttpMethod.GET,
|
|
request_model=EmptyRequest,
|
|
response_model=TaskStatusResponse,
|
|
),
|
|
result_url_extractor=get_video_url_from_response,
|
|
estimated_duration=estimated_duration,
|
|
node_id=node_id,
|
|
)
|
|
|
|
|
|
class ViduTextToVideoNode(comfy_io.ComfyNode):
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return comfy_io.Schema(
|
|
node_id="ViduTextToVideoNode",
|
|
display_name="Vidu Text To Video Generation",
|
|
category="api node/video/Vidu",
|
|
description="Generate video from text prompt",
|
|
inputs=[
|
|
comfy_io.Combo.Input(
|
|
"model",
|
|
options=[model.value for model in VideoModelName],
|
|
default=VideoModelName.vidu_q1.value,
|
|
tooltip="Model name",
|
|
),
|
|
comfy_io.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
tooltip="A textual description for video generation",
|
|
),
|
|
comfy_io.Int.Input(
|
|
"duration",
|
|
default=5,
|
|
min=5,
|
|
max=5,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=2147483647,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"aspect_ratio",
|
|
options=[model.value for model in AspectRatio],
|
|
default=AspectRatio.r_16_9.value,
|
|
tooltip="The aspect ratio of the output video",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"resolution",
|
|
options=[model.value for model in Resolution],
|
|
default=Resolution.r_1080p.value,
|
|
tooltip="Supported values may vary by model & duration",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"movement_amplitude",
|
|
options=[model.value for model in MovementAmplitude],
|
|
default=MovementAmplitude.auto.value,
|
|
tooltip="The movement amplitude of objects in the frame",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
comfy_io.Video.Output(),
|
|
],
|
|
hidden=[
|
|
comfy_io.Hidden.auth_token_comfy_org,
|
|
comfy_io.Hidden.api_key_comfy_org,
|
|
comfy_io.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
model: str,
|
|
prompt: str,
|
|
duration: int,
|
|
seed: int,
|
|
aspect_ratio: str,
|
|
resolution: str,
|
|
movement_amplitude: str,
|
|
) -> comfy_io.NodeOutput:
|
|
if not prompt:
|
|
raise ValueError("The prompt field is required and cannot be empty.")
|
|
payload = TaskCreationRequest(
|
|
model_name=model,
|
|
prompt=prompt,
|
|
duration=duration,
|
|
seed=seed,
|
|
aspect_ratio=aspect_ratio,
|
|
resolution=resolution,
|
|
movement_amplitude=movement_amplitude,
|
|
)
|
|
auth = {
|
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
|
}
|
|
results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id)
|
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
|
class ViduImageToVideoNode(comfy_io.ComfyNode):
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return comfy_io.Schema(
|
|
node_id="ViduImageToVideoNode",
|
|
display_name="Vidu Image To Video Generation",
|
|
category="api node/video/Vidu",
|
|
description="Generate video from image and optional prompt",
|
|
inputs=[
|
|
comfy_io.Combo.Input(
|
|
"model",
|
|
options=[model.value for model in VideoModelName],
|
|
default=VideoModelName.vidu_q1.value,
|
|
tooltip="Model name",
|
|
),
|
|
comfy_io.Image.Input(
|
|
"image",
|
|
tooltip="An image to be used as the start frame of the generated video",
|
|
),
|
|
comfy_io.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
default="",
|
|
tooltip="A textual description for video generation",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"duration",
|
|
default=5,
|
|
min=5,
|
|
max=5,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=2147483647,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"resolution",
|
|
options=[model.value for model in Resolution],
|
|
default=Resolution.r_1080p.value,
|
|
tooltip="Supported values may vary by model & duration",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"movement_amplitude",
|
|
options=[model.value for model in MovementAmplitude],
|
|
default=MovementAmplitude.auto.value,
|
|
tooltip="The movement amplitude of objects in the frame",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
comfy_io.Video.Output(),
|
|
],
|
|
hidden=[
|
|
comfy_io.Hidden.auth_token_comfy_org,
|
|
comfy_io.Hidden.api_key_comfy_org,
|
|
comfy_io.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
model: str,
|
|
image: torch.Tensor,
|
|
prompt: str,
|
|
duration: int,
|
|
seed: int,
|
|
resolution: str,
|
|
movement_amplitude: str,
|
|
) -> comfy_io.NodeOutput:
|
|
if get_number_of_images(image) > 1:
|
|
raise ValueError("Only one input image is allowed.")
|
|
validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
|
payload = TaskCreationRequest(
|
|
model_name=model,
|
|
prompt=prompt,
|
|
duration=duration,
|
|
seed=seed,
|
|
resolution=resolution,
|
|
movement_amplitude=movement_amplitude,
|
|
)
|
|
auth = {
|
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
|
}
|
|
payload.images = await upload_images_to_comfyapi(
|
|
image,
|
|
max_images=1,
|
|
mime_type="image/png",
|
|
auth_kwargs=auth,
|
|
)
|
|
results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
|
class ViduReferenceVideoNode(comfy_io.ComfyNode):
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return comfy_io.Schema(
|
|
node_id="ViduReferenceVideoNode",
|
|
display_name="Vidu Reference To Video Generation",
|
|
category="api node/video/Vidu",
|
|
description="Generate video from multiple images and prompt",
|
|
inputs=[
|
|
comfy_io.Combo.Input(
|
|
"model",
|
|
options=[model.value for model in VideoModelName],
|
|
default=VideoModelName.vidu_q1.value,
|
|
tooltip="Model name",
|
|
),
|
|
comfy_io.Image.Input(
|
|
"images",
|
|
tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
|
|
),
|
|
comfy_io.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
tooltip="A textual description for video generation",
|
|
),
|
|
comfy_io.Int.Input(
|
|
"duration",
|
|
default=5,
|
|
min=5,
|
|
max=5,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=2147483647,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"aspect_ratio",
|
|
options=[model.value for model in AspectRatio],
|
|
default=AspectRatio.r_16_9.value,
|
|
tooltip="The aspect ratio of the output video",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"resolution",
|
|
options=[model.value for model in Resolution],
|
|
default=Resolution.r_1080p.value,
|
|
tooltip="Supported values may vary by model & duration",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"movement_amplitude",
|
|
options=[model.value for model in MovementAmplitude],
|
|
default=MovementAmplitude.auto.value,
|
|
tooltip="The movement amplitude of objects in the frame",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
comfy_io.Video.Output(),
|
|
],
|
|
hidden=[
|
|
comfy_io.Hidden.auth_token_comfy_org,
|
|
comfy_io.Hidden.api_key_comfy_org,
|
|
comfy_io.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
model: str,
|
|
images: torch.Tensor,
|
|
prompt: str,
|
|
duration: int,
|
|
seed: int,
|
|
aspect_ratio: str,
|
|
resolution: str,
|
|
movement_amplitude: str,
|
|
) -> comfy_io.NodeOutput:
|
|
if not prompt:
|
|
raise ValueError("The prompt field is required and cannot be empty.")
|
|
a = get_number_of_images(images)
|
|
if a > 7:
|
|
raise ValueError("Too many images, maximum allowed is 7.")
|
|
for image in images:
|
|
validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
|
validate_image_dimensions(image, min_width=128, min_height=128)
|
|
payload = TaskCreationRequest(
|
|
model_name=model,
|
|
prompt=prompt,
|
|
duration=duration,
|
|
seed=seed,
|
|
aspect_ratio=aspect_ratio,
|
|
resolution=resolution,
|
|
movement_amplitude=movement_amplitude,
|
|
)
|
|
auth = {
|
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
|
}
|
|
payload.images = await upload_images_to_comfyapi(
|
|
images,
|
|
max_images=7,
|
|
mime_type="image/png",
|
|
auth_kwargs=auth,
|
|
)
|
|
results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
|
class ViduStartEndToVideoNode(comfy_io.ComfyNode):
|
|
|
|
@classmethod
|
|
def define_schema(cls):
|
|
return comfy_io.Schema(
|
|
node_id="ViduStartEndToVideoNode",
|
|
display_name="Vidu Start End To Video Generation",
|
|
category="api node/video/Vidu",
|
|
description="Generate a video from start and end frames and a prompt",
|
|
inputs=[
|
|
comfy_io.Combo.Input(
|
|
"model",
|
|
options=[model.value for model in VideoModelName],
|
|
default=VideoModelName.vidu_q1.value,
|
|
tooltip="Model name",
|
|
),
|
|
comfy_io.Image.Input(
|
|
"first_frame",
|
|
tooltip="Start frame",
|
|
),
|
|
comfy_io.Image.Input(
|
|
"end_frame",
|
|
tooltip="End frame",
|
|
),
|
|
comfy_io.String.Input(
|
|
"prompt",
|
|
multiline=True,
|
|
tooltip="A textual description for video generation",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"duration",
|
|
default=5,
|
|
min=5,
|
|
max=5,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
tooltip="Duration of the output video in seconds",
|
|
optional=True,
|
|
),
|
|
comfy_io.Int.Input(
|
|
"seed",
|
|
default=0,
|
|
min=0,
|
|
max=2147483647,
|
|
step=1,
|
|
display_mode=comfy_io.NumberDisplay.number,
|
|
control_after_generate=True,
|
|
tooltip="Seed for video generation (0 for random)",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"resolution",
|
|
options=[model.value for model in Resolution],
|
|
default=Resolution.r_1080p.value,
|
|
tooltip="Supported values may vary by model & duration",
|
|
optional=True,
|
|
),
|
|
comfy_io.Combo.Input(
|
|
"movement_amplitude",
|
|
options=[model.value for model in MovementAmplitude],
|
|
default=MovementAmplitude.auto.value,
|
|
tooltip="The movement amplitude of objects in the frame",
|
|
optional=True,
|
|
),
|
|
],
|
|
outputs=[
|
|
comfy_io.Video.Output(),
|
|
],
|
|
hidden=[
|
|
comfy_io.Hidden.auth_token_comfy_org,
|
|
comfy_io.Hidden.api_key_comfy_org,
|
|
comfy_io.Hidden.unique_id,
|
|
],
|
|
is_api_node=True,
|
|
)
|
|
|
|
@classmethod
|
|
async def execute(
|
|
cls,
|
|
model: str,
|
|
first_frame: torch.Tensor,
|
|
end_frame: torch.Tensor,
|
|
prompt: str,
|
|
duration: int,
|
|
seed: int,
|
|
resolution: str,
|
|
movement_amplitude: str,
|
|
) -> comfy_io.NodeOutput:
|
|
validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
|
|
payload = TaskCreationRequest(
|
|
model_name=model,
|
|
prompt=prompt,
|
|
duration=duration,
|
|
seed=seed,
|
|
resolution=resolution,
|
|
movement_amplitude=movement_amplitude,
|
|
)
|
|
auth = {
|
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
|
}
|
|
payload.images = [
|
|
(await upload_images_to_comfyapi(frame, max_images=1, mime_type="image/png", auth_kwargs=auth))[0]
|
|
for frame in (first_frame, end_frame)
|
|
]
|
|
results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id)
|
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
|
class ViduExtension(ComfyExtension):
|
|
@override
|
|
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
|
|
return [
|
|
ViduTextToVideoNode,
|
|
ViduImageToVideoNode,
|
|
ViduReferenceVideoNode,
|
|
ViduStartEndToVideoNode,
|
|
]
|
|
|
|
async def comfy_entrypoint() -> ViduExtension:
|
|
return ViduExtension()
|