"""Kling API Nodes For source of truth on the allowed permutations of request fields, please reference: - [Compatibility Table](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) """ from __future__ import annotations from typing import Optional, TypeVar, Any import math import logging import torch from comfy_api_nodes.apis import ( KlingTaskStatus, KlingCameraControl, KlingCameraConfig, KlingCameraControlType, KlingVideoGenDuration, KlingVideoGenMode, KlingVideoGenAspectRatio, KlingVideoGenModelName, KlingText2VideoRequest, KlingText2VideoResponse, KlingImage2VideoRequest, KlingImage2VideoResponse, KlingVideoExtendRequest, KlingVideoExtendResponse, KlingLipSyncVoiceLanguage, KlingLipSyncInputObject, KlingLipSyncRequest, KlingLipSyncResponse, KlingVirtualTryOnModelName, KlingVirtualTryOnRequest, KlingVirtualTryOnResponse, KlingVideoResult, KlingImageResult, KlingImageGenerationsRequest, KlingImageGenerationsResponse, KlingImageGenImageReferenceType, KlingImageGenModelName, KlingImageGenAspectRatio, KlingVideoEffectsRequest, KlingVideoEffectsResponse, KlingDualCharacterEffectsScene, KlingSingleImageEffectsScene, KlingDualCharacterEffectInput, KlingSingleImageEffectInput, KlingCharacterEffectModelName, KlingSingleImageEffectModelName, ) from comfy_api_nodes.apis.client import ( ApiEndpoint, HttpMethod, SynchronousOperation, PollingOperation, EmptyRequest, ) from comfy_api_nodes.apinode_utils import ( tensor_to_base64_string, download_url_to_video_output, upload_video_to_comfyapi, upload_audio_to_comfyapi, download_url_to_image_tensor, ) from comfy_api_nodes.mapper_utils import model_field_to_node_input from comfy_api.input.basic_types import AudioInput from comfy_api.input.video_types import VideoInput from comfy_api.input_impl import VideoFromFile from comfy.comfy_types.node_typing import IO, InputTypeOptions, ComfyNodeABC KLING_API_VERSION = "v1" PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video" PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video" PATH_VIDEO_EXTEND = f"/proxy/kling/{KLING_API_VERSION}/videos/video-extend" PATH_LIP_SYNC = f"/proxy/kling/{KLING_API_VERSION}/videos/lip-sync" PATH_VIDEO_EFFECTS = f"/proxy/kling/{KLING_API_VERSION}/videos/effects" PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations" PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on" PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations" MAX_PROMPT_LENGTH_T2V = 2500 MAX_PROMPT_LENGTH_I2V = 500 MAX_PROMPT_LENGTH_IMAGE_GEN = 500 MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200 MAX_PROMPT_LENGTH_LIP_SYNC = 120 R = TypeVar("R") class KlingApiError(Exception): """Base exception for Kling API errors.""" pass def poll_until_finished(auth_kwargs: dict[str,str], api_endpoint: ApiEndpoint[Any, R]) -> R: """Polls the Kling API endpoint until the task reaches a terminal state, then returns the response.""" return PollingOperation( poll_endpoint=api_endpoint, completed_statuses=[ KlingTaskStatus.succeed.value, ], failed_statuses=[KlingTaskStatus.failed.value], status_extractor=lambda response: ( response.data.task_status.value if response.data and response.data.task_status else None ), auth_kwargs=auth_kwargs, ).execute() def is_valid_camera_control_configs(configs: list[float]) -> bool: """Verifies that at least one camera control configuration is non-zero.""" return any(not math.isclose(value, 0.0) for value in configs) def is_valid_prompt(prompt: str) -> bool: """Verifies that the prompt is not empty.""" return bool(prompt) def is_valid_task_creation_response(response: KlingText2VideoResponse) -> bool: """Verifies that the initial response contains a task ID.""" return bool(response.data.task_id) def is_valid_video_response(response: KlingText2VideoResponse) -> bool: """Verifies that the response contains a task result with at least one video.""" return ( response.data is not None and response.data.task_result is not None and response.data.task_result.videos is not None and len(response.data.task_result.videos) > 0 ) def is_valid_image_response(response: KlingVirtualTryOnResponse) -> bool: """Verifies that the response contains a task result with at least one image.""" return ( response.data is not None and response.data.task_result is not None and response.data.task_result.images is not None and len(response.data.task_result.images) > 0 ) def validate_prompts(prompt: str, negative_prompt: str, max_length: int) -> bool: """Verifies that the positive prompt is not empty and that neither promt is too long.""" if not prompt: raise ValueError("Positive prompt is empty") if len(prompt) > max_length: raise ValueError(f"Positive prompt is too long: {len(prompt)} characters") if negative_prompt and len(negative_prompt) > max_length: raise ValueError( f"Negative prompt is too long: {len(negative_prompt)} characters" ) return True def validate_task_creation_response(response) -> None: """Validates that the Kling task creation request was successful.""" if not is_valid_task_creation_response(response): error_msg = f"Kling initial request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" logging.error(error_msg) raise KlingApiError(error_msg) def validate_video_result_response(response) -> None: """Validates that the Kling task result contains a video.""" if not is_valid_video_response(response): error_msg = f"Kling task {response.data.task_id} succeeded but no video data found in response." logging.error(f"Error: {error_msg}.\nResponse: {response}") raise KlingApiError(error_msg) def validate_image_result_response(response) -> None: """Validates that the Kling task result contains an image.""" if not is_valid_image_response(response): error_msg = f"Kling task {response.data.task_id} succeeded but no image data found in response." logging.error(f"Error: {error_msg}.\nResponse: {response}") raise KlingApiError(error_msg) def validate_input_image(image: torch.Tensor) -> None: """ Validates the input image adheres to the expectations of the Kling API: - The image resolution should not be less than 300*300px - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1 See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo """ if len(image.shape) == 4: height, width = image.shape[1], image.shape[2] elif len(image.shape) == 3: height, width = image.shape[0], image.shape[1] else: raise ValueError("Invalid image tensor shape.") # Ensure minimum resolution is met if height < 300: raise ValueError("Image height must be at least 300px") if width < 300: raise ValueError("Image width must be at least 300px") # Ensure aspect ratio is within acceptable range aspect_ratio = width / height if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5: raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1") def get_camera_control_input_config( tooltip: str, default: float = 0.0 ) -> tuple[IO, InputTypeOptions]: """Returns common InputTypeOptions for Kling camera control configurations.""" input_config = { "default": default, "min": -10.0, "max": 10.0, "step": 0.25, "display": "slider", "tooltip": tooltip, } return IO.FLOAT, input_config def get_video_from_response(response) -> KlingVideoResult: """Returns the first video object from the Kling video generation task result.""" video = response.data.task_result.videos[0] logging.info( "Kling task %s succeeded. Video URL: %s", response.data.task_id, video.url ) return video def get_images_from_response(response) -> list[KlingImageResult]: images = response.data.task_result.images logging.info("Kling task %s succeeded. Images: %s", response.data.task_id, images) return images def video_result_to_node_output( video: KlingVideoResult, ) -> tuple[VideoFromFile, str, str]: """Converts a KlingVideoResult to a tuple of (VideoFromFile, str, str) to be used as a ComfyUI node output.""" return ( download_url_to_video_output(video.url), str(video.id), str(video.duration), ) def image_result_to_node_output( images: list[KlingImageResult], ) -> torch.Tensor: """ Converts a KlingImageResult to a tuple containing a [B, H, W, C] tensor. If multiple images are returned, they will be stacked along the batch dimension. """ if len(images) == 1: return download_url_to_image_tensor(images[0].url) else: return torch.cat([download_url_to_image_tensor(image.url) for image in images]) class KlingNodeBase(ComfyNodeABC): """Base class for Kling nodes.""" FUNCTION = "api_call" CATEGORY = "api node/video/Kling" API_NODE = True class KlingCameraControls(KlingNodeBase): """Kling Camera Controls Node""" @classmethod def INPUT_TYPES(cls): return { "required": { "camera_control_type": model_field_to_node_input( IO.COMBO, KlingCameraControl, "type", enum_type=KlingCameraControlType, ), "horizontal_movement": get_camera_control_input_config( "Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right" ), "vertical_movement": get_camera_control_input_config( "Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward." ), "pan": get_camera_control_input_config( "Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", default=0.5, ), "tilt": get_camera_control_input_config( "Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.", ), "roll": get_camera_control_input_config( "Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", ), "zoom": get_camera_control_input_config( "Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.", ), } } DESCRIPTION = "Allows specifying configuration options for Kling Camera Controls and motion control effects." RETURN_TYPES = ("CAMERA_CONTROL",) RETURN_NAMES = ("camera_control",) FUNCTION = "main" @classmethod def VALIDATE_INPUTS( cls, horizontal_movement: float, vertical_movement: float, pan: float, tilt: float, roll: float, zoom: float, ) -> bool | str: if not is_valid_camera_control_configs( [ horizontal_movement, vertical_movement, pan, tilt, roll, zoom, ] ): return "Invalid camera control configs: at least one of the values must be non-zero" return True def main( self, camera_control_type: str, horizontal_movement: float, vertical_movement: float, pan: float, tilt: float, roll: float, zoom: float, ) -> tuple[KlingCameraControl]: return ( KlingCameraControl( type=KlingCameraControlType(camera_control_type), config=KlingCameraConfig( horizontal=horizontal_movement, vertical=vertical_movement, pan=pan, roll=roll, tilt=tilt, zoom=zoom, ), ), ) class KlingTextToVideoNode(KlingNodeBase): """Kling Text to Video Node""" @staticmethod def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: """ Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. Only includes config combos that support the `image_tail` request field. See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) """ return { "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), "standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"), "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), "pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"), "standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"), "standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"), "pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"), "pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"), "standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"), "standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"), } @classmethod def INPUT_TYPES(s): modes = list(KlingTextToVideoNode.get_mode_string_mapping().keys()) return { "required": { "prompt": model_field_to_node_input( IO.STRING, KlingText2VideoRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingText2VideoRequest, "cfg_scale", default=1.0, min=0.0, max=1.0, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingText2VideoRequest, "aspect_ratio", enum_type=KlingVideoGenAspectRatio, ), "mode": ( modes, { "default": modes[4], "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", }, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } RETURN_TYPES = ("VIDEO", "STRING", "STRING") RETURN_NAMES = ("VIDEO", "video_id", "duration") DESCRIPTION = "Kling Text to Video Node" def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingText2VideoResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_TEXT_TO_VIDEO}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingText2VideoResponse, ), ) def api_call( self, prompt: str, negative_prompt: str, cfg_scale: float, mode: str, aspect_ratio: str, camera_control: Optional[KlingCameraControl] = None, model_name: Optional[str] = None, duration: Optional[str] = None, **kwargs, ) -> tuple[VideoFromFile, str, str]: validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) if model_name is None: mode, duration, model_name = self.get_mode_string_mapping()[mode] initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_TEXT_TO_VIDEO, method=HttpMethod.POST, request_model=KlingText2VideoRequest, response_model=KlingText2VideoResponse, ), request=KlingText2VideoRequest( prompt=prompt if prompt else None, negative_prompt=negative_prompt if negative_prompt else None, duration=KlingVideoGenDuration(duration), mode=KlingVideoGenMode(mode), model_name=KlingVideoGenModelName(model_name), cfg_scale=cfg_scale, aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), camera_control=camera_control, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_video_result_response(final_response) video = get_video_from_response(final_response) return video_result_to_node_output(video) class KlingCameraControlT2VNode(KlingTextToVideoNode): """ Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera. Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. """ @classmethod def INPUT_TYPES(s): return { "required": { "prompt": model_field_to_node_input( IO.STRING, KlingText2VideoRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True, ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingText2VideoRequest, "cfg_scale", default=0.75, min=0.0, max=1.0, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingText2VideoRequest, "aspect_ratio", enum_type=KlingVideoGenAspectRatio, ), "camera_control": ( "CAMERA_CONTROL", { "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", }, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text." def api_call( self, prompt: str, negative_prompt: str, cfg_scale: float, aspect_ratio: str, camera_control: Optional[KlingCameraControl] = None, **kwargs, ): return super().api_call( model_name=KlingVideoGenModelName.kling_v1, cfg_scale=cfg_scale, mode=KlingVideoGenMode.std, aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), duration=KlingVideoGenDuration.field_5, prompt=prompt, negative_prompt=negative_prompt, camera_control=camera_control, **kwargs, ) class KlingImage2VideoNode(KlingNodeBase): """Kling Image to Video Node""" @classmethod def INPUT_TYPES(s): return { "required": { "start_frame": model_field_to_node_input( IO.IMAGE, KlingImage2VideoRequest, "image", tooltip="The reference image used to generate the video.", ), "prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "negative_prompt", multiline=True, ), "model_name": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "model_name", enum_type=KlingVideoGenModelName, ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingImage2VideoRequest, "cfg_scale", default=0.8, min=0.0, max=1.0, ), "mode": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "mode", enum_type=KlingVideoGenMode, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "aspect_ratio", enum_type=KlingVideoGenAspectRatio, ), "duration": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "duration", enum_type=KlingVideoGenDuration, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } RETURN_TYPES = ("VIDEO", "STRING", "STRING") RETURN_NAMES = ("VIDEO", "video_id", "duration") DESCRIPTION = "Kling Image to Video Node" def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingImage2VideoResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}", method=HttpMethod.GET, request_model=KlingImage2VideoRequest, response_model=KlingImage2VideoResponse, ), ) def api_call( self, start_frame: torch.Tensor, prompt: str, negative_prompt: str, model_name: str, cfg_scale: float, mode: str, aspect_ratio: str, duration: str, camera_control: Optional[KlingCameraControl] = None, end_frame: Optional[torch.Tensor] = None, **kwargs, ) -> tuple[VideoFromFile]: validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V) validate_input_image(start_frame) if camera_control is not None: # Camera control type for image 2 video is always `simple` camera_control.type = KlingCameraControlType.simple initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_IMAGE_TO_VIDEO, method=HttpMethod.POST, request_model=KlingImage2VideoRequest, response_model=KlingImage2VideoResponse, ), request=KlingImage2VideoRequest( model_name=KlingVideoGenModelName(model_name), image=tensor_to_base64_string(start_frame), image_tail=( tensor_to_base64_string(end_frame) if end_frame is not None else None ), prompt=prompt, negative_prompt=negative_prompt if negative_prompt else None, cfg_scale=cfg_scale, mode=KlingVideoGenMode(mode), duration=KlingVideoGenDuration(duration), camera_control=camera_control, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_video_result_response(final_response) video = get_video_from_response(final_response) return video_result_to_node_output(video) class KlingCameraControlI2VNode(KlingImage2VideoNode): """ Kling Image to Video Camera Control Node. This node is a image to video node, but it supports controlling the camera. Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. """ @classmethod def INPUT_TYPES(s): return { "required": { "start_frame": model_field_to_node_input( IO.IMAGE, KlingImage2VideoRequest, "image" ), "prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "negative_prompt", multiline=True, ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingImage2VideoRequest, "cfg_scale", default=0.75, min=0.0, max=1.0, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "aspect_ratio", enum_type=KlingVideoGenAspectRatio, ), "camera_control": ( "CAMERA_CONTROL", { "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", }, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image." def api_call( self, start_frame: torch.Tensor, prompt: str, negative_prompt: str, cfg_scale: float, aspect_ratio: str, camera_control: KlingCameraControl, **kwargs, ): return super().api_call( model_name=KlingVideoGenModelName.kling_v1_5, start_frame=start_frame, cfg_scale=cfg_scale, mode=KlingVideoGenMode.pro, aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), duration=KlingVideoGenDuration.field_5, prompt=prompt, negative_prompt=negative_prompt, camera_control=camera_control, **kwargs, ) class KlingStartEndFrameNode(KlingImage2VideoNode): """ Kling First Last Frame Node. This node allows creation of a video from a first and last frame. It calls the normal image to video endpoint, but only allows the subset of input options that support the `image_tail` request field. """ @staticmethod def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: """ Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. Only includes config combos that support the `image_tail` request field. See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) """ return { "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), "pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"), "pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"), "pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"), "pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"), } @classmethod def INPUT_TYPES(s): modes = list(KlingStartEndFrameNode.get_mode_string_mapping().keys()) return { "required": { "start_frame": model_field_to_node_input( IO.IMAGE, KlingImage2VideoRequest, "image" ), "end_frame": model_field_to_node_input( IO.IMAGE, KlingImage2VideoRequest, "image_tail" ), "prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingImage2VideoRequest, "negative_prompt", multiline=True, ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingImage2VideoRequest, "cfg_scale", default=0.5, min=0.0, max=1.0, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingImage2VideoRequest, "aspect_ratio", enum_type=KlingVideoGenAspectRatio, ), "mode": ( modes, { "default": modes[2], "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", }, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last." def api_call( self, start_frame: torch.Tensor, end_frame: torch.Tensor, prompt: str, negative_prompt: str, cfg_scale: float, aspect_ratio: str, mode: str, **kwargs, ): mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[ mode ] return super().api_call( prompt=prompt, negative_prompt=negative_prompt, model_name=model_name, start_frame=start_frame, cfg_scale=cfg_scale, mode=mode, aspect_ratio=aspect_ratio, duration=duration, end_frame=end_frame, **kwargs, ) class KlingVideoExtendNode(KlingNodeBase): @classmethod def INPUT_TYPES(s): return { "required": { "prompt": model_field_to_node_input( IO.STRING, KlingVideoExtendRequest, "prompt", multiline=True ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingVideoExtendRequest, "negative_prompt", multiline=True, ), "cfg_scale": model_field_to_node_input( IO.FLOAT, KlingVideoExtendRequest, "cfg_scale", default=0.5, min=0.0, max=1.0, ), "video_id": model_field_to_node_input( IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } RETURN_TYPES = ("VIDEO", "STRING", "STRING") RETURN_NAMES = ("VIDEO", "video_id", "duration") DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes." def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoExtendResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_VIDEO_EXTEND}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingVideoExtendResponse, ), ) def api_call( self, prompt: str, negative_prompt: str, cfg_scale: float, video_id: str, **kwargs, ) -> tuple[VideoFromFile, str, str]: validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_VIDEO_EXTEND, method=HttpMethod.POST, request_model=KlingVideoExtendRequest, response_model=KlingVideoExtendResponse, ), request=KlingVideoExtendRequest( prompt=prompt if prompt else None, negative_prompt=negative_prompt if negative_prompt else None, cfg_scale=cfg_scale, video_id=video_id, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_video_result_response(final_response) video = get_video_from_response(final_response) return video_result_to_node_output(video) class KlingVideoEffectsBase(KlingNodeBase): """Kling Video Effects Base""" RETURN_TYPES = ("VIDEO", "STRING", "STRING") RETURN_NAMES = ("VIDEO", "video_id", "duration") def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoEffectsResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_VIDEO_EFFECTS}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingVideoEffectsResponse, ), ) def api_call( self, dual_character: bool, effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene, model_name: str, duration: KlingVideoGenDuration, image_1: torch.Tensor, image_2: Optional[torch.Tensor] = None, mode: Optional[KlingVideoGenMode] = None, **kwargs, ): if dual_character: request_input_field = KlingDualCharacterEffectInput( model_name=model_name, mode=mode, images=[ tensor_to_base64_string(image_1), tensor_to_base64_string(image_2), ], duration=duration, ) else: request_input_field = KlingSingleImageEffectInput( model_name=model_name, image=tensor_to_base64_string(image_1), duration=duration, ) initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_VIDEO_EFFECTS, method=HttpMethod.POST, request_model=KlingVideoEffectsRequest, response_model=KlingVideoEffectsResponse, ), request=KlingVideoEffectsRequest( effect_scene=effect_scene, input=request_input_field, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_video_result_response(final_response) video = get_video_from_response(final_response) return video_result_to_node_output(video) class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase): """Kling Dual Character Video Effect Node""" @classmethod def INPUT_TYPES(s): return { "required": { "image_left": (IO.IMAGE, {"tooltip": "Left side image"}), "image_right": (IO.IMAGE, {"tooltip": "Right side image"}), "effect_scene": model_field_to_node_input( IO.COMBO, KlingVideoEffectsRequest, "effect_scene", enum_type=KlingDualCharacterEffectsScene, ), "model_name": model_field_to_node_input( IO.COMBO, KlingDualCharacterEffectInput, "model_name", enum_type=KlingCharacterEffectModelName, ), "mode": model_field_to_node_input( IO.COMBO, KlingDualCharacterEffectInput, "mode", enum_type=KlingVideoGenMode, ), "duration": model_field_to_node_input( IO.COMBO, KlingDualCharacterEffectInput, "duration", enum_type=KlingVideoGenDuration, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite." RETURN_TYPES = ("VIDEO", "STRING") RETURN_NAMES = ("VIDEO", "duration") def api_call( self, image_left: torch.Tensor, image_right: torch.Tensor, effect_scene: KlingDualCharacterEffectsScene, model_name: KlingCharacterEffectModelName, mode: KlingVideoGenMode, duration: KlingVideoGenDuration, **kwargs, ): video, _, duration = super().api_call( dual_character=True, effect_scene=effect_scene, model_name=model_name, mode=mode, duration=duration, image_1=image_left, image_2=image_right, **kwargs, ) return video, duration class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase): """Kling Single Image Video Effect Node""" @classmethod def INPUT_TYPES(s): return { "required": { "image": ( IO.IMAGE, { "tooltip": " Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1" }, ), "effect_scene": model_field_to_node_input( IO.COMBO, KlingVideoEffectsRequest, "effect_scene", enum_type=KlingSingleImageEffectsScene, ), "model_name": model_field_to_node_input( IO.COMBO, KlingSingleImageEffectInput, "model_name", enum_type=KlingSingleImageEffectModelName, ), "duration": model_field_to_node_input( IO.COMBO, KlingSingleImageEffectInput, "duration", enum_type=KlingVideoGenDuration, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene." def api_call( self, image: torch.Tensor, effect_scene: KlingSingleImageEffectsScene, model_name: KlingSingleImageEffectModelName, duration: KlingVideoGenDuration, **kwargs, ): return super().api_call( dual_character=False, effect_scene=effect_scene, model_name=model_name, duration=duration, image_1=image, **kwargs, ) class KlingLipSyncBase(KlingNodeBase): """Kling Lip Sync Base""" RETURN_TYPES = ("VIDEO", "STRING", "STRING") RETURN_NAMES = ("VIDEO", "video_id", "duration") def validate_text(self, text: str): if not text: raise ValueError("Text is required") if len(text) > MAX_PROMPT_LENGTH_LIP_SYNC: raise ValueError( f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters." ) def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingLipSyncResponse: """Polls the Kling API endpoint until the task reaches a terminal state.""" return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_LIP_SYNC}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingLipSyncResponse, ), ) def api_call( self, video: VideoInput, audio: Optional[AudioInput] = None, voice_language: Optional[str] = None, mode: Optional[str] = None, text: Optional[str] = None, voice_speed: Optional[float] = None, voice_id: Optional[str] = None, **kwargs ) -> tuple[VideoFromFile, str, str]: if text: self.validate_text(text) # Upload video to Comfy API and get download URL video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs) logging.info("Uploaded video to Comfy API. URL: %s", video_url) # Upload the audio file to Comfy API and get download URL if audio: audio_url = upload_audio_to_comfyapi(audio, auth_kwargs=kwargs) logging.info("Uploaded audio to Comfy API. URL: %s", audio_url) else: audio_url = None initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_LIP_SYNC, method=HttpMethod.POST, request_model=KlingLipSyncRequest, response_model=KlingLipSyncResponse, ), request=KlingLipSyncRequest( input=KlingLipSyncInputObject( video_url=video_url, mode=mode, text=text, voice_language=voice_language, voice_speed=voice_speed, audio_type="url", audio_url=audio_url, voice_id=voice_id, ), ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_video_result_response(final_response) video = get_video_from_response(final_response) return video_result_to_node_output(video) class KlingLipSyncAudioToVideoNode(KlingLipSyncBase): """Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file.""" @classmethod def INPUT_TYPES(s): return { "required": { "video": (IO.VIDEO, {}), "audio": (IO.AUDIO, {}), "voice_language": model_field_to_node_input( IO.COMBO, KlingLipSyncInputObject, "voice_language", enum_type=KlingLipSyncVoiceLanguage, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file." def api_call( self, video: VideoInput, audio: AudioInput, voice_language: str, **kwargs, ): return super().api_call( video=video, audio=audio, voice_language=voice_language, mode="audio2video", **kwargs, ) class KlingLipSyncTextToVideoNode(KlingLipSyncBase): """Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt.""" @staticmethod def get_voice_config() -> dict[str, tuple[str, str]]: return { # English voices "Melody": ("girlfriend_4_speech02", "en"), "Sunny": ("genshin_vindi2", "en"), "Sage": ("zhinen_xuesheng", "en"), "Ace": ("AOT", "en"), "Blossom": ("ai_shatang", "en"), "Peppy": ("genshin_klee2", "en"), "Dove": ("genshin_kirara", "en"), "Shine": ("ai_kaiya", "en"), "Anchor": ("oversea_male1", "en"), "Lyric": ("ai_chenjiahao_712", "en"), "Tender": ("chat1_female_new-3", "en"), "Siren": ("chat_0407_5-1", "en"), "Zippy": ("cartoon-boy-07", "en"), "Bud": ("uk_boy1", "en"), "Sprite": ("cartoon-girl-01", "en"), "Candy": ("PeppaPig_platform", "en"), "Beacon": ("ai_huangzhong_712", "en"), "Rock": ("ai_huangyaoshi_712", "en"), "Titan": ("ai_laoguowang_712", "en"), "Grace": ("chengshu_jiejie", "en"), "Helen": ("you_pingjing", "en"), "Lore": ("calm_story1", "en"), "Crag": ("uk_man2", "en"), "Prattle": ("laopopo_speech02", "en"), "Hearth": ("heainainai_speech02", "en"), "The Reader": ("reader_en_m-v1", "en"), "Commercial Lady": ("commercial_lady_en_f-v1", "en"), # Chinese voices "阳光少年": ("genshin_vindi2", "zh"), "懂事小弟": ("zhinen_xuesheng", "zh"), "运动少年": ("tiyuxi_xuedi", "zh"), "青春少女": ("ai_shatang", "zh"), "温柔小妹": ("genshin_klee2", "zh"), "元气少女": ("genshin_kirara", "zh"), "阳光男生": ("ai_kaiya", "zh"), "幽默小哥": ("tiexin_nanyou", "zh"), "文艺小哥": ("ai_chenjiahao_712", "zh"), "甜美邻家": ("girlfriend_1_speech02", "zh"), "温柔姐姐": ("chat1_female_new-3", "zh"), "职场女青": ("girlfriend_2_speech02", "zh"), "活泼男童": ("cartoon-boy-07", "zh"), "俏皮女童": ("cartoon-girl-01", "zh"), "稳重老爸": ("ai_huangyaoshi_712", "zh"), "温柔妈妈": ("you_pingjing", "zh"), "严肃上司": ("ai_laoguowang_712", "zh"), "优雅贵妇": ("chengshu_jiejie", "zh"), "慈祥爷爷": ("zhuxi_speech02", "zh"), "唠叨爷爷": ("uk_oldman3", "zh"), "唠叨奶奶": ("laopopo_speech02", "zh"), "和蔼奶奶": ("heainainai_speech02", "zh"), "东北老铁": ("dongbeilaotie_speech02", "zh"), "重庆小伙": ("chongqingxiaohuo_speech02", "zh"), "四川妹子": ("chuanmeizi_speech02", "zh"), "潮汕大叔": ("chaoshandashu_speech02", "zh"), "台湾男生": ("ai_taiwan_man2_speech02", "zh"), "西安掌柜": ("xianzhanggui_speech02", "zh"), "天津姐姐": ("tianjinjiejie_speech02", "zh"), "新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"), "译制片男": ("yizhipiannan-v1", "zh"), "撒娇女友": ("tianmeixuemei-v1", "zh"), "刀片烟嗓": ("daopianyansang-v1", "zh"), "乖巧正太": ("mengwa-v1", "zh"), } @classmethod def INPUT_TYPES(s): voice_options = list(s.get_voice_config().keys()) return { "required": { "video": (IO.VIDEO, {}), "text": model_field_to_node_input( IO.STRING, KlingLipSyncInputObject, "text", multiline=True ), "voice": (voice_options, {"default": voice_options[0]}), "voice_speed": model_field_to_node_input( IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt." def api_call( self, video: VideoInput, text: str, voice: str, voice_speed: float, **kwargs, ): voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice] return super().api_call( video=video, text=text, voice_language=voice_language, voice_id=voice_id, voice_speed=voice_speed, mode="text2video", **kwargs, ) class KlingImageGenerationBase(KlingNodeBase): """Kling Image Generation Base Node.""" RETURN_TYPES = ("IMAGE",) CATEGORY = "api node/image/Kling" def validate_prompt(self, prompt: str, negative_prompt: Optional[str] = None): if not prompt or len(prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: raise ValueError( f"Prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" ) if negative_prompt and len(negative_prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: raise ValueError( f"Negative prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" ) class KlingVirtualTryOnNode(KlingImageGenerationBase): """Kling Virtual Try On Node.""" @classmethod def INPUT_TYPES(s): return { "required": { "human_image": (IO.IMAGE, {}), "cloth_image": (IO.IMAGE, {}), "model_name": model_field_to_node_input( IO.COMBO, KlingVirtualTryOnRequest, "model_name", enum_type=KlingVirtualTryOnModelName, ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human." def get_response( self, task_id: str, auth_kwargs: dict[str,str] = None ) -> KlingVirtualTryOnResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingVirtualTryOnResponse, ), ) def api_call( self, human_image: torch.Tensor, cloth_image: torch.Tensor, model_name: KlingVirtualTryOnModelName, **kwargs, ): initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_VIRTUAL_TRY_ON, method=HttpMethod.POST, request_model=KlingVirtualTryOnRequest, response_model=KlingVirtualTryOnResponse, ), request=KlingVirtualTryOnRequest( human_image=tensor_to_base64_string(human_image), cloth_image=tensor_to_base64_string(cloth_image), model_name=model_name, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_image_result_response(final_response) images = get_images_from_response(final_response) return (image_result_to_node_output(images),) class KlingImageGenerationNode(KlingImageGenerationBase): """Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.""" @classmethod def INPUT_TYPES(s): return { "required": { "prompt": model_field_to_node_input( IO.STRING, KlingImageGenerationsRequest, "prompt", multiline=True, max_length=MAX_PROMPT_LENGTH_IMAGE_GEN, ), "negative_prompt": model_field_to_node_input( IO.STRING, KlingImageGenerationsRequest, "negative_prompt", multiline=True, ), "image_type": model_field_to_node_input( IO.COMBO, KlingImageGenerationsRequest, "image_reference", enum_type=KlingImageGenImageReferenceType, ), "image_fidelity": model_field_to_node_input( IO.FLOAT, KlingImageGenerationsRequest, "image_fidelity", slider=True, step=0.01, ), "human_fidelity": model_field_to_node_input( IO.FLOAT, KlingImageGenerationsRequest, "human_fidelity", slider=True, step=0.01, ), "model_name": model_field_to_node_input( IO.COMBO, KlingImageGenerationsRequest, "model_name", enum_type=KlingImageGenModelName, ), "aspect_ratio": model_field_to_node_input( IO.COMBO, KlingImageGenerationsRequest, "aspect_ratio", enum_type=KlingImageGenAspectRatio, ), "n": model_field_to_node_input( IO.INT, KlingImageGenerationsRequest, "n", ), }, "optional": { "image": (IO.IMAGE, {}), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", }, } DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image." def get_response( self, task_id: str, auth_kwargs: Optional[dict[str,str]] = None ) -> KlingImageGenerationsResponse: return poll_until_finished( auth_kwargs, ApiEndpoint( path=f"{PATH_IMAGE_GENERATIONS}/{task_id}", method=HttpMethod.GET, request_model=EmptyRequest, response_model=KlingImageGenerationsResponse, ), ) def api_call( self, model_name: KlingImageGenModelName, prompt: str, negative_prompt: str, image_type: KlingImageGenImageReferenceType, image_fidelity: float, human_fidelity: float, n: int, aspect_ratio: KlingImageGenAspectRatio, image: Optional[torch.Tensor] = None, **kwargs, ): self.validate_prompt(prompt, negative_prompt) if image is not None: image = tensor_to_base64_string(image) initial_operation = SynchronousOperation( endpoint=ApiEndpoint( path=PATH_IMAGE_GENERATIONS, method=HttpMethod.POST, request_model=KlingImageGenerationsRequest, response_model=KlingImageGenerationsResponse, ), request=KlingImageGenerationsRequest( model_name=model_name, prompt=prompt, negative_prompt=negative_prompt, image=image, image_reference=image_type, image_fidelity=image_fidelity, human_fidelity=human_fidelity, n=n, aspect_ratio=aspect_ratio, ), auth_kwargs=kwargs, ) task_creation_response = initial_operation.execute() validate_task_creation_response(task_creation_response) task_id = task_creation_response.data.task_id final_response = self.get_response(task_id, auth_kwargs=kwargs) validate_image_result_response(final_response) images = get_images_from_response(final_response) return (image_result_to_node_output(images),) NODE_CLASS_MAPPINGS = { "KlingCameraControls": KlingCameraControls, "KlingTextToVideoNode": KlingTextToVideoNode, "KlingImage2VideoNode": KlingImage2VideoNode, "KlingCameraControlI2VNode": KlingCameraControlI2VNode, "KlingCameraControlT2VNode": KlingCameraControlT2VNode, "KlingStartEndFrameNode": KlingStartEndFrameNode, "KlingVideoExtendNode": KlingVideoExtendNode, "KlingLipSyncAudioToVideoNode": KlingLipSyncAudioToVideoNode, "KlingLipSyncTextToVideoNode": KlingLipSyncTextToVideoNode, "KlingVirtualTryOnNode": KlingVirtualTryOnNode, "KlingImageGenerationNode": KlingImageGenerationNode, "KlingSingleImageVideoEffectNode": KlingSingleImageVideoEffectNode, "KlingDualCharacterVideoEffectNode": KlingDualCharacterVideoEffectNode, } NODE_DISPLAY_NAME_MAPPINGS = { "KlingCameraControls": "Kling Camera Controls", "KlingTextToVideoNode": "Kling Text to Video", "KlingImage2VideoNode": "Kling Image to Video", "KlingCameraControlI2VNode": "Kling Image to Video (Camera Control)", "KlingCameraControlT2VNode": "Kling Text to Video (Camera Control)", "KlingStartEndFrameNode": "Kling Start-End Frame to Video", "KlingVideoExtendNode": "Kling Video Extend", "KlingLipSyncAudioToVideoNode": "Kling Lip Sync Video with Audio", "KlingLipSyncTextToVideoNode": "Kling Lip Sync Video with Text", "KlingVirtualTryOnNode": "Kling Virtual Try On", "KlingImageGenerationNode": "Kling Image Generation", "KlingSingleImageVideoEffectNode": "Kling Video Effects", "KlingDualCharacterVideoEffectNode": "Kling Dual Character Video Effects", }