mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-06-08 15:17:14 +00:00
* Add Ideogram generate node. * Add staging api. * Add API_NODE and common error for missing auth token (#5) * Add Minimax Video Generation + Async Task queue polling example (#6) * [Minimax] Show video preview and embed workflow in ouput (#7) * Remove uv.lock * Remove polling operations. * Revert "Remove polling operations." * Update stubs. * Added Ideogram and Minimax back in. * Added initial BFL Flux 1.1 [pro] Ultra node (#11) * Add --comfy-api-base launch arg (#13) * Add instructions for staging development. (#14) * remove validation to make it easier to run against LAN copies of the API * Manually add BFL polling status response schema (#15) * Add function for uploading files. (#18) * Add Luma nodes (#16) * Refactor util functions (#20) * Add VIDEO type (#21) * Add rest of Luma node functionality (#19) * Fix image_luma_ref not working (#28) * [Bug] Remove duplicated option T2V-01 in MinimaxTextToVideoNode (#31) * Add utils to map from pydantic model fields to comfy node inputs (#30) * add veo2, bump av req (#32) * Add Recraft nodes (#29) * Add Kling Nodes (#12) * Add Camera Concepts (luma_concepts) to Luma Video nodes (#33) * Add Runway nodes (#17) * Convert Minimax node to use VIDEO output type (#34) * Standard `CATEGORY` system for api nodes (#35) * Set `Content-Type` header when uploading files (#36) * add better error propagation to veo2 (#37) * Add Realistic Image and Logo Raster styles for Recraft v3 (#38) * Fix runway image upload and progress polling (#39) * Fix image upload for Luma: only include `Content-Type` header field if it's set explicitly (#40) * Moved Luma nodes to nodes_luma.py (#47) * Moved Recraft nodes to nodes_recraft.py (#48) * Add Pixverse nodes (#46) * Move and fix BFL nodes to node_bfl.py (#49) * Move and edit Minimax node to nodes_minimax.py (#50) * Add Minimax Image to Video node + Cleanup (#51) * Add Recraft Text to Vector node, add Save SVG node to handle its output (#53) * Added pixverse_template support to Pixverse Text to Video node (#54) * Added Recraft Controls + Recraft Color RGB nodes (#57) * split remaining nodes out of nodes_api, make utility lib, refactor ideogram (#61) * Add types and doctstrings to utils file (#64) * Fix: `PollingOperation` progress bar update progress by absolute value (#65) * Use common download function in kling nodes module (#67) * Fix: Luma video nodes in `api nodes/image` category (#68) * Set request type explicitly (#66) * Add `control_after_generate` to all seed inputs (#69) * Fix bug: deleting `Content-Type` when property does not exist (#73) * Add preview to Save SVG node (#74) * change default poll interval (#76), rework veo2 * Add Pixverse and updated Kling types (#75) * Added Pixverse Image to VIdeo node (#77) * Add Pixverse Transition Video node (#79) * Proper ray-1-6 support as fix has been applied in backend (#80) * Added Recraft Style - Infinite Style Library node (#82) * add ideogram v3 (#83) * [Kling] Split Camera Control config to its own node (#81) * Add Pika i2v and t2v nodes (#52) * Temporary Fix for Runway (#87) * Added Stability Stable Image Ultra node (#86) * Remove Runway nodes (#88) * Fix: Prompt text can't be validated in Kling nodes when using primitive nodes (#90) * Fix: typo in node name "Stabiliy" => "Stability" (#91) * Add String (Multiline) node (#93) * Update Pika Duration and Resolution options (#94) * Change base branch to master. Not main. (#95) * Fix UploadRequest file_name param (#98) * Removed Infinite Style Library until later (#99) * fix ideogram style types (#100) * fix multi image return (#101) * add metadata saving to SVG (#102) * Bump templates version to include API node template workflows (#104) * Fix: `download_url_to_video_output` return type (#103) * fix 4o generation bug (#106) * Serve SVG files directly (#107) * Add a bunch of nodes, 3 ready to use, the rest waiting for endpoint support (#108) * Revert "Serve SVG files directly" (#111) * Expose 4 remaining Recraft nodes (#112) * [Kling] Add `Duration` and `Video ID` outputs (#105) * Fix: datamodel-codegen sets string#binary type to non-existent `bytes_aliased` variable (#114) * Fix: Dall-e 2 not setting request content-type dynamically (#113) * Default request timeout: one hour. (#116) * Add Kling nodes: camera control, start-end frame, lip-sync, video extend (#115) * Add 8 nodes - 4 BFL, 4 Stability (#117) * Fix error for Recraft ImageToImage error for nonexistent random_seed param (#118) * Add remaining Pika nodes (#119) * Make controls input work for Recraft Image to Image node (#120) * Use upstream PR: Support saving Comfy VIDEO type to buffer (#123) * Use Upstream PR: "Fix: Error creating video when sliced audio tensor chunks are non-c-contiguous" (#127) * Improve audio upload utils (#128) * Fix: Nested `AnyUrl` in request model cannot be serialized (Kling, Runway) (#129) * Show errors and API output URLs to the user (change log levels) (#131) * Fix: Luma I2I fails when weight is <=0.01 (#132) * Change category of `LumaConcepts` node from image to video (#133) * Fix: `image.shape` accessed before `image` is null-checked (#134) * Apply small fixes and most prompt validation (if needed to avoid API error) (#135) * Node name/category modifications (#140) * Add back Recraft Style - Infinite Style Library node (#141) * Fixed Kling: Check attributes of pydantic types. (#144) * Bump `comfyui-workflow-templates` version (#142) * [Kling] Print response data when error validating response (#146) * Fix: error validating Kling image response, trying to use `"key" in` on Pydantic class instance (#147) * [Kling] Fix: Correct/verify supported subset of input combos in Kling nodes (#149) * [Kling] Fix typo in node description (#150) * [Kling] Fix: CFG min/max not being enforced (#151) * Rebase launch-rebase (private) on prep-branch (public copy of master) (#153) * Bump templates version (#154) * Fix: Kling image gen nodes don't return entire batch when `n` > 1 (#152) * Remove pixverse_template from PixVerse Transition Video node (#155) * Invert image_weight value on Luma Image to Image node (#156) * Invert and resize mask for Ideogram V3 node to match masking conventions (#158) * [Kling] Fix: image generation nodes not returning Tuple (#159) * [Bug] [Kling] Fix Kling camera control (#161) * Kling Image Gen v2 + improve node descriptions for Flux/OpenAI (#160) * [Kling] Don't return video_id from dual effect video (#162) * Bump frontend to 1.18.8 (#163) * Use 3.9 compat syntax (#164) * Use Python 3.10 * add example env var * Update templates to 0.1.11 * Bump frontend to 1.18.9 --------- Co-authored-by: Robin Huang <robin.j.huang@gmail.com> Co-authored-by: Christian Byrne <cbyrne@comfy.org> Co-authored-by: thot experiment <94414189+thot-experiment@users.noreply.github.com>
488 lines
15 KiB
Python
488 lines
15 KiB
Python
import io
|
|
from inspect import cleandoc
|
|
import numpy as np
|
|
import torch
|
|
from PIL import Image
|
|
|
|
from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict
|
|
|
|
|
|
from comfy_api_nodes.apis import (
|
|
OpenAIImageGenerationRequest,
|
|
OpenAIImageEditRequest,
|
|
OpenAIImageGenerationResponse,
|
|
)
|
|
|
|
from comfy_api_nodes.apis.client import (
|
|
ApiEndpoint,
|
|
HttpMethod,
|
|
SynchronousOperation,
|
|
)
|
|
|
|
from comfy_api_nodes.apinode_utils import (
|
|
downscale_image_tensor,
|
|
validate_and_cast_response,
|
|
validate_string,
|
|
)
|
|
|
|
class OpenAIDalle2(ComfyNodeABC):
|
|
"""
|
|
Generates images synchronously via OpenAI's DALL·E 2 endpoint.
|
|
"""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@classmethod
|
|
def INPUT_TYPES(cls) -> InputTypeDict:
|
|
return {
|
|
"required": {
|
|
"prompt": (
|
|
IO.STRING,
|
|
{
|
|
"multiline": True,
|
|
"default": "",
|
|
"tooltip": "Text prompt for DALL·E",
|
|
},
|
|
),
|
|
},
|
|
"optional": {
|
|
"seed": (
|
|
IO.INT,
|
|
{
|
|
"default": 0,
|
|
"min": 0,
|
|
"max": 2**31 - 1,
|
|
"step": 1,
|
|
"display": "number",
|
|
"control_after_generate": True,
|
|
"tooltip": "not implemented yet in backend",
|
|
},
|
|
),
|
|
"size": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["256x256", "512x512", "1024x1024"],
|
|
"default": "1024x1024",
|
|
"tooltip": "Image size",
|
|
},
|
|
),
|
|
"n": (
|
|
IO.INT,
|
|
{
|
|
"default": 1,
|
|
"min": 1,
|
|
"max": 8,
|
|
"step": 1,
|
|
"display": "number",
|
|
"tooltip": "How many images to generate",
|
|
},
|
|
),
|
|
"image": (
|
|
IO.IMAGE,
|
|
{
|
|
"default": None,
|
|
"tooltip": "Optional reference image for image editing.",
|
|
},
|
|
),
|
|
"mask": (
|
|
IO.MASK,
|
|
{
|
|
"default": None,
|
|
"tooltip": "Optional mask for inpainting (white areas will be replaced)",
|
|
},
|
|
),
|
|
},
|
|
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
|
|
}
|
|
|
|
RETURN_TYPES = (IO.IMAGE,)
|
|
FUNCTION = "api_call"
|
|
CATEGORY = "api node/image/OpenAI"
|
|
DESCRIPTION = cleandoc(__doc__ or "")
|
|
API_NODE = True
|
|
|
|
def api_call(
|
|
self,
|
|
prompt,
|
|
seed=0,
|
|
image=None,
|
|
mask=None,
|
|
n=1,
|
|
size="1024x1024",
|
|
auth_token=None,
|
|
):
|
|
validate_string(prompt, strip_whitespace=False)
|
|
model = "dall-e-2"
|
|
path = "/proxy/openai/images/generations"
|
|
content_type = "application/json"
|
|
request_class = OpenAIImageGenerationRequest
|
|
img_binary = None
|
|
|
|
if image is not None and mask is not None:
|
|
path = "/proxy/openai/images/edits"
|
|
content_type = "multipart/form-data"
|
|
request_class = OpenAIImageEditRequest
|
|
|
|
input_tensor = image.squeeze().cpu()
|
|
height, width, channels = input_tensor.shape
|
|
rgba_tensor = torch.ones(height, width, 4, device="cpu")
|
|
rgba_tensor[:, :, :channels] = input_tensor
|
|
|
|
if mask.shape[1:] != image.shape[1:-1]:
|
|
raise Exception("Mask and Image must be the same size")
|
|
rgba_tensor[:, :, 3] = 1 - mask.squeeze().cpu()
|
|
|
|
rgba_tensor = downscale_image_tensor(rgba_tensor.unsqueeze(0)).squeeze()
|
|
|
|
image_np = (rgba_tensor.numpy() * 255).astype(np.uint8)
|
|
img = Image.fromarray(image_np)
|
|
img_byte_arr = io.BytesIO()
|
|
img.save(img_byte_arr, format="PNG")
|
|
img_byte_arr.seek(0)
|
|
img_binary = img_byte_arr # .getvalue()
|
|
img_binary.name = "image.png"
|
|
elif image is not None or mask is not None:
|
|
raise Exception("Dall-E 2 image editing requires an image AND a mask")
|
|
|
|
# Build the operation
|
|
operation = SynchronousOperation(
|
|
endpoint=ApiEndpoint(
|
|
path=path,
|
|
method=HttpMethod.POST,
|
|
request_model=request_class,
|
|
response_model=OpenAIImageGenerationResponse,
|
|
),
|
|
request=request_class(
|
|
model=model,
|
|
prompt=prompt,
|
|
n=n,
|
|
size=size,
|
|
seed=seed,
|
|
),
|
|
files=(
|
|
{
|
|
"image": img_binary,
|
|
}
|
|
if img_binary
|
|
else None
|
|
),
|
|
content_type=content_type,
|
|
auth_token=auth_token,
|
|
)
|
|
|
|
response = operation.execute()
|
|
|
|
img_tensor = validate_and_cast_response(response)
|
|
return (img_tensor,)
|
|
|
|
|
|
class OpenAIDalle3(ComfyNodeABC):
|
|
"""
|
|
Generates images synchronously via OpenAI's DALL·E 3 endpoint.
|
|
"""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@classmethod
|
|
def INPUT_TYPES(cls) -> InputTypeDict:
|
|
return {
|
|
"required": {
|
|
"prompt": (
|
|
IO.STRING,
|
|
{
|
|
"multiline": True,
|
|
"default": "",
|
|
"tooltip": "Text prompt for DALL·E",
|
|
},
|
|
),
|
|
},
|
|
"optional": {
|
|
"seed": (
|
|
IO.INT,
|
|
{
|
|
"default": 0,
|
|
"min": 0,
|
|
"max": 2**31 - 1,
|
|
"step": 1,
|
|
"display": "number",
|
|
"control_after_generate": True,
|
|
"tooltip": "not implemented yet in backend",
|
|
},
|
|
),
|
|
"quality": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["standard", "hd"],
|
|
"default": "standard",
|
|
"tooltip": "Image quality",
|
|
},
|
|
),
|
|
"style": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["natural", "vivid"],
|
|
"default": "natural",
|
|
"tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.",
|
|
},
|
|
),
|
|
"size": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["1024x1024", "1024x1792", "1792x1024"],
|
|
"default": "1024x1024",
|
|
"tooltip": "Image size",
|
|
},
|
|
),
|
|
},
|
|
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
|
|
}
|
|
|
|
RETURN_TYPES = (IO.IMAGE,)
|
|
FUNCTION = "api_call"
|
|
CATEGORY = "api node/image/OpenAI"
|
|
DESCRIPTION = cleandoc(__doc__ or "")
|
|
API_NODE = True
|
|
|
|
def api_call(
|
|
self,
|
|
prompt,
|
|
seed=0,
|
|
style="natural",
|
|
quality="standard",
|
|
size="1024x1024",
|
|
auth_token=None,
|
|
):
|
|
validate_string(prompt, strip_whitespace=False)
|
|
model = "dall-e-3"
|
|
|
|
# build the operation
|
|
operation = SynchronousOperation(
|
|
endpoint=ApiEndpoint(
|
|
path="/proxy/openai/images/generations",
|
|
method=HttpMethod.POST,
|
|
request_model=OpenAIImageGenerationRequest,
|
|
response_model=OpenAIImageGenerationResponse,
|
|
),
|
|
request=OpenAIImageGenerationRequest(
|
|
model=model,
|
|
prompt=prompt,
|
|
quality=quality,
|
|
size=size,
|
|
style=style,
|
|
seed=seed,
|
|
),
|
|
auth_token=auth_token,
|
|
)
|
|
|
|
response = operation.execute()
|
|
|
|
img_tensor = validate_and_cast_response(response)
|
|
return (img_tensor,)
|
|
|
|
|
|
class OpenAIGPTImage1(ComfyNodeABC):
|
|
"""
|
|
Generates images synchronously via OpenAI's GPT Image 1 endpoint.
|
|
"""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@classmethod
|
|
def INPUT_TYPES(cls) -> InputTypeDict:
|
|
return {
|
|
"required": {
|
|
"prompt": (
|
|
IO.STRING,
|
|
{
|
|
"multiline": True,
|
|
"default": "",
|
|
"tooltip": "Text prompt for GPT Image 1",
|
|
},
|
|
),
|
|
},
|
|
"optional": {
|
|
"seed": (
|
|
IO.INT,
|
|
{
|
|
"default": 0,
|
|
"min": 0,
|
|
"max": 2**31 - 1,
|
|
"step": 1,
|
|
"display": "number",
|
|
"control_after_generate": True,
|
|
"tooltip": "not implemented yet in backend",
|
|
},
|
|
),
|
|
"quality": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["low", "medium", "high"],
|
|
"default": "low",
|
|
"tooltip": "Image quality, affects cost and generation time.",
|
|
},
|
|
),
|
|
"background": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["opaque", "transparent"],
|
|
"default": "opaque",
|
|
"tooltip": "Return image with or without background",
|
|
},
|
|
),
|
|
"size": (
|
|
IO.COMBO,
|
|
{
|
|
"options": ["auto", "1024x1024", "1024x1536", "1536x1024"],
|
|
"default": "auto",
|
|
"tooltip": "Image size",
|
|
},
|
|
),
|
|
"n": (
|
|
IO.INT,
|
|
{
|
|
"default": 1,
|
|
"min": 1,
|
|
"max": 8,
|
|
"step": 1,
|
|
"display": "number",
|
|
"tooltip": "How many images to generate",
|
|
},
|
|
),
|
|
"image": (
|
|
IO.IMAGE,
|
|
{
|
|
"default": None,
|
|
"tooltip": "Optional reference image for image editing.",
|
|
},
|
|
),
|
|
"mask": (
|
|
IO.MASK,
|
|
{
|
|
"default": None,
|
|
"tooltip": "Optional mask for inpainting (white areas will be replaced)",
|
|
},
|
|
),
|
|
},
|
|
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
|
|
}
|
|
|
|
RETURN_TYPES = (IO.IMAGE,)
|
|
FUNCTION = "api_call"
|
|
CATEGORY = "api node/image/OpenAI"
|
|
DESCRIPTION = cleandoc(__doc__ or "")
|
|
API_NODE = True
|
|
|
|
def api_call(
|
|
self,
|
|
prompt,
|
|
seed=0,
|
|
quality="low",
|
|
background="opaque",
|
|
image=None,
|
|
mask=None,
|
|
n=1,
|
|
size="1024x1024",
|
|
auth_token=None,
|
|
):
|
|
validate_string(prompt, strip_whitespace=False)
|
|
model = "gpt-image-1"
|
|
path = "/proxy/openai/images/generations"
|
|
content_type="application/json"
|
|
request_class = OpenAIImageGenerationRequest
|
|
img_binaries = []
|
|
mask_binary = None
|
|
files = []
|
|
|
|
if image is not None:
|
|
path = "/proxy/openai/images/edits"
|
|
request_class = OpenAIImageEditRequest
|
|
content_type ="multipart/form-data"
|
|
|
|
batch_size = image.shape[0]
|
|
|
|
for i in range(batch_size):
|
|
single_image = image[i : i + 1]
|
|
scaled_image = downscale_image_tensor(single_image).squeeze()
|
|
|
|
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
|
|
img = Image.fromarray(image_np)
|
|
img_byte_arr = io.BytesIO()
|
|
img.save(img_byte_arr, format="PNG")
|
|
img_byte_arr.seek(0)
|
|
img_binary = img_byte_arr
|
|
img_binary.name = f"image_{i}.png"
|
|
|
|
img_binaries.append(img_binary)
|
|
if batch_size == 1:
|
|
files.append(("image", img_binary))
|
|
else:
|
|
files.append(("image[]", img_binary))
|
|
|
|
if mask is not None:
|
|
if image is None:
|
|
raise Exception("Cannot use a mask without an input image")
|
|
if image.shape[0] != 1:
|
|
raise Exception("Cannot use a mask with multiple image")
|
|
if mask.shape[1:] != image.shape[1:-1]:
|
|
raise Exception("Mask and Image must be the same size")
|
|
batch, height, width = mask.shape
|
|
rgba_mask = torch.zeros(height, width, 4, device="cpu")
|
|
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
|
|
|
|
scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()
|
|
|
|
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
|
|
mask_img = Image.fromarray(mask_np)
|
|
mask_img_byte_arr = io.BytesIO()
|
|
mask_img.save(mask_img_byte_arr, format="PNG")
|
|
mask_img_byte_arr.seek(0)
|
|
mask_binary = mask_img_byte_arr
|
|
mask_binary.name = "mask.png"
|
|
files.append(("mask", mask_binary))
|
|
|
|
# Build the operation
|
|
operation = SynchronousOperation(
|
|
endpoint=ApiEndpoint(
|
|
path=path,
|
|
method=HttpMethod.POST,
|
|
request_model=request_class,
|
|
response_model=OpenAIImageGenerationResponse,
|
|
),
|
|
request=request_class(
|
|
model=model,
|
|
prompt=prompt,
|
|
quality=quality,
|
|
background=background,
|
|
n=n,
|
|
seed=seed,
|
|
size=size,
|
|
),
|
|
files=files if files else None,
|
|
content_type=content_type,
|
|
auth_token=auth_token,
|
|
)
|
|
|
|
response = operation.execute()
|
|
|
|
img_tensor = validate_and_cast_response(response)
|
|
return (img_tensor,)
|
|
|
|
|
|
# A dictionary that contains all nodes you want to export with their names
|
|
# NOTE: names should be globally unique
|
|
NODE_CLASS_MAPPINGS = {
|
|
"OpenAIDalle2": OpenAIDalle2,
|
|
"OpenAIDalle3": OpenAIDalle3,
|
|
"OpenAIGPTImage1": OpenAIGPTImage1,
|
|
}
|
|
|
|
# A dictionary that contains the friendly/humanly readable titles for the nodes
|
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
"OpenAIDalle2": "OpenAI DALL·E 2",
|
|
"OpenAIDalle3": "OpenAI DALL·E 3",
|
|
"OpenAIGPTImage1": "OpenAI GPT Image 1",
|
|
}
|