mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-09-15 05:57:57 +00:00
Take some code from chainner to implement ESRGAN and other upscale models.
This commit is contained in:
114
comfy_extras/chainner_models/architecture/SRVGG.py
Normal file
114
comfy_extras/chainner_models/architecture/SRVGG.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import math
|
||||
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class SRVGGNetCompact(nn.Module):
|
||||
"""A compact VGG-style network structure for super-resolution.
|
||||
It is a compact network structure, which performs upsampling in the last layer and no convolution is
|
||||
conducted on the HR feature space.
|
||||
Args:
|
||||
num_in_ch (int): Channel number of inputs. Default: 3.
|
||||
num_out_ch (int): Channel number of outputs. Default: 3.
|
||||
num_feat (int): Channel number of intermediate features. Default: 64.
|
||||
num_conv (int): Number of convolution layers in the body network. Default: 16.
|
||||
upscale (int): Upsampling factor. Default: 4.
|
||||
act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
state_dict,
|
||||
act_type: str = "prelu",
|
||||
):
|
||||
super(SRVGGNetCompact, self).__init__()
|
||||
self.model_arch = "SRVGG (RealESRGAN)"
|
||||
self.sub_type = "SR"
|
||||
|
||||
self.act_type = act_type
|
||||
|
||||
self.state = state_dict
|
||||
|
||||
if "params" in self.state:
|
||||
self.state = self.state["params"]
|
||||
|
||||
self.key_arr = list(self.state.keys())
|
||||
|
||||
self.in_nc = self.get_in_nc()
|
||||
self.num_feat = self.get_num_feats()
|
||||
self.num_conv = self.get_num_conv()
|
||||
self.out_nc = self.in_nc # :(
|
||||
self.pixelshuffle_shape = None # Defined in get_scale()
|
||||
self.scale = self.get_scale()
|
||||
|
||||
self.supports_fp16 = True
|
||||
self.supports_bfp16 = True
|
||||
self.min_size_restriction = None
|
||||
|
||||
self.body = nn.ModuleList()
|
||||
# the first conv
|
||||
self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1))
|
||||
# the first activation
|
||||
if act_type == "relu":
|
||||
activation = nn.ReLU(inplace=True)
|
||||
elif act_type == "prelu":
|
||||
activation = nn.PReLU(num_parameters=self.num_feat)
|
||||
elif act_type == "leakyrelu":
|
||||
activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
|
||||
self.body.append(activation) # type: ignore
|
||||
|
||||
# the body structure
|
||||
for _ in range(self.num_conv):
|
||||
self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1))
|
||||
# activation
|
||||
if act_type == "relu":
|
||||
activation = nn.ReLU(inplace=True)
|
||||
elif act_type == "prelu":
|
||||
activation = nn.PReLU(num_parameters=self.num_feat)
|
||||
elif act_type == "leakyrelu":
|
||||
activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
|
||||
self.body.append(activation) # type: ignore
|
||||
|
||||
# the last conv
|
||||
self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1)) # type: ignore
|
||||
# upsample
|
||||
self.upsampler = nn.PixelShuffle(self.scale)
|
||||
|
||||
self.load_state_dict(self.state, strict=False)
|
||||
|
||||
def get_num_conv(self) -> int:
|
||||
return (int(self.key_arr[-1].split(".")[1]) - 2) // 2
|
||||
|
||||
def get_num_feats(self) -> int:
|
||||
return self.state[self.key_arr[0]].shape[0]
|
||||
|
||||
def get_in_nc(self) -> int:
|
||||
return self.state[self.key_arr[0]].shape[1]
|
||||
|
||||
def get_scale(self) -> int:
|
||||
self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0]
|
||||
# Assume out_nc is the same as in_nc
|
||||
# I cant think of a better way to do that
|
||||
self.out_nc = self.in_nc
|
||||
scale = math.sqrt(self.pixelshuffle_shape / self.out_nc)
|
||||
if scale - int(scale) > 0:
|
||||
print(
|
||||
"out_nc is probably different than in_nc, scale calculation might be wrong"
|
||||
)
|
||||
scale = int(scale)
|
||||
return scale
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
for i in range(0, len(self.body)):
|
||||
out = self.body[i](out)
|
||||
|
||||
out = self.upsampler(out)
|
||||
# add the nearest upsampled image, so that the network learns the residual
|
||||
base = F.interpolate(x, scale_factor=self.scale, mode="nearest")
|
||||
out += base
|
||||
return out
|
Reference in New Issue
Block a user