Take some code from chainner to implement ESRGAN and other upscale models.

2025-09-15 05:57:57 +00:00 · 2023-03-11 13:09:28 -05:00
parent 8c4ccb55d1
commit 905857edd8
45 changed files with 16654 additions and 0 deletions
--- a/comfy_extras/chainner_models/architecture/SRVGG.py
+++ b/comfy_extras/chainner_models/architecture/SRVGG.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import math
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SRVGGNetCompact(nn.Module):
+    """A compact VGG-style network structure for super-resolution.
+    It is a compact network structure, which performs upsampling in the last layer and no convolution is
+    conducted on the HR feature space.
+    Args:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_out_ch (int): Channel number of outputs. Default: 3.
+        num_feat (int): Channel number of intermediate features. Default: 64.
+        num_conv (int): Number of convolution layers in the body network. Default: 16.
+        upscale (int): Upsampling factor. Default: 4.
+        act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
+    """
+
+    def __init__(
+        self,
+        state_dict,
+        act_type: str = "prelu",
+    ):
+        super(SRVGGNetCompact, self).__init__()
+        self.model_arch = "SRVGG (RealESRGAN)"
+        self.sub_type = "SR"
+
+        self.act_type = act_type
+
+        self.state = state_dict
+
+        if "params" in self.state:
+            self.state = self.state["params"]
+
+        self.key_arr = list(self.state.keys())
+
+        self.in_nc = self.get_in_nc()
+        self.num_feat = self.get_num_feats()
+        self.num_conv = self.get_num_conv()
+        self.out_nc = self.in_nc  # :(
+        self.pixelshuffle_shape = None  # Defined in get_scale()
+        self.scale = self.get_scale()
+
+        self.supports_fp16 = True
+        self.supports_bfp16 = True
+        self.min_size_restriction = None
+
+        self.body = nn.ModuleList()
+        # the first conv
+        self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1))
+        # the first activation
+        if act_type == "relu":
+            activation = nn.ReLU(inplace=True)
+        elif act_type == "prelu":
+            activation = nn.PReLU(num_parameters=self.num_feat)
+        elif act_type == "leakyrelu":
+            activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+        self.body.append(activation)  # type: ignore
+
+        # the body structure
+        for _ in range(self.num_conv):
+            self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1))
+            # activation
+            if act_type == "relu":
+                activation = nn.ReLU(inplace=True)
+            elif act_type == "prelu":
+                activation = nn.PReLU(num_parameters=self.num_feat)
+            elif act_type == "leakyrelu":
+                activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+            self.body.append(activation)  # type: ignore
+
+        # the last conv
+        self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1))  # type: ignore
+        # upsample
+        self.upsampler = nn.PixelShuffle(self.scale)
+
+        self.load_state_dict(self.state, strict=False)
+
+    def get_num_conv(self) -> int:
+        return (int(self.key_arr[-1].split(".")[1]) - 2) // 2
+
+    def get_num_feats(self) -> int:
+        return self.state[self.key_arr[0]].shape[0]
+
+    def get_in_nc(self) -> int:
+        return self.state[self.key_arr[0]].shape[1]
+
+    def get_scale(self) -> int:
+        self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0]
+        # Assume out_nc is the same as in_nc
+        # I cant think of a better way to do that
+        self.out_nc = self.in_nc
+        scale = math.sqrt(self.pixelshuffle_shape / self.out_nc)
+        if scale - int(scale) > 0:
+            print(
+                "out_nc is probably different than in_nc, scale calculation might be wrong"
+            )
+        scale = int(scale)
+        return scale
+
+    def forward(self, x):
+        out = x
+        for i in range(0, len(self.body)):
+            out = self.body[i](out)
+
+        out = self.upsampler(out)
+        # add the nearest upsampled image, so that the network learns the residual
+        base = F.interpolate(x, scale_factor=self.scale, mode="nearest")
+        out += base
+        return out