Learning_Computer_Vision_by_examples/ExampleTorchProcessors.py at master · SC-One/Learning_Computer_Vision_by_examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import cv2
import numpy as np
import torch
import torch.nn as nn
from SimpleFramework.Utility import MatrixRenderer
from SimpleFramework.SimpleFrameworkImageApplier import ImageProcessor


class Conv2DProcessor(ImageProcessor):
    """
    Multi-step processor:
      1. Original Image
      2. Grayscale Conversion
      3. Convolution Effect
      4. Max Pooling
      5. Batch Normalization
    """

    def __init__(self, theName="sharping kernel conv2d", kernel=np.array([[0, -1,  0],
                                                                          [-1, 5, -1],
                                                                          [0, -1,  0]], dtype=np.float32)):
        super().__init__()
        self._name = theName
        # Default 3x3 sharpening kernel
        self.kernel = np.array([[0, -1,  0],
                                [-1, 5, -1],
                                [0, -1,  0]], dtype=np.float32)
        ks = self.kernel.shape[0]
        self.conv = nn.Conv2d(1, 1, kernel_size=ks, padding=ks//2, bias=False)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.batchnorm = nn.BatchNorm2d(1)

        with torch.no_grad():
            weight = torch.from_numpy(self.kernel).unsqueeze(0).unsqueeze(0)
            self.conv.weight.copy_(weight)

    def name(self):
        return self._name

    def step_names(self):
        return ["Original Image", "After Grayscale", "After Conv2D", "After MaxPool", "After BatchNorm"]

    def apply1(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            results[f"{label} + Original Image"] = img.copy()
        return results

    def apply2(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            gray_bgr = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
            results[f"{label} + After Grayscale"] = gray_bgr
        return results

    def apply3(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
            tensor = torch.from_numpy(gray).unsqueeze(0).unsqueeze(0)
            out_tensor = self.conv(tensor)
            out = out_tensor.detach().squeeze().numpy()
            out_norm = cv2.normalize(
                out, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            conv_bgr = cv2.cvtColor(out_norm, cv2.COLOR_GRAY2BGR)
            results[f"{label} + After Conv2D"] = conv_bgr
        return results

    def apply4(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
            tensor = torch.from_numpy(gray).unsqueeze(0).unsqueeze(0)
            pooled_tensor = self.pool(tensor)
            pooled = pooled_tensor.detach().squeeze().numpy()
            pooled_norm = cv2.normalize(
                pooled, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            pooled_bgr = cv2.cvtColor(pooled_norm, cv2.COLOR_GRAY2BGR)
            results[f"{label} + After MaxPool"] = pooled_bgr
        return results

    def apply5(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
            tensor = torch.from_numpy(gray).unsqueeze(0).unsqueeze(0)
            bn_tensor = self.batchnorm(tensor)
            bn = bn_tensor.detach().squeeze().numpy()
            bn_norm = cv2.normalize(
                bn, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            bn_bgr = cv2.cvtColor(bn_norm, cv2.COLOR_GRAY2BGR)
            results[f"{label} + After BatchNorm"] = bn_bgr
        return results


class Conv2DProcessorValueRenderer(ImageProcessor):
    """
    Applies a fixed 3x3 sharpening kernel to a grayscale image,
    and provides visualizations of the pixel matrix before and after convolution.
    """

    def __init__(self):
        super().__init__()
        # Define a default sharpening kernel
        self.kernel = np.array([[0, -1,  0],
                                [-1, 5, -1],
                                [0, -1,  0]], dtype=np.float32)
        self.renderer = MatrixRenderer(
            cell_size=30, font_scale=0.5, thickness=1)
        # Build PyTorch conv layer
        ks = self.kernel.shape[0]
        self.conv = nn.Conv2d(1, 1, kernel_size=ks, padding=ks//2, bias=False)
        with torch.no_grad():
            weight = torch.from_numpy(self.kernel).unsqueeze(0).unsqueeze(0)
            self.conv.weight.copy_(weight)

    def name(self):
        return "Conv2D Effect"

    def step_names(self):
        return ["Original Matrix", "After Conv2D"]

    def apply1(self, images: dict) -> dict:
        results = {}
        for label, img in images.items():
            # Convert to grayscale float matrix
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
            # Render original values
            orig_vis = self.renderer.render(gray)
            # PyTorch convolution
            tensor = torch.from_numpy(gray).unsqueeze(0).unsqueeze(0)
            out_tensor = self.conv(tensor)
            # Detach to numpy
            out = out_tensor.detach().squeeze().numpy()
            # Normalize to [0,255] for visualization
            out_norm = cv2.normalize(
                out, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            conv_vis = self.renderer.render(out_norm)
            results[f"{label} + Original Matrix"] = orig_vis
            results[f"{label} + After Conv2D"] = conv_vis
        return results