DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on

RandomResizedCrop in PyTorch (1)

Buy Me a Coffee

*Memos:

RandomResizedCrop() can crop a random part of an image, then resize it to a given size as shown below:

*Memos:

  • The 1st argument for initialization is size(Required-Type:int or tuple/list(int) or size()): *Memos:
    • It's [height, width].
    • It must be 1 <= x.
    • A tuple/list must be the 1D with 1 or 2 elements.
    • A single value(int or tuple/list(int)) means [size, size].
  • The 2nd argument for initialization is scale(Optional-Type:tuple/list(int or float)): *Memos:
    • It's [min, max] so it must min <= max.
    • It must be 0 <= x.
    • A tuple/list must be the 1D with 2 elements.
    • A double of 0 or 1 <= x gets the same result.
  • The 3rd argument for initialization is ratio(Optional-Type:tuple/list(int or float)): *Memos:
    • It's [min, max] so it must min <= max.
    • It must be 0 < x.
    • A tuple/list must be the 1D with 2 elements.
  • The 4th argument for initialization is interpolation(Optional-Default:InterpolationMode.BILINEAR-Type:InterpolationMode).
  • The 5th argument for initialization is antialias(Optional-Default:True-Type:bool). *Even if setting False to it, it's always True if interpolation is InterpolationMode.BILINEAR or InterpolationMode.BICUBIC.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int)): *Memos:
    • A tensor must be 3D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import RandomResizedCrop
from torchvision.transforms.functional import InterpolationMode

rrc = RandomResizedCrop(size=100)
rrc = RandomResizedCrop(size=100,
                        scale=(0.08, 1.0),
                        ratio=(0.75, 1.3333333333333333),
                        interpolation=InterpolationMode.BILINEAR,
                        antialias=True)
rrc
# RandomResizedCrop(size=(100, 100),
#                   scale=(0.08, 1.0),
#                   ratio=(0.75, 1.3333333333333333), 
#                   interpolation=InterpolationMode.BILINEAR,
#                   antialias=True)

rrc.size
# (100, 100)

rrc.scale
# (0.08, 1.0)

rrc.ratio
# (0.75, 1.3333333333333333)

rrc.interpolationa
# <InterpolationMode.BILINEAR: 'bilinear'>

rrc.antialias
# True

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

s1000_data = OxfordIIITPet( # `s` is size.
    root="data",
    transform=RandomResizedCrop(size=1000)
    # transform=RandomResizedCrop(size=[1000])
    # transform=RandomResizedCrop(size=[1000, 1000])
)

s100_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100)
)

s50_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=50)
)

s10_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=10)
)

s1_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=1)
)

s200_300_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[200, 300])
)

s300_200_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[300, 200])
)

s100sc0_1_data = OxfordIIITPet( # `sc` is scale.
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0, 1])
)

s100sc0_05_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0, 0.5])
)

s100sc05_1_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.5, 1])
)

s100sc0_0_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0, 0])
)

s100sc0001_0001_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.001, 0.001])
)

s100sc001_001_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.01, 0.01])
)

s100sc01_01_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.1, 0.1])
)

s100sc02_02_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.2, 0.2])
)

s100sc03_03_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.3, 0.3])
)

s100sc04_04_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.4, 0.4])
)

s100sc05_05_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.5, 0.5])
)

s100sc06_06_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.6, 0.6])
)

s100sc07_07_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.7, 0.7])
)

s100sc08_08_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.8, 0.8])
)

s100sc09_09_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[0.9, 0.9])
)

s100sc1_1_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[1, 1])
)

s100sc10_10_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[10, 10])
)

s100sc100_100_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100, scale=[100, 100])
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        plt.imshow(X=im)
    plt.tight_layout()
    plt.show()

show_images1(data=origin_data, main_title="origin_data")
show_images1(data=s1000_data, main_title="s1000_data")
show_images1(data=s100_data, main_title="s100_data")
show_images1(data=s50_data, main_title="s50_data")
show_images1(data=s10_data, main_title="s10_data")
show_images1(data=s1_data, main_title="s1_data")
print()
show_images2(data=origin_data, main_title="origin_data")
show_images1(data=s200_300_data, main_title="s200_300_data")
show_images1(data=s300_200_data, main_title="s300_200_data")
print()
show_images1(data=s100sc0_1_data, main_title="s100sc0_1_data")
show_images1(data=s100sc0_05_data, main_title="s100sc0_05_data")
show_images1(data=s100sc05_1_data, main_title="s100sc05_1_data")
print()
show_images1(data=s100sc0_0_data, main_title="s100sc0_0_data")
show_images1(data=s100sc0001_0001_data, main_title="s100sc0001_0001_data")
show_images1(data=s100sc001_001_data, main_title="s100sc001_001_data")
show_images1(data=s100sc01_01_data, main_title="s100sc01_01_data")
show_images1(data=s100sc02_02_data, main_title="s100sc02_02_data")
show_images1(data=s100sc03_03_data, main_title="s100sc03_03_data")
show_images1(data=s100sc04_04_data, main_title="s100sc04_04_data")
show_images1(data=s100sc05_05_data, main_title="s100sc05_05_data")
show_images1(data=s100sc06_06_data, main_title="s100sc06_06_data")
show_images1(data=s100sc07_07_data, main_title="s100sc07_07_data")
show_images1(data=s100sc08_08_data, main_title="s100sc08_08_data")
show_images1(data=s100sc09_09_data, main_title="s100sc09_09_data")
show_images1(data=s100sc1_1_data, main_title="s100sc1_1_data")
show_images1(data=s100sc10_10_data, main_title="s100sc10_10_data")
show_images1(data=s100sc100_100_data, main_title="s100sc100_100_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓ 
def show_images2(data, main_title=None, s=None, sc=(0.08, 1.0),
                 r=(0.75, 1.3333333333333333),
                 ip=InterpolationMode.BILINEAR, a=True):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        if s:
            rrc = RandomResizedCrop(size=s, scale=sc, # Here
                                    ratio=r, interpolation=ip,
                                    antialias=a)
            plt.imshow(X=rrc(im)) # Here
        else:
            plt.imshow(X=im)
    plt.tight_layout()
    plt.show()

show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s1000_data", s=1000)
show_images2(data=origin_data, main_title="s100_data", s=100)
show_images2(data=origin_data, main_title="s50_data", s=50)
show_images2(data=origin_data, main_title="s10_data", s=10)
show_images2(data=origin_data, main_title="s1_data", s=1)
print()
show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s200_300_data", s=[200, 300])
show_images2(data=origin_data, main_title="s300_200_data", s=[300, 200])
print()
show_images2(data=origin_data, main_title="s100sc0_1_data", s=100, sc=[0, 1])
show_images2(data=origin_data, main_title="s100sc0_05_data", s=100,
             sc=[0, 0.5])
show_images2(data=origin_data, main_title="s100sc05_1_data", s=100,
             sc=[0.5, 1])
print()
show_images2(data=origin_data, main_title="s100sc0_0_data", s=100, sc=[0, 0])
show_images2(data=origin_data, main_title="s100sc0001_0001_data", s=100,
             sc=[0.001, 0.001])
show_images2(data=origin_data, main_title="s100sc001_001_data", s=100,
             sc=[0.01, 0.01])
show_images2(data=origin_data, main_title="s100sc01_01_data", s=100,
             sc=[0.1, 0.1])
show_images2(data=origin_data, main_title="s100sc02_02_data", s=100,
             sc=[0.2, 0.2])
show_images2(data=origin_data, main_title="s100sc03_03_data", s=100,
             sc=[0.3, 0.3])
show_images2(data=origin_data, main_title="s100sc04_04_data", s=100,
             sc=[0.4, 0.4])
show_images2(data=origin_data, main_title="s100sc05_05_data", s=100,
             sc=[0.5, 0.5])
show_images2(data=origin_data, main_title="s100sc06_06_data", s=100,
             sc=[0.6, 0.6])
show_images2(data=origin_data, main_title="s100sc07_07_data", s=100,
             sc=[0.7, 0.7])
show_images2(data=origin_data, main_title="s100sc08_08_data", s=100,
             sc=[0.8, 0.8])
show_images2(data=origin_data, main_title="s100sc09_09_data", s=100,
             sc=[0.9, 0.9])
show_images2(data=origin_data, main_title="s100sc1_1_data", s=100, sc=[1, 1])
show_images2(data=origin_data, main_title="s100sc10_10_data", s=100,
             sc=[10, 10])
show_images2(data=origin_data, main_title="s100sc100_100_data", s=100,
             sc=[100, 100])
Enter fullscreen mode Exit fullscreen mode

Image description

Image description

Image description

Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Top comments (0)