DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on

Resize in PyTorch

Buy Me a Coffee

*Memos:

Resize() can resize zero or more images as shown below:

*Memos:

  • The 1st argument for initialization is size(Required-Type:int, tuple/list(int) or size()): *Memos:
    • It's [height, width].
    • It must be 1 <= x.
    • None can be explicitly set to it only if max_size isn't None.
    • A tuple/list must be the 1D with 1 or 2 elements.
    • A single value(int or tuple/list(int)) is applied to a smaller image's width or height edge, then the other larger width or height edge is also resized: *Memos:
    • If an image's width is smaller than its height, it's [size * height / width, size].
    • If an image width is larger than its height, it's [size, size * width / height].
    • If an image width is equal to its height, it's [size, size].
  • The 2nd argument for initialization is interpolation(Optional-Default:InterpolationMode.BILINEAR-Type:InterpolationMode).
  • The 3rd argument for initialization is max_size(Optional-Default:None-Type:int): *Memos:
    • It's only supported if size is a single value(int or tuple/list(int)).
    • After size is applied if a larger image's width or height edge exceeds it, it's applied to a larger image's width or height edge to limit the image size, then the other smaller image's width or height edge also becomes smaller than before.
  • The 4th argument for initialization is antialias(Optional-Default:True-Type:bool). *Even if setting False to it, it's always True if interpolation is InterpolationMode.BILINEAR or InterpolationMode.BICUBIC.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int, float, complex or bool)): *Memos:
    • A tensor must be the 3D or more D of one or more elements.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import Resize
from torchvision.transforms.functional import InterpolationMode

resize = Resize(size=100)
resize = Resize(size=100,
                interpolation=InterpolationMode.BILINEAR,
                max_size=None,
                antialias=True)
resize
# Resize(size=[100],
#        interpolation=InterpolationMode.BILINEAR,
#        antialias=True)

resize.size
# [100]

resize.interpolation
# <InterpolationMode.BILINEAR: 'bilinear'>

print(resize.max_size)
# None

resize.antialias
# True

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

s1000_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=1000)
    # transform=Resize(size=[1000])
    # transform=Resize(size=[1000, 1000])
)

s100_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=100)
)

s50_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=50)
)

s10_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=10)
)

s100_180_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=[100, 180])
)

s180_100_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=[180, 100])
)

s100ms110_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=100, max_size=110)
)

sNonems110_data = OxfordIIITPet(
    root="data",
    transform=Resize(size=None, max_size=110)
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=(10, 5))
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        plt.imshow(X=im)
    plt.tight_layout()
    plt.show()

show_images1(data=origin_data, main_title="origin_data")
show_images1(data=s1000_data, main_title="s1000_data")
show_images1(data=s100_data, main_title="s100_data")
show_images1(data=s50_data, main_title="s50_data")
show_images1(data=s10_data, main_title="s10_data")
print()
show_images1(data=origin_data, main_title="origin_data")
show_images1(data=s100_180_data, main_title="s100_180_data")
show_images1(data=s180_100_data, main_title="s180_100_data")
print()
show_images1(data=s100_data, main_title="s100_data")
show_images1(data=s100ms110_data, main_title="s100ms110_data")
show_images1(data=sNonems110_data, main_title="sNonems110_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓
def show_images2(data, main_title=None, s=None, ms=None):
    plt.figure(figsize=(10, 5))
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    temp_s = s
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        if not temp_s and not ms:
            s = [im.size[1], im.size[0]]
        resize = Resize(size=s, max_size=ms) # Here
        plt.imshow(X=resize(im)) # Here
    plt.tight_layout()
    plt.show()

show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s1000_data", s=1000)
show_images2(data=origin_data, main_title="s100_data", s=100)
show_images2(data=origin_data, main_title="s100_data", s=50)
show_images2(data=origin_data, main_title="s10_data", s=10)
print()
show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s100_180_data", s=[100, 180])
show_images2(data=origin_data, main_title="s180_100_data", s=[180, 100])
print()
show_images2(data=origin_data, main_title="s100_data", s=100)
show_images2(data=origin_data, main_title="s100ms110_data", s=100, ms=110)
show_images2(data=origin_data, main_title="sNonems110_data", ms=110)
Enter fullscreen mode Exit fullscreen mode

Image description

Image description

Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Image description

Top comments (0)