DATA LOADING and trasformation to TENSOR (Auto Encoder with 2 images in output) TUTORIAL

Prerequisites

To run this tutorial, please make sure the following packages are installed:

- PyTorch 0.4.1
- TorchVision 0.2.1
- PIL: For image io and transforms
- Matplotlib: To generate plots, histograms and etc

from torch.utils.data import Dataset
from os import listdir
from os.path import join
import matplotlib.pyplot as plt
from PIL import Image

DATASET CLASS

torch.utils.data.Dataset is an abstract class representing a dataset. Your custom dataset should inherit Dataset and override the following methods:

- __len__ so that len(dataset) returns the size of the dataset.
- __getitem__ to support the indexing such that dataset[i] can be used to get ith sample.

Let’s create a dataset class for our Auto Encoder dataset. We will read the 'Input' image directory and 'Ground Truth' image directory in __init__ but leave the reading of images to __getitem__. This is memory efficient because all the images are not stored in the memory at once but read as required. Sample of our dataset will be a img_in, img_gt. Our datset will take an optional argument transform so that any required processing can be applied on the sample.
class AutoEncoderDataSet(Dataset):
    def __init__(self, dir_in, dir_gt, transform_in=None, transform_gt=None):
        self.dir_in = self.load_dir_single(dir_in)
        self.dir_gt = self.load_dir_single(dir_gt)
        self.transform_in = transform_in
        self.transform_gt = transform_gt

    def is_image_file(self, filename):
        return any(filename.endswith(extension) for extension in [".png", ".PNG", ".jpg", ".JPG", ".jpeg", ".JPEG"])

    def load_img(self, filename):
        img = Image.open(filename)

        return img

    def load_dir_single(self, directory):
        return [join(directory, x) for x in listdir(directory) if self.is_image_file(x)]

    def __len__(self):
        return len(self.dir_in)

    def __getitem__(self, index):
        img_in = self.load_img(self.dir_in[index])
        img_gt = self.load_img(self.dir_gt[index])

        if self.transform_in:
            img_in = self.transform_in(img_in)
        if self.transform_gt:
            img_gt = self.transform_gt(img_gt)

        return img_in, img_gt
torchvision.transforms.Compose is a simple callable class which allows us to composes several transforms together. We will use a transforms.ToTensor() class. This class converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. We will push transform function to our AutoEncoderDataSet class as:
    composed = transforms.Compose([transforms.ToTensor()])
    auto_encoder_dataset = AutoEncoderDataSet(ps['DIR_IMG_IN'], ps['DIR_IMG_GT'], composed, composed)
Let’s instantiate this class and iterate through the data samples.
def main(ps):
    composed = transforms.Compose([transforms.ToTensor()])
    auto_encoder_dataset = AutoEncoderDataSet('img/tr/in/', 'img/tr/gt/', composed, composed)
    for i in range(len(auto_encoder_dataset)):
        img_in, img_gt = auto_encoder_dataset[i]
        print(i, 'Input image:', img_in.size(), 'Ground truth image:', img_gt.size())
Out:
      
0 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
1 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
2 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
3 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
4 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
5 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
6 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
7 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
8 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])
9 Input image: torch.Size([3, 256, 256]) Ground truth image: torch.Size([3, 256, 256])

The full example code:
from torch.utils.data import Dataset
from torchvision import transforms
from os import listdir
from os.path import join
from PIL import Image


class AutoEncoderDataSet(Dataset):
    def __init__(self, dir_in, dir_gt, transform_in=None, transform_gt=None):
        self.dir_in = self.load_dir_single(dir_in)
        self.dir_gt = self.load_dir_single(dir_gt)
        self.transform_in = transform_in
        self.transform_gt = transform_gt

    def is_image_file(self, filename):
        return any(filename.endswith(extension) for extension in [".png", ".PNG", ".jpg", ".JPG", ".jpeg", ".JPEG"])

    def load_img(self, filename):
        img = Image.open(filename)

        return img

    def load_dir_single(self, directory):
        return [join(directory, x) for x in listdir(directory) if self.is_image_file(x)]

    def __len__(self):
        return len(self.dir_in)

    def __getitem__(self, index):
        img_in = self.load_img(self.dir_in[index])
        img_gt = self.load_img(self.dir_gt[index])

        if self.transform_in:
            img_in = self.transform_in(img_in)
        if self.transform_gt:
            img_gt = self.transform_gt(img_gt)

        return img_in, img_gt


def main(ps):
    composed = transforms.Compose([transforms.ToTensor()])
    auto_encoder_dataset = AutoEncoderDataSet(ps['DIR_IMG_IN'], ps['DIR_IMG_GT'], composed, composed)
    for i in range(len(auto_encoder_dataset)):
        img_in, img_gt = auto_encoder_dataset[i]
        print(i, 'Input image:', img_in.size(), 'Ground truth image:', img_gt.size())


if __name__ == "__main__":
    ps = {
        'DIR_IMG_IN': 'img/tr/in/',
        'DIR_IMG_GT': 'img/tr/gt/'
    }
    main(ps)

REFERENCES:

0. class torch.utils.data.Dataset