ディープラーニング: フレームワークDeZeroで画像認識をする方法④ - ビジネスパーソン・ガジェット置場　empty lot for business

こちらのページはVGG16ではなく自分で何層かのCNNを作成して適用してみた結果についてまとめたものです。

モデルの作成

DeZeroでモデルを作成するには、Modelクラスを継承したクラスで下記を設定する

__init__()でレイヤーのインスタンスを生成
forward()でレイヤーインスタンスに入力値を渡し、活性化関数に通す

作成したモデルのインスタンスを生成し予測値を求める

テストモデル

前回まではVGG16を使用してきたが、手始めに4層のCNNと２層の全結合層で6層のモデルを作成して適用してみる

コード

※Google Colaboratory（ランタイムのタイプ= GPU)

※エポック数 8

import glob
from PIL import Image
import math
import numpy as np
import matplotlib.pyplot as plt
import dezero
import dezero.functions as F
import dezero.layers as L
from dezero import DataLoader, optimizers, Parameter, test_mode, Dataset, Model

path = "./drive/MyDrive/Colab Notebooks/data/"
train_path_iso = glob.glob(path + 'train/イソ/*')
train_path_wonyon = glob.glob(path + 'train/ウォニョン/*')
train_path_gaul = glob.glob(path + 'train/ガウル/*')
train_path_yujin = glob.glob(path + 'train/ユジン/*')
train_path_lizu = glob.glob(path + 'train/リズ/*')
train_path_rey = glob.glob(path + 'train/レイ/*')

test_path_iso = glob.glob(path + 'test/イソ/*')
test_path_wonyon = glob.glob(path + 'test/ウォニョン/*')
test_path_gaul = glob.glob(path + 'test/ガウル/*')
test_path_yujin = glob.glob(path + 'test/ユジン/*')
test_path_lizu = glob.glob(path + 'test/リズ/*')
test_path_rey = glob.glob(path + 'test/レイ/*')

train_path, test_path = [], []
train_path.extend(train_path_iso)
train_path.extend(train_path_wonyon)
train_path.extend(train_path_gaul)
train_path.extend(train_path_yujin)
train_path.extend(train_path_lizu)
train_path.extend(train_path_rey)

test_path.extend(test_path_iso)
test_path.extend(test_path_wonyon)
test_path.extend(test_path_gaul)
test_path.extend(test_path_yujin)
test_path.extend(test_path_lizu)
test_path.extend(test_path_rey)

_train_label = [0]*len(train_path_iso) + [1]*len(train_path_wonyon) + [2]*len(train_path_gaul) + [3]*len(train_path_yujin) + [4]*len(train_path_lizu) + [5]*len(train_path_rey)
train_label = np.array(_train_label)

_test_label = [0]*len(test_path_iso) + [1]*len(test_path_wonyon) + [2]*len(test_path_gaul) + [3]*len(test_path_yujin) + [4]*len(test_path_lizu) + [5]*len(test_path_rey)
test_label = np.array(_test_label) 


class TestModel(Model):
    WEIGHTS_PATH = None
    def __init__(self, pretrained=False):
        super().__init__()
        self.conv1 = L.Conv2d(32, kernel_size=3, stride=1, pad=1)
        self.conv2 = L.Conv2d(64, kernel_size=3, stride=1, pad=1)
        self.conv3 = L.Conv2d(128, kernel_size=3, stride=1, pad=1)
        self.conv4 = L.Conv2d(128, kernel_size=3, stride=1, pad=1)
        self.fc5 = L.Linear(512)
        self.fc6 = L.Linear(6)

        if pretrained:
            weights_path = ''
            self.load_weights(weights_path)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.pooling(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.pooling(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = F.pooling(x, 2, 2)
        x = F.relu(self.conv4(x))
        x = F.pooling(x, 2, 2)
        x = F.reshape(x, (x.shape[0], -1))
        x = F.dropout(F.relu(self.fc5(x)))
        x = self.fc6(x)
        return x

    @staticmethod
    def preprocess(image, size=(224, 224), dtype=np.float32):
        image = image.convert('RGB')
        if size:
            image = image.resize(size)
        image = np.asarray(image, dtype=dtype)
        image = image[:, :, ::-1]
        image -= np.array([103.939, 116.779, 123.68], dtype=dtype)
        image = image.transpose((2, 0, 1))
        return image


class IveDataset(Dataset):
    def __init__(self, train=True, transform=TestModel.preprocess, target_transform=None):
        super().__init__(train, transform, target_transform)

    def prepare(self):
        if self.train:
            self.data = train_path
            self.label = train_label
        else:
            self.data = test_path
            self.label = test_label

    def __getitem__(self, index):
        img = Image.open(self.data[index])
        img = self.transform(img)
        return img, int(self.label[index])

    def __len__(self):
        return len(self.label)

batchsize = 8
train_set = IveDataset(train=True)
train_loader = DataLoader(train_set, batchsize, shuffle=True)

test_set = IveDataset(train=False)
test_loader = DataLoader(test_set, batchsize, shuffle=False)

train_loader.to_gpu()
test_loader.to_gpu()

max_epoch = 10

for epoch in range(max_epoch):
    sum_loss, sum_acc = 0, 0
    for x, t in train_loader:
        y = model(x)
        loss = F.softmax_cross_entropy(y, t)
        acc = F.accuracy(y, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()

        sum_loss += float(loss.data) * len(t)
        sum_acc += float(acc.data) * len(t)

    print('epoch: {}'.format(epoch+1))
    print('train loss: {:.4f}, accuracy: {:.4f}'.format(sum_loss / len(train_set), sum_acc / len(train_set)))

    sum_loss, sum_acc = 0, 0

    with test_mode():
        for x, t in test_loader:
            y = model(x)
            loss = F.softmax_cross_entropy(y, t)
            acc = F.accuracy(y, t)
            sum_loss += float(loss.data) * len(t)
            sum_acc += float(acc.data) * len(t)

    print('test loss: {:.4f}, accuracy: {:.4f}'.format(sum_loss / len(test_set), sum_acc / len(test_set)))

結果

epoch: 1
train loss: 17.8225, accuracy: 0.1900
test loss: 1.7513, accuracy: 0.2417
epoch: 2
train loss: 1.3803, accuracy: 0.5067
test loss: 1.8130, accuracy: 0.2667
epoch: 3
train loss: 0.8045, accuracy: 0.7167
test loss: 1.8307, accuracy: 0.3250
epoch: 4
train loss: 0.3319, accuracy: 0.9167
test loss: 2.2559, accuracy: 0.3167
epoch: 5
train loss: 0.1350, accuracy: 0.9800
test loss: 2.3326, accuracy: 0.3667
epoch: 6
train loss: 0.0555, accuracy: 0.9967
test loss: 2.4772, accuracy: 0.3333
epoch: 7
train loss: 0.0337, accuracy: 0.9967
test loss: 2.7865, accuracy: 0.3333
epoch: 8
train loss: 0.0235, accuracy: 0.9967
test loss: 2.4964, accuracy: 0.3333
epoch: 9
train loss: 0.0130, accuracy: 1.0000
test loss: 2.6056, accuracy: 0.3500
epoch: 10
train loss: 0.0069, accuracy: 1.0000
test loss: 2.7157, accuracy: 0.3667

[ ]