1、实战分析
背景:数据集8351张图,每张都是狗狗照片,共133种。
目标:用CNN实现狗类品种分类
方法:使用ImageNet上预先训练好的VGG16
分析场景:狗类数据集较小,与ImageNet相似度较高
将最后的全连接层删除,换成新的连接层。冻结前面模型的权重,只训练最后一个连接层的权重。
我们将所有的图片,都直接穿过之前的网络一直到红色的最大池化层,做为新的输入。则我们需要训练的网络只剩下2层。
二、实战
2.1 模型1
1、加载数据库
from sklearn.datasets import load_files from keras.utils import np_utils import numpy as np from glob import glob # define function to load train, test, and validation datasets def load_dataset(path): data = load_files(path) dog_files = np.array(data['filenames']) dog_targets = np_utils.to_categorical(np.array(data['target']), 133) return dog_files, dog_targets # load train, test, and validation datasets train_files, train_targets = load_dataset('dogImages/train') valid_files, valid_targets = load_dataset('dogImages/valid') test_files, test_targets = load_dataset('dogImages/test') # load ordered list of dog names dog_names = [item[25:-1] for item in glob('dogImages/train/*/')] # print statistics about the dataset print('There are %d total dog categories.' % len(dog_names)) print('There are %s total dog images.\n' % str(len(train_files) + len(valid_files) + len(test_files))) print('There are %d training dog images.' % len(train_files)) print('There are %d validation dog images.' % len(valid_files)) print('There are %d test dog images.'% len(test_files))
2、可视化部分图片
import cv2 import matplotlib.pyplot as plt %matplotlib inline def visualize_img(img_path, ax): img = cv2.imread(img_path) ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) fig = plt.figure(figsize=(20, 10)) for i in range(12): ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[]) visualize_img(train_files[i], ax)
3、读入VGG16瓶颈特征
bottleneck_features = np.load('bottleneck_features/DogVGG16Data.npz') train_vgg16 = bottleneck_features['train'] valid_vgg16 = bottleneck_features['valid'] test_vgg16 = bottleneck_features['test']
4、模型1
from keras.layers import Dense, Flatten from keras.models import Sequential model = Sequential() model.add(Flatten(input_shape=(7, 7, 512))) model.add(Dense(133, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary()
5、模型编译
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
6、模型训练
from keras.callbacks import ModelCheckpoint # train the model checkpointer = ModelCheckpoint(filepath='dogvgg16.weights.best.hdf5', verbose=1, save_best_only=True) model.fit(train_vgg16, train_targets, epochs=20, validation_data=(valid_vgg16, valid_targets), callbacks=[checkpointer], verbose=1, shuffle=True)
7、加载最优模型
# load the weights that yielded the best validation accuracy model.load_weights('dogvgg16.weights.best.hdf5')
8、测试
# get index of predicted dog breed for each image in test set vgg16_predictions = [np.argmax(model.predict(np.expand_dims(feature, axis=0))) for feature in test_vgg16] # report test accuracy test_accuracy = 100*np.sum(np.array(vgg16_predictions)== np.argmax(test_targets, axis=1))/len(vgg16_predictions) print('\nTest accuracy: %.4f%%' % test_accuracy)
2.2 模型2
跟模型1的区别就是步骤4的模型不同,引入全局平均池化层 GAP减少训练参数。
1、加载数据库
from sklearn.datasets import load_files from keras.utils import np_utils import numpy as np from glob import glob # define function to load train, test, and validation datasets def load_dataset(path): data = load_files(path) dog_files = np.array(data['filenames']) dog_targets = np_utils.to_categorical(np.array(data['target']), 133) return dog_files, dog_targets # load train, test, and validation datasets train_files, train_targets = load_dataset('dogImages/train') valid_files, valid_targets = load_dataset('dogImages/valid') test_files, test_targets = load_dataset('dogImages/test') # load ordered list of dog names dog_names = [item[25:-1] for item in glob('dogImages/train/*/')] # print statistics about the dataset print('There are %d total dog categories.' % len(dog_names)) print('There are %s total dog images.\n' % str(len(train_files) + len(valid_files) + len(test_files))) print('There are %d training dog images.' % len(train_files)) print('There are %d validation dog images.' % len(valid_files)) print('There are %d test dog images.'% len(test_files))
2、可视化部分图片
import cv2 import matplotlib.pyplot as plt %matplotlib inline def visualize_img(img_path, ax): img = cv2.imread(img_path) ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) fig = plt.figure(figsize=(20, 10)) for i in range(12): ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[]) visualize_img(train_files[i], ax)
3、读入VGG16瓶颈特征
bottleneck_features = np.load('bottleneck_features/DogVGG16Data.npz') train_vgg16 = bottleneck_features['train'] valid_vgg16 = bottleneck_features['valid'] test_vgg16 = bottleneck_features['test']
4、模型2
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Sequential
model = Sequential()
model.add(GlobalAveragePooling2D(input_shape=(7, 7, 512)))
model.add(Dense(133, activation='softmax'))
model.summary()
5、模型编译
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
6、模型训练
from keras.callbacks import ModelCheckpoint # train the model checkpointer = ModelCheckpoint(filepath='dogvgg16.weights.best.hdf5', verbose=1, save_best_only=True) model.fit(train_vgg16, train_targets, epochs=20, validation_data=(valid_vgg16, valid_targets), callbacks=[checkpointer], verbose=1, shuffle=True)
7、加载最优模型
# load the weights that yielded the best validation accuracy model.load_weights('dogvgg16.weights.best.hdf5')
8、测试
# get index of predicted dog breed for each image in test set vgg16_predictions = [np.argmax(model.predict(np.expand_dims(feature, axis=0))) for feature in test_vgg16] # report test accuracy test_accuracy = 100*np.sum(np.array(vgg16_predictions)== np.argmax(test_targets, axis=1))/len(vgg16_predictions) print('\nTest accuracy: %.4f%%' % test_accuracy)
三、优化
引入全局平均池化层GAP进行优化。最近MIT发现,包含GAP的CNN还具有目标定位的功能。
Gap简单说明:
全连接层:上面程序中,将(7, 7, 512)展开,得到7*7*512个元素,再softmax成133类
GAP全局平均池化:上面程序中,(7,7,512)中的7*7变成1个点,512个元素直接softmax成133类。
论文:http://cnnlocalization.csail.mit.edu/Zhou_Learning_Deep_Features_CVPR_2016_paper.pdf
提出用GAP层进行目标定位的第一篇论文
代码:https://github.com/alexisbcook/ResNetCAM-keras
使用CNN进行目标定位
视频:https://www.youtube.com/watch?v=fZvOy0VXWAI
使用CNN进行目标定位
代码:https://github.com/alexisbcook/keras_transfer_cifar10
用可视化技术更好的理解瓶颈特征