本文部分内容来自zouxy09的博客。谢谢。http://blog.csdn.net/zouxy09/article/details/9993371

以及斯坦福大学深度学习教程：http://ufldl.stanford.edu/wiki/index.php/UFLDL教程

CNN结构的连接比权值多非常多，由于权值共享。CNN通过数据驱动的方式学习得到一些滤波器，作为提取输入的特征的一种方法。

典型CNN中開始几层都是卷积和下採样交替，然后在最后是一些全连接层。

在全连接层时已经将全部两维特征map转化为全连接一维输入。

1、前向传播

如果该网络能处理c类分类问题，共N个训练样本。

定义平方误差代价函数：

卷积神经网络(CNN)的训练及代码实现

2、反向传播

调整參数。

批量梯度下降法是一种经常使用的优化目标函数的方法，通过对目标函数关于參数求导，来更新參数。其目标函数延梯度下降的方向高速逼近最小值。所以每次迭代都依照例如以下公式对

卷积神经网络(CNN)的训练及代码实现

反向传播算法的思路例如以下：

卷积神经网络(CNN)的训练及代码实现

3、卷积神经网络训练參数时的不同处

3.1卷积层

CNN中卷积层的BP更新。

在卷积层，上层的特征map被一个能够学习的卷积核进行卷积，然后通过一个激活函数。就能够得到输出特征map。

每一个输出map能够组合卷积多个输入map，正向传播时例如以下计算：

卷积神经网络(CNN)的训练及代码实现

对于卷积层參数的调整。在计算残差时看的是卷积层和下採样层间的连接，在调整參数时看的是上层和卷积层间的连接。

3.2下採样层

对于下採样层。输入N个特征map，则输出N个map。仅仅是每一个输出map就变小了。正向传播时例如以下计算：

卷积神经网络(CNN)的训练及代码实现

watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQv/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center" alt="">

卷积神经网络(CNN)的训练及代码实现

4、卷积神经网络代码实现

本代码须要下载MNIST数据，网上非常easy搜到，在这就不详细给出了。另外完整CNN实现代码，能够在此下载：https://github.com/rasmusbergpalm/DeepLearnToolbox

1、cnnapplygrads.m

<pre name="code" class="html"><pre name="code" class="cpp">function net = cnnapplygrads(net, opts)

    for l = 2 : numel(net.layers)

        if strcmp(net.layers{l}.type, 'c')

            for j = 1 : numel(net.layers{l}.a)

                for ii = 1 : numel(net.layers{l - 1}.a)

                    net.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j} - opts.alpha * net.layers{l}.dk{ii}{j};

                end

                net.layers{l}.b{j} = net.layers{l}.b{j} - opts.alpha * net.layers{l}.db{j};

            end

        end

    end

    net.ffW = net.ffW - opts.alpha * net.dffW;

    net.ffb = net.ffb - opts.alpha * net.dffb;

end

2、cnnbp.m

<pre name="code" class="cpp">function net = cnnbp(net, y)

    n = numel(net.layers);

    %   error

    net.e = net.o - y;

    %  loss function

    net.L = 1/2* sum(net.e(:) .^ 2) / size(net.e, 2);

    %%  backprop deltas

    net.od = net.e .* (net.o .* (1 - net.o));   %  output delta

    net.fvd = (net.ffW' * net.od);              %  feature vector delta

    if strcmp(net.layers{n}.type, 'c')         %  only conv layers has sigm function

        net.fvd = net.fvd .* (net.fv .* (1 - net.fv));

    end

    %  reshape feature vector deltas into output map style

    sa = size(net.layers{n}.a{1});

    fvnum = sa(1) * sa(2);

    for j = 1 : numel(net.layers{n}.a)

        net.layers{n}.d{j} = reshape(net.fvd(((j - 1) * fvnum + 1) : j * fvnum, :), sa(1), sa(2), sa(3));

    end

    for l = (n - 1) : -1 : 1

        if strcmp(net.layers{l}.type, 'c')

            for j = 1 : numel(net.layers{l}.a)

                net.layers{l}.d{j} = net.layers{l}.a{j} .* (1 - net.layers{l}.a{j}) .* (expand(net.layers{l + 1}.d{j}, [net.layers{l + 1}.scale net.layers{l + 1}.scale 1]) / net.layers{l + 1}.scale ^ 2);

            end

        elseif strcmp(net.layers{l}.type, 's')

            for i = 1 : numel(net.layers{l}.a)

                z = zeros(size(net.layers{l}.a{1}));

                for j = 1 : numel(net.layers{l + 1}.a)

                     z = z + convn(net.layers{l + 1}.d{j}, rot180(net.layers{l + 1}.k{i}{j}), 'full');

                end

                net.layers{l}.d{i} = z;

            end

        end

    end

    %%  calc gradients

    for l = 2 : n

        if strcmp(net.layers{l}.type, 'c')

            for j = 1 : numel(net.layers{l}.a)

                for i = 1 : numel(net.layers{l - 1}.a)

                    net.layers{l}.dk{i}{j} = convn(flipall(net.layers{l - 1}.a{i}), net.layers{l}.d{j}, 'valid') / size(net.layers{l}.d{j}, 3);

                end

                net.layers{l}.db{j} = sum(net.layers{l}.d{j}(:)) / size(net.layers{l}.d{j}, 3);

            end

        end

    end

    net.dffW = net.od * (net.fv)' / size(net.od, 2);

    net.dffb = mean(net.od, 2);

    function X = rot180(X)

        X = flipdim(flipdim(X, 1), 2);

    end

end

3、cnnff.m

<pre name="code" class="cpp">function net = cnnff(net, x)

    n = numel(net.layers);

    net.layers{1}.a{1} = x;

    inputmaps = 1;

    for l = 2 : n   %  for each layer

        if strcmp(net.layers{l}.type, 'c')

            %  !!below can probably be handled by insane matrix operations

            for j = 1 : net.layers{l}.outputmaps   %  for each output map

                %  create temp output map

                z = zeros(size(net.layers{l - 1}.a{1}) - [net.layers{l}.kernelsize - 1 net.layers{l}.kernelsize - 1 0]);

                for i = 1 : inputmaps   %  for each input map

                    %  convolve with corresponding kernel and add to temp output map

                    z = z + convn(net.layers{l - 1}.a{i}, net.layers{l}.k{i}{j}, 'valid');

                end

                %  add bias, pass through nonlinearity

                net.layers{l}.a{j} = sigm(z + net.layers{l}.b{j});

            end

            %  set number of input maps to this layers number of outputmaps

            inputmaps = net.layers{l}.outputmaps;

        elseif strcmp(net.layers{l}.type, 's')

            %  downsample

            for j = 1 : inputmaps

                z = convn(net.layers{l - 1}.a{j}, ones(net.layers{l}.scale) / (net.layers{l}.scale ^ 2), 'valid');   %  !! replace with variable

                net.layers{l}.a{j} = z(1 : net.layers{l}.scale : end, 1 : net.layers{l}.scale : end, :);

            end

        end

    end

    %  concatenate all end layer feature maps into vector

    net.fv = [];

    for j = 1 : numel(net.layers{n}.a)

        sa = size(net.layers{n}.a{j});

        net.fv = [net.fv; reshape(net.layers{n}.a{j}, sa(1) * sa(2), sa(3))];

    end

    %  feedforward into output perceptrons

    net.o = sigm(net.ffW * net.fv + repmat(net.ffb, 1, size(net.fv, 2)));

end

4、cnnnumgradcheck.m

<pre name="code" class="cpp">function cnnnumgradcheck(net, x, y)

    epsilon = 1e-4;

    er      = 1e-8;

    n = numel(net.layers);

    for j = 1 : numel(net.ffb)

        net_m = net; net_p = net;

        net_p.ffb(j) = net_m.ffb(j) + epsilon;

        net_m.ffb(j) = net_m.ffb(j) - epsilon;

        net_m = cnnff(net_m, x); net_m = cnnbp(net_m, y);

        net_p = cnnff(net_p, x); net_p = cnnbp(net_p, y);

        d = (net_p.L - net_m.L) / (2 * epsilon);

        e = abs(d - net.dffb(j));

        if e > er

            error('numerical gradient checking failed');

        end

    end

    for i = 1 : size(net.ffW, 1)

        for u = 1 : size(net.ffW, 2)

            net_m = net; net_p = net;

            net_p.ffW(i, u) = net_m.ffW(i, u) + epsilon;

            net_m.ffW(i, u) = net_m.ffW(i, u) - epsilon;

            net_m = cnnff(net_m, x); net_m = cnnbp(net_m, y);

            net_p = cnnff(net_p, x); net_p = cnnbp(net_p, y);

            d = (net_p.L - net_m.L) / (2 * epsilon);

            e = abs(d - net.dffW(i, u));

            if e > er

                error('numerical gradient checking failed');

            end

        end

    end

    for l = n : -1 : 2

        if strcmp(net.layers{l}.type, 'c')

            for j = 1 : numel(net.layers{l}.a)

                net_m = net; net_p = net;

                net_p.layers{l}.b{j} = net_m.layers{l}.b{j} + epsilon;

                net_m.layers{l}.b{j} = net_m.layers{l}.b{j} - epsilon;

                net_m = cnnff(net_m, x); net_m = cnnbp(net_m, y);

                net_p = cnnff(net_p, x); net_p = cnnbp(net_p, y);

                d = (net_p.L - net_m.L) / (2 * epsilon);

                e = abs(d - net.layers{l}.db{j});

                if e > er

                    error('numerical gradient checking failed');

                end

                for i = 1 : numel(net.layers{l - 1}.a)

                    for u = 1 : size(net.layers{l}.k{i}{j}, 1)

                        for v = 1 : size(net.layers{l}.k{i}{j}, 2)

                            net_m = net; net_p = net;

                            net_p.layers{l}.k{i}{j}(u, v) = net_p.layers{l}.k{i}{j}(u, v) + epsilon;

                            net_m.layers{l}.k{i}{j}(u, v) = net_m.layers{l}.k{i}{j}(u, v) - epsilon;

                            net_m = cnnff(net_m, x); net_m = cnnbp(net_m, y);

                            net_p = cnnff(net_p, x); net_p = cnnbp(net_p, y);

                            d = (net_p.L - net_m.L) / (2 * epsilon);

                            e = abs(d - net.layers{l}.dk{i}{j}(u, v));

                            if e > er

                                error('numerical gradient checking failed');

                            end

                        end

                    end

                end

            end

        elseif strcmp(net.layers{l}.type, 's')

%            for j = 1 : numel(net.layers{l}.a)

%                net_m = net; net_p = net;

%                net_p.layers{l}.b{j} = net_m.layers{l}.b{j} + epsilon;

%                net_m.layers{l}.b{j} = net_m.layers{l}.b{j} - epsilon;

%                net_m = cnnff(net_m, x); net_m = cnnbp(net_m, y);

%                net_p = cnnff(net_p, x); net_p = cnnbp(net_p, y);

%                d = (net_p.L - net_m.L) / (2 * epsilon);

%                e = abs(d - net.layers{l}.db{j});

%                if e > er

%                    error('numerical gradient checking failed');

%                end

%            end

        end

    end

%    keyboard

end

5、cnnsetup.m

<pre name="code" class="cpp">function net = cnnsetup(net, x, y)

    assert(~isOctave() || compare_versions(OCTAVE_VERSION, '3.8.0', '>='), ['Octave 3.8.0 or greater is required for CNNs as there is a bug in convolution in previous versions. See http://savannah.gnu.org/bugs/?

39314. Your version is ' myOctaveVersion]);

    inputmaps = 1;

    mapsize = size(squeeze(x(:, :, 1)));

    for l = 1 : numel(net.layers)   %  layer

        if strcmp(net.layers{l}.type, 's')

            mapsize = mapsize / net.layers{l}.scale;

            assert(all(floor(mapsize)==mapsize), ['Layer ' num2str(l) ' size must be integer. Actual: ' num2str(mapsize)]);

            for j = 1 : inputmaps

                net.layers{l}.b{j} = 0;

            end

        end

        if strcmp(net.layers{l}.type, 'c')

            mapsize = mapsize - net.layers{l}.kernelsize + 1;

            fan_out = net.layers{l}.outputmaps * net.layers{l}.kernelsize ^ 2;

            for j = 1 : net.layers{l}.outputmaps  %  output map

                fan_in = inputmaps * net.layers{l}.kernelsize ^ 2;

                for i = 1 : inputmaps  %  input map

                    net.layers{l}.k{i}{j} = (rand(net.layers{l}.kernelsize) - 0.5) * 2 * sqrt(6 / (fan_in + fan_out));

                end

                net.layers{l}.b{j} = 0;

            end

            inputmaps = net.layers{l}.outputmaps;

        end

    end

    % 'onum' is the number of labels, that's why it is calculated using size(y, 1). If you have 20 labels so the output of the network will be 20 neurons.

    % 'fvnum' is the number of output neurons at the last layer, the layer just before the output layer.

    % 'ffb' is the biases of the output neurons.

    % 'ffW' is the weights between the last layer and the output neurons. Note that the last layer is fully connected to the output layer, that's why the size of the weights is (onum * fvnum)

    fvnum = prod(mapsize) * inputmaps;

    onum = size(y, 1);

    net.ffb = zeros(onum, 1);

    net.ffW = (rand(onum, fvnum) - 0.5) * 2 * sqrt(6 / (onum + fvnum));

end

6、cnntest.m

function [er, bad] = cnntest(net, x, y)

    %  feedforward

    net = cnnff(net, x);

    [~, h] = max(net.o);

    [~, a] = max(y);

    bad = find(h ~= a);

    er = numel(bad) / size(y, 2);

end

7、cnntrain.m

function net = cnntrain(net, x, y, opts)

    m = size(x, 3);

    numbatches = m / opts.batchsize;

    if rem(numbatches, 1) ~= 0

        error('numbatches not integer');

    end

    net.rL = [];

    for i = 1 : opts.numepochs

        disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]);

        tic;

        kk = randperm(m);

        for l = 1 : numbatches

            batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));

            batch_y = y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));

            net = cnnff(net, batch_x);

            net = cnnbp(net, batch_y);

            net = cnnapplygrads(net, opts);

            if isempty(net.rL)

                net.rL(1) = net.L;

            end

            net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;

        end

        toc;

    end

end

8、test_example_CNN.m

function test_example_CNN

load mnist_uint8;

train_x = double(reshape(train_x',28,28,60000))/255;

test_x = double(reshape(test_x',28,28,10000))/255;

train_y = double(train_y');

test_y = double(test_y');

%% ex1 Train a 6c-2s-12c-2s Convolutional neural network

%will run 1 epoch in about 200 second and get around 11% error.

%With 100 epochs you'll get around 1.2% error

rand('state',0)

cnn.layers = {

    struct('type', 'i') %input layer

    struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer

    struct('type', 's', 'scale', 2) %sub sampling layer

    struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer

    struct('type', 's', 'scale', 2) %subsampling layer

};

opts.alpha = 1;

opts.batchsize = 50;

opts.numepochs = 1;

cnn = cnnsetup(cnn, train_x, train_y);

cnn = cnntrain(cnn, train_x, train_y, opts);

[er, bad] = cnntest(cnn, test_x, test_y);

er

%plot mean squared error

figure; plot(cnn.rL);

assert(er<0.12, 'Too big error');

注：另外还有CNN具体MATLAB实现代码及详解请參照zouxy09的博客：http://blog.csdn.net/zouxy09/article/details/9993743/ 该作者博客里解释的非常具体，另外作者还写了非常多关于深度学习的笔记，都写得非常棒。在此对其表示膜拜和感谢。

秒客网

卷积神经网络(CNN)的训练及代码实现

1、前向传播

2、反向传播

3、卷积神经网络训练參数时的不同处

3.1卷积层

3.2下採样层

4、卷积神经网络代码实现

相关文章