逻辑回归(Logistic Regression)是一个二分分类算法。逻辑回归的目标是最小化其预测与训练数据之间的误差。为了训练逻辑回归模型中的参数w和b,需要定义一个成本函数(cost function)。
成本函数(cost function):它是针对整个训练集的。衡量参数w和b在整个训练集上的效果。
损失函数或误差函数(loss function or error function):它是针对单个训练样本进行定义的。可以用来衡量算法的效果,衡量预测输出值与实际值有多接近。
梯度下降法的核心是最小化成本函数。使用梯度下降法可以找到一个函数的局部极小值。
关于逻辑回归的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/78283675
关于梯度下降法的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/75351323
关于激活函数sigmoid函数的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/73848734
关于MNIST数据集的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/49611549
以下截图来自吴恩达老师深度学习视频课:
以下code是完全按照上面的推导公式进行实现的,训练数据集为从MNIST中train中随机选取的0、1各10个图像;测试数据集为从MNIST中test中随机选取的0、1各10个图像,如下图,其中第一排前10个0用于训练,后10个0用于测试;第二排前10个1用于训练,后10个1用于测试:
logistic_regression2.hpp:
#ifndef FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_ #define FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_ #include <vector> #include <string> namespace ANN { template<typename T> class LogisticRegression2 { // two categories public: LogisticRegression2() = default; int init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate = 0.00001, int iterations = 10000); int train(const std::string& model); int load_model(const std::string& model); T predict(const T* data, int feature_length) const; // y = 1/(1+exp(-(wx+b))) private: int store_model(const std::string& model) const; T calculate_sigmoid(T value) const; // y = 1/(1+exp(-value)) T calculate_z(const std::vector<T>& feature) const; std::vector<std::vector<T>> x; // training set std::vector<T> y; // ground truth labels int iterations = 1000; int m = 0; // train samples num int feature_length = 0; T alpha = (T)0.00001; // learning rate std::vector<T> w; // weights T b = (T)0.; // threshold }; // class LogisticRegression2 } // namespace ANN #endif // FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_logistic_regression2.cpp:
#include "logistic_regression2.hpp" #include <fstream> #include <algorithm> #include <random> #include <cmath> #include "common.hpp" namespace ANN { template<typename T> int LogisticRegression2<T>::init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate, int iterations) { if (train_num < 2) { fprintf(stderr, "logistic regression train samples num is too little: %d\n", train_num); return -1; } if (learning_rate <= 0) { fprintf(stderr, "learning rate must be greater 0: %f\n", learning_rate); return -1; } if (iterations <= 0) { fprintf(stderr, "number of iterations cannot be zero or a negative number: %d\n", iterations); return -1; } this->alpha = learning_rate; this->iterations = iterations; this->m = train_num; this->feature_length = feature_length; this->x.resize(train_num); this->y.resize(train_num); for (int i = 0; i < train_num; ++i) { const T* p = data + i * feature_length; this->x[i].resize(feature_length); for (int j = 0; j < feature_length; ++j) { this->x[i][j] = p[j]; } this->y[i] = labels[i]; } return 0; } template<typename T> T LogisticRegression2<T>::calculate_z(const std::vector<T>& feature) const { T z{ 0. }; for (int i = 0; i < this->feature_length; ++i) { z += w[i] * feature[i]; } z += b; return z; } template<typename T> int LogisticRegression2<T>::train(const std::string& model) { CHECK(x.size() == y.size()); w.resize(this->feature_length, (T)0.); std::random_device rd; std::mt19937 generator(rd()); std::uniform_real_distribution<T> distribution(-0.1, 0.1); for (int i = 0; i < this->feature_length; ++i) { w[i] = distribution(generator); } b = distribution(generator); for (int iter = 0; iter < this->iterations; ++iter) { T J = (T)0., db = (T)0.; std::vector<T> dw(this->feature_length, (T)0.); std::vector<T> z(this->m, (T)0), a(this->m, (T)0), dz(this->m, (T)0); for (int i = 0; i < this->m; ++i) { z[i] = calculate_z(x[i]); // z(i)=w^T*x(i)+b a[i] = calculate_sigmoid(z[i]); // a(i)= 1/(1+e^(-z(i))) J += -(y[i] * std::log(a[i]) + (1 - y[i] * std::log(1 - a[i]))); // J+=-[y(i)*loga(i)+(1-y(i))*log(1-a(i))] dz[i] = a[i] - y[i]; // dz(i) = a(i)-y(i) for (int j = 0; j < this->feature_length; ++j) { dw[j] += x[i][j] * dz[i]; // dw(i)+=x(i)(j)*dz(i) } db += dz[i]; // db+=dz(i) } J /= this->m; for (int j = 0; j < this->feature_length; ++j) { dw[j] /= m; } db /= m; for (int j = 0; j < this->feature_length; ++j) { w[j] -= this->alpha * dw[j]; } b -= this->alpha*db; } CHECK(store_model(model) == 0); return 0; } template<typename T> int LogisticRegression2<T>::load_model(const std::string& model) { std::ifstream file; file.open(model.c_str(), std::ios::binary); if (!file.is_open()) { fprintf(stderr, "open file fail: %s\n", model.c_str()); return -1; } int length{ 0 }; file.read((char*)&length, sizeof(length)); this->w.resize(length); this->feature_length = length; file.read((char*)this->w.data(), sizeof(T)*this->w.size()); file.read((char*)&this->b, sizeof(T)); file.close(); return 0; } template<typename T> T LogisticRegression2<T>::predict(const T* data, int feature_length) const { CHECK(feature_length == this->feature_length); T value{ (T)0. }; for (int t = 0; t < this->feature_length; ++t) { value += data[t] * this->w[t]; } value += this->b; return (calculate_sigmoid(value)); } template<typename T> int LogisticRegression2<T>::store_model(const std::string& model) const { std::ofstream file; file.open(model.c_str(), std::ios::binary); if (!file.is_open()) { fprintf(stderr, "open file fail: %s\n", model.c_str()); return -1; } int length = w.size(); file.write((char*)&length, sizeof(length)); file.write((char*)w.data(), sizeof(T) * w.size()); file.write((char*)&b, sizeof(T)); file.close(); return 0; } template<typename T> T LogisticRegression2<T>::calculate_sigmoid(T value) const { return ((T)1 / ((T)1 + exp(-value))); } template class LogisticRegression2<float>; template class LogisticRegression2<double>; } // namespace ANNmain.cpp:
#include "funset.hpp" #include <iostream> #include "perceptron.hpp" #include "BP.hpp"" #include "CNN.hpp" #include "linear_regression.hpp" #include "naive_bayes_classifier.hpp" #include "logistic_regression.hpp" #include "common.hpp" #include "knn.hpp" #include "decision_tree.hpp" #include "pca.hpp" #include <opencv2/opencv.hpp> #include "logistic_regression2.hpp" // ================================ logistic regression ===================== int test_logistic_regression2_train() { const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" }; cv::Mat data, labels; for (int i = 1; i < 11; ++i) { const std::vector<std::string> label{ "0_", "1_" }; for (const auto& value : label) { std::string name = std::to_string(i); name = image_path + value + name + ".jpg"; cv::Mat image = cv::imread(name, 0); if (image.empty()) { fprintf(stderr, "read image fail: %s\n", name.c_str()); return -1; } data.push_back(image.reshape(0, 1)); } } data.convertTo(data, CV_32F); std::unique_ptr<float[]> tmp(new float[20]); for (int i = 0; i < 20; ++i) { if (i % 2 == 0) tmp[i] = 0.f; else tmp[i] = 1.f; } labels = cv::Mat(20, 1, CV_32FC1, tmp.get()); ANN::LogisticRegression2<float> lr; const float learning_rate{ 0.0001f }; const int iterations{ 10000 }; int ret = lr.init((float*)data.data, (float*)labels.data, data.rows, data.cols); if (ret != 0) { fprintf(stderr, "logistic regression init fail: %d\n", ret); return -1; } const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" }; ret = lr.train(model); if (ret != 0) { fprintf(stderr, "logistic regression train fail: %d\n", ret); return -1; } return 0; } int test_logistic_regression2_predict() { const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" }; cv::Mat data, labels, result; for (int i = 11; i < 21; ++i) { const std::vector<std::string> label{ "0_", "1_" }; for (const auto& value : label) { std::string name = std::to_string(i); name = image_path + value + name + ".jpg"; cv::Mat image = cv::imread(name, 0); if (image.empty()) { fprintf(stderr, "read image fail: %s\n", name.c_str()); return -1; } data.push_back(image.reshape(0, 1)); } } data.convertTo(data, CV_32F); std::unique_ptr<int[]> tmp(new int[20]); for (int i = 0; i < 20; ++i) { if (i % 2 == 0) tmp[i] = 0; else tmp[i] = 1; } labels = cv::Mat(20, 1, CV_32SC1, tmp.get()); CHECK(data.rows == labels.rows); const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" }; ANN::LogisticRegression2<float> lr; int ret = lr.load_model(model); if (ret != 0) { fprintf(stderr, "load logistic regression model fail: %d\n", ret); return -1; } for (int i = 0; i < data.rows; ++i) { float probability = lr.predict((float*)(data.row(i).data), data.cols); fprintf(stdout, "probability: %.6f, ", probability); if (probability > 0.5) fprintf(stdout, "predict result: 1, "); else fprintf(stdout, "predict result: 0, "); fprintf(stdout, "actual result: %d\n", ((int*)(labels.row(i).data))[0]); } return 0; }测试结果如下:由执行结果可知,测试图像全部分类正确。由于w和b初始值是随机产生的,因此每次执行的结果多少有些差异。
GitHub: https://github.com/fengbingchun/NN_Test