写一个 Demo:
使用 Eigen
和 多层 神经网络……
将来 用来 手写 识别 的 Demo演示……
```
#include <iostream>
#include <Eigen/Dense>
#include <vector>
using Eigen::MatrixXd;
//-----------------------------------------------------
class NeuralNetwork
{
public:
NeuralNetwork(std::vector<int> _architecture
, const Eigen::MatrixXd _train_dataX
, const Eigen::MatrixXd _train_LabelY
, double _learning_rate = 0.05
, int _mini_batch_size = 30
, int _iteration_size = 1000 //1000
, double _lambda = 0.0);
// 执行训练操作
void train();
// 求解单个样本输出
Eigen::MatrixXd predict(const Eigen::MatrixXd& _input);
// 评估模型, 默认label为one_hot编码
double evaluate(const Eigen::MatrixXd& _test_dataX, const Eigen::MatrixXd& _test_dataY, bool one_hot = true);
private:
// _z为上层输出线性组合值:[z1, z2, z3, ...], 例如z1为一个样本的计算值
Eigen::MatrixXd sigmoid(const Eigen::MatrixXd& _z);
// 激励函数梯度计算,_a为激励函数输出值
Eigen::MatrixXd sigmoid_grad(const Eigen::MatrixXd& _a);
// 损失函数实现
double loss(const Eigen::MatrixXd& pre_y, const Eigen::MatrixXd& ori_y, int m);
// 前向传播, _x为样本矩阵[x1, x2, x3,...], 例如x1为一个样本
Eigen::MatrixXd feedforword(const Eigen::MatrixXd& _x);
// 反向传播, _x为训练样本,_y为样本与输出
void backforward(const Eigen::MatrixXd& _x, const Eigen::MatrixXd& _y);
// 得到列向量的最大值行号
int argmax(const Eigen::MatrixXd& _y);
// 返回将列向量_bias复制_m列的矩阵
Eigen::MatrixXd replicate(const Eigen::MatrixXd& _bias, int _m);
private:
std::vector<int> architecture; // 神经网络的结构(4, 4, 1) 表示有一个input layer(4个神经元, 和输入数据的维度一致),
//一个hidden layer(4个神经元), 一个output layer(1个神经元)
const Eigen::MatrixXd train_dataX; // 训练数据(n, m) 表示有m个训练样本, 每个样本是n维向量
const Eigen::MatrixXd train_dataY; // 训练数据label
std::vector<Eigen::MatrixXd> train_weights; // 训练权重
std::vector<Eigen::MatrixXd> train_weights_grad;// 权重梯度
std::vector<Eigen::MatrixXd> train_bias; // 训练偏置
std::vector<Eigen::MatrixXd> train_bias_grad; // 偏置梯度
std::vector<Eigen::MatrixXd> feedforword_a; // 前向传播得到的激活值的中间输出
std::vector<Eigen::MatrixXd> error_term; // 残差
std::vector<Eigen::MatrixXd> predict_a; // 预测中间值, 用于单个样本的预测输出
double learning_rate; // 反向传播学习率
double lambda; // 过拟合参数
int mini_batch_size; // 批量梯度下降的一个批次数量
int iteration_size; // 迭代次数
};
NeuralNetwork::NeuralNetwork(std::vector<int> _architecture
, const Eigen::MatrixXd _train_dataX
, const Eigen::MatrixXd _train_LabelY
, double _learning_rate
, int _mini_batch_size
, int _iteration_size
, double _lambda)
:architecture(_architecture)
, train_dataX(_train_dataX)
, train_dataY(_train_LabelY)
, learning_rate(_learning_rate)
, mini_batch_size(_mini_batch_size)
, iteration_size(_iteration_size)
, lambda(_lambda)
{
// 构建神经网络
for (int i = 1; i < architecture.size(); ++i)
{
// 权重初始化较小随机值
Eigen::MatrixXd w(architecture[i], architecture[i - 1]);
w.setRandom();
train_weights.push_back(w);
// 初始化梯度
Eigen::MatrixXd wd(architecture[i], architecture[i - 1]);
wd.setZero();
train_weights_grad.push_back(wd);
// 偏置初始化为随机值
Eigen::MatrixXd b(architecture[i], 1);
b.setRandom();
train_bias.push_back(b);
// 初始化偏置梯度
Eigen::MatrixXd bd(architecture[i], mini_batch_size);
bd.setZero();
train_bias_grad.push_back(bd);
// 初始化激活值
Eigen::MatrixXd a(architecture[i], mini_batch_size);
a.setZero();
feedforword_a.push_back(a);
// 初始化残差
Eigen::MatrixXd e(architecture[i], mini_batch_size);
e.setZero();
error_term.push_back(e);
// 初始化预测中间值
Eigen::MatrixXd pa(architecture[i], 1);
pa.setZero();
predict_a.push_back(pa);
}// for
}// end
// 前向传播, _x为样本矩阵[x1, x2, x3,...], 例如x1为一个样本
Eigen::MatrixXd NeuralNetwork::feedforword(const Eigen::MatrixXd& _x)
{
for (int i = 0; i < feedforword_a.size(); ++i)
{
if (i == 0) // 输入值为样本
{
feedforword_a.at(i) = sigmoid(train_weights.at(i) * _x + replicate(train_bias.at(i), mini_batch_size));//偏置列数要与样本列数一致
}// if
else
{
feedforword_a.at(i) = sigmoid(train_weights.at(i) *
feedforword_a.at(i - 1) + replicate(train_bias.at(i), mini_batch_size));
}// else
}// for
return feedforword_a.at(feedforword_a.size() - 1);
}// end
// _z为上层输出线性组合值:[z1, z2, z3, ...], 例如z1为一个样本的计算值,_z.array()指的是逐元素操作
Eigen::MatrixXd NeuralNetwork::sigmoid(const Eigen::MatrixXd& _z)
{
return 1.0 / (1.0 + (-_z.array()).exp());
}// end
// 返回将列向量_bias复制_m列的矩阵
Eigen::MatrixXd NeuralNetwork::replicate(const Eigen::MatrixXd& _bias, int _m)
{
Eigen::MatrixXd ret_bias(_bias.rows(), _m);
for (int i = 0; i < _m; ++i)
{
ret_bias.col(i) = _bias;
}
return ret_bias;
}
// 反向传播, _x为训练样本,_y为样本与输出
void NeuralNetwork::backforward(const Eigen::MatrixXd& _x, const Eigen::MatrixXd& _y)
{
// 1, 计算第2层到最后一层的激活值
feedforword(_x);
// 从后向前,一层层的计算
for (int i = error_term.size() - 1; i >= 0; --i)
{
// 2, 反向计算残差
if (i == error_term.size() - 1) // 输出层
{
error_term.at(i) = -(_y.array() - feedforword_a.at(i).array())
* sigmoid_grad(feedforword_a.at(i)).array();
}// if
else
{
error_term.at(i) = (train_weights.at(i + 1).transpose()
* error_term.at(i + 1)).array() * sigmoid_grad(feedforword_a.at(i)).array();
}// else
// 3, 梯度计算,计算结果有mini_batch_size列,而后会在一次批量计算结束后进行累加
train_bias_grad.at(i) = error_term.at(i);
if (i > 0)
train_weights_grad.at(i) = error_term.at(i) * feedforword_a.at(i - 1).transpose();
else
train_weights_grad.at(i) = error_term.at(i) * _x.transpose();
}// for
}// end
// 激励函数梯度计算,_a为激励函数输出值
Eigen::MatrixXd NeuralNetwork::sigmoid_grad(const Eigen::MatrixXd& _a)
{
return _a.array() * (1.0 - _a.array());
}// end
// 执行训练操作
void NeuralNetwork::train()
{
std::cout << "training..." << std::endl;
for (int i = 0; i < train_weights.size(); ++i)
{
std::cout << "train_weights: " << train_weights.at(i) << std::endl;
std::cout << "train_bias: " << train_bias.at(i) << std::endl;
}// for
// 批量梯度下降迭代
for (int i = 0; i < iteration_size; ++i)
{
for (int k = 0; k < train_dataX.cols() - mini_batch_size; k += mini_batch_size)
{
// 获取一个mini_batch_size的样本集合
Eigen::MatrixXd mini_train_x = train_dataX.middleCols(k, mini_batch_size);
Eigen::MatrixXd mini_train_y = train_dataY.middleCols(k, mini_batch_size);
// 计算梯度
backforward(mini_train_x, mini_train_y);
// 更新权重
for (int j = 0; j < train_weights.size(); ++j)
{
// 权重的批量累计值实际上在反向传播过程中已经通过矩阵相乘得到了
train_weights.at(j) = train_weights.at(j) -
learning_rate * (train_weights_grad.at(j) / mini_batch_size + lambda * train_weights.at(j));
Eigen::MatrixXd tempBias(mini_batch_size, 1);// 这里的矩阵是为了求偏置累计和
tempBias.setOnes();
train_bias.at(j) = train_bias.at(j) -
learning_rate * (train_bias_grad.at(j) * tempBias / mini_batch_size);
}// for
if (0 == i % 10) {//if110
if (0 == k % 1000) {//if220
std::cout << "iter " << i << "\t[k:" << k << "-->loss:\t" << loss(feedforword_a.at(feedforword_a.size() - 1), mini_train_y, mini_batch_size) << std::endl;
}//if220
}//if110
}// for
}// for
for (int i = 0; i < train_weights.size(); ++i)
{
std::cout << "train_weights: " << train_weights.at(i) << std::endl;
std::cout << "train_bias: " << train_bias.at(i) << std::endl;
}// for
std::cout << "trained..." << std::endl;
}// end
// 损失函数实现,对着公式应该就能看懂
double NeuralNetwork::loss(const Eigen::MatrixXd& pre_y, const Eigen::MatrixXd& ori_y, int m)
{
// 误差项
double left_term = 0.0;
for (int i = 0; i < m; ++i)
{
Eigen::MatrixXd temp_m = (pre_y.col(i) - ori_y.col(i)).transpose() * (pre_y.col(i) - ori_y.col(i)) / 2.0;
left_term = temp_m(0, 0);
}
left_term /= m;
// 正则化项
double norm_term = 0.0;
for (int i = 0; i < train_weights.size(); ++i)
{
Eigen::MatrixXd temp_m = train_weights.at(i);
for (int j = 0; j < temp_m.cols(); ++j)
{
for (int k = 0; k < temp_m.rows(); ++k)
{
norm_term += temp_m(k, j) * temp_m(k, j);
}// for
}// for
}// for
norm_term *= (lambda / 2);
return left_term + norm_term;
}// end
// 评估模型, 默认label为one_hot编码
double NeuralNetwork::evaluate(const Eigen::MatrixXd& _test_dataX, const Eigen::MatrixXd& _test_dataY, bool one_hot)
{
int cnt = 0;
for (int i = 0; i < _test_dataX.cols(); ++i)
{
// 获取一个测试样本
Eigen::MatrixXd x = _test_dataX.col(i);
// 送入神经网络
Eigen::MatrixXd y_pre = predict(x);
if (one_hot)
{
if (argmax(y_pre) == argmax(_test_dataY.col(i)))
{
++cnt;
}// if
}// if
else
{
if ((y_pre(0, 0) - _test_dataY(0, i)) < 0.1)
{
++cnt;
}// if
}// if
}// for
return cnt * 1.0 / _test_dataX.cols();
}// end
// 得到列向量的最大值行号
int NeuralNetwork::argmax(const Eigen::MatrixXd& _y)
{
double _max = _y(0, 0);
int ret = 0;
for (int i = 1; i < _y.rows(); ++i)
{
if (_y(i, 0) > _max)
{
_max = _y(i, 0);
ret = i;
}
}
return ret;
}
// 求解单个样本输出
Eigen::MatrixXd NeuralNetwork::predict(const Eigen::MatrixXd& _input)
{
for (int i = 0; i < predict_a.size(); ++i)
{
if (i == 0)
{
predict_a.at(i) = sigmoid(train_weights.at(i) * _input + train_bias.at(i));
}// if
else
{
predict_a.at(i) = sigmoid(train_weights.at(i) * predict_a.at(i - 1) + train_bias.at(i));
}// else
}// for
return predict_a.at(predict_a.size() - 1);
}// end
class CustomData
{
public:
CustomData(int numberOfTrainData, int numberOfTestData);
const Eigen::MatrixXd getTrainData() const;
const Eigen::MatrixXd getTrainLabel() const;
const Eigen::MatrixXd getTestData() const;
const Eigen::MatrixXd getTestLabel() const;
private:
void generatorData(int numberOfTrainData, int numberOfTestData);
private:
Eigen::MatrixXd mtrain_x, mtrain_y, mtest_x, mtest_y;
};
CustomData::CustomData(int numberOfTrainData, int numberOfTestData)
{
generatorData(numberOfTrainData, numberOfTestData);
}
const Eigen::MatrixXd CustomData::getTrainData() const
{
return mtrain_x;
}
const Eigen::MatrixXd CustomData::getTrainLabel() const
{
return mtrain_y;
}
const Eigen::MatrixXd CustomData::getTestData() const
{
return mtest_x;
}
const Eigen::MatrixXd CustomData::getTestLabel() const
{
return mtest_y;
}
void CustomData::generatorData(int numberOfTrainData, int numberOfTestData)
{
mtrain_x.resize(4, numberOfTrainData);
mtrain_x.setZero();
mtrain_y.resize(1, numberOfTrainData);
mtest_x.resize(4, numberOfTestData);
mtest_x.setZero();
mtest_y.resize(1, numberOfTestData);
for (int i = 0; i < numberOfTrainData; ++i)
{//for110i
int index = i % 4;
int indexx2 = (i / 4) * 4;
mtrain_x(index, i) = 1;
for (size_t j = 0; j != mtrain_x.rows(); ++j)
{
mtrain_x(j, i) += (5e-3 * rand() / RAND_MAX - 2.5e-3);
}
mtrain_y(0, i) = (index + 1) * 1.0 / 4;
//-------------------------------------------------------
mtrain_x(0, 0 + indexx2) = 9;
mtrain_x(1, 0 + indexx2) = 9;
mtrain_x(2, 0 + indexx2) = 0;
mtrain_x(3, 0 + indexx2) = 0;
mtrain_y(0, 0 + indexx2) = 1;
mtrain_x(0, 1 + indexx2) = 9;
mtrain_x(1, 1 + indexx2) = 0;
mtrain_x(2, 1 + indexx2) = 9;
mtrain_x(3, 1 + indexx2) = 0;
mtrain_y(0, 1 + indexx2) = 0;
mtrain_x(0, 2 + indexx2) = 0;
mtrain_x(1, 2 + indexx2) = 0;
mtrain_x(2, 2 + indexx2) = 9;
mtrain_x(3, 2 + indexx2) = 9;
mtrain_y(0, 2 + indexx2) = 1;
mtrain_x(0, 3 + indexx2) = 0;
mtrain_x(1, 3 + indexx2) = 9;
mtrain_x(2, 3 + indexx2) = 0;
mtrain_x(3, 3 + indexx2) = 9;
mtrain_y(0, 3 + indexx2) = 0;
//-------------------------------------------------------
}//for110i
//-----------------------------------------------------------
//-----------------------------------------------------------
for (int i = 0; i < numberOfTestData; ++i)
{
int index = i % 4;
mtest_x(index, i) = 1;
for (int j = 0; j < mtest_x.rows(); ++j)
{
mtest_x(j, i) += (5e-3 * rand() / RAND_MAX - 2.5e-3);
}
mtest_y(0, i) = (index + 1) * 1.0 / 4;
}
//----------------------------------------------------------
//==========================================================
}//void CustomData::generatorData
//=====================================================
int main(int argc, char** argv) {
CustomData data(1000, 300); //CustomData data(10000, 3000);
std::vector<int> architecture = { 4, 4, 1 };
NeuralNetwork network(architecture, data.getTrainData(), data.getTrainLabel());
network.train();
Eigen::MatrixXd input1(4, 1);
input1 << 0.990, 0.902, 0.003, 0.00013;
std::cout << "predict:" << network.predict(input1) << std::endl;
Eigen::MatrixXd input2(4, 1);
input2 << 0.0103, 0.987, 0.0006, 0.90014;
std::cout << "predict:" << network.predict(input2) << std::endl;
Eigen::MatrixXd input3(4, 1);
input3 << 0.9201, 0.002, 0.9579, 0.0015;
std::cout << "predict:" << network.predict(input3) << std::endl;
Eigen::MatrixXd input4(4, 1);
input4 << 0.004, 0.001, 0.905, 0.9399;
std::cout << "predict:" << network.predict(input4) << std::endl;
return 0;
}//
/*
int main()
{
MatrixXd m(2, 2);
m(0, 0) = 3;
m(1, 0) = 2.5;
m(0, 1) = -1;
m(1, 1) = m(1, 0) + m(0, 1);
std::cout << m << std::endl;
}//main() */
```
```
train_weights: -0.663259 -0.807855 0.815424 -0.882382
0.476608 0.468307 0.613758 -0.983581
0.985534 0.403241 0.30015 0.855098
-0.229835 -0.860714 0.311869 0.283792
train_bias: -0.485397
-0.780755
-0.930723
0.16129
train_weights: -0.643422 0.164586 0.136265 -0.697195
train_bias: 0.7528
iter 0 [k:0-->loss: 0.00460512
iter 10 [k:0-->loss: 0.00174769
iter 20 [k:0-->loss: 0.00151385
iter 30 [k:0-->loss: 0.00138848
iter 40 [k:0-->loss: 0.00129441
iter 50 [k:0-->loss: 0.00120854
iter 60 [k:0-->loss: 0.0010464
iter 70 [k:0-->loss: 0.000946192
iter 80 [k:0-->loss: 0.00085975
iter 90 [k:0-->loss: 0.000779842
iter 100 [k:0-->loss: 0.000705283
iter 110 [k:0-->loss: 0.000631854
iter 120 [k:0-->loss: 0.000539578
iter 130 [k:0-->loss: 0.000428641
iter 140 [k:0-->loss: 0.000374152
iter 150 [k:0-->loss: 0.000344166
iter 160 [k:0-->loss: 0.000321507
iter 170 [k:0-->loss: 0.000302534
iter 180 [k:0-->loss: 0.00028602
iter 190 [k:0-->loss: 0.000271352
iter 200 [k:0-->loss: 0.000258158
iter 210 [k:0-->loss: 0.000246184
iter 220 [k:0-->loss: 0.000235249
iter 230 [k:0-->loss: 0.000225211
iter 240 [k:0-->loss: 0.000215958
iter 250 [k:0-->loss: 0.000207399
iter 260 [k:0-->loss: 0.000199458
iter 270 [k:0-->loss: 0.000192069
iter 280 [k:0-->loss: 0.000185178
iter 290 [k:0-->loss: 0.000178736
iter 300 [k:0-->loss: 0.000172702
iter 310 [k:0-->loss: 0.000167038
iter 320 [k:0-->loss: 0.000161713
iter 330 [k:0-->loss: 0.000156698
iter 340 [k:0-->loss: 0.000151967
iter 350 [k:0-->loss: 0.000147497
iter 360 [k:0-->loss: 0.000143268
iter 370 [k:0-->loss: 0.000139261
iter 380 [k:0-->loss: 0.00013546
iter 390 [k:0-->loss: 0.00013185
iter 400 [k:0-->loss: 0.000128418
iter 410 [k:0-->loss: 0.00012515
iter 420 [k:0-->loss: 0.000122035
iter 430 [k:0-->loss: 0.000119064
iter 440 [k:0-->loss: 0.000116227
iter 450 [k:0-->loss: 0.000113515
iter 460 [k:0-->loss: 0.00011092
iter 470 [k:0-->loss: 0.000108436
iter 480 [k:0-->loss: 0.000106055
iter 490 [k:0-->loss: 0.000103772
iter 500 [k:0-->loss: 0.00010158
iter 510 [k:0-->loss: 9.94744e-05
iter 520 [k:0-->loss: 9.74503e-05
iter 530 [k:0-->loss: 9.55033e-05
iter 540 [k:0-->loss: 9.3629e-05
iter 550 [k:0-->loss: 9.18236e-05
iter 560 [k:0-->loss: 9.00835e-05
iter 570 [k:0-->loss: 8.84052e-05
iter 580 [k:0-->loss: 8.67856e-05
iter 590 [k:0-->loss: 8.52218e-05
iter 600 [k:0-->loss: 8.37109e-05
iter 610 [k:0-->loss: 8.22504e-05
iter 620 [k:0-->loss: 8.08379e-05
iter 630 [k:0-->loss: 7.94712e-05
iter 640 [k:0-->loss: 7.8148e-05
iter 650 [k:0-->loss: 7.68663e-05
iter 660 [k:0-->loss: 7.56244e-05
iter 670 [k:0-->loss: 7.44204e-05
iter 680 [k:0-->loss: 7.32526e-05
iter 690 [k:0-->loss: 7.21194e-05
iter 700 [k:0-->loss: 7.10195e-05
iter 710 [k:0-->loss: 6.99513e-05
iter 720 [k:0-->loss: 6.89136e-05
iter 730 [k:0-->loss: 6.79051e-05
iter 740 [k:0-->loss: 6.69245e-05
iter 750 [k:0-->loss: 6.59709e-05
iter 760 [k:0-->loss: 6.5043e-05
iter 770 [k:0-->loss: 6.41399e-05
iter 780 [k:0-->loss: 6.32607e-05
iter 790 [k:0-->loss: 6.24044e-05
iter 800 [k:0-->loss: 6.15702e-05
iter 810 [k:0-->loss: 6.07571e-05
iter 820 [k:0-->loss: 5.99646e-05
iter 830 [k:0-->loss: 5.91917e-05
iter 840 [k:0-->loss: 5.84378e-05
iter 850 [k:0-->loss: 5.77022e-05
iter 860 [k:0-->loss: 5.69843e-05
iter 870 [k:0-->loss: 5.62834e-05
iter 880 [k:0-->loss: 5.5599e-05
iter 890 [k:0-->loss: 5.49305e-05
iter 900 [k:0-->loss: 5.42773e-05
iter 910 [k:0-->loss: 5.3639e-05
iter 920 [k:0-->loss: 5.3015e-05
iter 930 [k:0-->loss: 5.2405e-05
iter 940 [k:0-->loss: 5.18083e-05
iter 950 [k:0-->loss: 5.12247e-05
iter 960 [k:0-->loss: 5.06536e-05
iter 970 [k:0-->loss: 5.00947e-05
iter 980 [k:0-->loss: 4.95477e-05
iter 990 [k:0-->loss: 4.90121e-05
train_weights: -0.0223575 -0.849625 0.870558 -1.50992
0.774452 0.112707 1.46785 -0.782934
0.985799 0.404441 0.2995 0.855383
-0.666085 -0.994196 0.0593543 0.334045
train_bias: -0.483912
-0.725367
-0.930662
0.118402
train_weights: -5.4056 5.26012 -1.67297 1.2787
train_bias: -1.05641
trained...
predict:0.455409
predict:0.318009
predict:0.394684
predict:0.490456
```