上一节我们匆匆忙忙的构建起来了FCN16S网络但是测试很不理想。跟论文里的结果相差甚远。这一节我们就来找出原因,使网络达到论文的精度,并解决输入慢的问题。
## 首先我们先在git仓库里新建一个分支。
![](https://box.kancloud.cn/dbdac0464fb5f3844b3da0ab0050b479_706x85.png)
然后把它同步到网上:
![](https://box.kancloud.cn/96ddfa23b868c77d3d12f04cbbc73dc6_708x54.png)
以后我们就在这个分支上修改代码
完整代码位置:https://github.com/tangzhenjie/FCN16S/tree/advance
*****
下面我们就来解决上一节中的问题
* [第一问题 数据载入慢](#第一节)
* [第二问题 精度上不去](#第二节)
* [第三问题 显示网络学习过程](#第三节)
<h3 id="第一节">第一问题 数据载入慢</h5>
原因:上一节我们没有用到通道输入数据,是直接把数据全部读入内存的。
解决方法:使用tf.data.dataset通道输入
参考学习连接:https://www.tensorflow.org/guide/datasets
首先我们先修改:read_MITSceneParsingData.py这个文件是为了为生成dataset做准备。
![](https://box.kancloud.cn/4eb75bc593aa46fd8f6dc8e3505ad97a_1216x345.png)
然后删除然后执行时会重新生成:![](https://box.kancloud.cn/3576724547b2ed1dbf6a2ab0207ee328_961x340.png)
我们在项目中新建一个文件:BatchReader.py然后添加如下代码:
```
~~~
import tensorflow as tf
import read_MITSceneParsingData as Reader
import numpy as np
#dataset_dir = "D:\pycharm_program\FCN16S\Data_zoo\MIT_SceneParsing\\"
#测试
#train_filepaths, eval_filepaths = Reader.read_dataset(dataset_dir)
#train_filepaths = tf.convert_to_tensor(train_filepaths, dtype=tf.string)
#i = 0
#train_filepaths = np.array(train_filepaths)
#train_filepaths1 = train_filepaths[:, 1]
#print(train_filepaths1[0])
"""
读取batch数据
:param image_filepaths tensor dtype=string 图像路径
annotation_filepaths tensor dtype=string 标签图像路径
image_size 图像剪裁大小
batch_size batch大小
:return tuple
"""
def read_batch_image(image_filepaths, label_filepaths, image_size, batch_size=2):
image, label = tf.train.slice_input_producer([image_filepaths, label_filepaths], shuffle=True)
# Read images from disk
image = tf.read_file(image)
image = tf.image.decode_jpeg(image, channels=3)
# Resize images to a common size
image = tf.image.resize_images(image, [image_size, image_size])
# Normalize(后期改动)
#image = image * 1.0 / 127.5 - 1.0
# Read labels from disk
label = tf.read_file(label)
label = tf.image.decode_png(label, channels=1)
# Resize labels to a common size
label = tf.image.resize_images(label, [image_size, image_size])
X, Y = tf.train.batch([image, label], batch_size=batch_size, capacity=batch_size * 8, num_threads=4)
return X, Y
~~~
```
下面我们就来替换:FCN16S.py中输入数据的方法:
首先我们把模块引进来去掉不用的模块:
![](https://box.kancloud.cn/fbf59812a6894d0af338d74afa906c41_716x319.png)
然后我们把FCN16S.py函数重新写了一下:
```
~~~
from __future__ import print_function
import tensorflow as tf
import numpy as np
import TensorflowUtils as utils
from six.moves import xrange # 兼容python2和python3
import read_MITSceneParsingData as DatasetReader
import BatchReader as BatchReader
# 定义一些网络需要的参数(可以以命令行可选参数进行重新赋值)
FLAGS = tf.flags.FLAGS
# batch大小
tf.flags.DEFINE_integer("batch_size", "2", "batch size for training")
# 定义日志文件位置
tf.flags.DEFINE_string("logs_dir", "D:\pycharm_program\FCN16S\Logs\\", "path to logs directory")
# 定义图像数据集存放的路径
tf.flags.DEFINE_string("data_dir", "D:\pycharm_program\FCN16S\Data_zoo\MIT_SceneParsing\\", "path to the dataset")
# 定义学习率
tf.flags.DEFINE_float("learning_rate", "1e-4", "learning rate for Adam Optimizer")
# 存放VGG16模型的mat (我们使用matlab训练好的VGG16参数)
tf.flags.DEFINE_string("model_dir", "D:\pycharm_program\FCN16S\Model_zoo\\", "Path to vgg model mat")
# 是否是调试状态(如果是调试状态会额外保存一些信息)
tf.flags.DEFINE_bool("debug", "True", "Model Debug:True/ False")
# 执行的状态(训练 测试 显示)
tf.flags.DEFINE_string("mode", "train", "Mode: train/ test/ visualize")
# checkpoint目录
tf.flags.DEFINE_string("checkpoint_dir", "D:\pycharm_program\FCN16S\Checkpoint\\", "path to the checkpoint")
# 验证结果保存图像目录
tf.flags.DEFINE_string("image_dir", "D:\pycharm_program\FCN16S\Image\\", "path to the checkpoint")
# 模型地址
MODEL_URL = "http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-16.mat"
# 最大迭代次数
MAX_ITERATION = int(1e5 + 1)
# MIT数据集的类别数
NUM_OF_CLASSES = 151
# 首先VGG16网络中的图像输入224*224(但是我们这个网络理论上可以输入任意图片大小)
IMAGE_SIZE = 224
"""
首先定义该网络与VGG16相同的部分
:param weight 从.mat中获得的权重
image 网络输入的图像
:return 包括相同部分所有输出的数组
"""
def vgg_net(weights, image):
# 首先我们定义FCN16S中使用VGG16层中的名字,用来生成相同的网络
layers = (
"conv1_1", "relu1_1", "conv1_2", "relu1_2", "pool1",
"conv2_1", "relu2_1", "conv2_2", "relu2_2", "pool2",
"conv3_1", "relu3_1", "conv3_2", "relu3_2", "conv3_3", "relu3_3", "pool3",
"conv4_1", "relu4_1", "conv4_2", "relu4_2", "conv4_3", "relu4_3", "pool4",
"conv5_1", "relu5_1", "conv5_2", "relu5_2", "conv5_3", "relu5_3", "pool5"
)
# 生成的公有层的所有接口
net = {}
# 当前输入
current = image
for i, name in enumerate(layers):
# 获取前面层名字的前四个字符
kind = name[:4]
if kind == "conv":
kernels = weights[i][0][0][0][0][0]
bias = weights[i][0][0][0][0][1]
print(weights[i][0][0][0][0][0].shape)
print(weights[i][0][0][0][0][1].shape)
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
# 生成变量
kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w")
bias = utils.get_variable(bias.reshape(-1), name=name + "_b")
current = utils.conv2d_basic(current, kernels, bias)
elif kind == "relu":
current = tf.nn.relu(current, name=name)
if FLAGS.debug:
utils.add_activation_summary(current)
elif kind == "pool":
current = utils.max_pool_2x2(current)
net[name] = current
return net
"""
构建FCN16S
:param image 网络输入的图像 [batch, height, width, channels]
:return 输出与image大小相同的tensor
"""
def fcn16s_net(image, keep_prob):
# 转换数据类型
# 首先我们获取相同部分构造的模型权重
model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL)
weights = model_data["layers"][0]
mean = model_data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
image = utils.process_image(image, mean_pixel)
# 首先我们padding图片
image = utils.pading(image, 100)
with tf.variable_scope("VGG16"):
vgg16net_dict = vgg_net(weights, image)
with tf.variable_scope("FCN16S"):
pool5 = vgg16net_dict["pool5"]
# 创建fc6层
w6 = utils.weight_variable([7, 7, 512, 4096], name="w6")
b6 = utils.bias_variable([4096], name="b6")
conv6 = tf.nn.conv2d(pool5, w6, [1, 1, 1, 1], padding="VALID")
conv_bias6 = tf.nn.bias_add(conv6, b6)
relu6 = tf.nn.relu(conv_bias6, name="relu6")
if FLAGS.debug:
utils.add_activation_summary(relu6)
relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)
# 创建fc7层
w7 = utils.weight_variable([1, 1, 4096, 4096], name="w7")
b7 = utils.bias_variable([4096], name="b7")
conv7 = utils.conv2d_basic(relu_dropout6, w7, b7)
relu7 = tf.nn.relu(conv7, name="relu7")
if FLAGS.debug:
utils.add_activation_summary(relu7)
conv_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)
# 定义score_fr层
w8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSES], name="w8")
b8 = utils.bias_variable([NUM_OF_CLASSES], name="b8")
score_fr = utils.conv2d_basic(conv_dropout7, w8, b8)
# 定义upscore2层
w9 = utils.weight_variable([4, 4, NUM_OF_CLASSES, NUM_OF_CLASSES], name="w9")
b9 = utils.bias_variable([NUM_OF_CLASSES], name="b9")
upscore2 = utils.conv2d_transpose_strided(score_fr, w9, b9)
# 定义score_pool4
pool4_shape = vgg16net_dict["pool4"].get_shape()
w10 = utils.weight_variable([1, 1, pool4_shape[3].value, NUM_OF_CLASSES], name="w10")
b10 = utils.bias_variable([NUM_OF_CLASSES], name="b10")
score_pool4 = utils.conv2d_basic(vgg16net_dict["pool4"], w10, b10)
# 定义score_pool4c
upscore2_shape = upscore2.get_shape()
upscore2_target_height = upscore2_shape[1].value
upscore2_target_width = upscore2_shape[2].value
score_pool4c = tf.image.crop_to_bounding_box(score_pool4, 5, 5, upscore2_target_height, upscore2_target_width)
# 定义fuse_pool4
fuse_pool4 = tf.add(upscore2, score_pool4c, name="fuse_pool4")
# 定义upscore16
fuse_pool4_shape = fuse_pool4.get_shape()
w11 = utils.weight_variable([32, 32, NUM_OF_CLASSES, NUM_OF_CLASSES], name="w11")
b11 = utils.bias_variable([NUM_OF_CLASSES], name="b11")
output_shape = tf.stack([tf.shape(fuse_pool4)[0], fuse_pool4_shape[1].value * 16, fuse_pool4_shape[2].value * 16, NUM_OF_CLASSES])
upscore16 = utils.conv2d_transpose_strided(fuse_pool4, w11, b11, output_shape=output_shape , stride=16)
# 定义score层
image_shape = image.get_shape()
score_target_height = image_shape[1].value - 200 # 因为输入网络的图片需要先padding100,所以减去200
score_target_width = image_shape[2].value - 200 # 因为输入网络的图片需要先padding100,所以减去200
score = tf.image.crop_to_bounding_box(upscore16, 27, 27, score_target_height, score_target_width)
annotation_pred = tf.argmax(score, dimension=3, name="prediction")
return tf.expand_dims(annotation_pred, dim=3), score
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
if FLAGS.debug:
for grad, var in grads:
utils.add_gradient_summary(grad, var)
return optimizer.apply_gradients(grads)
def main(argv=None):
##########################构建网络部分####################
# 我们首先定义网络的输入部分
keep_probability = tf.placeholder(tf.float32, name="keep_probability")
train_filepaths, eval_filepaths = DatasetReader.read_dataset(FLAGS.data_dir)
if FLAGS.mode == "train":
train_filepaths = np.array(train_filepaths, dtype=np.string_)
image_filepaths = train_filepaths[:, 0]
label_filepaths = train_filepaths[:, 1]
else:
eval_filepaths = np.array(eval_filepaths, dtype=np.string_)
image_filepaths = eval_filepaths[:, 0]
label_filepaths = eval_filepaths[:, 1]
images, labels = BatchReader.read_batch_image(image_filepaths, label_filepaths, IMAGE_SIZE, FLAGS.batch_size)
labels = tf.cast(labels, tf.int64)
tf.summary.image("images", images, max_outputs=3)
tf.summary.image("labels", tf.cast(labels, tf.uint8), max_outputs=3)
pred_annotation, logits = fcn16s_net(images, keep_probability)
tf.summary.image("pre", tf.cast(pred_annotation, tf.uint8), max_outputs=3)
# 定义损失函数
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.squeeze(labels, squeeze_dims=[3])), name="entropy")
if FLAGS.debug:
tf.summary.scalar("loss", loss)
# 定义m_iou
m_iou, confusion_matrix = tf.metrics.mean_iou(labels=tf.squeeze(labels, squeeze_dims=[3]),predictions=tf.squeeze(pred_annotation, squeeze_dims=[3]), num_classes=NUM_OF_CLASSES)
if FLAGS.debug:
tf.summary.scalar("m_iou", m_iou)
# 获取要训练的变量
trainable_var = tf.trainable_variables()
train_op = train(loss, trainable_var)
# tensorboard op
summary = tf.summary.merge_all()
#################到此我们网络构建完毕#################
###################构建运行对话##################
sess = tf.Session()
print("Setting up Saver.....")
saver = tf.train.Saver()
# 首先给变量初始化进行训练验证前的的准备
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
train_summary_writer = tf.summary.FileWriter(FLAGS.logs_dir + "\\train", sess.graph)
# 判断有没有checkpoint
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored .....")
# Start the data queue
tf.train.start_queue_runners(sess=sess)
# 开始训练或者验证
if FLAGS.mode == "train":
feed_dict = {keep_probability: 0.5}
for itr in xrange(MAX_ITERATION):
# 运行
_, loss_value, mIOU, _ = sess.run([train_op, loss, m_iou, confusion_matrix], feed_dict=feed_dict)
print("the %d time loss: %g" % (itr, loss_value))
print("the %d time m_iou: %g" % (itr, mIOU))
# 下面是保存一些能反映训练中的过程的一些信息
if itr % 500 == 0:
saver.save(sess, FLAGS.checkpoint_dir + "model.ckpt", itr)
print("model saved")
summary_str = sess.run(summary, feed_dict={keep_probability: 1.0})
train_summary_writer.add_summary(summary_str, itr)
train_summary_writer.flush()
print("summary saved")
elif FLAGS.mode == "visualize":
feed_dict={keep_probability: 1.0}
# 运行
loss_value, mIOU, _ = sess.run([loss, m_iou, confusion_matrix], feed_dict=feed_dict)
print("validate loss: %g" % loss_value)
print("validate m_iou: %g" % mIOU)
if __name__ == "__main__":
tf.app.run()
~~~
```
测试结果:
![](https://box.kancloud.cn/f3451ccb4bcd96c16e6d6b9517846096_435x276.png)
> 我们运行会发现运行快了很多
<h3 id="第二节">第二问题 精度上不去</h5>
经过排查代码没有问题,我们把代码放到华为云上可以明显看到m_iou在不断的升高。
![](https://box.kancloud.cn/ecfd0b22830086dae9d8bd60f6ef3602_1142x717.png)
我们增大批处理量后发现m-iou提升加快了。
![](https://box.kancloud.cn/c2180e39b9222d8ce0ac3a58beaed818_1142x717.png)
### 结果:
同时我们在上面的代码中添加了显示summary信息的代码。最后我们经过训练结果如下:
![](https://box.kancloud.cn/6ade79a68f4b4875e4cf398198e17138_805x851.png)
- 序言
- 第一章 机器学习概述
- 第二章 机器学习环境搭建
- 环境搭建
- 第三章 机器学习之基础算法
- 第一节:基础知识
- 第二节:k近邻算法
- 第三节:决策树算法
- 第四节:朴素贝叶斯
- 第五节:逻辑斯蒂回归
- 第六节:支持向量机
- 第四章 机器学习之深度学习算法
- 第一节: CNN
- 4.1.1 CNN介绍
- 4.1.2 CNN反向传播
- 4.1.3 DNN实例
- 4.1.4 CNN实例
- 第五章 机器学习论文与实践
- 第一节: 语义分割
- 5.1 FCN
- 5.1.1 FCN--------实现FCN16S
- 5.1.2 FCN--------优化FCN16S
- 5.2 DeepLab
- 5.2.1 DeepLabv2
- 第六章 机器学习在实际项目中的应用