# 数据建模-数据集特征获取
*****
**以下内容均按照作者的个人思路去编排,如果有更好的想法等,则以自己的优先**
*****
## 一、过滤图像数据集
待更新......
## 二、按类别求图像(集)的均值、方差
1. 求图像的均值、方差
```
import cv2
img1 = cv2.imread("D:/Source Code/Python/Conda/AI/data/seg_data/seg_train/seg_train/forest/8.jpg",1)
print (img1[0][0])#输出原始图像的通道值
mean_image = np.mean(img1, axis=0)#对图像求均值
var_image = np.var(img1, axis=0)#对图像求方差
print ("-----------------------------------------------")
print ("mean_image的形状以及数值")
print (mean_image.shape)
print (mean_image[0])
print ("var_image的形状以及数值")
print (var_image.shape)
print (var_image[0])
print ("-----------------------------------------------")
```
结果:(与之类似即可)
![](https://img.kancloud.cn/1f/38/1f3873a7336321ec98b55208b9821917_699x237.png)
2. 求图像集的均值、方差
```
import os
from PIL import Image #加载图像
import matplotlib.pyplot as plt
import numpy as np
from imageio import imread #读取图像
filepath = r'D:/Source Code/Python/Conda/AI/data/seg_data/seg_train/seg_train/forest'#载入数据集目录
pathDir = os.listdir(filepath)#返回指定的文件夹包含的文件或文件夹的名字的列表给pathDir
R_channel = 0#R通道=0
G_channel = 0#G通道=0
B_channel = 0#B通道=0
for idx in range(len(pathDir)):#将索引指定为循环体为0~pathDir的长度-1
filename = pathDir[idx]#将包含索引的pathDir赋给filename
img = imread(os.path.join(filepath, filename)) / 255.0#将'filepath'与'filename'拼接在一起并归一化,之后赋给img
R_channel = R_channel + np.sum(img[:, :, 0])#获取读取到的img中,单通道0的像素值并求和,之后与R通道的值合并在一起
G_channel = G_channel + np.sum(img[:, :, 1])#获取读取到的img中,单通道1的像素值并求和,之后与G通道的值合并在一起
B_channel = B_channel + np.sum(img[:, :, 2])#获取读取到的img中,单通道2的像素值并求和,之后与B通道的值合并在一起
#一张RGB图像可以看成一个三维的矩阵,矩阵中的每一个数表示了图像上不同位置,不同颜色的亮度。
num = len(pathDir) * 155 * 155 #这里(155*155)是每幅图片的大小,所有图片尺寸必须都一样,将pathDir的个数*155*155赋给num
R_mean = R_channel / num#R的均值为'R通道的值÷num'
G_mean = G_channel / num#G的均值为'G通道的值÷num'
B_mean = B_channel / num#B的均值为'B通道的值÷num'
R_channel = 0
G_channel = 0
B_channel = 0
for idx in range(len(pathDir)):
filename = pathDir[idx]
img = imread(os.path.join(filepath, filename)) / 255.0
#获取读取到的img中,单通道0的像素值并求和,之后减去R的均值,再将得到的值平方,然后与R通道的值合并在一起赋给R_channel
R_channel = R_channel + np.sum((img[:, :, 0] - R_mean) ** 2)
#获取读取到的img中,单通道1的像素值并求和,之后减去G的均值,再将得到的值平方,然后与G通道的值合并在一起赋给G_channel
G_channel = G_channel + np.sum((img[:, :, 1] - G_mean) ** 2)
#获取读取到的img中,单通道2的像素值并求和,之后减去B的均值,再将得到的值平方,然后与B通道的值合并在一起赋给B_channel
B_channel = B_channel + np.sum((img[:, :, 2] - B_mean) ** 2)
R_var = np.sqrt(R_channel / num)#R的方差为'R通道的值÷num再开方'
G_var = np.sqrt(G_channel / num)#G的方差为'G通道的值÷num再开方'
B_var = np.sqrt(B_channel / num)#B的方差为'B通道的值÷num再开方'
print("R_mean is %f, G_mean is %f, B_mean is %f" % (R_mean, G_mean, B_mean))
print("R_var is %f, G_var is %f, B_var is %f" % (R_var, G_var, B_var))
```
结果:(与之类似即可)
![](https://img.kancloud.cn/56/62/5662588560d653da41cb4588be423aae_756x64.png)
* 拓展学习(统一图片尺寸):
```
from PIL import Image
import os
def image_resize(image_path, new_path): # 统一图片尺寸
print('============>>修改图片尺寸')
for img_name in os.listdir(image_path):
img_path = image_path + "/" + img_name # 获取该图片全称
image = Image.open(img_path) # 打开特定一张图片
image = image.resize((512, 512)) # 设置需要转换的图片大小
image.save(new_path + '/'+ img_name) # 按照原图像名称保存图像至新路径
print("end the processing!")
if __name__ == '__main__':
print("ready for :::::::: ")
ori_path = r"Z:\pycharm_projects\ssd\VOC2007\JPEGImages" # 输入图片的文件夹路径
new_path = 'Z:/pycharm_projects/ssd/VOC2007/reshape' # 转换之后的文件夹路径,注意反斜杠
image_resize(ori_path, new_path)
```