基于h5py的使用及数据封装代码
1.h5py简单介绍
h5py文件是存放两类对象的容器,数据集(dataset)和组(group),dataset类似数组类的数据集合,和numpy的数组差不多。group是像文件夹一样的容器,它好比python中的字典,有键(key)和值(value)。group中可以存放dataset或者其他的group。”键”就是组成员的名称,”值”就是组成员对象本身(组或者数据集),下面来看下如何创建组和数据集。
1.1创建一个h5py文件
importh5py
#要是读取文件的话,就把w换成r
f=h5py.File("myh5py.hdf5","w")
在当前目录下会生成一个myh5py.hdf5文件。
2.创建dataset数据集
importh5py
f=h5py.File("myh5py.hdf5","w")
#deset1是数据集的name,(20,)代表数据集的shape,i代表的是数据集的元素类型
d1=f.create_dataset("dset1",(20,),'i')
forkeyinf.keys():
print(key)
print(f[key].name)
print(f[key].shape)
print(f[key].value)
输出:
dset1
/dset1
(20,)
[00000000000000000000]
importh5py
importnumpyasnp
f=h5py.File("myh5py.hdf5","w")
a=np.arange(20)
d1=f.create_dataset("dset1",data=a)
forkeyinf.keys():
print(f[key].name)
print(f[key].value)
输出:
/dset1
[012345678910111213141516171819]
2.hpf5用于封装训练集和测试集
#============================================================
#Thispreparethehdf5datasetsoftheDRIVEdatabase
#============================================================
importos
importh5py
importnumpyasnp
fromPILimportImage
defwrite_hdf5(arr,outfile):
withh5py.File(outfile,"w")asf:
f.create_dataset("image",data=arr,dtype=arr.dtype)
#------------Pathoftheimages--------------------------------------------------------------
#train
original_imgs_train="./DRIVE/training/images/"
groundTruth_imgs_train="./DRIVE/training/1st_manual/"
borderMasks_imgs_train="./DRIVE/training/mask/"
#test
original_imgs_test="./DRIVE/test/images/"
groundTruth_imgs_test="./DRIVE/test/1st_manual/"
borderMasks_imgs_test="./DRIVE/test/mask/"
#---------------------------------------------------------------------------------------------
Nimgs=20
channels=3
height=584
width=565
dataset_path="./DRIVE_datasets_training_testing/"
defget_datasets(imgs_dir,groundTruth_dir,borderMasks_dir,train_test="null"):
imgs=np.empty((Nimgs,height,width,channels))
groundTruth=np.empty((Nimgs,height,width))
border_masks=np.empty((Nimgs,height,width))
forpath,subdirs,filesinos.walk(imgs_dir):#listallfiles,directoriesinthepath
foriinrange(len(files)):
#original
print"originalimage:"+files[i]
img=Image.open(imgs_dir+files[i])
imgs[i]=np.asarray(img)
#correspondinggroundtruth
groundTruth_name=files[i][0:2]+"_manual1.gif"
print"groundtruthname:"+groundTruth_name
g_truth=Image.open(groundTruth_dir+groundTruth_name)
groundTruth[i]=np.asarray(g_truth)
#correspondingbordermasks
border_masks_name=""
iftrain_test=="train":
border_masks_name=files[i][0:2]+"_training_mask.gif"
eliftrain_test=="test":
border_masks_name=files[i][0:2]+"_test_mask.gif"
else:
print"specifyiftrainortest!!"
exit()
print"bordermasksname:"+border_masks_name
b_mask=Image.open(borderMasks_dir+border_masks_name)
border_masks[i]=np.asarray(b_mask)
print"imgsmax:"+str(np.max(imgs))
print"imgsmin:"+str(np.min(imgs))
assert(np.max(groundTruth)==255andnp.max(border_masks)==255)
assert(np.min(groundTruth)==0andnp.min(border_masks)==0)
print"groundtruthandbordermasksarecorrectlywithihpixelvaluerange0-255(black-white)"
#reshapingformystandardtensors
imgs=np.transpose(imgs,(0,3,1,2))
assert(imgs.shape==(Nimgs,channels,height,width))
groundTruth=np.reshape(groundTruth,(Nimgs,1,height,width))
border_masks=np.reshape(border_masks,(Nimgs,1,height,width))
assert(groundTruth.shape==(Nimgs,1,height,width))
assert(border_masks.shape==(Nimgs,1,height,width))
returnimgs,groundTruth,border_masks
ifnotos.path.exists(dataset_path):
os.makedirs(dataset_path)
#gettingthetrainingdatasets
imgs_train,groundTruth_train,border_masks_train=get_datasets(original_imgs_train,groundTruth_imgs_train,borderMasks_imgs_train,"train")
print"savingtraindatasets"
write_hdf5(imgs_train,dataset_path+"DRIVE_dataset_imgs_train.hdf5")
write_hdf5(groundTruth_train,dataset_path+"DRIVE_dataset_groundTruth_train.hdf5")
write_hdf5(border_masks_train,dataset_path+"DRIVE_dataset_borderMasks_train.hdf5")
#gettingthetestingdatasets
imgs_test,groundTruth_test,border_masks_test=get_datasets(original_imgs_test,groundTruth_imgs_test,borderMasks_imgs_test,"test")
print"savingtestdatasets"
write_hdf5(imgs_test,dataset_path+"DRIVE_dataset_imgs_test.hdf5")
write_hdf5(groundTruth_test,dataset_path+"DRIVE_dataset_groundTruth_test.hdf5")
write_hdf5(border_masks_test,dataset_path+"DRIVE_dataset_borderMasks_test.hdf5")
遍历文件夹下的所有文件os.walk(dir)
forparent,dir_names,file_namesinos.walk(parent_dir): foriinfile_names: printfile_name
parent:父路径
dir_names:子文件夹
file_names:文件名
以上这篇基于h5py的使用及数据封装代码就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。