基于h5py的使用及数据封装代码
1.h5py简单介绍
h5py文件是存放两类对象的容器,数据集(dataset)和组(group),dataset类似数组类的数据集合,和numpy的数组差不多。group是像文件夹一样的容器,它好比python中的字典,有键(key)和值(value)。group中可以存放dataset或者其他的group。”键”就是组成员的名称,”值”就是组成员对象本身(组或者数据集),下面来看下如何创建组和数据集。
1.1创建一个h5py文件
importh5py #要是读取文件的话,就把w换成r f=h5py.File("myh5py.hdf5","w")
在当前目录下会生成一个myh5py.hdf5文件。
2.创建dataset数据集
importh5py f=h5py.File("myh5py.hdf5","w") #deset1是数据集的name,(20,)代表数据集的shape,i代表的是数据集的元素类型 d1=f.create_dataset("dset1",(20,),'i') forkeyinf.keys(): print(key) print(f[key].name) print(f[key].shape) print(f[key].value)
输出:
dset1 /dset1 (20,) [00000000000000000000] importh5py importnumpyasnp f=h5py.File("myh5py.hdf5","w") a=np.arange(20) d1=f.create_dataset("dset1",data=a) forkeyinf.keys(): print(f[key].name) print(f[key].value)
输出:
/dset1 [012345678910111213141516171819] 2.hpf5用于封装训练集和测试集 #============================================================ #Thispreparethehdf5datasetsoftheDRIVEdatabase #============================================================ importos importh5py importnumpyasnp fromPILimportImage defwrite_hdf5(arr,outfile): withh5py.File(outfile,"w")asf: f.create_dataset("image",data=arr,dtype=arr.dtype) #------------Pathoftheimages-------------------------------------------------------------- #train original_imgs_train="./DRIVE/training/images/" groundTruth_imgs_train="./DRIVE/training/1st_manual/" borderMasks_imgs_train="./DRIVE/training/mask/" #test original_imgs_test="./DRIVE/test/images/" groundTruth_imgs_test="./DRIVE/test/1st_manual/" borderMasks_imgs_test="./DRIVE/test/mask/" #--------------------------------------------------------------------------------------------- Nimgs=20 channels=3 height=584 width=565 dataset_path="./DRIVE_datasets_training_testing/" defget_datasets(imgs_dir,groundTruth_dir,borderMasks_dir,train_test="null"): imgs=np.empty((Nimgs,height,width,channels)) groundTruth=np.empty((Nimgs,height,width)) border_masks=np.empty((Nimgs,height,width)) forpath,subdirs,filesinos.walk(imgs_dir):#listallfiles,directoriesinthepath foriinrange(len(files)): #original print"originalimage:"+files[i] img=Image.open(imgs_dir+files[i]) imgs[i]=np.asarray(img) #correspondinggroundtruth groundTruth_name=files[i][0:2]+"_manual1.gif" print"groundtruthname:"+groundTruth_name g_truth=Image.open(groundTruth_dir+groundTruth_name) groundTruth[i]=np.asarray(g_truth) #correspondingbordermasks border_masks_name="" iftrain_test=="train": border_masks_name=files[i][0:2]+"_training_mask.gif" eliftrain_test=="test": border_masks_name=files[i][0:2]+"_test_mask.gif" else: print"specifyiftrainortest!!" exit() print"bordermasksname:"+border_masks_name b_mask=Image.open(borderMasks_dir+border_masks_name) border_masks[i]=np.asarray(b_mask) print"imgsmax:"+str(np.max(imgs)) print"imgsmin:"+str(np.min(imgs)) assert(np.max(groundTruth)==255andnp.max(border_masks)==255) assert(np.min(groundTruth)==0andnp.min(border_masks)==0) print"groundtruthandbordermasksarecorrectlywithihpixelvaluerange0-255(black-white)" #reshapingformystandardtensors imgs=np.transpose(imgs,(0,3,1,2)) assert(imgs.shape==(Nimgs,channels,height,width)) groundTruth=np.reshape(groundTruth,(Nimgs,1,height,width)) border_masks=np.reshape(border_masks,(Nimgs,1,height,width)) assert(groundTruth.shape==(Nimgs,1,height,width)) assert(border_masks.shape==(Nimgs,1,height,width)) returnimgs,groundTruth,border_masks ifnotos.path.exists(dataset_path): os.makedirs(dataset_path) #gettingthetrainingdatasets imgs_train,groundTruth_train,border_masks_train=get_datasets(original_imgs_train,groundTruth_imgs_train,borderMasks_imgs_train,"train") print"savingtraindatasets" write_hdf5(imgs_train,dataset_path+"DRIVE_dataset_imgs_train.hdf5") write_hdf5(groundTruth_train,dataset_path+"DRIVE_dataset_groundTruth_train.hdf5") write_hdf5(border_masks_train,dataset_path+"DRIVE_dataset_borderMasks_train.hdf5") #gettingthetestingdatasets imgs_test,groundTruth_test,border_masks_test=get_datasets(original_imgs_test,groundTruth_imgs_test,borderMasks_imgs_test,"test") print"savingtestdatasets" write_hdf5(imgs_test,dataset_path+"DRIVE_dataset_imgs_test.hdf5") write_hdf5(groundTruth_test,dataset_path+"DRIVE_dataset_groundTruth_test.hdf5") write_hdf5(border_masks_test,dataset_path+"DRIVE_dataset_borderMasks_test.hdf5")
遍历文件夹下的所有文件os.walk(dir)
forparent,dir_names,file_namesinos.walk(parent_dir): foriinfile_names: printfile_name
parent:父路径
dir_names:子文件夹
file_names:文件名
以上这篇基于h5py的使用及数据封装代码就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。