您现在的位置：程式師世界 >> 編程語言 > >> 更多編程語言 >> Python

Python datasets are scaled

編輯：Python

There is already a file containing all the image names file.txt, Now it is divided in proportion .

according to train ：test ：val = 8：1：1 The proportion of , Yes file.txt The image names in the file are randomly divided .

First get file.txt The image name of the file is saved as list, Right again list Make proportional random division （ Start by randomly disrupting the order , And then divide them in proportion ）, Get saved with 3 Children of different sets list. Finally, return and save to the corresponding txt In file .

# train/val/test = 8/1/1
# encoding: utf-8
import os
import random
def ran_split(full_list,shuffle=False,ratio1=0.8,ratio2=0.1):
sublists=[]
n_total = len(full_list)
offset1 = int(n_total * ratio1)
offset2 = int(n_total * ratio2) + offset1
if n_total == 0 or offset1 < 1:
return [], full_list
if shuffle:
random.shuffle(full_list) # Disorganize the order
sublist_1 = full_list[:offset1]
sublist_2 = full_list[offset1:offset2]
sublist_3 = full_list[offset2:]
sublists.append(sublist_1)
sublists.append(sublist_2)
sublists.append(sublist_3)
return sublists # sublists=[sublist_1,sublist_2,sublist_3]
def read_file(filepath):
file_list=[]
with open(filepath,'r') as fr:
data = fr.readlines()
data = ''.join(data).strip('\n').splitlines()
# ''.join() list To str
# s.strip(rm) Delete s At the beginning and end of rm character
# .splitlines() Returns a string to a list
file_list=data
return file_list
def write_file(dst1,txt):
fo=open(dst1,'w')
for item in txt:
fo.write(str(item)+'\n')
if __name__ == "__main__":
root_path=r'F:\all_date\WHU'
from_txt='file.txt'
txts=['train.txt','test.txt','val.txt']
from_path=os.path.join(root_path,from_txt)
txt_list=read_file(from_path)
sublists = ran_split(txt_list,shuffle=True,ratio1=0.8,ratio2=0.1)
# notes ： Generated sublist Quantity and txts The same number
for txt_name,i in zip(txts,range(len(txts))):
to_path=os.path.join(root_path,txt_name)
write_file(to_path,sublists[i])