def create_tree(dataset, min_sup=1):
header_table = {
}
for trans in dataset:
for item in trans:
header_table[item] = header_table.get(item, 0) + dataset[trans]
temp_table = {
}
# Remove itemsets that do not meet the minimum support
for k in header_table.keys():
if header_table[k] >= min_sup:
temp_table[k] = header_table[k]
del(header_table)
header_table = temp_table
freq_item_set = set(header_table.keys())
# If no itemset meets the requirements , The exit
if len(freq_item_set) == 0:
return None, None
for k in header_table:
header_table[k] = [header_table[k], None]
ret_tree = treeNode('Null Set', 1, None)
for tran_set, count in dataset.items():
# The elements in each transaction are sorted according to the global frequency
local_D = {
}
for item in tran_set:
if item in freq_item_set:
local_D[item] = header_table[item][0]
if len(local_D) > 0:
ordered_items = [v[0] for v in sorted(local_D.items(), key=lambda p:p[1], reverse=True)]
update_tree(ordered_items, ret_tree, header_table, count)
return ret_tree, header_table
def update_tree(items, in_tree, header_table, count):
if items[0] in in_tree.children:
in_tree.children[items[0]].inc(count)
else:
in_tree.children[items[0]] = treeNode(items[0], count, in_tree)
if header_table[items[0]][1] == None:
header_table[items[0]][1] = in_tree.children[items[0]]
else:
update_header(header_table[items[0]][1], in_tree.children[items[0]])
if len(items) > 1:
# Iterate through the rest of the itemset update_tree function
update_tree(items[1::], in_tree.children[items[0]], header_table, count)
def createInitSet(dataSet):
retDict={
}
for trans in dataSet:
retDict[frozenset(trans)]=1
return retDict
if __name__=='__main__':
minSup=3# Minimum support
simDat=loadSimpDat()
initSet=createInitSet(simDat)
myFPtree,myHeaderTab=create_tree(initSet,minSup)
myFPtree.disp()# Display in text form FP Trees
print('~~~~~~~~')
myFreqList=[]
mine_tree(myFPtree,myHeaderTab,minSup,set([]),myFreqList)
#mineTree(myFPtree,myHeaderTab,minSup,set([]),myFreqList)
print(myFreqList)