def create_tree(dataset, min_sup=1):
header_table = {
}
for trans in dataset:
for item in trans:
header_table[item] = header_table.get(item, 0) + dataset[trans]
temp_table = {
}
# 移除不滿足最小支持度的項集
for k in header_table.keys():
if header_table[k] >= min_sup:
temp_table[k] = header_table[k]
del(header_table)
header_table = temp_table
freq_item_set = set(header_table.keys())
# 如果沒有項集滿足要求,則退出
if len(freq_item_set) == 0:
return None, None
for k in header_table:
header_table[k] = [header_table[k], None]
ret_tree = treeNode('Null Set', 1, None)
for tran_set, count in dataset.items():
# 根據全局頻率對每個事務中的元素進行排序
local_D = {
}
for item in tran_set:
if item in freq_item_set:
local_D[item] = header_table[item][0]
if len(local_D) > 0:
ordered_items = [v[0] for v in sorted(local_D.items(), key=lambda p:p[1], reverse=True)]
update_tree(ordered_items, ret_tree, header_table, count)
return ret_tree, header_table
def update_tree(items, in_tree, header_table, count):
if items[0] in in_tree.children:
in_tree.children[items[0]].inc(count)
else:
in_tree.children[items[0]] = treeNode(items[0], count, in_tree)
if header_table[items[0]][1] == None:
header_table[items[0]][1] = in_tree.children[items[0]]
else:
update_header(header_table[items[0]][1], in_tree.children[items[0]])
if len(items) > 1:
# 對剩下的項集迭代調用 update_tree 函數
update_tree(items[1::], in_tree.children[items[0]], header_table, count)