Lekcja – Binary Search Tree – balansowanie drzewa
from time import time, sleep
class Node:
def __init__(self, id, data):
self.id = id
self.data = data
self.left = None
self.right = None
self.depth = None
def __repr__(self):
return f"<{self.id}/{self.depth}>"
def insert(parent, child, depth=0):
if parent is None:
child.depth = depth
return child
elif parent.id == child.id:
return parent
elif child.id < parent.id:
parent.left = insert(parent.left, child, parent.depth+1)
else:
parent.right = insert(parent.right, child, parent.depth+1)
return parent
def load_stock_file(file_name):
bst = None
file = open(file_name, 'r')
line = file.readline()
line_number = 0
while True:
line = file.readline()
if not line:
break
data = line.split(',')
line_number +=1
#print(line_number, data[0], '/', data[1])
node = Node(data[0], data[1])
bst = insert(bst, node)
file.close()
return bst
# https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download
stock_file = './data/stock.csv'
bst = load_stock_file(stock_file)
print('Done')
def max_depth(node):
if node is None:
return 0
else:
left_depth = max_depth(node.left) + 1
right_depth = max_depth(node.right) + 1
return max(left_depth, right_depth)
print(f"Max depth: {max_depth(bst)}")
print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}")
def count(node, delay=0):
sleep(delay)
if node is None:
return 0
else:
left_count = count(node.left, delay)
right_count = count(node.right, delay)
return left_count + right_count + 1
print(f"Count: {count(bst)}")
print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}")
# start_time = time()
# count(bst.left, 0.001)
# print(f"Operation on left {time() - start_time} s")
# start_time = time()
# count(bst.right, 0.001)
# print(f"Operation on right {time() - start_time} s")
def get_ordered_list(parent, nodes_list):
if not parent:
return
get_ordered_list(parent.left,nodes_list)
nodes_list.append(parent)
get_ordered_list(parent.right,nodes_list)
# nodes_list = []
# get_ordered_list(bst, nodes_list)
# for node in nodes_list:
# print(node)
def build_bst_partially(nodes_list, first_idx, last_idx, depth=0):
if first_idx > last_idx:
return None
middle_idx = (first_idx + last_idx) // 2
middle_node = nodes_list[middle_idx]
middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1)
middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1)
middle_node.depth = depth
return middle_node
def rebuild_bst(bst):
nodes_list=[]
get_ordered_list(bst,nodes_list)
first_idx = 0
last_idx = len(nodes_list) - 1
return build_bst_partially(nodes_list, first_idx, last_idx)
bst = rebuild_bst(bst)
print(f"Max depth: {max_depth(bst)}")
print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}")
print(f"Count: {count(bst)}")
print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}")
Lab
class City:
def __init__(self, id, country, population):
self.id = id
self.country = country
self.population = population
self.left = None
self.right = None
self.depth = None
def __repr__(self):
return f"<{self.id}/{self.depth}>"
def insert(parent, child, depth=0):
if parent is None:
child.depth = depth
return child
elif parent.id == child.id:
return parent
elif child.id < parent.id:
parent.left = insert(parent.left, child, parent.depth+1)
else:
parent.right = insert(parent.right, child, parent.depth+1)
return parent
def load_file(file_name):
bst = None
file = open(file_name, 'r', encoding="utf-8")
line = file.readline()
line_number = 0
while True:
line = file.readline()
if not line:
break
data = line.split(',')
line_number +=1
city_ascii = data[1].strip().replace('"','').replace("'",'').replace("`",'').upper()
country = data[4].replace('"','').replace("'",'').replace("`",'').upper()
try:
population = int(data[9].replace('"',''))
except ValueError:
population = -1
node = City(city_ascii, country, population)
bst = insert(bst, node)
file.close()
return bst
# https://www.kaggle.com/juanmah/world-cities
stock_file = './data/worldcities.csv'
bst = load_file(stock_file)
print('Done')
def max_depth(node):
if node is None:
return 0
else:
left_depth = max_depth(node.left) + 1
right_depth = max_depth(node.right) + 1
return max(left_depth, right_depth)
def get_ordered_list(parent, nodes_list):
if not parent:
return
get_ordered_list(parent.left,nodes_list)
nodes_list.append(parent)
get_ordered_list(parent.right,nodes_list)
def build_bst_partially(nodes_list, first_idx, last_idx, depth=0):
if first_idx > last_idx:
return None
middle_idx = (first_idx + last_idx) // 2
middle_node = nodes_list[middle_idx]
middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1)
middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1)
middle_node.depth = depth
return middle_node
def rebuild_bst(bst):
nodes_list=[]
get_ordered_list(bst,nodes_list)
first_idx = 0
last_idx = len(nodes_list) - 1
return build_bst_partially(nodes_list, first_idx, last_idx)
# Inital values
print(f'Depth on left {max_depth(bst.left)+1} and on right {max_depth(bst.right)+1}')
bst = rebuild_bst(bst)
print(f'Depth on left {max_depth(bst.left)+1} and on right {max_depth(bst.right)+1}')