Lekcja – Wyszukiwanie danych
from time import time, sleep class Node: def __init__(self, id, data): self.id = id self.data = data self.left = None self.right = None self.depth = None def __repr__(self): return f"<{self.id}/{self.depth}>" def insert(parent, child, depth=0): if parent is None: child.depth = depth return child elif parent.id == child.id: return parent elif child.id < parent.id: parent.left = insert(parent.left, child, parent.depth+1) else: parent.right = insert(parent.right, child, parent.depth+1) return parent def load_stock_file(file_name): bst = None file = open(file_name, 'r') line = file.readline() line_number = 0 while True: line = file.readline() if not line: break data = line.split(',') line_number +=1 #print(line_number, data[0], '/', data[1]) node = Node(data[0], data[1]) bst = insert(bst, node) file.close() return bst # https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download stock_file = './data/stock.csv' bst = load_stock_file(stock_file) print('Done') def max_depth(node): if node is None: return 0 else: left_depth = max_depth(node.left) + 1 right_depth = max_depth(node.right) + 1 return max(left_depth, right_depth) print(f"Max depth: {max_depth(bst)}") print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}") def count(node, delay=0): sleep(delay) if node is None: return 0 else: left_count = count(node.left, delay) right_count = count(node.right, delay) return left_count + right_count + 1 print(f"Count: {count(bst)}") print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}") # start_time = time() # count(bst.left, 0.001) # print(f"Operation on left {time() - start_time} s") # start_time = time() # count(bst.right, 0.001) # print(f"Operation on right {time() - start_time} s") def get_ordered_list(parent, nodes_list): if not parent: return get_ordered_list(parent.left,nodes_list) nodes_list.append(parent) get_ordered_list(parent.right,nodes_list) # nodes_list = [] # get_ordered_list(bst, nodes_list) # for node in nodes_list: # print(node) def build_bst_partially(nodes_list, first_idx, last_idx, depth=0): if first_idx > last_idx: return None middle_idx = (first_idx + last_idx) // 2 middle_node = nodes_list[middle_idx] middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1) middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1) middle_node.depth = depth return middle_node def rebuild_bst(bst): nodes_list=[] get_ordered_list(bst,nodes_list) first_idx = 0 last_idx = len(nodes_list) - 1 return build_bst_partially(nodes_list, first_idx, last_idx) bst = rebuild_bst(bst) print(f"Max depth: {max_depth(bst)}") print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}") print(f"Count: {count(bst)}") print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}") def find(parent, id, verbose=False): if verbose: print(f"Analyzing {parent}") if not parent: return None elif parent.id == id: return parent elif id < parent.id: return find(parent.left, id, verbose) else: return find(parent.right, id, verbose) microsoft = find(bst, 'MSFT', True) print(microsoft.id, microsoft.data) transmedics = find(bst, 'TMDX', True) print(transmedics.id, transmedics.data) mars = find(bst, 'MARS', True) print(mars)
Lab
class City: def __init__(self, id, country, population): self.id = id self.country = country self.population = population self.left = None self.right = None self.depth = None def __repr__(self): return f"<{self.id}/{self.depth}>" def insert(parent, child, depth=0): if parent is None: child.depth = depth return child elif parent.id == child.id: return parent elif child.id < parent.id: parent.left = insert(parent.left, child, parent.depth+1) else: parent.right = insert(parent.right, child, parent.depth+1) return parent def load_file(file_name): bst = None file = open(file_name, 'r', encoding="utf-8") line = file.readline() line_number = 0 while True: line = file.readline() if not line: break data = line.split(',') line_number +=1 city_ascii = data[1].strip().replace('"','').replace("'",'').replace("`",'').upper() country = data[4].replace('"','').replace("'",'').replace("`",'').upper() try: population = int(data[9].replace('"','')) except ValueError: population = -1 node = City(city_ascii, country, population) bst = insert(bst, node) file.close() return bst # https://www.kaggle.com/juanmah/world-cities stock_file = './data/worldcities.csv' bst = load_file(stock_file) print('Done') def max_depth(node): if node is None: return 0 else: left_depth = max_depth(node.left) + 1 right_depth = max_depth(node.right) + 1 return max(left_depth, right_depth) def get_ordered_list(parent, nodes_list): if not parent: return get_ordered_list(parent.left,nodes_list) nodes_list.append(parent) get_ordered_list(parent.right,nodes_list) def build_bst_partially(nodes_list, first_idx, last_idx, depth=0): if first_idx > last_idx: return None middle_idx = (first_idx + last_idx) // 2 middle_node = nodes_list[middle_idx] middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1) middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1) middle_node.depth = depth return middle_node def rebuild_bst(bst): nodes_list=[] get_ordered_list(bst,nodes_list) first_idx = 0 last_idx = len(nodes_list) - 1 return build_bst_partially(nodes_list, first_idx, last_idx) bst = rebuild_bst(bst) def find(parent, id, verbose=False): if verbose: print(f"Analyzing {parent}") if not parent: return None elif parent.id == id: return parent elif id < parent.id: return find(parent.left, id, verbose) else: return find(parent.right, id, verbose) c = find(bst, 'BERLIN', False) if c: print(c.id, c.population) else: print('404 - not found') c = find(bst, 'WROCLAW', False) if c: print(c.id, c.population) else: print('404 - not found') c = find(bst, 'VINA TUSCA', False) if c: print(c.id, c.population) else: print('404 - not found')