diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..f84a5f5e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python.analysis.extraPaths": [ + "./miniDB" + ] +} \ No newline at end of file diff --git a/docs/student documentation b/docs/student documentation new file mode 100644 index 00000000..e69de29b diff --git a/docs/studentsdocumentation .pdf b/docs/studentsdocumentation .pdf new file mode 100644 index 00000000..461f6b54 Binary files /dev/null and b/docs/studentsdocumentation .pdf differ diff --git a/mdb.py b/mdb.py index a981e5be..4b205f4d 100644 --- a/mdb.py +++ b/mdb.py @@ -9,11 +9,11 @@ from database import Database from table import Table -# art font is "big" +# art font is "big"1 art = ''' _ _ _____ ____ (_) (_)| __ \ | _ \ - _ __ ___ _ _ __ _ | | | || |_) | + _ __ ___ _ _ __ _ | | | || |_) e | '_ ` _ \ | || '_ \ | || | | || _ < | | | | | || || | | || || |__| || |_) | |_| |_| |_||_||_| |_||_||_____/ |____/ 2022 @@ -170,7 +170,7 @@ def interpret(query): 'import': ['import', 'from'], 'export': ['export', 'to'], 'insert into': ['insert into', 'values'], - 'select': ['select', 'from', 'where', 'distinct', 'order by', 'limit'], + 'select': ['select', 'from', 'where', 'AND','OR', 'distinct', 'order by', 'limit'], 'lock table': ['lock table', 'mode'], 'unlock table': ['unlock table', 'force'], 'delete from': ['delete from', 'where'], @@ -178,6 +178,7 @@ def interpret(query): 'create index': ['create index', 'on', 'using'], 'drop index': ['drop index'], 'create view' : ['create view', 'as'] + } if query[-1]!=';': diff --git a/miniDB/btree.py b/miniDB/btree.py index f0676209..6afd56bb 100644 --- a/miniDB/btree.py +++ b/miniDB/btree.py @@ -9,13 +9,15 @@ class Node: def __init__(self, b, values=None, ptrs=None,left_sibling=None, right_sibling=None, parent=None, is_leaf=False): self.b = b # branching factor self.values = [] if values is None else values # Values (the data from the pk column) + self.keys = [] + self.children = [] self.ptrs = [] if ptrs is None else ptrs # ptrs (the indexes of each datapoint or the index of another bucket) self.left_sibling = left_sibling # the index of a buckets left sibling self.right_sibling = right_sibling # the index of a buckets right sibling self.parent = parent # the index of a buckets parent self.is_leaf = is_leaf # a boolean value signaling whether the node is a leaf or not - + def find(self, value, return_ops=False): ''' Returns the index of the next node to search for a value if the node is not a leaf (a ptrs of the available ones). @@ -57,6 +59,7 @@ def insert(self, value, ptr, ptr1=None): ptr: float. The ptr of the inserted value (e.g. its index). ptr1: float. The 2nd ptr (e.g. in case the user wants to insert into a nonleaf node). ''' + # for each value in the node, if the user supplied value is smaller, insert the value and its ptr into that position # if a second ptr is provided, insert it right next to the 1st ptr # else (no value in the node is larger) append value and ptr/s to the back of the list. @@ -68,7 +71,7 @@ def insert(self, value, ptr, ptr1=None): self.ptrs.insert(index+1, ptr) if ptr1: - self.ptrs.insert(index+1, ptr1) + self.ptrs.insert(index+2, ptr1) return self.values.append(value) self.ptrs.append(ptr) @@ -239,7 +242,7 @@ def show(self): self.nodes[ptr].show() print('----') - +#test def plot(self): ## arrange the nodes top to bottom left to right nds = [] @@ -346,3 +349,74 @@ def find(self, operator, value): # print the number of operations (usefull for benchamrking) # print(f'With BTree -> {ops} comparison operations') return results + ''' + creating Row class which is responsible fora single row in a table, and is defined with two attributes + table and values + ''' +class Row: + def __init__(self, table, values): + self.table = table + self.values = values + ''' + table class responsible for class .. with specific attributes and methods + ''' +class Table: + def __init__(self, name, columns): + self.name = name + self.columns = columns + self.rows = [] + self.primary_key_index = None + self.unique_indexes = {} + ''' + insert fior unique columns + takes a list of values and inserts a new row into the table + ''' + def insert(self, values): + if len(values) != len(self.columns): + raise ValueError("the number of values is not the same as columns number") + row = Row(self, values) + for column in self.columns: + if column.is_primary_key: + if self.primary_key_index is None: + self.primary_key_index = Btree(column) + self.primary_key_index.insert(row) + elif column.is_unique: + if column.name not in self.unique_indexes: + self.unique_indexes[column.name] = Btree(column) + self.unique_indexes[column.name].insert(row) + self.rows.append(row) + ''' + this method returns a list of rows from the table based on column names kai sto where clause + ''' + def select(self, column_names=None, where=None): + if where is None: + rows = self.rows + else: + rows = [] + for row in self.rows: + if where.matches(row): + rows.append(row) + if column_names is None: + return [row.values for row in rows] + else: + indices = [self.get_index(column_name) for column_name in column_names] + result = [] + for row in rows: + result.append([row.values[index] for index in indices]) + return result + ''' + get_index method a method that returns the index object for a given column name + raises errror if column doesnt exist in the table... + ''' + def get_index(self, column_name): + if column_name in [column.name for column in self.columns]: + for column in self.columns: + if column.name == column_name: + if column.is_primary_key: + return self.primary_key_index + elif column.is_unique: + return self.unique_indexes[column.name] + else: + return None + else: + raise ValueError(f"this column {column_name} does not exist in this table {self.name}") diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..2fb8de9a 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -510,10 +510,10 @@ def lock_table(self, table_name, mode='x'): try: pid = self.tables['meta_locks']._select_where('pid',f'table_name={table_name}').data[0][0] - if pid!=os.getpid(): - raise Exception(f'Table "{table_name}" is locked by process with pid={pid}') - else: - return False + #if pid!=os.getpid(): + #raise Exception(f'Table "{table_name}" is locked by process with pid={pid}') + #else: + #return False except IndexError: pass @@ -540,8 +540,8 @@ def unlock_table(self, table_name, force=False): try: # pid = self.select('*','meta_locks', f'table_name={table_name}', return_object=True).data[0][1] pid = self.tables['meta_locks']._select_where('pid',f'table_name={table_name}').data[0][0] - if pid!=os.getpid(): - raise Exception(f'Table "{table_name}" is locked by the process with pid={pid}') + #if pid!=os.getpid(): + # raise Exception(f'Table "{table_name}" is locked by the process with pid={pid}') except IndexError: pass self.tables['meta_locks']._delete_where(f'table_name={table_name}') @@ -635,7 +635,7 @@ def _get_insert_stack_for_table(self, table_name): table_name: string. Table name (must be part of database). ''' return self.tables['meta_insert_stack']._select_where('*', f'table_name={table_name}').column_by_name('indexes')[0] - # res = self.select('meta_insert_stack', '*', f'table_name={table_name}', return_object=True).indexes[0] + #res = self.select('meta_insert_stack', '*', f'table_name={table_name}', return_object=True).indexes[0] # return res def _update_meta_insert_stack_for_tb(self, table_name, new_stack): diff --git a/miniDB/hashtesting.py b/miniDB/hashtesting.py new file mode 100644 index 00000000..87dd3759 --- /dev/null +++ b/miniDB/hashtesting.py @@ -0,0 +1,54 @@ +# import hashlib + +#would work in theory B) ^v_v^ + + +# class HashTable(object): +# def __init__(self, size=10) + +# self.num_elements = 0 +# self.data = [0] * size +# self.size = len(self.data) +# print(self.data) + +# def __get_hash_index(self): +# test_hash = int(blah blah) +# return test_hash % self.size + +# # making insert method +# def insert(self, key, value): +# ''' +# inserting data +# key(str) , value(tuple) , +# ''' +# hash_data = (key, value) +# hash_index = self.__get_hash__index(key) +# self.data[hash_index] = hash_data + + +# # getter +# def get(self,key): +# '''data from key''' +# hash_index = self.__get__hash_index(key) +# #compare with first element of the list +# if key != self.data[hash_index][0] or self.data[hash_index] ==0 +# datakey[0] +# if key !=datakey or data ==0: +# raise KEYeRROR("cant hash that key or no data") +# return data[1] + +# #similar for remove +# def remove(self, key): +# hash_index = self.__get__hash_index(key) +# #compare with first element of the list +# if key != self.data[hash_index][0] or self.data[hash_index] ==0 +# datakey[0] +# if key !=datakey or data ==0: +# raise KEYeRROR("cant hash that key or no data") +# self.data[hash_index] = 0 + + +# def key_contains(self, substring): + + +# test_hash_table = HashTable() \ No newline at end of file diff --git a/miniDB/joins.py b/miniDB/joins.py index 81fd0915..dc2aaf68 100644 --- a/miniDB/joins.py +++ b/miniDB/joins.py @@ -23,7 +23,7 @@ def __init__(self, condition, left_table, right_table, index, index_saved): def join(self): # Get the column of the left and right tables and the operator, from the condition of the join - column_name_left, operator, column_name_right = Table()._parse_condition(self.condition, join=True) + column_name_left, column_name_right, operator, column_name_right,isnot = Table()._parse_condition(self.condition, join=True) reversed = False # If we have the index of the left table, reverse the order of the tables @@ -73,7 +73,7 @@ def __init__(self, condition, left_table, right_table): def join(self): # Get the column of the left and right tables and the operator, from the condition of the join - column_name_left, operator, column_name_right = Table()._parse_condition(self.condition, join=True) + column_name_left,column_name_right, operator, column_name_right,isnot = Table()._parse_condition(self.condition, join=True) column_index_left = self.left_table.column_names.index(column_name_left) column_index_right = self.right_table.column_names.index(column_name_right) diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..11e9dc23 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -8,7 +8,9 @@ def get_op(op, a, b): '<': operator.lt, '>=': operator.ge, '<=': operator.le, - '=': operator.eq} + '=': operator.eq, + 'and':operator.and_, + 'or':operator.or_} try: return ops[op](a,b) @@ -29,12 +31,10 @@ def split_condition(condition): if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. right = right.strip('"') - elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. - raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') - - if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) - raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') - + #elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. + # raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') + #if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) + #raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') return left, op_key, right def reverse_op(op): diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..8b7ad7aa 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -26,6 +26,11 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' + + + + + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): if load is not None: @@ -151,7 +156,7 @@ def _update_rows(self, set_value, set_column, condition): Operatores supported: (<,<=,=,>=,>) ''' # parse the condition - column_name, operator, value = self._parse_condition(condition) + column_name,column_name_right, operator, value,isnot = self._parse_condition(condition) # get the condition and the set column column = self.column_by_name(column_name) @@ -182,7 +187,7 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' - column_name, operator, value = self._parse_condition(condition) + column_name,column_nameRight, operator, value,isnot= self._parse_condition(condition) indexes_to_del = [] @@ -223,19 +228,51 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by desc: boolean. If True, order_by will return results in descending order (False by default). limit: int. An integer that defines the number of rows that will be returned (all rows if None). ''' - + # if * return all columns, else find the column indexes for the columns specified if return_columns == '*': return_cols = [i for i in range(len(self.column_names))] else: return_cols = [self.column_names.index(col.strip()) for col in return_columns.split(',')] - + # if condition is None, return all rows # if not, return the rows with values where condition is met for value + ''' + we have split what used to be y into column_name1 and column_name2. Using operator and condition.split we are able to decide if we want to run code for AND or OR + ''' if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + column_name1,column_name2, operator, value,isnot = self._parse_condition(condition) + column1, column2 = self.column_by_name(column_name1),self.column_by_name(column_name2) + if isnot == True and operator == '=' and len(condition.split(' and '))>1: + left_condition, right_condition = condition.split(' and ') + left_value,right_value = self._parse_condition(left_condition.strip()),self._parse_condition(right_condition.strip()) + right_value = self._parse_condition(right_condition.strip()) + left_value,right_value= left_value[:-1],right_value[:-1] + operator1, operator2 = left_value[-2], right_value[-2] + w,u = left_value[-1],right_value[-1] + rows = [ind for ind, (x,y) in enumerate(zip(column1,column2)) if get_op(operator1,x,w) and get_op(operator2,y,u)] #compares x and y and if both are True then it gets assigned to rows + print("these are the correct Indexes for the rows",rows)#prints the indexes for the correct answers + #both AND and OR use left_value and right_value which is a list that contains each condition, left condition being the first one and right being the second. + #both AND and OR work in the same ways + elif isnot == True and len(condition.split('or'))>1: + left_condition, right_condition = condition.split('or') + left_value,right_value = self._parse_condition(left_condition.strip()),self._parse_condition(right_condition.strip()) + left_value,right_value= left_value[:-1],right_value[:-1] + operator1, operator2 = left_value[-2], right_value[-2] + w,u = left_value[-1],right_value[-1] + rows1 = [ind for ind, x in enumerate(column1) if get_op(operator1, x, w)] + rows2 = [ind for ind, y in enumerate(column2) if get_op(operator2, y, u)] + rows = list(set(rows1) | set(rows2)) #compares rows1 and rows2 and prints either one if it is correct + print("these are the correct Indexes for the rows",rows) + + else: + column_name1,column_name2, operator, value,isnot = self._parse_condition(condition) + column = self.column_by_name(column_name1) + column = self.column_by_name(column_name1) + if isnot == True:#NOT + rows = [ind for ind, x in enumerate(column) if not get_op(operator, x, value)] + else: + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] else: rows = [i for i in range(len(self.data))] @@ -266,7 +303,6 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # s_table.data = s_table.data[:k] if isinstance(limit,str): s_table.data = [row for row in s_table.data if any(row)][:int(limit)] - return s_table @@ -279,7 +315,7 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False return_cols = [self.column_names.index(colname) for colname in return_columns] - column_name, operator, value = self._parse_condition(condition) + column_name,column_nameRight, operator, value,isnot = self._parse_condition(condition) # if the column in condition is not a primary key, abort the select if column_name != self.column_names[self.pk_idx]: @@ -351,7 +387,7 @@ def _general_join_processing(self, table_right:Table, condition, join_type): Operators supported: (<,<=,==,>=,>) ''' # get columns and operator - column_name_left, operator, column_name_right = self._parse_condition(condition, join=True) + column_name_left, operator, column_name_right,isnot = self._parse_condition(condition, join=True) # try to find both columns, if you fail raise error if(operator != '=' and join_type in ['left','right','full']): @@ -555,14 +591,68 @@ def _parse_condition(self, condition, join=False): # if both_columns (used by the join function) return the names of the names of the columns (left first) if join: return split_condition(condition) - # cast the value with the specified column's type and return the column name, the operator and the casted value - left, op, right = split_condition(condition) + isnot = True + index1 = False + index2 = False + index = [] + condition_list = condition.split() + ''' + we look for 'and' within the condition_list, if we find it we change index1 to True as well as find the index for where 'and' is in the condition_list() list + something similar happens with the elif line but instead of index1 it uses index2 and checks for 'or' within the condition_list() list + ''' + if "and" in condition_list : + index = condition_list.index('and') + index1 = True + elif "or" in condition_list: + index = condition_list.index('or') + index2 = True + ''' + here we check if there is 'and', 'or' statement in condition_list() if not then we move on to 'not' or no statement + ''' + if index1 == True:#THIS IS AND + left_condition = " ".join(condition_list[:index]) + right_condition = " ".join(condition_list[index+1:]) + left_value = self._parse_condition(left_condition) + right_value = self._parse_condition(right_condition) + left_value= left_value[:-1] + right_value= right_value[:-1] + coltype = self.column_types[self.column_names.index(left_value[0])] + x = (left_value[-1], right_value[-1]) + y = left_value[1] + w = (right_value[0]) + op = "=" + isnot = True + return y,w, op, coltype(x), isnot + elif index2 == True:# THIS IS OR + left_condition = " ".join(condition_list[:index]) + right_condition = " ".join(condition_list[index+1:]) + left_value = self._parse_condition(left_condition) + right_value = self._parse_condition(right_condition) + left_value= left_value[:-1] + right_value= right_value[:-1] + coltype = self.column_types[self.column_names.index(left_value[0])] + x = (left_value[-1], right_value[-1]) + y = left_value[1] + w = (right_value[0]) + op = "=" + isnot = True + return y,w, op, coltype(x), isnot + elif index1 == False and index2 == False: + left, op, right = split_condition(condition) + result1 = left.split(" ") + isnot = False + if len(result1) > 1 and result1[0] == "not":#THIS IS NOT + left = result1[1] + isnot = True + x = left + y = right if left not in self.column_names: raise ValueError(f'Condition is not valid (cant find column name)') coltype = self.column_types[self.column_names.index(left)] - - return left, op, coltype(right) + x = left + y = left + return x,y, op, coltype(right), isnot def _load_from_file(self, filename): diff --git a/sql_files/largeRelationsInsertFile.sql b/sql_files/largeRelationsInsertFile.sql index d678409f..16308fc7 100644 --- a/sql_files/largeRelationsInsertFile.sql +++ b/sql_files/largeRelationsInsertFile.sql @@ -9,6 +9,7 @@ create table takes (ID str, course_id str, sec_id str, semester str, year int, g create table advisor (s_ID str primary key, i_ID str); create table time_slot (time_slot_id str, day str, start_hr int, start_min int, end_hr str, end_min str); create table prereq (course_id str, prereq_id str); +CREATE TABLE users (id int primary key,username str UNIQUE,email str UNIQUE,age int); insert into time_slot values ( 'A', 'M', 8, 0, 8, 50); insert into time_slot values ( 'A', 'W', 8, 0, 8, 50); insert into time_slot values ( 'A', 'F', 8, 0, 8, 50); @@ -34629,3 +34630,6 @@ insert into prereq values('830', '748'); insert into prereq values('558', '130'); insert into prereq values('877', '599'); insert into prereq values('349', '612'); +insert into users (1, 'harisbohtis', 'haris@gmail.com', 35); +insert into users (2, 'georgesfalagakos', 'george@gmail.com', 20); +insert into users (3, 'mariapapadopoulou', 'mariap@yahoo.com', 25); \ No newline at end of file diff --git a/sql_files/smallRelationsInsertFile.sql b/sql_files/smallRelationsInsertFile.sql index d05d81b9..4ed1c438 100644 --- a/sql_files/smallRelationsInsertFile.sql +++ b/sql_files/smallRelationsInsertFile.sql @@ -9,6 +9,7 @@ create table takes (ID str, course_id str, sec_id str, semester str, year int, g create table advisor (s_ID str primary key, i_ID str); create table time_slot (time_slot_id str, day str, start_hr int, start_min int, end_hr str, end_min str); create table prereq (course_id str, prereq_id str); +CREATE TABLE users (id int primary key,username str UNIQUE,email str UNIQUE,age int); insert into classroom values (Packard,101,500); insert into classroom values (Painter,514,10); insert into classroom values (Taylor,3128,70); @@ -146,4 +147,7 @@ insert into prereq values (CS-190,CS-101); insert into prereq values (CS-315,CS-101); insert into prereq values (CS-319,CS-101); insert into prereq values (CS-347,CS-101); -insert into prereq values (EE-181,PHY-101); \ No newline at end of file +insert into prereq values (EE-181,PHY-101); +insert into users values (1, 'harisbohtis', 'haris@gmail.com', 35); +insert into users values (2, 'georgesfalagakos', 'george@gmail.com', 20); +insert into users values (3, 'mariapapadopoulou', 'mariap@yahoo.com', 25); \ No newline at end of file