-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatabase.py
More file actions
298 lines (234 loc) · 10.1 KB
/
database.py
File metadata and controls
298 lines (234 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
"""
A module for the persistence layer
"""
import datetime
import sqlite3
import typing as t
import pandas as pd
from pathlib import Path
CURRENT_PATH_CWD = Path.cwd()
IMAGES_PATH = CURRENT_PATH_CWD / "images_and_graphs"
class DatabaseManager:
"""
A class specialized for the persistence layer using SQLite
"""
def __init__(self, database_filename: str):
"""
Initializes the connection with the SQLite database
"""
self.connection = sqlite3.connect(database_filename,check_same_thread=False)
self.key_conversion_needed=[]
def __del__(self):
""" Closes the connection when the database manager is no longer used """
self.connection.close()
def _execute(self, statement: str, values: t.Optional[t.Tuple[str]] = None) -> sqlite3.Cursor:
"""
Takes in a SQL statement and optionally the values for placeholders
and executes it with SQLite
Args:
statement:the command written in sql str format
values: what to replace the placeholders with, tuples of str
Returns:
a cursor, sqlite3 cursor
"""
try:
with self.connection:
cursor = self.connection.cursor()
cursor.execute(statement, values or [])
return cursor
except (sqlite3.IntegrityError, sqlite3.OperationalError):
print(
f"Something went wrong with the following transaction:\n{statement}"
)
raise
def _type_for_columns(self,columns_specs:dict)->dict:
"""
Module that receives a dict of column names and sample of column data in python and returns a dict with the ID key inserted
plus the same keys as before which have been converted to SQL data types
Args:
columns_specs: dict with key as column names and values as sample of column data
Returns:
new_columns:a dict with the same key as in input but the values contain the converted type of the value in sql equivalent
"""
new_columns={}
new_columns.setdefault("ID",'INTEGER PRIMARY KEY AUTOINCREMENT')
for key, value in list(columns_specs.items()):
current_type=type(value)
if current_type == str or current_type == chr or current_type == datetime or current_type== datetime.timedelta:
new_type="TEXT"
self.key_conversion_needed.append(key)
elif current_type == int:
new_type="INTEGER"
elif current_type == float:
new_type="REAL"
else:
new_type = "BLOB"
self.key_conversion_needed.append(key)
new_columns.setdefault(key,new_type)
return new_columns
def create_table_if_not_exists(self, table_name: str, columns: t.Dict[str, str]) -> None:
"""
Takes in a table name and the columns with names as keys and types as values and then creates
the CREATE TABLE statement to be executed with SQLite
Args:
table_name: string with name of the table
columns: dict with column names as keys and the data type in sql format for values
"""
columns=self._type_for_columns(columns)
columns_with_types = []
for column_name, data_type in columns.items():
current_column = f"{column_name} {data_type.upper()}"
columns_with_types.append(current_column)
columns_in_statement = ", ".join(columns_with_types)
statement = f"""
CREATE TABLE IF NOT EXISTS {table_name} (
{columns_in_statement}
);
"""
self._execute(statement)
def drop_table(self, table_name: str) -> None:
"""
Takes in a table name to delete using the DROP TABLE statement to be executed with SQLite
"""
statement = f"DROP TABLE {table_name};"
self._execute(statement)
def add(self, table_name: str, data: t.Dict[str, str]) -> int:
"""
Takes in a table name to INSERT data INTO and a data dictionary with columns
as keys and values as values.
It returns the last used id
And it works row by row
Args:
table_name: str -the name of the table where to add
data:dict, the data to be added organized with keys as column names and values as actual data to be added
Returns:
last row added id
"""
keys = data.keys()
for elem in set(self.key_conversion_needed):
if elem not in keys:
# print(f"key: {elem} not in data.keys()={keys}, skipping...")
continue
data[elem] = str(data[elem])
column_names = ", ".join(keys)
placeholders = ", ".join(["?"] * len(keys))
column_values = tuple(data.values())
statement = f"""
INSERT INTO
{table_name} (
{column_names}
) VALUES (
{placeholders}
);
"""
result = self._execute(statement, column_values)
#because the lastrow id will literally return the last- that means not 0 if it is first, but 1
return (result.lastrowid-1)
def delete(self, table_name: str, criteria: t.Dict[str, str]) -> None:
"""
Takes in a table name and a criteria to DELETE FROM
"""
placeholders = [f"{column} = ?" for column in criteria.keys()]
delete_criteria = " AND ".join(placeholders)
delete_criteria_values = tuple(criteria.values())
statement = f"""
DELETE FROM
{table_name}
WHERE
{delete_criteria};
"""
self._execute(statement, delete_criteria_values)
def _select_statement(
self,
table_name: str,
criteria: t.Dict[str, str] = {},
order_by: t.Optional[str] = None,
ordered_descending: bool = False,
) -> str:
"""
Takes in a table name and optionally a criteria as a dictionary, a column to order by
and a boolean flag to order it by that column descending or not
returns the select statement construction
Args:
table_name:str, the table name
criteria: dict where keys are column name, value are values that meet the criteria of equality
order_by: optional value to sort by
ordered_descending:optional value to change the way it is sorted
Returns:
statement: the string that is searches for
"""
select_criteria_values = tuple(criteria.values())
statement = f"SELECT * FROM {table_name}"
if criteria:
placeholders = [f"{column} = ?" for column in criteria.keys()]
select_criteria = " AND ".join(placeholders)
statement = statement + f" WHERE {select_criteria}"
if order_by:
statement = statement + f" ORDER BY {order_by}"
if ordered_descending:
statement = statement + " DESC"
statement = statement + ";"
# self._execute(statement,select_criteria_values)
return statement
def _write_to_xls_file(self, df:pd.DataFrame, name: str, sheet_name:t.Optional[str]="Sheet1"):
"""
Method for writing to xls directly from the DB manager.
It passes through a pandas Dataframe and it is written
Called by select_and_write_to_xls_file
Args:
df: a dataframe whill a values take with a fetchall in previous calls
name: str a name of the xls
"""
with pd.ExcelWriter(name, engine="openpyxl") as writer:
df.to_excel(writer, sheet_name=sheet_name, header=True, engine="openpyxl", index=True)
def select_and_write_to_xls_file(
self,
name:str,
table_name: str,
sheet_name:t.Optional[str]="Sheet1",
criteria: t.Dict[str, str] = {},
order_by: t.Optional[str] = None,
ordered_descending: bool = False,
):
"""
Method that recieves the imput for a querry in a DB, then generates the xls at given location
Args:
table_name:str, the table name
criteria: dict where keys are column name, value are values that meet the criteria of equality
order_by: optional value to sort by
ordered_descending:optional value to change the way it is sorted
Returns:
None- just creates the dataframe and the output is the xls file
"""
select_criteria_values = tuple(criteria.values())
for item in select_criteria_values:
new_statement=self._select_statement(table_name,criteria, order_by, ordered_descending).replace("?",str(item),1)
df=pd.read_sql_query(new_statement, self.connection)
self._write_to_xls_file(df, name,sheet_name)
def add_multiple_rows(self, table_name: str, data: t.Dict[str, t.List], tuple_data:t.List[t.Tuple]) -> None:
"""
Takes in a table name to INSERT data INTO and a data dictionary with columns
as keys and values as multiple rows and column values.
It returns the last used id
Args:
"""
keys = data.keys()
for elem in set(self.key_conversion_needed):
if elem not in keys:
# print(f"key: {elem} not in data.keys()={keys}, skipping...")
continue
data[elem] = str(data[elem])
column_names = ", ".join(keys)
placeholders=", ".join(["?"] * len(keys))
placeholders = f"({placeholders})"
placeholders=", ".join([f"{placeholders}"]*len(list(data.values())[0]))
column_values = tuple(tuple_data)
statement = f"""
INSERT INTO
{table_name} (
{column_names}
) VALUES
{placeholders}
;
"""
self._execute(statement, column_values)