-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbase.py
More file actions
71 lines (59 loc) · 1.85 KB
/
base.py
File metadata and controls
71 lines (59 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import io
import abc
import zipfile
import psycopg2
import requests
import pandas as pd
from column_mapping import *
# pretty sure this isn't used anywhere besides historical
class cvm_dados:
def __init__(self, link):
self.link = link
def get_data(self):
self.data = requests.get(self.link)
def list_files(self):
return zipfile.ZipFile(io.BytesIO(self.data.content)).namelist()
def select_file(self, file_name):
"""
Only if file is a zip
"""
self.zip = zipfile.ZipFile(io.BytesIO(self.data.content))
return self.zip.read(file_name)
class get_data(abc.ABC):
def __init__(self, url):
self.url = url
@abc.abstractmethod
def download(self):
pass
@abc.abstractmethod
def make_df(self):
pass
class zip_data(get_data):
def download(self):
self.data = requests.get(self.url)
_zip = zipfile.ZipFile(io.BytesIO(self.data.content))
if len(_zip.namelist()) > 1:
self.data = {i: io.BytesIO(_zip.read(i)) for i in _zip.namelist()}
else:
self.data = io.BytesIO(_zip.read(_zip.namelist()[0]))
def make_df(self):
self.download()
if type(self.data) == dict:
self.df = {i: pd.read_csv(j, sep=";", encoding="ansi") for i, j in self.data.items()}
else:
self.download()
self.df = pd.read_csv(self.data, sep=";", encoding="ansi")
return self
class csv_data(get_data):
def download(self):
self.data = io.BytesIO(requests.get(self.url).content)
def make_df(self):
self.download()
self.df = pd.read_csv(self.data, sep=";", encoding="ansi")
return self
class data_factory():
def __new__(cls, fmt):
if fmt == "ZIP":
return zip_data
elif fmt == "CSV":
return csv_data