From 16becced46498bdf097ccacf80f7c8e678fb9b61 Mon Sep 17 00:00:00 2001 From: abbse Date: Mon, 17 Aug 2015 13:27:36 +0200 Subject: [PATCH 1/2] IMP Added support to both local/remote files and compressed/decompressed --- enerdata/profiles/profile.py | 144 +++++++++++++++++++++++------------ 1 file changed, 94 insertions(+), 50 deletions(-) diff --git a/enerdata/profiles/profile.py b/enerdata/profiles/profile.py index 642d660..3038446 100644 --- a/enerdata/profiles/profile.py +++ b/enerdata/profiles/profile.py @@ -2,6 +2,7 @@ import bisect import logging +import os try: from collections import namedtuple, Counter except ImportError: @@ -10,11 +11,13 @@ from multiprocessing import Lock from StringIO import StringIO from dateutil.relativedelta import relativedelta +from urlparse import urlparse from enerdata.profiles import Dragger from enerdata.contracts.tariff import Tariff from enerdata.datetime.timezone import TIMEZONE from enerdata.metering.measure import Measure, EnergyMeasure +from enerdata.utils.compress import is_compressed_file, get_compressed_file logger = logging.getLogger(__name__) @@ -168,11 +171,7 @@ def profile(self, tariff, measures, drag_method='hour'): ) -class REEProfile(object): - HOST = 'www.ree.es' - PATH = '/sites/default/files/simel/perff' - down_lock = Lock() - +class BaseProfile(object): _CACHE = {} @classmethod @@ -187,58 +186,103 @@ def get_range(cls, start, end): return cofs @classmethod - def get(cls, year, month): + def get(cls, f, header): + import csv + + m = StringIO(f.read()) + if is_compressed_file(m): + cf = get_compressed_file(m) + m = StringIO(cf.read(m)) + reader = csv.reader(m, delimiter=';') + cofs = [] + n_hour = 0 + for vals in reader: + if header: + header = False + continue + if int(vals[3]) == 1: + n_hour = 1 + dt = datetime( + int(vals[0]), int(vals[1]), int(vals[2]) + ) + day = TIMEZONE.localize(dt, is_dst=bool(not int(vals[4]))) + day += timedelta(hours=n_hour) + n_hour += 1 + cofs.append( + (TIMEZONE.normalize(day), dict( + (k, float(vals[i])) for i, k in enumerate('ABCD', 5) + )) + ) + return cofs + + @classmethod + def get_cached(cls, key): + if key in cls._CACHE: + return cls._CACHE[key] + + +class RemoteProfile(BaseProfile): + down_lock = Lock() + + @classmethod + def get(cls, year, month, uri=None, header=False): + key = '%(year)s%(month)02i' % locals() + cached = super(RemoteProfile, cls).get_cached(key) + if cached: + return cached + + if not uri: + raise Exception('Profile uri required') + url = urlparse(uri) + host = url.netloc + path = url.path + + cls.down_lock.acquire() + import httplib + conn = None try: - cls.down_lock.acquire() - import csv - import httplib - key = '%(year)s%(month)02i' % locals() - conn = None - if key in cls._CACHE: - logger.debug('Using CACHE for REEProfile {0}'.format(key)) - return cls._CACHE[key] - perff_file = 'PERFF_%(key)s.gz' % locals() - conn = httplib.HTTPConnection(cls.HOST) - conn.request('GET', '%s/%s' % (cls.PATH, perff_file)) - logger.debug('Downloading REEProfile from {0}/{1}'.format( - cls.PATH, perff_file - )) - r = conn.getresponse() - if r.msg.type == 'application/x-gzip': - import gzip - c = StringIO(r.read()) - m = StringIO(gzip.GzipFile(fileobj=c).read()) - c.close() - reader = csv.reader(m, delimiter=';') - header = True - cofs = [] - for vals in reader: - if header: - header = False - continue - if int(vals[3]) == 1: - n_hour = 1 - dt = datetime( - int(vals[0]), int(vals[1]), int(vals[2]) - ) - day = TIMEZONE.localize(dt, is_dst=bool(not int(vals[4]))) - day += timedelta(hours=n_hour) - n_hour += 1 - cofs.append(Coefficent( - TIMEZONE.normalize(day), dict( - (k, float(vals[i])) for i, k in enumerate('ABCD', 5) - )) - ) - cls._CACHE[key] = cofs - return cofs - else: - raise Exception('Profiles from REE not found') + conn = httplib.HTTPConnection(host) + conn.request('GET', path) + f = conn.getresponse() + cls._CACHE[key] = super(RemoteProfile, cls).get(f, header) + return cls._CACHE[key] finally: if conn is not None: conn.close() cls.down_lock.release() +class REEProfile(RemoteProfile): + HOST = 'http://www.ree.es' + PATH = '/sites/default/files/simel/perff' + + @classmethod + def get(cls, year, month): + key = '%(year)s%(month)02i' % locals() + perff_file = 'PERFF_%(key)s.gz' % locals() + uri = '/'.join([cls.HOST, cls.PATH, perff_file]) + return super(REEProfile, cls).get(year, month, uri, True) + + +class LocalProfile(BaseProfile): + + @classmethod + def get(cls, year, month, path=None, header=False): + key = '%(year)s%(month)02i' % locals() + cached = super(LocalProfile, cls).get_cached(key) + if cached: + return cached + + if not path: + raise Exception('Profile directory required') + if not os.path.isfile(path): + raise Exception('Profile file {path} not found'.format(**locals())) + + with open(path) as f: + cls._CACHE[key] = super(LocalProfile, cls).get(f, header) + return cls._CACHE[key] + + class ProfileHour(namedtuple('ProfileHour', ['date', 'measure', 'valid'])): __slots__ = () From ac35829338fc67cd6abe52fd29c04c31d07498f4 Mon Sep 17 00:00:00 2001 From: abbse Date: Mon, 24 Aug 2015 12:02:32 +0200 Subject: [PATCH 2/2] IMP Added support to REE losses and modular parser --- enerdata/losses/__init__.py | 0 enerdata/losses/profile.py | 59 +++++++++++++++++++++++++ enerdata/profiles/profile.py | 81 ++++------------------------------- enerdata/utils/__init__.py | 0 enerdata/utils/compress.py | 59 +++++++++++++++++++++++++ enerdata/utils/profile.py | 83 ++++++++++++++++++++++++++++++++++++ 6 files changed, 209 insertions(+), 73 deletions(-) create mode 100644 enerdata/losses/__init__.py create mode 100644 enerdata/losses/profile.py create mode 100644 enerdata/utils/__init__.py create mode 100644 enerdata/utils/compress.py create mode 100644 enerdata/utils/profile.py diff --git a/enerdata/losses/__init__.py b/enerdata/losses/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/enerdata/losses/profile.py b/enerdata/losses/profile.py new file mode 100644 index 0000000..5d11d22 --- /dev/null +++ b/enerdata/losses/profile.py @@ -0,0 +1,59 @@ +import os +import bisect +from datetime import datetime, date, timedelta +from multiprocessing import Lock +from StringIO import StringIO +from urlparse import urlparse + +from enerdata.datetime.timezone import TIMEZONE +from enerdata.metering.measure import Measure +from enerdata.utils.profile import * + +import xlrd + +SUPPORTED_TARIFFS = ['2.0A', '2.0.DHA', '2.0.DHS', + '2.1A', '2.1.DHA', '2.1.DHS', + '3.0A', '3.1A', + '6.1', '6.2', '6.3', '6.4'] + + +class REELossProfileParser(object): + + @classmethod + def get(cls, m, header=None): + book = xlrd.open_workbook(file_contents=m.read()) + tariffs = book.sheet_names() + coefs = dict() + for tariff in tariffs: + if tariff not in SUPPORTED_TARIFFS: + raise Exception('Not supported tariff: {tariff}'.format(**locals())) + + dataset = book.sheet_by_name(tariff) + _header= False + for row_idx in range(dataset.nrows): + row = dataset.row_values(row_idx) + if row[0].startswith('Fecha'): + _header = True + continue + if not _header: + continue + + date_raw = row[1] + date = datetime.strptime(date_raw, '%d/%m/%Y') + key = date.strftime('%Y%m%d') + coefs.setdefault(key, {}) + if tariff not in coefs[key]: + coefs[key][tariff] = row[4:] + return coefs + + +class REELossProfile(RemoteProfile): + HOST = 'http://www.esios.ree.es' + PATH = 'Solicitar?fileName={perff_file}&fileType=xls&idioma=es&tipoSolicitar=Publicaciones' + + @classmethod + def get(cls, year, month): + key = '%(year)s%(month)02i' % locals() + perff_file = 'COEF_PERD_PEN_MM_%(key)s' % locals() + uri = '/'.join([cls.HOST, cls.PATH.format(**locals())]) + return super(REELossProfile, cls).get('PERD', year, month, REELossProfileParser, uri, True) diff --git a/enerdata/profiles/profile.py b/enerdata/profiles/profile.py index 3038446..6d7f3b7 100644 --- a/enerdata/profiles/profile.py +++ b/enerdata/profiles/profile.py @@ -1,23 +1,17 @@ from __future__ import division -import bisect import logging -import os try: from collections import namedtuple, Counter except ImportError: from backport_collections import namedtuple, Counter -from datetime import datetime, date, timedelta -from multiprocessing import Lock -from StringIO import StringIO from dateutil.relativedelta import relativedelta -from urlparse import urlparse from enerdata.profiles import Dragger from enerdata.contracts.tariff import Tariff -from enerdata.datetime.timezone import TIMEZONE -from enerdata.metering.measure import Measure, EnergyMeasure -from enerdata.utils.compress import is_compressed_file, get_compressed_file +from enerdata.metering.measure import EnergyMeasure +from enerdata.utils.profile import * + logger = logging.getLogger(__name__) @@ -171,8 +165,7 @@ def profile(self, tariff, measures, drag_method='hour'): ) -class BaseProfile(object): - _CACHE = {} +class REEProfileParser(object): @classmethod def get_range(cls, start, end): @@ -186,13 +179,8 @@ def get_range(cls, start, end): return cofs @classmethod - def get(cls, f, header): + def get(cls, m, header): import csv - - m = StringIO(f.read()) - if is_compressed_file(m): - cf = get_compressed_file(m) - m = StringIO(cf.read(m)) reader = csv.reader(m, delimiter=';') cofs = [] n_hour = 0 @@ -215,42 +203,6 @@ def get(cls, f, header): ) return cofs - @classmethod - def get_cached(cls, key): - if key in cls._CACHE: - return cls._CACHE[key] - - -class RemoteProfile(BaseProfile): - down_lock = Lock() - - @classmethod - def get(cls, year, month, uri=None, header=False): - key = '%(year)s%(month)02i' % locals() - cached = super(RemoteProfile, cls).get_cached(key) - if cached: - return cached - - if not uri: - raise Exception('Profile uri required') - url = urlparse(uri) - host = url.netloc - path = url.path - - cls.down_lock.acquire() - import httplib - conn = None - try: - conn = httplib.HTTPConnection(host) - conn.request('GET', path) - f = conn.getresponse() - cls._CACHE[key] = super(RemoteProfile, cls).get(f, header) - return cls._CACHE[key] - finally: - if conn is not None: - conn.close() - cls.down_lock.release() - class REEProfile(RemoteProfile): HOST = 'http://www.ree.es' @@ -261,26 +213,9 @@ def get(cls, year, month): key = '%(year)s%(month)02i' % locals() perff_file = 'PERFF_%(key)s.gz' % locals() uri = '/'.join([cls.HOST, cls.PATH, perff_file]) - return super(REEProfile, cls).get(year, month, uri, True) - - -class LocalProfile(BaseProfile): - - @classmethod - def get(cls, year, month, path=None, header=False): - key = '%(year)s%(month)02i' % locals() - cached = super(LocalProfile, cls).get_cached(key) - if cached: - return cached - - if not path: - raise Exception('Profile directory required') - if not os.path.isfile(path): - raise Exception('Profile file {path} not found'.format(**locals())) - - with open(path) as f: - cls._CACHE[key] = super(LocalProfile, cls).get(f, header) - return cls._CACHE[key] + return super(REEProfile, cls).get( + 'PERFF', year, month, REEProfileParser, uri, True + ) class ProfileHour(namedtuple('ProfileHour', ['date', 'measure', 'valid'])): diff --git a/enerdata/utils/__init__.py b/enerdata/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/enerdata/utils/compress.py b/enerdata/utils/compress.py new file mode 100644 index 0000000..966e342 --- /dev/null +++ b/enerdata/utils/compress.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import struct +import gzip +import bz2 + + +class CompressedFile (object): + magic = None + file_type = None + mime_type = None + proper_extension = None + + @classmethod + def is_magic(cls, data): + return data.startswith(cls.magic) + + +class GZFile (CompressedFile): + magic = '\x1f\x8b\x08' + file_type = 'gz' + mime_type = 'compressed/gz' + + @classmethod + def read(cls, c): + return gzip.GzipFile(fileobj=c).read() + + +class BZFile (CompressedFile): + magic = '\x42\x5a\x68' + file_type = 'bz2' + mime_type = 'compressed/bz' + + @classmethod + def read(cls, c): + return bz2.decompress(c.buf) + + +def is_compressed_file(f): + start_of_file = f.read(5) + f.seek(0) + for cls in (GZFile, BZFile): + if cls.is_magic(start_of_file): + return True + return False + + +class FileTypeNotSupportedException(Exception): + pass + + +def get_compressed_file(f): + start_of_file = f.read(5) + f.seek(0) + for cls in (GZFile, BZFile): + if cls.is_magic(start_of_file): + return cls + raise FileTypeNotSupportedException diff --git a/enerdata/utils/profile.py b/enerdata/utils/profile.py new file mode 100644 index 0000000..03c8ae3 --- /dev/null +++ b/enerdata/utils/profile.py @@ -0,0 +1,83 @@ +import os +import bisect +from datetime import datetime, date, timedelta +from multiprocessing import Lock +from StringIO import StringIO +from urlparse import urlparse, urljoin + +from enerdata.datetime.timezone import TIMEZONE +from enerdata.metering.measure import Measure +from enerdata.utils.compress import is_compressed_file, get_compressed_file + + +class BaseProfile(object): + _CACHE = {} + + @classmethod + def get(cls, f, parser, header): + if not parser: + raise Exception('Parser required') + + m = StringIO(f.read()) + if is_compressed_file(m): + cf = get_compressed_file(m) + m = StringIO(cf.read(m)) + return parser.get(m, header) + + @classmethod + def get_cached(cls, key): + if key in cls._CACHE: + return cls._CACHE[key] + + +class RemoteProfile(BaseProfile): + down_lock = Lock() + + @classmethod + def get(cls, tag, year, month, parser=None, uri=None, header=False): + key = '%(tag)s%(year)s%(month)02i' % locals() + cached = super(RemoteProfile, cls).get_cached(key) + if cached: + return cached + + if not uri: + raise Exception('Profile uri required') + url = urlparse(uri) + host = url.netloc + path = url.path + '?' + url.query + + cls.down_lock.acquire() + import httplib + conn = None + try: + conn = httplib.HTTPConnection(host) + conn.request('GET', path) + f = conn.getresponse() + cls._CACHE[key] = super(RemoteProfile, cls).get(f, parser, header) + return cls._CACHE[key] + finally: + if conn is not None: + conn.close() + cls.down_lock.release() + + if not os.path.isdir(path): + raise Exception('Profile directory {path} not found'.format(**locals())) + + +class LocalProfile(BaseProfile): + + @classmethod + def get(cls, tag, year, month, parser=None, path=None, header=False): + key = '%(tag)s%(year)s%(month)02i' % locals() + cached = super(LocalProfile, cls).get_cached(key) + if cached: + return cached + + if not path: + raise Exception('Profile directory required') + if not os.path.isfile(path): + raise Exception('Profile file {path} not found'.format(**locals())) + + with open(path) as f: + cls._CACHE[key] = super(LocalProfile, cls).get(f, parser, header) + return cls._CACHE[key]