diff --git a/src/jane/__init__.py b/src/jane/__init__.py index 99c8c6e..39e146e 100644 --- a/src/jane/__init__.py +++ b/src/jane/__init__.py @@ -1,8 +1,34 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import +import warnings + from .exceptions import * # NOQA from .version import get_git_version +# monkey patch django's json field to get around django/django#6929 on +# django <1.11 +import django +try: + django_version_major_minor = list( + map(int, django.__version__.split('.')[:2])) +except ValueError: + msg = ("Failed to determine Django version. Django's json field will not " + "be patched (django/django#6929).") + warnings.warn(msg) +else: + if django_version_major_minor < [1, 11]: + import django.contrib.postgres.fields.jsonb + + django.contrib.postgres.fields.jsonb.KeyTransform._as_sql_original = \ + django.contrib.postgres.fields.jsonb.KeyTransform.as_sql + + def as_sql(self, *args, **kwargs): + _as_sql = self._as_sql_original(*args, **kwargs) + return '({})'.format(_as_sql[0]), _as_sql[1] + + django.contrib.postgres.fields.jsonb.KeyTransform.as_sql = as_sql + __version__ = get_git_version() +__all__ = ['__version__'] diff --git a/src/jane/documents/models.py b/src/jane/documents/models.py index 42c2fd7..da423e2 100644 --- a/src/jane/documents/models.py +++ b/src/jane/documents/models.py @@ -232,14 +232,6 @@ class DocumentIndexManager(models.GeoManager): """ Custom queryset manager for the document indices. """ - JSON_QUERY_TEMPLATE_MAP = { - "int": "CAST(json->>'%s' AS INTEGER) %s %s", - "float": "CAST(json->>'%s' AS REAL) %s %s", - "str": "LOWER(json->>'%s') %s LOWER('%s')", - "bool": "CAST(json->>'%s' AS BOOL) %s %s", - "UTCDateTime": "CAST(json->>'%s' AS TIMESTAMP) %s TIMESTAMP '%s'" - } - JSON_ORDERING_TEMPLATE = { "int": "CAST(json->>'%s' AS INTEGER)", "float": "CAST(json->>'%s' AS REAL)", @@ -260,9 +252,6 @@ def get_queryset(self): annotate(attachments_count=Count('attachments')) return queryset - def _get_json_query(self, key, operator, type, value): - return self.JSON_QUERY_TEMPLATE_MAP[type] % (key, operator, str(value)) - def apply_retrieve_permission(self, document_type, queryset, user): """ Apply potential additional restrictions based on the permissions. @@ -429,10 +418,6 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None, "bool": bool, "UTCDateTime": UTCDateTime } - - # Filter based on the attributes in the meta field. - where = [] - for key, value_type in meta.items(): # Handle strings. if value_type == "str": @@ -443,22 +428,24 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None, if name not in kwargs: continue value = kwargs[name] + method = 'exact' # Possible wildcards. if "*" in value or "?" in value: - value = value.replace("?", "_").replace("*", r"%%") - # PostgreSQL specific case insensitive LIKE statement. - if operator == "=": - where.append("json->>'%s' ILIKE '%s'" % (key, - value)) - elif operator == "!=": - where.append("json->>'%s' NOT ILIKE '%s'" % ( - key, value)) - else: - raise NotImplementedError() # pragma: no cover + value = value.replace("*", ".*") + method = 'iregex' + # the regex field lookup on JSON fields suffers from a + # bug on Django 1.9, see django/django#6929. + # We patch django's json field on django <1.11 in + # jane/__init__.py + # PostgreSQL specific case insensitive LIKE statement. + if operator == "=": + queryset = queryset.filter(**{ + 'json__{}__{}'.format(key, method): value}) + elif operator == "!=": + queryset = queryset.exclude(**{ + 'json__{}__{}'.format(key, method): value}) else: - where.append( - self._get_json_query(key, operator, value_type, - value)) + raise NotImplementedError() # pragma: no cover # Handle integers, floats, and UTCDateTimes. elif value_type in ("int", "float", "UTCDateTime"): choices = (("min_%s", ">="), ("max_%s", "<="), ("%s", "="), @@ -467,9 +454,23 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None, name = name % key if name not in kwargs: continue - where.append(self._get_json_query( - key, operator, value_type, - type_map[value_type](kwargs[name]))) + value = type_map[value_type](kwargs[name]) + if value_type == "UTCDateTime": + value = value.datetime.isoformat() + if operator == "=": + queryset = queryset.filter(**{ + 'json__{}__exact'.format(key): value}) + elif operator == "!=": + queryset = queryset.exclude(**{ + 'json__{}__exact'.format(key): value}) + elif operator == ">=": + queryset = queryset.filter(**{ + 'json__{}__gte'.format(key): value}) + elif operator == "<=": + queryset = queryset.filter(**{ + 'json__{}__lte'.format(key): value}) + else: + raise NotImplementedError() # pragma: no cover # Handle bools. elif value_type == "bool": # Booleans can be searched for (in)equality. @@ -480,18 +481,16 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None, continue value = str(kwargs[name]).lower() if value in ["t", "true", "yes", "y"]: - value = "true" + value = True elif value in ["f", "false", "no", "n"]: - value = "false" + value = False else: raise NotImplementedError() # pragma: no cover - where.append(self._get_json_query( - key, operator, value_type, value)) + queryset = queryset.filter(**{ + 'json__{}__exact'.format(key): value}) else: raise NotImplementedError() # pragma: no cover - queryset = queryset.extra(where=where) - if "ordering" in kwargs and kwargs["ordering"] in meta: ord = kwargs["ordering"] queryset = queryset.order_by( diff --git a/src/jane/fdsnws/station_query.py b/src/jane/fdsnws/station_query.py index 6e04701..53a3c8e 100644 --- a/src/jane/fdsnws/station_query.py +++ b/src/jane/fdsnws/station_query.py @@ -13,24 +13,13 @@ import jane from jane.documents.models import DocumentIndex, DocumentType - - -def _get_json_query(key, operator, type, value): - return JSON_QUERY_TEMPLATE_MAP[type] % (key, operator, str(value)) +from jane.jane.utils import _queryset_filter_jsonfield_isnull def _format_time(value): return value.strftime("%Y-%m-%dT%H:%M:%S+00:00") -JSON_QUERY_TEMPLATE_MAP = { - int: "CAST(json->>'%s' AS INTEGER) %s %s", - float: "CAST(json->>'%s' AS REAL) %s %s", - str: "json->>'%s' %s '%s'", - UTCDateTime: "CAST(json->>'%s' AS TIMESTAMP) %s TIMESTAMP '%s'" -} - - # Define some constants for writing StationXML files. SOURCE = settings.JANE_FDSN_STATIONXML_SOURCE SENDER = settings.JANE_FDSN_STATIONXML_SENDER @@ -124,66 +113,59 @@ def query_stations(fh, url, nodata, level, format, user, starttime=None, query = DocumentIndex.objects.filter( document__document_type="stationxml") - where = [] if starttime: # If end_date is null it is assumed to be bigger. - where.append( - "((json->>'end_date') is null) OR (" + - _get_json_query("end_date", ">=", UTCDateTime, starttime) + ")") + query = (query.filter(json__end_date__gte=starttime.isoformat()) | + _queryset_filter_jsonfield_isnull(query, path=['end_date'], + isnull=True)).distinct() if endtime: - where.append( - _get_json_query("start_date", "<=", UTCDateTime, endtime)) + query = query.filter(json__start_date__lte=endtime.isoformat()) if startbefore: - where.append( - _get_json_query("start_date", "<", UTCDateTime, startbefore)) + query = query.filter(json__start_date__lte=startbefore.isoformat()) if startafter: - where.append( - _get_json_query("start_date", ">", UTCDateTime, startafter)) + query = query.filter(json__start_date__gt=startafter.isoformat()) if endbefore: # If end_date is null it is assumed to be bigger. We don't want that # here. - where.append( - "((json->>'end_date') is not null) AND (" + - _get_json_query("end_date", "<", UTCDateTime, endbefore) + ")") + query = (query.filter(json__end_date__lt=endbefore.isoformat()) & + _queryset_filter_jsonfield_isnull(query, path=['end_date'], + isnull=False)).distinct() if endafter: # If end_date is null it is assumed to be bigger. - where.append( - "((json->>'end_date') is null) OR (" + - _get_json_query("end_date", ">", UTCDateTime, endafter) + ")") + query = (query.filter(json__end_date__gt=endafter.isoformat()) | + _queryset_filter_jsonfield_isnull(query, path=['end_date'], + isnull=True)).distinct() if minlatitude is not None: - where.append( - _get_json_query("latitude", ">=", float, minlatitude)) + query = query.filter(json__latitude__gte=minlatitude) if maxlatitude is not None: - where.append( - _get_json_query("latitude", "<=", float, maxlatitude)) + query = query.filter(json__latitude__lte=maxlatitude) if minlongitude is not None: - where.append( - _get_json_query("longitude", ">=", float, minlongitude)) + query = query.filter(json__longitude__gte=minlongitude) if maxlongitude is not None: - where.append( - _get_json_query("longitude", "<=", float, maxlongitude)) + query = query.filter(json__longitude__lte=maxlongitude) for key in ["network", "station", "location", "channel"]: argument = locals()[key] - if argument is not None and '*' not in argument: - # Two percentage signs are needed (for escaping?) - argument = [_i.replace("?", "_").replace("*", r"%%") - for _i in argument] - # A minus sign negates the query. - n = [] - y = [] - for _i in argument: - if _i.startswith("-"): - n.append("json->>'%s' NOT LIKE '%s'" % (key, _i[1:])) + if argument is not None: + for argument_ in argument: + value = argument_ + if value.startswith('-'): + value = value[1:] + query_method = query.exclude else: - y.append("json->>'%s' LIKE '%s'" % (key, _i)) - if y: - where.append(" OR ".join(y)) - if n: - where.append(" AND ".join(n)) - - if where: - query = query.extra(where=where) + query_method = query.filter + method = 'exact' + # Possible wildcards. + if "*" in value or "?" in value: + value = value.replace("*", ".*").replace("?", ".") + method = 'iregex' + # the regex field lookup on JSON fields suffers from a + # bug on Django 1.9, see django/django#6929. + # We patch django's json field on django <1.11 in + # jane/__init__.py + query = query & query_method(**{ + 'json__{}__{}'.format(key, method): value}) + query = query.distinct() # Radial queries - also apply the per-user filtering right here! if latitude is not None: diff --git a/src/jane/fdsnws/tests/test_station_1.py b/src/jane/fdsnws/tests/test_station_1.py index 15b479b..f97b95b 100644 --- a/src/jane/fdsnws/tests/test_station_1.py +++ b/src/jane/fdsnws/tests/test_station_1.py @@ -611,6 +611,55 @@ def test_total_and_selected_number_of_sta_and_cha(self): self.assertEqual(inv[0][0].total_number_of_channels, 3) self.assertEqual(inv[0][0].selected_number_of_channels, 3) + def test_comma_separated_list_queries(self): + """ + Test comma separated queries in NSCL parameters + """ + client = FDSNClient(self.live_server_url) + + inv = client.get_stations(level="channel", network="BW", + station="ALTM", location="", + channel="EHZ,EHN") + c = inv.get_contents() + self.assertEqual(c["channels"], + ['BW.ALTM..EHN', 'BW.ALTM..EHZ']) + + inv = client.get_stations(level="channel", network="BW", + station="ALTM", location="", + channel="EHZ,EH?") + c = inv.get_contents() + self.assertEqual(c["channels"], + ['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ']) + + # the following queries should be improved, we should have multiple + # different network/station codes in the test db + client = FDSNClient(self.live_server_url) + inv = client.get_stations(level="channel", network="AA,BW", + station="ALTM", location="--", channel="EH*") + c = inv.get_contents() + self.assertEqual(c["channels"], + ['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ']) + + # the following queries should be improved, we should have multiple + # different network/station codes in the test db + client = FDSNClient(self.live_server_url) + inv = client.get_stations(level="channel", network="BW", + station="ALTM", location="00,,10", + channel="EH*") + c = inv.get_contents() + self.assertEqual(c["channels"], + ['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ']) + + # the following queries should be improved, we should have multiple + # different network/station codes in the test db + client = FDSNClient(self.live_server_url) + inv = client.get_stations(level="channel", network="BW", + station="XXX,YYY,ALTM", location="--", + channel="EHZ") + c = inv.get_contents() + self.assertEqual(c["channels"], + ['BW.ALTM..EHZ']) + def test_seed_code_queries(self): client = FDSNClient(self.live_server_url) diff --git a/src/jane/jane/utils.py b/src/jane/jane/utils.py index cd9e84c..c85ceff 100644 --- a/src/jane/jane/utils.py +++ b/src/jane/jane/utils.py @@ -17,3 +17,33 @@ class OptionalTrailingSlashSimpleRouter(SimpleRouter): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.trailing_slash = "/?" + + +def _queryset_filter_jsonfield_isnull(queryset, path, isnull, field='json'): + """ + Replaces the buggy isnull query on json fields, see + https://stackoverflow.com/q/38528516 + + :type queryset: :class:`django.db.models.query.QuerySet` + :param queryset: Django queryset object to do the query on. + :type field: str + :param field: Name of the field (column) in the databse table that holds + the json data for the query. By default should always be "json" for + Jane. + :type path: list + :param path: List of field names as strings to traverse in the json field. + For example use ``field='json', path=['end_date'], isnull=True``, for a + query of type ``queryset.filter(json__end_date__isnull=True)``. + :type isnull: bool + :param isnull: Whether to return items with the respective field being + `null` (``isnull=True``) or it *not* being `null` (``isnull=False``). + """ + key = '__'.join([field] + list(path[:-1]) + ['contains']) + if not path: + raise ValueError() + kwargs = {key: {path[-1]: None}} + if isnull: + method = queryset.filter + else: + method = queryset.exclude + return method(**kwargs) diff --git a/src/jane/quakeml/tests/test_quakeml.py b/src/jane/quakeml/tests/test_quakeml.py index 915f934..9b0c437 100644 --- a/src/jane/quakeml/tests/test_quakeml.py +++ b/src/jane/quakeml/tests/test_quakeml.py @@ -393,9 +393,11 @@ def test_quakeml_queries(self): **self.valid_auth_headers).json()["results"]), 0) # All authors are None - so as soon as one searches for an author, # only results with an author will return something. + # Changed: authors that are None will be returned if doing a search + # only excluding a specific author self.assertEqual(len(self.client.get( path + "?!author=random", - **self.valid_auth_headers).json()["results"]), 0) + **self.valid_auth_headers).json()["results"]), 2) # Test the ordering. ev = self.client.get(path + "?ordering=depth_in_m").json()["results"]