Skip to content
26 changes: 26 additions & 0 deletions src/jane/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,34 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import warnings

from .exceptions import * # NOQA

from .version import get_git_version

# monkey patch django's json field to get around django/django#6929 on
# django <1.11
import django
try:
django_version_major_minor = list(
map(int, django.__version__.split('.')[:2]))
except ValueError:
msg = ("Failed to determine Django version. Django's json field will not "
"be patched (django/django#6929).")
warnings.warn(msg)
else:
if django_version_major_minor < [1, 11]:
import django.contrib.postgres.fields.jsonb

django.contrib.postgres.fields.jsonb.KeyTransform._as_sql_original = \
django.contrib.postgres.fields.jsonb.KeyTransform.as_sql

def as_sql(self, *args, **kwargs):
_as_sql = self._as_sql_original(*args, **kwargs)
return '({})'.format(_as_sql[0]), _as_sql[1]

django.contrib.postgres.fields.jsonb.KeyTransform.as_sql = as_sql

__version__ = get_git_version()
__all__ = ['__version__']
73 changes: 36 additions & 37 deletions src/jane/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,6 @@ class DocumentIndexManager(models.GeoManager):
"""
Custom queryset manager for the document indices.
"""
JSON_QUERY_TEMPLATE_MAP = {
"int": "CAST(json->>'%s' AS INTEGER) %s %s",
"float": "CAST(json->>'%s' AS REAL) %s %s",
"str": "LOWER(json->>'%s') %s LOWER('%s')",
"bool": "CAST(json->>'%s' AS BOOL) %s %s",
"UTCDateTime": "CAST(json->>'%s' AS TIMESTAMP) %s TIMESTAMP '%s'"
}

JSON_ORDERING_TEMPLATE = {
"int": "CAST(json->>'%s' AS INTEGER)",
"float": "CAST(json->>'%s' AS REAL)",
Expand All @@ -260,9 +252,6 @@ def get_queryset(self):
annotate(attachments_count=Count('attachments'))
return queryset

def _get_json_query(self, key, operator, type, value):
return self.JSON_QUERY_TEMPLATE_MAP[type] % (key, operator, str(value))

def apply_retrieve_permission(self, document_type, queryset, user):
"""
Apply potential additional restrictions based on the permissions.
Expand Down Expand Up @@ -429,10 +418,6 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None,
"bool": bool,
"UTCDateTime": UTCDateTime
}

# Filter based on the attributes in the meta field.
where = []

for key, value_type in meta.items():
# Handle strings.
if value_type == "str":
Expand All @@ -443,22 +428,24 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None,
if name not in kwargs:
continue
value = kwargs[name]
method = 'exact'
# Possible wildcards.
if "*" in value or "?" in value:
value = value.replace("?", "_").replace("*", r"%%")
# PostgreSQL specific case insensitive LIKE statement.
if operator == "=":
where.append("json->>'%s' ILIKE '%s'" % (key,
value))
elif operator == "!=":
where.append("json->>'%s' NOT ILIKE '%s'" % (
key, value))
else:
raise NotImplementedError() # pragma: no cover
value = value.replace("*", ".*")
method = 'iregex'
# the regex field lookup on JSON fields suffers from a
# bug on Django 1.9, see django/django#6929.
# We patch django's json field on django <1.11 in
# jane/__init__.py
# PostgreSQL specific case insensitive LIKE statement.
if operator == "=":
queryset = queryset.filter(**{
'json__{}__{}'.format(key, method): value})
elif operator == "!=":
queryset = queryset.exclude(**{
'json__{}__{}'.format(key, method): value})
else:
where.append(
self._get_json_query(key, operator, value_type,
value))
raise NotImplementedError() # pragma: no cover
# Handle integers, floats, and UTCDateTimes.
elif value_type in ("int", "float", "UTCDateTime"):
choices = (("min_%s", ">="), ("max_%s", "<="), ("%s", "="),
Expand All @@ -467,9 +454,23 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None,
name = name % key
if name not in kwargs:
continue
where.append(self._get_json_query(
key, operator, value_type,
type_map[value_type](kwargs[name])))
value = type_map[value_type](kwargs[name])
if value_type == "UTCDateTime":
value = value.datetime.isoformat()
if operator == "=":
queryset = queryset.filter(**{
'json__{}__exact'.format(key): value})
elif operator == "!=":
queryset = queryset.exclude(**{
'json__{}__exact'.format(key): value})
elif operator == ">=":
queryset = queryset.filter(**{
'json__{}__gte'.format(key): value})
elif operator == "<=":
queryset = queryset.filter(**{
'json__{}__lte'.format(key): value})
else:
raise NotImplementedError() # pragma: no cover
# Handle bools.
elif value_type == "bool":
# Booleans can be searched for (in)equality.
Expand All @@ -480,18 +481,16 @@ def get_filtered_queryset(self, document_type, queryset=None, user=None,
continue
value = str(kwargs[name]).lower()
if value in ["t", "true", "yes", "y"]:
value = "true"
value = True
elif value in ["f", "false", "no", "n"]:
value = "false"
value = False
else:
raise NotImplementedError() # pragma: no cover
where.append(self._get_json_query(
key, operator, value_type, value))
queryset = queryset.filter(**{
'json__{}__exact'.format(key): value})
else:
raise NotImplementedError() # pragma: no cover

queryset = queryset.extra(where=where)

if "ordering" in kwargs and kwargs["ordering"] in meta:
ord = kwargs["ordering"]
queryset = queryset.order_by(
Expand Down
90 changes: 36 additions & 54 deletions src/jane/fdsnws/station_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,13 @@

import jane
from jane.documents.models import DocumentIndex, DocumentType


def _get_json_query(key, operator, type, value):
return JSON_QUERY_TEMPLATE_MAP[type] % (key, operator, str(value))
from jane.jane.utils import _queryset_filter_jsonfield_isnull


def _format_time(value):
return value.strftime("%Y-%m-%dT%H:%M:%S+00:00")


JSON_QUERY_TEMPLATE_MAP = {
int: "CAST(json->>'%s' AS INTEGER) %s %s",
float: "CAST(json->>'%s' AS REAL) %s %s",
str: "json->>'%s' %s '%s'",
UTCDateTime: "CAST(json->>'%s' AS TIMESTAMP) %s TIMESTAMP '%s'"
}


# Define some constants for writing StationXML files.
SOURCE = settings.JANE_FDSN_STATIONXML_SOURCE
SENDER = settings.JANE_FDSN_STATIONXML_SENDER
Expand Down Expand Up @@ -124,66 +113,59 @@ def query_stations(fh, url, nodata, level, format, user, starttime=None,
query = DocumentIndex.objects.filter(
document__document_type="stationxml")

where = []
if starttime:
# If end_date is null it is assumed to be bigger.
where.append(
"((json->>'end_date') is null) OR (" +
_get_json_query("end_date", ">=", UTCDateTime, starttime) + ")")
query = (query.filter(json__end_date__gte=starttime.isoformat()) |
_queryset_filter_jsonfield_isnull(query, path=['end_date'],
isnull=True)).distinct()
if endtime:
where.append(
_get_json_query("start_date", "<=", UTCDateTime, endtime))
query = query.filter(json__start_date__lte=endtime.isoformat())
if startbefore:
where.append(
_get_json_query("start_date", "<", UTCDateTime, startbefore))
query = query.filter(json__start_date__lte=startbefore.isoformat())
if startafter:
where.append(
_get_json_query("start_date", ">", UTCDateTime, startafter))
query = query.filter(json__start_date__gt=startafter.isoformat())
if endbefore:
# If end_date is null it is assumed to be bigger. We don't want that
# here.
where.append(
"((json->>'end_date') is not null) AND (" +
_get_json_query("end_date", "<", UTCDateTime, endbefore) + ")")
query = (query.filter(json__end_date__lt=endbefore.isoformat()) &
_queryset_filter_jsonfield_isnull(query, path=['end_date'],
isnull=False)).distinct()
if endafter:
# If end_date is null it is assumed to be bigger.
where.append(
"((json->>'end_date') is null) OR (" +
_get_json_query("end_date", ">", UTCDateTime, endafter) + ")")
query = (query.filter(json__end_date__gt=endafter.isoformat()) |
_queryset_filter_jsonfield_isnull(query, path=['end_date'],
isnull=True)).distinct()
if minlatitude is not None:
where.append(
_get_json_query("latitude", ">=", float, minlatitude))
query = query.filter(json__latitude__gte=minlatitude)
if maxlatitude is not None:
where.append(
_get_json_query("latitude", "<=", float, maxlatitude))
query = query.filter(json__latitude__lte=maxlatitude)
if minlongitude is not None:
where.append(
_get_json_query("longitude", ">=", float, minlongitude))
query = query.filter(json__longitude__gte=minlongitude)
if maxlongitude is not None:
where.append(
_get_json_query("longitude", "<=", float, maxlongitude))
query = query.filter(json__longitude__lte=maxlongitude)

for key in ["network", "station", "location", "channel"]:
argument = locals()[key]
if argument is not None and '*' not in argument:
# Two percentage signs are needed (for escaping?)
argument = [_i.replace("?", "_").replace("*", r"%%")
for _i in argument]
# A minus sign negates the query.
n = []
y = []
for _i in argument:
if _i.startswith("-"):
n.append("json->>'%s' NOT LIKE '%s'" % (key, _i[1:]))
if argument is not None:
for argument_ in argument:
value = argument_
if value.startswith('-'):
value = value[1:]
query_method = query.exclude
else:
y.append("json->>'%s' LIKE '%s'" % (key, _i))
if y:
where.append(" OR ".join(y))
if n:
where.append(" AND ".join(n))

if where:
query = query.extra(where=where)
query_method = query.filter
method = 'exact'
# Possible wildcards.
if "*" in value or "?" in value:
value = value.replace("*", ".*").replace("?", ".")
method = 'iregex'
# the regex field lookup on JSON fields suffers from a
# bug on Django 1.9, see django/django#6929.
# We patch django's json field on django <1.11 in
# jane/__init__.py
query = query & query_method(**{
'json__{}__{}'.format(key, method): value})
query = query.distinct()

# Radial queries - also apply the per-user filtering right here!
if latitude is not None:
Expand Down
49 changes: 49 additions & 0 deletions src/jane/fdsnws/tests/test_station_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,55 @@ def test_total_and_selected_number_of_sta_and_cha(self):
self.assertEqual(inv[0][0].total_number_of_channels, 3)
self.assertEqual(inv[0][0].selected_number_of_channels, 3)

def test_comma_separated_list_queries(self):
"""
Test comma separated queries in NSCL parameters
"""
client = FDSNClient(self.live_server_url)

inv = client.get_stations(level="channel", network="BW",
station="ALTM", location="",
channel="EHZ,EHN")
c = inv.get_contents()
self.assertEqual(c["channels"],
['BW.ALTM..EHN', 'BW.ALTM..EHZ'])

inv = client.get_stations(level="channel", network="BW",
station="ALTM", location="",
channel="EHZ,EH?")
c = inv.get_contents()
self.assertEqual(c["channels"],
['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ'])

# the following queries should be improved, we should have multiple
# different network/station codes in the test db
client = FDSNClient(self.live_server_url)
inv = client.get_stations(level="channel", network="AA,BW",
station="ALTM", location="--", channel="EH*")
c = inv.get_contents()
self.assertEqual(c["channels"],
['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ'])

# the following queries should be improved, we should have multiple
# different network/station codes in the test db
client = FDSNClient(self.live_server_url)
inv = client.get_stations(level="channel", network="BW",
station="ALTM", location="00,,10",
channel="EH*")
c = inv.get_contents()
self.assertEqual(c["channels"],
['BW.ALTM..EHE', 'BW.ALTM..EHN', 'BW.ALTM..EHZ'])

# the following queries should be improved, we should have multiple
# different network/station codes in the test db
client = FDSNClient(self.live_server_url)
inv = client.get_stations(level="channel", network="BW",
station="XXX,YYY,ALTM", location="--",
channel="EHZ")
c = inv.get_contents()
self.assertEqual(c["channels"],
['BW.ALTM..EHZ'])

def test_seed_code_queries(self):
client = FDSNClient(self.live_server_url)

Expand Down
30 changes: 30 additions & 0 deletions src/jane/jane/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,33 @@ class OptionalTrailingSlashSimpleRouter(SimpleRouter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.trailing_slash = "/?"


def _queryset_filter_jsonfield_isnull(queryset, path, isnull, field='json'):
"""
Replaces the buggy isnull query on json fields, see
https://stackoverflow.com/q/38528516

:type queryset: :class:`django.db.models.query.QuerySet`
:param queryset: Django queryset object to do the query on.
:type field: str
:param field: Name of the field (column) in the databse table that holds
the json data for the query. By default should always be "json" for
Jane.
:type path: list
:param path: List of field names as strings to traverse in the json field.
For example use ``field='json', path=['end_date'], isnull=True``, for a
query of type ``queryset.filter(json__end_date__isnull=True)``.
:type isnull: bool
:param isnull: Whether to return items with the respective field being
`null` (``isnull=True``) or it *not* being `null` (``isnull=False``).
"""
key = '__'.join([field] + list(path[:-1]) + ['contains'])
if not path:
raise ValueError()
kwargs = {key: {path[-1]: None}}
if isnull:
method = queryset.filter
else:
method = queryset.exclude
return method(**kwargs)
4 changes: 3 additions & 1 deletion src/jane/quakeml/tests/test_quakeml.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,11 @@ def test_quakeml_queries(self):
**self.valid_auth_headers).json()["results"]), 0)
# All authors are None - so as soon as one searches for an author,
# only results with an author will return something.
# Changed: authors that are None will be returned if doing a search
# only excluding a specific author
self.assertEqual(len(self.client.get(
path + "?!author=random",
**self.valid_auth_headers).json()["results"]), 0)
**self.valid_auth_headers).json()["results"]), 2)

# Test the ordering.
ev = self.client.get(path + "?ordering=depth_in_m").json()["results"]
Expand Down