Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
03e5978
Merge pull request #405 from ror-community/persist-es-vol
lizkrznarich Sep 18, 2024
9e80c6c
Added default sort for organization api on id field
ashwinisukale Sep 19, 2024
3e03b8c
Merge pull request #406 from ror-community/org_pagination_with_sort
lizkrznarich Sep 19, 2024
f7e5c0c
support indexing and returning records with v2.1 locations
lizkrznarich Nov 15, 2024
b14056c
update bulk create report and v2 test data
lizkrznarich Nov 15, 2024
5c5d60c
fix unit tests for v2.1 locations update
lizkrznarich Nov 18, 2024
c00dcd5
add new aggregations and update tests
lizkrznarich Nov 19, 2024
a9b7f7d
update filters and integration tests
lizkrznarich Nov 21, 2024
fe48347
udpate integration tests and v1 models
lizkrznarich Nov 21, 2024
6032b12
remove sort order from queries
lizkrznarich Nov 21, 2024
27d5352
Merge pull request #410 from ror-community/schema-v2-1
lizkrznarich Dec 3, 2024
1447000
update dev workflow file to use test data dump with v2.1 data
lizkrznarich Dec 3, 2024
d9ae28c
Merge pull request #411 from ror-community/schema-v2-1
lizkrznarich Dec 3, 2024
9624da5
remove sort order from unit tests
lizkrznarich Dec 3, 2024
3b02109
Merge pull request #412 from ror-community/schema-v2-1
lizkrznarich Dec 3, 2024
f819ca8
change issue_url field to html_url in bulk update report
lizkrznarich Dec 3, 2024
58f5315
Merge pull request #413 from ror-community/schema-v2-1
lizkrznarich Dec 3, 2024
1d1de03
update created last mod default schema version to 2.1
lizkrznarich Dec 11, 2024
635ac62
Merge pull request #414 from ror-community/schema-v2-1
lizkrznarich Dec 11, 2024
3f53d2e
add print for debug
lizkrznarich Dec 12, 2024
b943baf
Merge pull request #416 from ror-community/schema-v2-1
lizkrznarich Dec 12, 2024
162de35
Merge pull request #415 from ror-community/dev
lizkrznarich Dec 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
- name: Setup
working-directory: ./ror-api
run: |
python manage.py setup v1.42-2024-02-21-ror-data -t
python manage.py setup v1.55-2024-10-31-ror-data -t
# Dump file temp hard coded for v2 beta
# Pulled from ror-data-test per settings.py config
- name: Test
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ docker-compose.override.yml
__pycache__/
*.pyc
rorapi/data/**
esdata/**
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ services:
test: curl -f http://elastic:changeme@elasticsearch:8200
interval: 10s
timeout: 1s
volumes:
- ./esdata:/usr/share/elasticsearch/data
web:
container_name: rorapiweb
env_file: .env
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ boto3
pandas==1.4.1
numpy==1.22
titlecase==2.3
update_address @ git+https://github.com/ror-community/update_address.git@v2-locations
update_address @ git+https://github.com/ror-community/update_address.git@v2-1-locations
launchdarkly-server-sdk==7.6.1
jsonschema==3.2.0
python-magic
Expand Down
2 changes: 1 addition & 1 deletion rorapi/common/create_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from rorapi.management.commands.generaterorid import check_ror_id

V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json")
V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/schema-v2-1/ror_schema_v2_1.json")


def update_record(json_input, existing_record):
Expand Down
7 changes: 5 additions & 2 deletions rorapi/common/csv_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def process_csv(csv_file, version, validate_only):
success_msg = None
error = None
report = []
report_fields = ['row', 'ror_id', 'action', 'errors']
report_fields = ['row', 'html_url', 'ror_id', 'action', 'errors']
skipped_count = 0
updated_count = 0
new_count = 0
Expand All @@ -57,10 +57,13 @@ def process_csv(csv_file, version, validate_only):
reader = csv.DictReader(io.StringIO(read_file))
row_num = 2
for row in reader:
html_url = None
ror_id = None
updated = False
print("Row data")
print(row)
if row['html_url']:
html_url = row['html_url']
if row['id']:
ror_id = row['id']
updated = True
Expand All @@ -86,7 +89,7 @@ def process_csv(csv_file, version, validate_only):
skipped_count += 1
if validate_only and action == 'created':
ror_id = None
report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
row_num += 1
if new_count > 0 or updated_count > 0 or skipped_count > 0:
try:
Expand Down
3 changes: 3 additions & 0 deletions rorapi/common/es_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,6 @@ def paginate(self, page):

def get_query(self):
return self.search

def add_sort(self, field, order="asc"):
self.search = self.search.sort({field: {"order": order}})
9 changes: 0 additions & 9 deletions rorapi/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,6 @@ def __init__(self, data):
self.count = data.doc_count


class Aggregations:
"""Aggregations model class"""

def __init__(self, data):
self.types = [TypeBucket(b) for b in data.types.buckets]
self.countries = [CountryBucket(b) for b in data.countries.buckets]
self.statuses = [StatusBucket(b) for b in data.statuses.buckets]


class Errors:
"""Errors model class"""

Expand Down
23 changes: 0 additions & 23 deletions rorapi/common/parsers.py

This file was deleted.

79 changes: 52 additions & 27 deletions rorapi/common/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from urllib.parse import unquote

ALLOWED_FILTERS = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status")
ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status")
ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name")
ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status")
ALLOWED_ALL_STATUS_VALUES = ("", "true", "false")
# includes deprecated ext id types
Expand Down Expand Up @@ -99,11 +100,15 @@
"links.type",
"links.value",
"locations.geonames_id",
"locations.geonames_details.name",
"locations.geonames_details.lat",
"locations.geonames_details.lng",
"locations.geonames_details.continent_code",
"locations.geonames_details.continent_name",
"locations.geonames_details.country_code",
"locations.geonames_details.country_name",
"locations.geonames_details.country_subdivision_code",
"locations.geonames_details.country_subdivision_name",
"locations.geonames_details.lat",
"locations.geonames_details.lng",
"locations.geonames_details.name",
"names.lang",
"names.types",
"names.value",
Expand Down Expand Up @@ -150,34 +155,46 @@ def check_status_adv_q(adv_q_string):
status_in_q = True
return status_in_q

def get_country_name_filters(country_name_field, filter_string, version):
country_name_filters = []
if version == "v1":
allowed_filters = ALLOWED_FILTERS_V1
else:
allowed_filters = ALLOWED_FILTERS_V2
search = re.findall(country_name_field + ":([^:]*)", filter_string)
if search:
for s in search:
if len(re.findall(",", s)) > 1:
s = s.rsplit(",", 1)[0]
for allowed_filter in allowed_filters:
if allowed_filter in s:
s = s.rsplit("," + allowed_filter, 1)[0]
country_name_filter = country_name_field + ":" + s
filter_string = filter_string.replace(country_name_filter, "")
country_name_filters.append(country_name_filter)
return country_name_filters


def filter_string_to_list(filter_string, version):
filter_list = []
if "country.country_code" in filter_string and version == "v2":
filter_string = filter_string.replace(
"country.country_code", "locations.geonames_details.country_code"
)
if version == "v2":
if "country.country_code" in filter_string:
filter_string = filter_string.replace(
"country.country_code", "locations.geonames_details.country_code"
)
if "country.country_name" in filter_string:
filter_string = filter_string.replace(
"country.country_name", "locations.geonames_details.country_name"
)
# some country names contain comma chars
# allow comma chars in country_name filter values only
# country.country_name:Germany,types:Company
if "country.country_name" in filter_string:
country_name_filters = []
search = re.findall("country.country_name:([^:]*)", filter_string)
if search:
for s in search:
if len(re.findall(",", s)) > 1:
s = s.rsplit(",", 1)[0]
for allowed_filter in ALLOWED_FILTERS:
if allowed_filter in s:
s = s.rsplit("," + allowed_filter, 1)[0]
country_name_filter = "country.country_name:" + s
v2_country_name_filter = "locations.geonames_details.country_name:" + s
filter_string = filter_string.replace(country_name_filter, "")
if version == "v2":
country_name_filters.append(v2_country_name_filter)
else:
country_name_filters.append(country_name_filter)

if version == "v1":
country_name_field = "country.country_name"
else:
country_name_field = "locations.geonames_details.country_code"
if country_name_field in filter_string:
country_name_filters = get_country_name_filters(country_name_field, filter_string, version)
filter_list = [f for f in filter_string.split(",") if f]
filter_list = filter_list + country_name_filters
else:
Expand Down Expand Up @@ -240,7 +257,11 @@ def validate(params, version):

valid_filters = [f for f in filters if ":" in f]
filter_keys = [f.split(":")[0] for f in valid_filters]
illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS]
if version == "v1":
allowed_filters = ALLOWED_FILTERS_V1
else:
allowed_filters = ALLOWED_FILTERS_V2
illegal_keys = [v for v in filter_keys if v not in allowed_filters]
errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys])

if "page" in params:
Expand Down Expand Up @@ -323,6 +344,7 @@ def build_search_query(params, version):
[
("types", "types"),
("countries", "locations.geonames_details.country_code"),
("continents", "locations.geonames_details.continent_code"),
("statuses", "status"),
]
)
Expand All @@ -335,6 +357,9 @@ def build_search_query(params, version):
]
)

sort_field = params.get("sort", "id")
sort_order = params.get("order", "asc")

qb.paginate(int(params.get("page", 1)))
return qb.get_query()

Expand Down
1 change: 1 addition & 0 deletions rorapi/common/record_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def get_lang_code(lang_string):
def get_file_from_url(url):
rsp = requests.get(url)
rsp.raise_for_status()
print(rsp.json())
return rsp.json()

def validate_record(data, schema):
Expand Down
6 changes: 0 additions & 6 deletions rorapi/common/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,5 @@ class BucketSerializer(serializers.Serializer):
count = serializers.IntegerField()


class AggregationsSerializer(serializers.Serializer):
types = BucketSerializer(many=True)
countries = BucketSerializer(many=True)
statuses = BucketSerializer(many=True)


class ErrorsSerializer(serializers.Serializer):
errors = serializers.StringRelatedField(many=True)
6 changes: 3 additions & 3 deletions rorapi/tests/tests_integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ def test_retrieval(self):

def test_query_grid_retrieval(self):
for test_org in requests.get(BASE_URL).json()['items']:
grid = test_org['external_ids']['GRID']['preferred']
output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
self.verify_single_item(output, test_org)
grid = test_org['external_ids']['GRID']['preferred']
output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
self.verify_single_item(output, test_org)

def test_error(self):
output = requests.get(BASE_URL, {
Expand Down
2 changes: 1 addition & 1 deletion rorapi/tests/tests_integration/tests_search_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_typos(self):
'query': 'julius~ maximilian~ universitat~ wuerzburg~'
}).json()
self.assertTrue(items['number_of_results'] > 0)
self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')

def test_extra_word(self):
items = requests.get(
Expand Down
2 changes: 1 addition & 1 deletion rorapi/tests/tests_integration/tests_search_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_typos(self):
'query': 'julius~ maximilian~ universitat~ wuerzburg~'
}).json()
self.assertTrue(items['number_of_results'] > 0)
self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')

def test_extra_word(self):
items = requests.get(
Expand Down
Loading
Loading