ror-community · lizkrznarich · Dec 12, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -69,7 +69,7 @@ jobs:
       - name: Setup
         working-directory: ./ror-api
         run: |
-          python manage.py setup v1.42-2024-02-21-ror-data -t
+          python manage.py setup v1.55-2024-10-31-ror-data -t
           # Dump file temp hard coded for v2 beta
           # Pulled from ror-data-test per settings.py config
       - name: Test

diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ docker-compose.override.yml
 __pycache__/
 *.pyc
 rorapi/data/**
+esdata/**
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -17,6 +17,8 @@ services:
       test: curl -f http://elastic:changeme@elasticsearch:8200
       interval: 10s
       timeout: 1s
+    volumes:
+      - ./esdata:/usr/share/elasticsearch/data
   web:
     container_name: rorapiweb
     env_file: .env

diff --git a/requirements.txt b/requirements.txt
@@ -20,7 +20,7 @@ boto3
 pandas==1.4.1
 numpy==1.22
 titlecase==2.3
-update_address @ git+https://github.com/ror-community/update_address.git@v2-locations
+update_address @ git+https://github.com/ror-community/update_address.git@v2-1-locations
 launchdarkly-server-sdk==7.6.1
 jsonschema==3.2.0
 python-magic

diff --git a/rorapi/common/create_update.py b/rorapi/common/create_update.py
@@ -8,7 +8,7 @@
 )
 from rorapi.management.commands.generaterorid import check_ror_id
 
-V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json")
+V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/schema-v2-1/ror_schema_v2_1.json")
 
 
 def update_record(json_input, existing_record):

diff --git a/rorapi/common/csv_bulk.py b/rorapi/common/csv_bulk.py
@@ -48,7 +48,7 @@ def process_csv(csv_file, version, validate_only):
     success_msg = None
     error = None
     report = []
-    report_fields = ['row', 'ror_id', 'action', 'errors']
+    report_fields = ['row', 'html_url', 'ror_id', 'action', 'errors']
     skipped_count = 0
     updated_count = 0
     new_count = 0
@@ -57,10 +57,13 @@ def process_csv(csv_file, version, validate_only):
     reader = csv.DictReader(io.StringIO(read_file))
     row_num = 2
     for row in reader:
+        html_url = None
         ror_id = None
         updated = False
         print("Row data")
         print(row)
+        if row['html_url']:
+            html_url = row['html_url']
         if row['id']:
             ror_id = row['id']
             updated = True
@@ -86,7 +89,7 @@ def process_csv(csv_file, version, validate_only):
             skipped_count += 1
         if validate_only and action == 'created':
             ror_id = None
-        report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
+        report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
         row_num += 1
     if new_count > 0 or updated_count > 0 or skipped_count > 0:
         try:

diff --git a/rorapi/common/es_utils.py b/rorapi/common/es_utils.py
@@ -82,3 +82,6 @@ def paginate(self, page):
 
     def get_query(self):
         return self.search
+
+    def add_sort(self, field, order="asc"):
+        self.search = self.search.sort({field: {"order": order}})
diff --git a/rorapi/common/models.py b/rorapi/common/models.py
@@ -40,15 +40,6 @@ def __init__(self, data):
         self.count = data.doc_count
 
 
-class Aggregations:
-    """Aggregations model class"""
-
-    def __init__(self, data):
-        self.types = [TypeBucket(b) for b in data.types.buckets]
-        self.countries = [CountryBucket(b) for b in data.countries.buckets]
-        self.statuses = [StatusBucket(b) for b in data.statuses.buckets]
-
-
 class Errors:
     """Errors model class"""
 

diff --git a/rorapi/common/parsers.py b/rorapi/common/parsers.py
diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py
@@ -18,7 +18,8 @@
 
 from urllib.parse import unquote
 
-ALLOWED_FILTERS = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status")
+ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status")
+ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name")
 ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status")
 ALLOWED_ALL_STATUS_VALUES = ("", "true", "false")
 # includes deprecated ext id types
@@ -99,11 +100,15 @@
     "links.type",
     "links.value",
     "locations.geonames_id",
-    "locations.geonames_details.name",
-    "locations.geonames_details.lat",
-    "locations.geonames_details.lng",
+    "locations.geonames_details.continent_code",
+    "locations.geonames_details.continent_name",
     "locations.geonames_details.country_code",
     "locations.geonames_details.country_name",
+    "locations.geonames_details.country_subdivision_code",
+    "locations.geonames_details.country_subdivision_name",
+    "locations.geonames_details.lat",
+    "locations.geonames_details.lng",
+    "locations.geonames_details.name",
     "names.lang",
     "names.types",
     "names.value",
@@ -150,34 +155,46 @@ def check_status_adv_q(adv_q_string):
         status_in_q = True
     return status_in_q
 
+def get_country_name_filters(country_name_field, filter_string, version):
+    country_name_filters = []
+    if version == "v1":
+        allowed_filters = ALLOWED_FILTERS_V1
+    else:
+        allowed_filters = ALLOWED_FILTERS_V2
+    search = re.findall(country_name_field + ":([^:]*)", filter_string)
+    if search:
+        for s in search:
+            if len(re.findall(",", s)) > 1:
+                s = s.rsplit(",", 1)[0]
+            for allowed_filter in allowed_filters:
+                if allowed_filter in s:
+                    s = s.rsplit("," + allowed_filter, 1)[0]
+            country_name_filter = country_name_field + ":" + s
+            filter_string = filter_string.replace(country_name_filter, "")
+            country_name_filters.append(country_name_filter)
+    return country_name_filters
+
 
 def filter_string_to_list(filter_string, version):
     filter_list = []
-    if "country.country_code" in filter_string and version == "v2":
-        filter_string = filter_string.replace(
-            "country.country_code", "locations.geonames_details.country_code"
-        )
+    if version == "v2":
+        if "country.country_code" in filter_string:
+            filter_string = filter_string.replace(
+                "country.country_code", "locations.geonames_details.country_code"
+            )
+        if "country.country_name" in filter_string:
+            filter_string = filter_string.replace(
+                "country.country_name", "locations.geonames_details.country_name"
+            )
     # some country names contain comma chars
     # allow comma chars in country_name filter values only
     # country.country_name:Germany,types:Company
-    if "country.country_name" in filter_string:
-        country_name_filters = []
-        search = re.findall("country.country_name:([^:]*)", filter_string)
-        if search:
-            for s in search:
-                if len(re.findall(",", s)) > 1:
-                    s = s.rsplit(",", 1)[0]
-                for allowed_filter in ALLOWED_FILTERS:
-                    if allowed_filter in s:
-                        s = s.rsplit("," + allowed_filter, 1)[0]
-                country_name_filter = "country.country_name:" + s
-                v2_country_name_filter = "locations.geonames_details.country_name:" + s
-                filter_string = filter_string.replace(country_name_filter, "")
-                if version == "v2":
-                    country_name_filters.append(v2_country_name_filter)
-                else:
-                    country_name_filters.append(country_name_filter)
-
+    if version == "v1":
+        country_name_field = "country.country_name"
+    else:
+        country_name_field = "locations.geonames_details.country_code"
+    if country_name_field in filter_string:
+        country_name_filters = get_country_name_filters(country_name_field, filter_string, version)
         filter_list = [f for f in filter_string.split(",") if f]
         filter_list = filter_list + country_name_filters
     else:
@@ -240,7 +257,11 @@ def validate(params, version):
 
     valid_filters = [f for f in filters if ":" in f]
     filter_keys = [f.split(":")[0] for f in valid_filters]
-    illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS]
+    if version == "v1":
+        allowed_filters = ALLOWED_FILTERS_V1
+    else:
+        allowed_filters = ALLOWED_FILTERS_V2
+    illegal_keys = [v for v in filter_keys if v not in allowed_filters]
     errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys])
 
     if "page" in params:
@@ -323,6 +344,7 @@ def build_search_query(params, version):
             [
                 ("types", "types"),
                 ("countries", "locations.geonames_details.country_code"),
+                ("continents", "locations.geonames_details.continent_code"),
                 ("statuses", "status"),
             ]
         )
@@ -335,6 +357,9 @@ def build_search_query(params, version):
             ]
         )
 
+    sort_field = params.get("sort", "id")
+    sort_order = params.get("order", "asc")
+
     qb.paginate(int(params.get("page", 1)))
     return qb.get_query()
 

diff --git a/rorapi/common/record_utils.py b/rorapi/common/record_utils.py
@@ -20,6 +20,7 @@ def get_lang_code(lang_string):
 def get_file_from_url(url):
     rsp = requests.get(url)
     rsp.raise_for_status()
+    print(rsp.json())
     return rsp.json()
 
 def validate_record(data, schema):

diff --git a/rorapi/common/serializers.py b/rorapi/common/serializers.py
@@ -13,11 +13,5 @@ class BucketSerializer(serializers.Serializer):
     count = serializers.IntegerField()
 
 
-class AggregationsSerializer(serializers.Serializer):
-    types = BucketSerializer(many=True)
-    countries = BucketSerializer(many=True)
-    statuses = BucketSerializer(many=True)
-
-
 class ErrorsSerializer(serializers.Serializer):
     errors = serializers.StringRelatedField(many=True)
diff --git a/rorapi/tests/tests_integration/tests.py b/rorapi/tests/tests_integration/tests.py
@@ -220,9 +220,9 @@ def test_retrieval(self):
 
     def test_query_grid_retrieval(self):
         for test_org in requests.get(BASE_URL).json()['items']:
-            grid = test_org['external_ids']['GRID']['preferred']
-            output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
-            self.verify_single_item(output, test_org)
+                grid = test_org['external_ids']['GRID']['preferred']
+                output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
+                self.verify_single_item(output, test_org)
 
     def test_error(self):
         output = requests.get(BASE_URL, {

diff --git a/rorapi/tests/tests_integration/tests_search_v1.py b/rorapi/tests/tests_integration/tests_search_v1.py
@@ -117,7 +117,7 @@ def test_typos(self):
                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
             }).json()
         self.assertTrue(items['number_of_results'] > 0)
-        self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
+        self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
 
     def test_extra_word(self):
         items = requests.get(

diff --git a/rorapi/tests/tests_integration/tests_search_v2.py b/rorapi/tests/tests_integration/tests_search_v2.py
@@ -117,7 +117,7 @@ def test_typos(self):
                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
             }).json()
         self.assertTrue(items['number_of_results'] > 0)
-        self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
+        self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
 
     def test_extra_word(self):
         items = requests.get(