diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/CacheNoLandGeometry.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/CacheNoLandGeometry.java index c032c0ac..befb5d94 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/CacheNoLandGeometry.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/CacheNoLandGeometry.java @@ -38,7 +38,8 @@ public Map getAllNoLandGeometry() { null, null, null, - null); + null, + false); return result.collections .stream() diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java index 15779168..ff76b4ac 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java @@ -211,7 +211,8 @@ protected ElasticSearchBase.SearchResult searchCollectionsB null, createSortOptions(sortBy, CQLFields.class), null, - null); + null, + false); } @Override @@ -240,6 +241,11 @@ public ElasticSearchBase.SearchResult searchAllCollections( return searchCollectionsByIds(null, Boolean.FALSE, sortBy); } + /*** + * This function is used for searching by user input query. In such case, the sorting is from two impactors: the relavance score (powered by Elasticsearch) in the `._score` field, + * and the importance score (powered by IMOS internal ranking, which has a maximum value of 106). + * The final score is relavance_score * (importance_score / max(importance_score)) + */ @Override public ElasticSearchBase.SearchResult searchByParameters(List keywords, String cql, List properties, String sortBy, CQLCrsType coor) throws CQLException { @@ -333,6 +339,14 @@ public ElasticSearchBase.SearchResult searchByParameters(Li .toList(); } + boolean useScriptScore = false; + if (sortBy != null && !sortBy.isEmpty()) { + // only use script_score if sortby contains "score" and should field is not empty + if (sortBy.toLowerCase().contains("score") && should != null && !should.isEmpty()) { + useScriptScore = true; + } + } + return searchCollectionBy( null, should, @@ -341,7 +355,8 @@ public ElasticSearchBase.SearchResult searchByParameters(Li searchAfter, createSortOptions(sortBy, CQLFields.class), score, - maxSize + maxSize, + useScriptScore ); } } diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearchBase.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearchBase.java index 75059eeb..eb495eb9 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearchBase.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearchBase.java @@ -156,8 +156,8 @@ protected SearchResult searchCollectionBy(final List final List searchAfter, final List sortOptions, final Double score, - final Long maxSize) { - + final Long maxSize, + final boolean useScriptScore) { Supplier builderSupplier = () -> { SearchRequest.Builder builder = new SearchRequest.Builder(); builder.index(indexName) @@ -165,23 +165,92 @@ protected SearchResult searchCollectionBy(final List // we use the smaller one. The internal page size is used to get the result by // batch, lets say page is 20 and internal is 10, then we do it in two batch. // But if we request 5 only, then there is no point to load 10 - .size(maxSize != null && maxSize < pageSize ? maxSize.intValue() : pageSize) - .query(q -> q.bool(createBoolQueryForProperties(queries, should, filters))); + .size(maxSize != null && maxSize < pageSize ? maxSize.intValue() : pageSize); + + // use script score if search with text, in such case, the final score depends on both relevance and metadata quality + // put query in script block + if (useScriptScore) { + builder.query(q -> q.scriptScore(ss -> ss + // to get the original _score from ELasticsearch + .query(bq -> bq.bool(createBoolQueryForProperties(queries, should, filters))) + .script(s -> s.inline(i -> i + .lang("painless") + .source( + // Step 1: Retrieve internal quality score from summaries.score field + // Default to 0 if field doesn't exist or is empty + "double internalScore = doc.containsKey('summaries.score') && " + + "!doc['summaries.score'].empty ? doc['summaries.score'].value : 0.0; " + + + // Step 2: Normalize internal score to 0-1 range + // Assuming summaries.score is in range 0-106 + "double normalizedScore = internalScore / 106.0; " + + + // Step 3: Ensure minimum multiplier to avoid zero scores + "double multiplier = Math.max(normalizedScore, 0.01); " + + + // Step 4: Calculate final score + // Final score = Elasticsearch relevance * normalized quality + "return _score * multiplier;" + ) + ) + )) + ); + } + // use original query logic + else { + builder.query(q -> q.bool(createBoolQueryForProperties(queries, should, filters))); + } if(searchAfter != null) { builder.searchAfter(searchAfter); } - if(sortOptions != null) { - builder.sort(sortOptions); - } + // to use sort by uuid as a tiebreaker + boolean hasUuidSort = false; - builder.sort(so -> so - // We need a unique key for the search, cannot use _id in v8 anymore, so we need - // to sort using the keyword, this field is not for search and therefore not in enum - .field(FieldSort.of(f -> f - .field(StacBasicField.UUID.sortField) - .order(SortOrder.Asc)))); + // apply sort options + if (useScriptScore) { + // add sort options + if (sortOptions != null && !sortOptions.isEmpty()) { + for (SortOptions sortOption : sortOptions) { + builder.sort(sortOption); + + // check if it has sort by id option + if (sortOption.isField() && + sortOption.field().field().equals(StacBasicField.UUID.sortField)) { + hasUuidSort = true; + } + } + } + } + else { + // when not using script_score, apply all sort options + if (sortOptions != null && !sortOptions.isEmpty()) { + for (SortOptions sortOption : sortOptions) { + builder.sort(sortOption); + + // check if it has sort by id option + if (sortOption.isField() && + sortOption.field().field().equals(StacBasicField.UUID.sortField)) { + hasUuidSort = true; + } + } + } + else if (should != null && !should.isEmpty()) { + // If no sortOptions provided but there are text queries, + // default to sorting by _score + builder.sort(so -> so.score(sc -> sc.order(SortOrder.Desc))); + } + } + // add sort by id as the final tiebreaker if it was applied + if (!hasUuidSort) { + builder.sort(so -> so + // We need a unique key for the search, cannot use _id in v8 anymore, so we need + // to sort using the keyword, this field is not for search and therefore not in enum + .field(FieldSort.of(f -> f + .field(StacBasicField.UUID.sortField) + .order(SortOrder.Asc)))); + } if(score != null) { // By default we do not setup any min_score, the api caller should pass it in so diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java index d6fae3fe..08ee28d4 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java @@ -526,7 +526,7 @@ public void verifyCQLPropertyScore() throws IOException { // Increase score will drop one record collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=3", Collections.class); - assertEquals(3, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3"); + assertEquals(2, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3"); assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011"); } diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java index 68dda6c8..c357e1f5 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java @@ -63,11 +63,17 @@ public void verifyCorrectInternalPagingLargeData() throws IOException { // Given 6 records and we set page to 4, that means each query elastic return 4 record only // and the logic to load the reset can kick in. super.insertJsonToElasticRecordIndex( + // set test summaries.score 90 "5c418118-2581-4936-b6fd-d6bedfe74f62.json", + // set test summaries.score 106 "19da2ce7-138f-4427-89de-a50c724f5f54.json", + // set test summaries.score 70 "516811d7-cd1e-207a-e0440003ba8c79dd.json", + // set test summaries.score 60 "7709f541-fc0c-4318-b5b9-9053aa474e0e.json", + // set test summaries.score 50 "bc55eff4-7596-3565-e044-00144fdd4fa6.json", + // set test summaries.score 100 "bf287dfe-9ce4-4969-9c59-51c39ea4d011.json"); // Call rest api directly and get query result @@ -97,6 +103,7 @@ public void verifyCorrectInternalPagingLargeData() throws IOException { } /** * with page_size set, the max number of record return will equals page_size + * With default search, the sort should follow uuid order */ @Test public void verifyCorrectPageSizeDataReturn() throws IOException { @@ -105,11 +112,17 @@ public void verifyCorrectPageSizeDataReturn() throws IOException { // Given 6 records and we set page to 4, that means each query elastic return 4 record only // and the logic to load the reset can kick in. super.insertJsonToElasticRecordIndex( + // set test summaries.score 90 "5c418118-2581-4936-b6fd-d6bedfe74f62.json", + // set test summaries.score 106 "19da2ce7-138f-4427-89de-a50c724f5f54.json", + // set test summaries.score 70 "516811d7-cd1e-207a-e0440003ba8c79dd.json", + // set test summaries.score 60 "7709f541-fc0c-4318-b5b9-9053aa474e0e.json", + // set test summaries.score 50 "bc55eff4-7596-3565-e044-00144fdd4fa6.json", + // set test summaries.score 100 "bf287dfe-9ce4-4969-9c59-51c39ea4d011.json"); // Call rest api directly and get query result @@ -132,12 +145,12 @@ public void verifyCorrectPageSizeDataReturn() throws IOException { assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); assertEquals("1.0", collections.getBody().getSearchAfter().get(0), "Search after 1 value"); assertEquals( - "100", + "90", collections.getBody().getSearchAfter().get(1), "search_after 2 arg" ); assertEquals( - "str:bf287dfe-9ce4-4969-9c59-51c39ea4d011", + "str:5c418118-2581-4936-b6fd-d6bedfe74f62", collections.getBody().getSearchAfter().get(2), "search_after 3 arg" ); @@ -185,6 +198,11 @@ public void verifyCorrectPageSizeDataReturn() throws IOException { * Extreme case, page size set to 1 and query text "dataset" and page one by one. Only part of the json * will be return, the sort value should give you the next item and you will be able to go to next one. * The first sort value is the relevant and because of query text the value will be something greater than 1.0 + * After weighted sorting, the actual order is (for the first 4 records): + * Document 0: UUID=bf287dfe-9ce4-4969-9c59-51c39ea4d011 + * Document 1: UUID=19da2ce7-138f-4427-89de-a50c724f5f54 + * Document 2: UUID=bc55eff4-7596-3565-e044-00144fdd4fa6 + * Document 3: UUID=7709f541-fc0c-4318-b5b9-9053aa474e0e */ @Test public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { @@ -193,11 +211,17 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { // Given 6 records and we set page to 4, that means each query elastic return 4 record only // and the logic to load the reset can kick in. super.insertJsonToElasticRecordIndex( + // set test summaries.score 90 "5c418118-2581-4936-b6fd-d6bedfe74f62.json", + // set test summaries.score 106 "19da2ce7-138f-4427-89de-a50c724f5f54.json", + // set test summaries.score 70 "516811d7-cd1e-207a-e0440003ba8c79dd.json", + // set test summaries.score 60 "7709f541-fc0c-4318-b5b9-9053aa474e0e.json", + // set test summaries.score 50 "bc55eff4-7596-3565-e044-00144fdd4fa6.json", + // set test summaries.score 100 "bf287dfe-9ce4-4969-9c59-51c39ea4d011.json"); // Call rest api directly and get query result with search on "dataset" @@ -220,7 +244,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); assertEquals( - "str:bc55eff4-7596-3565-e044-00144fdd4fa6", + "str:bf287dfe-9ce4-4969-9c59-51c39ea4d011", collections.getBody().getSearchAfter().get(2), "search_after 2 arg" ); @@ -232,7 +256,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { String.format("'%s||%s||%s'", collections.getBody().getSearchAfter().get(0), collections.getBody().getSearchAfter().get(1), - "bc55eff4-7596-3565-e044-00144fdd4fa6"), + "bf287dfe-9ce4-4969-9c59-51c39ea4d011"), HttpMethod.GET, null, new ParameterizedTypeReference<>() { @@ -249,7 +273,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); assertEquals( - "str:7709f541-fc0c-4318-b5b9-9053aa474e0e", + "str:19da2ce7-138f-4427-89de-a50c724f5f54", collections.getBody().getSearchAfter().get(2), "search_after 3 arg" ); @@ -278,7 +302,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); assertEquals( - "str:19da2ce7-138f-4427-89de-a50c724f5f54", + "str:5c418118-2581-4936-b6fd-d6bedfe74f62", collections.getBody().getSearchAfter().get(2), "search_after 3 value" ); @@ -286,6 +310,11 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { /** * Similar to verifyCorrectPageSizeDataReturnWithQuery and add score in the query, * this is used to verify a bug fix where page_size and score crash the query + * After weighted sorting, the actual order is (for the first 4 records): + * Document 0: UUID=bf287dfe-9ce4-4969-9c59-51c39ea4d011 + * Document 1: UUID=19da2ce7-138f-4427-89de-a50c724f5f54 + * Document 2: UUID=bc55eff4-7596-3565-e044-00144fdd4fa6 + * Document 3: UUID=7709f541-fc0c-4318-b5b9-9053aa474e0e */ @Test public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { @@ -296,11 +325,17 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { // Given 6 records and we set page to 4, that means each query elastic return 4 record only // and the logic to load the reset can kick in. super.insertJsonToElasticRecordIndex( + // set test summaries.score 90 "5c418118-2581-4936-b6fd-d6bedfe74f62.json", + // set test summaries.score 106 "19da2ce7-138f-4427-89de-a50c724f5f54.json", + // set test summaries.score 70 "516811d7-cd1e-207a-e0440003ba8c79dd.json", + // set test summaries.score 60 "7709f541-fc0c-4318-b5b9-9053aa474e0e.json", + // set test summaries.score 50 "bc55eff4-7596-3565-e044-00144fdd4fa6.json", + // set test summaries.score 100 "bf287dfe-9ce4-4969-9c59-51c39ea4d011.json"); // Call rest api directly and get query result with search on "dataset" @@ -329,12 +364,12 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { logger.debug("verifyCorrectPageSizeAndScoreWithQuery - search after {}", collections.getBody().getSearchAfter()); assertEquals( - "80", + "100", collections.getBody().getSearchAfter().get(1), "search_after 2 value" ); assertEquals( - "str:bc55eff4-7596-3565-e044-00144fdd4fa6", + "str:bf287dfe-9ce4-4969-9c59-51c39ea4d011", collections.getBody().getSearchAfter().get(2), "search_after 3 value" ); @@ -346,7 +381,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { String.format("'%s|| %s || %s'", collections.getBody().getSearchAfter().get(0), collections.getBody().getSearchAfter().get(1), - "bc55eff4-7596-3565-e044-00144fdd4fa6"), + "bf287dfe-9ce4-4969-9c59-51c39ea4d011"), HttpMethod.GET, null, new ParameterizedTypeReference<>() { diff --git a/server/src/test/resources/databag/19da2ce7-138f-4427-89de-a50c724f5f54.json b/server/src/test/resources/databag/19da2ce7-138f-4427-89de-a50c724f5f54.json index 7349641c..74116701 100644 --- a/server/src/test/resources/databag/19da2ce7-138f-4427-89de-a50c724f5f54.json +++ b/server/src/test/resources/databag/19da2ce7-138f-4427-89de-a50c724f5f54.json @@ -28,7 +28,7 @@ ] }, "summaries": { - "score": 100, + "score": 106, "status": "completed", "credits": [ "Australia’s Integrated Marine Observing System (IMOS) is enabled by the National Collaborative Research Infrastructure Strategy (NCRIS). It is operated by a consortium of institutions as an unincorporated joint venture, with the University of Tasmania as Lead Agent.", diff --git a/server/src/test/resources/databag/5c418118-2581-4936-b6fd-d6bedfe74f62.json b/server/src/test/resources/databag/5c418118-2581-4936-b6fd-d6bedfe74f62.json index 22d740a5..c4975058 100644 --- a/server/src/test/resources/databag/5c418118-2581-4936-b6fd-d6bedfe74f62.json +++ b/server/src/test/resources/databag/5c418118-2581-4936-b6fd-d6bedfe74f62.json @@ -244,7 +244,7 @@ } ], "summaries": { - "score": 100, + "score": 90, "dataset_provider": null, "dataset_group": "aodn", "proj:geometry": { diff --git a/server/src/test/resources/databag/7709f541-fc0c-4318-b5b9-9053aa474e0e.json b/server/src/test/resources/databag/7709f541-fc0c-4318-b5b9-9053aa474e0e.json index 25d814ca..e4ec40dc 100644 --- a/server/src/test/resources/databag/7709f541-fc0c-4318-b5b9-9053aa474e0e.json +++ b/server/src/test/resources/databag/7709f541-fc0c-4318-b5b9-9053aa474e0e.json @@ -28,7 +28,7 @@ ] }, "summaries": { - "score": 95, + "score": 60, "status": "completed", "credits": [ "Australian Climate Change Science Program", diff --git a/server/src/test/resources/databag/bc55eff4-7596-3565-e044-00144fdd4fa6.json b/server/src/test/resources/databag/bc55eff4-7596-3565-e044-00144fdd4fa6.json index 3c4284c3..6529575a 100644 --- a/server/src/test/resources/databag/bc55eff4-7596-3565-e044-00144fdd4fa6.json +++ b/server/src/test/resources/databag/bc55eff4-7596-3565-e044-00144fdd4fa6.json @@ -30,7 +30,7 @@ ] }, "summaries": { - "score": 80, + "score": 50, "status": "", "scope": { "code": "nonGeographicDataset",