diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml
index 6a66ac5fb5609..ec9de9c810540 100644
--- a/.github/workflows/assemble.yml
+++ b/.github/workflows/assemble.yml
@@ -3,7 +3,7 @@ on: [pull_request]
jobs:
assemble:
- if: github.repository == 'opensearch-project/OpenSearch'
+ if: github.repository == 'peteralfonsi/OpenSearch'
runs-on: ${{ matrix.os }}
strategy:
matrix:
@@ -24,3 +24,8 @@ jobs:
- name: Run Gradle (assemble)
run: |
./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE
+ - name: Upload artifact
+ uses: actions/upload-artifact@v2
+ with:
+ name: tiered-caching-artifact
+ path: distribution/archives/linux-tar/build/distributions/opensearch-min-3.0.0-SNAPSHOT-linux-x64.tar.gz
diff --git a/.gitignore b/.gitignore
index 7514d55cc3c9a..291a63cdeef92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,4 +64,5 @@ testfixtures_shared/
.ci/jobs/
# build files generated
-doc-tools/missing-doclet/bin/
\ No newline at end of file
+doc-tools/missing-doclet/bin/
+server/src/main/java/org/opensearch/indices/KLSPerformanceTest.java
diff --git a/.idea/runConfigurations/Debug_OpenSearch.xml b/.idea/runConfigurations/Debug_OpenSearch.xml
index 0d8bf59823acf..c18046f873477 100644
--- a/.idea/runConfigurations/Debug_OpenSearch.xml
+++ b/.idea/runConfigurations/Debug_OpenSearch.xml
@@ -6,6 +6,10 @@
+
+
+
+
-
+
\ No newline at end of file
diff --git a/server/build.gradle b/server/build.gradle
index fd15367553fe9..de8d2c53af3d3 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -161,6 +161,9 @@ dependencies {
api "org.ehcache:ehcache:${versions.ehcache}"
api "org.slf4j:slf4j-api:${versions.slf4j}"
+ // roaring bitmaps
+ api 'org.roaringbitmap:RoaringBitmap:0.9.49'
+ runtimeOnly 'org.roaringbitmap:shims:0.9.49'
testImplementation(project(":test:framework")) {
// tests use the locally compiled version of server
diff --git a/server/licenses/RoaringBitmap-0.9.49.jar.sha1 b/server/licenses/RoaringBitmap-0.9.49.jar.sha1
new file mode 100644
index 0000000000000..919a73c074b6a
--- /dev/null
+++ b/server/licenses/RoaringBitmap-0.9.49.jar.sha1
@@ -0,0 +1 @@
+b45b49c1ec5c5fc48580412d0ca635e1833110ea
\ No newline at end of file
diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt
new file mode 100644
index 0000000000000..a890d4a062fad
--- /dev/null
+++ b/server/licenses/RoaringBitmap-LICENSE.txt
@@ -0,0 +1,191 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+
+3. Grant of Patent License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution.
+
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+
+5. Submission of Contributions.
+
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+
+6. Trademarks.
+
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty.
+
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+
+8. Limitation of Liability.
+
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability.
+
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+
+ Copyright 2013-2016 the RoaringBitmap authors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/server/licenses/shims-0.9.49.jar.sha1 b/server/licenses/shims-0.9.49.jar.sha1
new file mode 100644
index 0000000000000..9e76614ca5207
--- /dev/null
+++ b/server/licenses/shims-0.9.49.jar.sha1
@@ -0,0 +1 @@
+8bd7794fbdaa9536354dd2d8d961d9503beb9460
\ No newline at end of file
diff --git a/server/licenses/shims-LICENSE.txt b/server/licenses/shims-LICENSE.txt
new file mode 100644
index 0000000000000..a890d4a062fad
--- /dev/null
+++ b/server/licenses/shims-LICENSE.txt
@@ -0,0 +1,191 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+
+3. Grant of Patent License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution.
+
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+
+5. Submission of Contributions.
+
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+
+6. Trademarks.
+
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty.
+
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+
+8. Limitation of Liability.
+
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability.
+
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+
+ Copyright 2013-2016 the RoaringBitmap authors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/server/licenses/shims-NOTICE.txt b/server/licenses/shims-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/server/licenses/slf4j-api-LICENSE.txt b/server/licenses/slf4j-api-LICENSE.txt
index 2be7689435062..f687729a0b452 100644
--- a/server/licenses/slf4j-api-LICENSE.txt
+++ b/server/licenses/slf4j-api-LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2004-2022 QOS.ch
+Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland)
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
@@ -18,4 +18,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheDiskTierIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheDiskTierIT.java
new file mode 100644
index 0000000000000..18a37d4d43b42
--- /dev/null
+++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheDiskTierIT.java
@@ -0,0 +1,150 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.indices;
+
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.Client;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.common.cache.tier.DiskTierTookTimePolicy;
+import org.opensearch.common.cache.tier.TierType;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.core.common.unit.ByteSizeValue;
+import org.opensearch.index.cache.request.RequestCacheStats;
+import org.opensearch.index.cache.request.ShardRequestCache;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.test.OpenSearchIntegTestCase;
+
+import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
+import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse;
+
+// This is a separate file from IndicesRequestCacheIT because we only want to run our test
+// on a node with a maximum request cache size that we set.
+
+@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
+public class IndicesRequestCacheDiskTierIT extends OpenSearchIntegTestCase {
+ public void testDiskTierStats() throws Exception {
+ int heapSizeBytes = 9876;
+ String node = internalCluster().startNode(
+ Settings.builder()
+ .put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), new ByteSizeValue(heapSizeBytes))
+ .put(DiskTierTookTimePolicy.DISK_TOOKTIME_THRESHOLD_SETTING.getKey(), TimeValue.ZERO) // allow into disk cache regardless of
+ // took time
+ );
+ Client client = client(node);
+
+ Settings.Builder indicesSettingBuilder = Settings.builder()
+ .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)
+ .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+ .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0);
+
+ assertAcked(
+ client.admin().indices().prepareCreate("index").setMapping("k", "type=keyword").setSettings(indicesSettingBuilder).get()
+ );
+ indexRandom(true, client.prepareIndex("index").setSource("k", "hello"));
+ ensureSearchable("index");
+ SearchResponse resp;
+
+ resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + 0)).get();
+ int requestSize = (int) getCacheSizeBytes(client, "index", TierType.ON_HEAP);
+ assertTrue(heapSizeBytes > requestSize);
+ // If this fails, increase heapSizeBytes! We can't adjust it after getting the size of one query
+ // as the cache size setting is not dynamic
+
+ int numOnDisk = 5;
+ int numRequests = heapSizeBytes / requestSize + numOnDisk;
+ System.out.println("Size " + requestSize + " numRequests " + numRequests);
+ for (int i = 1; i < numRequests; i++) {
+ resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + i)).get();
+ assertSearchResponse(resp);
+ IndicesRequestCacheIT.assertCacheState(client, "index", 0, i + 1, TierType.ON_HEAP, false);
+ IndicesRequestCacheIT.assertCacheState(client, "index", 0, i + 1, TierType.DISK, false);
+ }
+
+ // So far, disk-specific stats should be 0, as keystore has prevented any actual disk reaches
+ long tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 0, -1, 0);
+
+ // the first request, for "hello0", should have been evicted to the disk tier
+ resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello0")).get();
+ IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 1, TierType.ON_HEAP, false);
+ IndicesRequestCacheIT.assertCacheState(client, "index", 1, numRequests, TierType.DISK, false);
+ tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 1, 0, -1);
+
+ // We make another actual request that should be in the disk tier. Disk specific stats should keep incrementing
+ resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello1")).get();
+ IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 2, TierType.ON_HEAP, false);
+ IndicesRequestCacheIT.assertCacheState(client, "index", 2, numRequests, TierType.DISK, false);
+ tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 2, tookTimeSoFar, -1);
+
+ // A final request for something in neither tier shouldn't increment disk specific stats
+ resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + numRequests)).get();
+ IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 3, TierType.ON_HEAP, false);
+ IndicesRequestCacheIT.assertCacheState(client, "index", 2, numRequests + 1, TierType.DISK, false);
+ assertDiskTierSpecificStats(client, "index", 2, tookTimeSoFar, tookTimeSoFar);
+
+ }
+
+ private long getCacheSizeBytes(Client client, String index, TierType tierType) {
+ RequestCacheStats requestCacheStats = client.admin()
+ .indices()
+ .prepareStats(index)
+ .setRequestCache(true)
+ .get()
+ .getTotal()
+ .getRequestCache();
+ return requestCacheStats.getMemorySizeInBytes(tierType);
+ }
+
+ private long assertDiskTierSpecificStats(
+ Client client,
+ String index,
+ long totalDiskReaches,
+ long totalGetTimeLowerBound,
+ long totalGetTimeUpperBound
+ ) {
+ // set bounds to -1 to ignore them
+ RequestCacheStats requestCacheStats = client.admin()
+ .indices()
+ .prepareStats(index)
+ .setRequestCache(true)
+ .get()
+ .getTotal()
+ .getRequestCache();
+ ShardRequestCache.DiskStatsAccumulator specStats = requestCacheStats.getDiskSpecificStats();
+ assertEquals(totalDiskReaches, specStats.getTotalDiskReaches());
+ long tookTime = specStats.getTotalGetTime();
+ assertTrue(tookTime >= totalGetTimeLowerBound || totalGetTimeLowerBound < 0);
+ assertTrue(tookTime <= totalGetTimeUpperBound || totalGetTimeUpperBound < 0);
+ return tookTime; // Return for use in next check
+ }
+}
diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java
index a1815d9be2daf..e85cac7cf157c 100644
--- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java
+++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java
@@ -40,6 +40,7 @@
import org.opensearch.action.search.SearchType;
import org.opensearch.client.Client;
import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.common.cache.tier.TierType;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.util.FeatureFlags;
@@ -662,7 +663,9 @@ public void testCacheWithInvalidation() throws Exception {
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get();
assertSearchResponse(resp);
// Should expect hit as here as refresh didn't happen
- assertCacheState(client, "index", 1, 1);
+ assertCacheState(client, "index", 1, 1, TierType.ON_HEAP, false);
+ assertCacheState(client, "index", 0, 1, TierType.DISK, false);
+ assertNumCacheEntries(client, "index", 1, TierType.ON_HEAP);
// Explicit refresh would invalidate cache
refresh();
@@ -670,10 +673,20 @@ public void testCacheWithInvalidation() throws Exception {
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get();
assertSearchResponse(resp);
// Should expect miss as key has changed due to change in IndexReader.CacheKey (due to refresh)
- assertCacheState(client, "index", 1, 2);
+ assertCacheState(client, "index", 1, 2, TierType.ON_HEAP, false);
+ assertCacheState(client, "index", 0, 2, TierType.DISK, false);
+
+ // assertNumCacheEntries(client, "index", 1, TierType.ON_HEAP); // Evictions won't be 1 until the cache cleaner runs every minute
}
- private static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) {
+ protected static void assertCacheState(
+ Client client,
+ String index,
+ long expectedHits,
+ long expectedMisses,
+ TierType tierType,
+ boolean enforceZeroEvictions
+ ) {
RequestCacheStats requestCacheStats = client.admin()
.indices()
.prepareStats(index)
@@ -683,11 +696,36 @@ private static void assertCacheState(Client client, String index, long expectedH
.getRequestCache();
// Check the hit count and miss count together so if they are not
// correct we can see both values
- assertEquals(
- Arrays.asList(expectedHits, expectedMisses, 0L),
- Arrays.asList(requestCacheStats.getHitCount(), requestCacheStats.getMissCount(), requestCacheStats.getEvictions())
- );
+ if (enforceZeroEvictions) {
+ assertEquals(
+ Arrays.asList(expectedHits, expectedMisses, 0L),
+ Arrays.asList(
+ requestCacheStats.getHitCount(tierType),
+ requestCacheStats.getMissCount(tierType),
+ requestCacheStats.getEvictions(tierType)
+ )
+ );
+ } else {
+ assertEquals(
+ Arrays.asList(expectedHits, expectedMisses),
+ Arrays.asList(requestCacheStats.getHitCount(tierType), requestCacheStats.getMissCount(tierType))
+ );
+ }
+ }
+
+ protected static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) {
+ assertCacheState(client, index, expectedHits, expectedMisses, TierType.ON_HEAP, true);
+ }
+ protected static void assertNumCacheEntries(Client client, String index, long expectedEntries, TierType tierType) {
+ RequestCacheStats requestCacheStats = client.admin()
+ .indices()
+ .prepareStats(index)
+ .setRequestCache(true)
+ .get()
+ .getTotal()
+ .getRequestCache();
+ assertEquals(expectedEntries, requestCacheStats.getEntries(tierType));
}
}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java b/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java
new file mode 100644
index 0000000000000..3ac30b09bddca
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java
@@ -0,0 +1,42 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.bytes.BytesReference;
+
+import java.util.Arrays;
+
+/**
+ * A serializer which transforms BytesReference to byte[].
+ * The type of BytesReference is NOT preserved after deserialization, but nothing in opensearch should care.
+ */
+public class BytesReferenceSerializer implements Serializer {
+ // This class does not get passed to ehcache itself, so it's not required that classes match after deserialization.
+
+ public BytesReferenceSerializer() {}
+
+ @Override
+ public byte[] serialize(BytesReference object) {
+ return BytesReference.toBytes(object);
+ }
+
+ @Override
+ public BytesReference deserialize(byte[] bytes) {
+ if (bytes == null) {
+ return null;
+ }
+ return new BytesArray(bytes);
+ }
+
+ @Override
+ public boolean equals(BytesReference object, byte[] bytes) {
+ return Arrays.equals(serialize(object), bytes);
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/CachePolicyInfoWrapper.java b/server/src/main/java/org/opensearch/common/cache/tier/CachePolicyInfoWrapper.java
new file mode 100644
index 0000000000000..ae7854850fed6
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/CachePolicyInfoWrapper.java
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier;
+
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+
+import java.io.IOException;
+
+/**
+ * A class containing information needed for all CacheTierPolicy objects to decide whether to admit
+ * a given BytesReference. This spares us from having to create an entire short-lived QuerySearchResult object
+ * just to read a few values.
+ */
+public class CachePolicyInfoWrapper implements Writeable {
+ private final Long tookTimeNanos;
+
+ public CachePolicyInfoWrapper(Long tookTimeNanos) {
+ this.tookTimeNanos = tookTimeNanos;
+ // Add more values here as they are needed for future cache tier policies
+ }
+
+ public CachePolicyInfoWrapper(StreamInput in) throws IOException {
+ this.tookTimeNanos = in.readOptionalLong();
+ }
+
+ public Long getTookTimeNanos() {
+ return tookTimeNanos;
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeOptionalLong(tookTimeNanos);
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/CacheTierPolicy.java b/server/src/main/java/org/opensearch/common/cache/tier/CacheTierPolicy.java
new file mode 100644
index 0000000000000..ac5fbb230d173
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/CacheTierPolicy.java
@@ -0,0 +1,45 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * An interface for policies that inspect data of type T to decide whether they are admitted into a cache tier.
+ */
+public interface CacheTierPolicy {
+ /**
+ * Determines whether this policy allows the data into its cache tier.
+ * @param data The data to check
+ * @return true if accepted, otherwise false
+ */
+ boolean checkData(T data);
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/CacheValue.java b/server/src/main/java/org/opensearch/common/cache/tier/CacheValue.java
new file mode 100644
index 0000000000000..bb3735e48b181
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/CacheValue.java
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * Represents a cache value, with its associated tier type where it is stored,
+ * and tier-specific stats for an individual request stored in a map.
+ * @param Type of value.
+ */
+public class CacheValue {
+ V value;
+ TierType source;
+ TierRequestStats stats;
+
+ CacheValue(V value, TierType source, TierRequestStats stats) {
+ this.value = value;
+ this.source = source;
+ this.stats = stats;
+ }
+
+ public V getValue() {
+ return value;
+ }
+
+ public TierType getSource() {
+ return source;
+ }
+
+ public TierRequestStats getStats() {
+ return stats;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/CachingTier.java b/server/src/main/java/org/opensearch/common/cache/tier/CachingTier.java
index 48fd5deadc111..2e94cacd0f40a 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/CachingTier.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/CachingTier.java
@@ -18,7 +18,7 @@
*/
public interface CachingTier {
- V get(K key);
+ CacheValue get(K key);
void put(K key, V value);
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/DiskTierRequestStats.java b/server/src/main/java/org/opensearch/common/cache/tier/DiskTierRequestStats.java
new file mode 100644
index 0000000000000..9c2d1c5d1a706
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/DiskTierRequestStats.java
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * A class created by disk tier implementations containing disk-specific stats for a single request.
+ */
+public class DiskTierRequestStats implements TierRequestStats {
+
+ private final long requestGetTimeNanos;
+ private final boolean requestReachedDisk;
+
+ public DiskTierRequestStats(long requestGetTimeNanos, boolean requestReachedDisk) {
+ this.requestReachedDisk = requestReachedDisk;
+ this.requestGetTimeNanos = requestGetTimeNanos;
+ }
+
+ @Override
+ public TierType getTierType() {
+ return TierType.DISK;
+ }
+
+ public long getRequestGetTimeNanos() {
+ return requestGetTimeNanos;
+ }
+
+ public boolean getRequestReachedDisk() {
+ return requestReachedDisk;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicy.java b/server/src/main/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicy.java
new file mode 100644
index 0000000000000..d6c3b894a974d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicy.java
@@ -0,0 +1,77 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier;
+
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.core.common.bytes.BytesReference;
+
+import java.util.function.Function;
+
+/**
+ * A cache tier policy which accepts queries whose took time is greater than some threshold,
+ * which is specified as a dynamic cluster-level setting. The threshold should be set to approximately
+ * the time it takes to get a result from the cache tier.
+ * The policy expects to be able to read a CachePolicyInfoWrapper from the start of the BytesReference.
+ */
+public class DiskTierTookTimePolicy implements CacheTierPolicy {
+ public static final Setting DISK_TOOKTIME_THRESHOLD_SETTING = Setting.positiveTimeSetting(
+ "index.requests.cache.disk.tooktime.threshold",
+ new TimeValue(10),
+ Setting.Property.Dynamic,
+ Setting.Property.NodeScope
+ ); // Set this to TimeValue.ZERO to let all data through
+
+ private TimeValue threshold;
+ private final Function getPolicyInfoFn;
+
+ public DiskTierTookTimePolicy(
+ Settings settings,
+ ClusterSettings clusterSettings,
+ Function getPolicyInfoFn
+ ) {
+ this.threshold = DISK_TOOKTIME_THRESHOLD_SETTING.get(settings);
+ clusterSettings.addSettingsUpdateConsumer(DISK_TOOKTIME_THRESHOLD_SETTING, this::setThreshold);
+ this.getPolicyInfoFn = getPolicyInfoFn;
+ }
+
+ protected void setThreshold(TimeValue threshold) { // protected so that we can manually set value in unit test
+ this.threshold = threshold;
+ }
+
+ @Override
+ public boolean checkData(BytesReference data) {
+ if (threshold.equals(TimeValue.ZERO)) {
+ return true;
+ }
+ Long tookTimeNanos;
+ try {
+ tookTimeNanos = getPolicyInfoFn.apply(data).getTookTimeNanos();
+ } catch (Exception e) {
+ // If we can't retrieve the took time for whatever reason, admit the data to be safe
+ return true;
+ }
+ if (tookTimeNanos == null) {
+ // Received a null took time -> this QSR is from an old version which does not have took time, we should accept it
+ return true;
+ }
+ TimeValue tookTime = TimeValue.timeValueNanos(tookTimeNanos);
+ if (tookTime.compareTo(threshold) < 0) { // negative -> tookTime is shorter than threshold
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java
index e7e52c7fbccbf..639936b62fc7b 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java
@@ -12,12 +12,14 @@
import org.opensearch.common.cache.RemovalListener;
import org.opensearch.common.cache.RemovalNotification;
import org.opensearch.common.cache.RemovalReason;
+import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore;
import org.opensearch.common.metrics.CounterMetric;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import java.io.File;
+import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.Iterator;
import java.util.NoSuchElementException;
@@ -34,19 +36,26 @@
import org.ehcache.config.builders.PooledExecutionServiceConfigurationBuilder;
import org.ehcache.config.builders.ResourcePoolsBuilder;
import org.ehcache.config.units.MemoryUnit;
+import org.ehcache.core.spi.service.FileBasedPersistenceContext;
import org.ehcache.event.CacheEvent;
import org.ehcache.event.CacheEventListener;
import org.ehcache.event.EventType;
import org.ehcache.expiry.ExpiryPolicy;
import org.ehcache.impl.config.store.disk.OffHeapDiskStoreConfiguration;
+import org.ehcache.spi.serialization.SerializerException;
+/**
+ * An ehcache-based disk tier implementation.
+ * @param The key type of cache entries
+ * @param The value type of cache entries
+ */
public class EhCacheDiskCachingTier implements DiskCachingTier {
// A Cache manager can create many caches.
- private final PersistentCacheManager cacheManager;
+ private static PersistentCacheManager cacheManager = null;
// Disk cache
- private Cache cache;
+ private Cache cache;
private final long maxWeightInBytes;
private final String storagePath;
@@ -85,12 +94,18 @@ public class EhCacheDiskCachingTier implements DiskCachingTier {
// Defines how many segments the disk cache is separated into. Higher number achieves greater concurrency but
// will hold that many file pointers.
public final Setting DISK_SEGMENTS;
+ private final RBMIntKeyLookupStore keystore;
+
+ private final Serializer keySerializer;
+ private final Serializer valueSerializer;
private EhCacheDiskCachingTier(Builder builder) {
this.keyType = Objects.requireNonNull(builder.keyType, "Key type shouldn't be null");
this.valueType = Objects.requireNonNull(builder.valueType, "Value type shouldn't be null");
this.expireAfterAccess = Objects.requireNonNull(builder.expireAfterAcess, "ExpireAfterAccess value shouldn't " + "be null");
- this.ehCacheEventListener = new EhCacheEventListener();
+ this.keySerializer = Objects.requireNonNull(builder.keySerializer, "Key serializer shouldn't be null");
+ this.valueSerializer = Objects.requireNonNull(builder.valueSerializer, "Value serializer shouldn't be null");
+ this.ehCacheEventListener = new EhCacheEventListener(this.valueSerializer);
this.maxWeightInBytes = builder.maxWeightInBytes;
this.storagePath = Objects.requireNonNull(builder.storagePath, "Storage path shouldn't be null");
if (builder.threadPoolAlias == null || builder.threadPoolAlias.isBlank()) {
@@ -106,8 +121,15 @@ private EhCacheDiskCachingTier(Builder builder) {
this.DISK_WRITE_CONCURRENCY = Setting.intSetting(builder.settingPrefix + ".tiered.disk.ehcache.concurrency", 2, 1, 3);
// Default value is 16 within Ehcache.
this.DISK_SEGMENTS = Setting.intSetting(builder.settingPrefix + ".ehcache.disk.segments", 16, 1, 32);
- this.cacheManager = buildCacheManager();
+
+ // In test cases, there might be leftover cache managers and caches hanging around, from nodes created in the test case setup
+ // Destroy them before recreating them
+ close();
+ cacheManager = buildCacheManager();
this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder);
+
+ long keystoreMaxWeight = builder.keystoreMaxWeightInBytes;
+ this.keystore = new RBMIntKeyLookupStore(keystoreMaxWeight);
}
private PersistentCacheManager buildCacheManager() {
@@ -124,26 +146,26 @@ private PersistentCacheManager buildCacheManager() {
.build(true);
}
- private Cache buildCache(Duration expireAfterAccess, Builder builder) {
- return this.cacheManager.createCache(
+ private Cache buildCache(Duration expireAfterAccess, Builder builder) {
+ return cacheManager.createCache(
DISK_CACHE_ALIAS,
CacheConfigurationBuilder.newCacheConfigurationBuilder(
- this.keyType,
- this.valueType,
+ keyType,
+ byte[].class,
ResourcePoolsBuilder.newResourcePoolsBuilder().disk(maxWeightInBytes, MemoryUnit.B)
- ).withExpiry(new ExpiryPolicy() {
+ ).withExpiry(new ExpiryPolicy() {
@Override
- public Duration getExpiryForCreation(K key, V value) {
+ public Duration getExpiryForCreation(K key, byte[] value) {
return INFINITE;
}
@Override
- public Duration getExpiryForAccess(K key, Supplier extends V> value) {
+ public Duration getExpiryForAccess(K key, Supplier extends byte[]> value) {
return expireAfterAccess;
}
@Override
- public Duration getExpiryForUpdate(K key, Supplier extends V> oldValue, V newValue) {
+ public Duration getExpiryForUpdate(K key, Supplier extends byte[]> oldValue, byte[] newValue) {
return INFINITE;
}
})
@@ -155,6 +177,7 @@ public Duration getExpiryForUpdate(K key, Supplier extends V> oldValue, V newV
DISK_SEGMENTS.get(settings)
)
)
+ .withKeySerializer(new KeySerializerWrapper(keySerializer))
);
}
@@ -175,14 +198,28 @@ private CacheEventListenerConfigurationBuilder getListenerConfiguration(Builder<
}
@Override
- public V get(K key) {
- // Optimize it by adding key store.
- return cache.get(key);
+ public CacheValue get(K key) {
+ boolean reachedDisk = false;
+ long now = System.nanoTime();
+
+ V value = null;
+ if (keystore.contains(key.hashCode()) || keystore.isFull()) { // Check in-memory store of key hashes to avoid unnecessary disk seek
+ value = valueSerializer.deserialize(cache.get(key));
+ reachedDisk = true;
+ }
+
+ long tookTime = -1L; // This value will be ignored by stats accumulator if reachedDisk is false anyway
+ if (reachedDisk) {
+ tookTime = System.nanoTime() - now;
+ }
+ DiskTierRequestStats stats = new DiskTierRequestStats(tookTime, reachedDisk);
+ return new CacheValue<>(value, TierType.DISK, stats);
}
@Override
public void put(K key, V value) {
- cache.put(key, value);
+ cache.put(key, valueSerializer.serialize(value));
+ keystore.add(key.hashCode());
}
@Override
@@ -193,8 +230,9 @@ public V computeIfAbsent(K key, TieredCacheLoader loader) throws Exception
@Override
public void invalidate(K key) {
- // There seems to be an thread leak issue while calling this and then closing cache.
+ // There seems to be a thread leak issue while calling this and then closing cache.
cache.remove(key);
+ keystore.remove(key.hashCode());
}
@Override
@@ -211,6 +249,7 @@ public void setRemovalListener(RemovalListener removalListener) {
@Override
public void invalidateAll() {
// Clear up files.
+ keystore.clear();
}
@Override
@@ -230,13 +269,13 @@ public TierType getTierType() {
@Override
public void close() {
- cacheManager.removeCache(DISK_CACHE_ALIAS);
- cacheManager.close();
try {
cacheManager.destroyCache(DISK_CACHE_ALIAS);
+ cacheManager.close();
+ cacheManager = null;
} catch (CachePersistenceException e) {
throw new OpenSearchException("Exception occurred while destroying ehcache and associated data", e);
- }
+ } catch (NullPointerException ignored) {} // Another test node has already destroyed the cache manager
}
/**
@@ -244,18 +283,23 @@ public void close() {
* @param Type of key
* @param Type of value
*/
- class EhCacheEventListener implements CacheEventListener {
+ class EhCacheEventListener implements CacheEventListener {
private Optional> removalListener;
+ // We need to pass the value serializer to this listener, as the removal listener is expecting
+ // values of type K, V, not K, byte[]
+ private Serializer valueSerializer;
- EhCacheEventListener() {}
+ EhCacheEventListener(Serializer valueSerializer) {
+ this.valueSerializer = valueSerializer;
+ }
public void setRemovalListener(RemovalListener removalListener) {
this.removalListener = Optional.ofNullable(removalListener);
}
@Override
- public void onEvent(CacheEvent extends K, ? extends V> event) {
+ public void onEvent(CacheEvent extends K, ? extends byte[]> event) {
switch (event.getType()) {
case CREATED:
count.inc();
@@ -264,20 +308,40 @@ public void onEvent(CacheEvent extends K, ? extends V> event) {
case EVICTED:
this.removalListener.ifPresent(
listener -> listener.onRemoval(
- new RemovalNotification<>(event.getKey(), event.getOldValue(), RemovalReason.EVICTED)
+ new RemovalNotification<>(
+ event.getKey(),
+ valueSerializer.deserialize(event.getOldValue()),
+ RemovalReason.EVICTED,
+ TierType.DISK
+ )
)
);
count.dec();
assert event.getNewValue() == null;
break;
case REMOVED:
+ this.removalListener.ifPresent(
+ listener -> listener.onRemoval(
+ new RemovalNotification<>(
+ event.getKey(),
+ valueSerializer.deserialize(event.getOldValue()),
+ RemovalReason.INVALIDATED,
+ TierType.DISK
+ )
+ )
+ );
count.dec();
assert event.getNewValue() == null;
break;
case EXPIRED:
this.removalListener.ifPresent(
listener -> listener.onRemoval(
- new RemovalNotification<>(event.getKey(), event.getOldValue(), RemovalReason.INVALIDATED)
+ new RemovalNotification<>(
+ event.getKey(),
+ valueSerializer.deserialize(event.getOldValue()),
+ RemovalReason.INVALIDATED,
+ TierType.DISK
+ )
)
);
count.dec();
@@ -297,9 +361,9 @@ public void onEvent(CacheEvent extends K, ? extends V> event) {
*/
class EhCacheKeyIterator implements Iterator {
- Iterator> iterator;
+ Iterator> iterator;
- EhCacheKeyIterator(Iterator> iterator) {
+ EhCacheKeyIterator(Iterator> iterator) {
this.iterator = iterator;
}
@@ -317,6 +381,42 @@ public K next() {
}
}
+ /**
+ * The wrapper for the key serializer which is passed directly to Ehcache.
+ * Required because we cannot directly use a byte[] as an ehcache key, possibly due to an ehcache bug.
+ */
+ private class KeySerializerWrapper implements org.ehcache.spi.serialization.Serializer {
+ public Serializer serializer;
+
+ public KeySerializerWrapper(Serializer serializer) {
+ this.serializer = serializer;
+ }
+
+ // This constructor must be present, but does not have to work as we are not actually persisting the disk
+ // cache after a restart.
+ // See https://www.ehcache.org/documentation/3.0/serializers-copiers.html#persistent-vs-transient-caches
+ public KeySerializerWrapper(ClassLoader classLoader, FileBasedPersistenceContext persistenceContext) {}
+
+ @Override
+ public ByteBuffer serialize(K object) throws SerializerException {
+ return ByteBuffer.wrap(serializer.serialize(object));
+ }
+
+ @Override
+ public K read(ByteBuffer binary) throws ClassNotFoundException, SerializerException {
+ byte[] arr = new byte[binary.remaining()];
+ binary.get(arr);
+ return serializer.deserialize(arr);
+ }
+
+ @Override
+ public boolean equals(K object, ByteBuffer binary) throws ClassNotFoundException, SerializerException {
+ byte[] arr = new byte[binary.remaining()];
+ binary.get(arr);
+ return serializer.equals(object, arr);
+ }
+ }
+
/**
* Builder object to build Ehcache disk tier.
* @param Type of key
@@ -342,6 +442,9 @@ public static class Builder {
// Provides capability to make ehCache event listener to run in sync mode. Used for testing too.
private boolean isEventListenerModeSync;
+ private Serializer keySerializer;
+ private Serializer valueSerializer;
+ private long keystoreMaxWeightInBytes = 0;
public Builder() {}
@@ -399,6 +502,21 @@ public EhCacheDiskCachingTier.Builder setIsEventListenerModeSync(boolean i
return this;
}
+ public EhCacheDiskCachingTier.Builder setKeySerializer(Serializer keySerializer) {
+ this.keySerializer = keySerializer;
+ return this;
+ }
+
+ public EhCacheDiskCachingTier.Builder setValueSerializer(Serializer valueSerializer) {
+ this.valueSerializer = valueSerializer;
+ return this;
+ }
+
+ public EhCacheDiskCachingTier.Builder setKeyStoreMaxWeightInBytes(long weight) {
+ this.keystoreMaxWeightInBytes = weight;
+ return this;
+ }
+
public EhCacheDiskCachingTier build() {
return new EhCacheDiskCachingTier<>(this);
}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/OnHeapTierRequestStats.java b/server/src/main/java/org/opensearch/common/cache/tier/OnHeapTierRequestStats.java
new file mode 100644
index 0000000000000..3527dae885fde
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/OnHeapTierRequestStats.java
@@ -0,0 +1,19 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * A class created by on-heap tier implementations containing on-heap-specific stats for a single request.
+ */
+public class OnHeapTierRequestStats implements TierRequestStats {
+ @Override
+ public TierType getTierType() {
+ return TierType.ON_HEAP;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/OpenSearchOnHeapCache.java b/server/src/main/java/org/opensearch/common/cache/tier/OpenSearchOnHeapCache.java
index 22d2f769507cf..c7cd065674de7 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/OpenSearchOnHeapCache.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/OpenSearchOnHeapCache.java
@@ -66,8 +66,8 @@ public TierType getTierType() {
}
@Override
- public V get(K key) {
- return cache.get(key);
+ public CacheValue get(K key) {
+ return new CacheValue(cache.get(key), TierType.ON_HEAP, new OnHeapTierRequestStats());
}
@Override
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/Serializer.java b/server/src/main/java/org/opensearch/common/cache/tier/Serializer.java
new file mode 100644
index 0000000000000..04413fce4b8b0
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/Serializer.java
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * An interface for serializers, to be used in disk caching tier and elsewhere.
+ * T is the class of the original object, and U is the serialized class.
+ */
+public interface Serializer {
+ /**
+ * Serializes an object.
+ * @param object A non-serialized object.
+ * @return The serialized representation of the object.
+ */
+ U serialize(T object);
+
+ /**
+ * Deserializes bytes into an object.
+ * @param bytes The serialized representation.
+ * @return The original object.
+ */
+ T deserialize(U bytes);
+
+ /**
+ * Compares an object to a serialized representation of an object.
+ * @param object A non-serialized objet
+ * @param bytes Serialized representation of an object
+ * @return true if representing the same object, false if not
+ */
+ boolean equals(T object, U bytes);
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/TierRequestStats.java b/server/src/main/java/org/opensearch/common/cache/tier/TierRequestStats.java
new file mode 100644
index 0000000000000..d156be7b3f028
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/TierRequestStats.java
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+/**
+ * An interface for single-request tier-specific stats, which are produced on each request from a cache tier
+ * and then added to the correct shard's overall StatsHolder for the tier.
+ */
+public interface TierRequestStats {
+ TierType getTierType();
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/TierType.java b/server/src/main/java/org/opensearch/common/cache/tier/TierType.java
index ca61b636c1dda..3ef4338030899 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/TierType.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/TierType.java
@@ -13,6 +13,17 @@
*/
public enum TierType {
- ON_HEAP,
- DISK;
+ ON_HEAP("on_heap"),
+ DISK("disk");
+
+ private final String stringValue;
+
+ TierType(String stringValue) {
+ // Associate each TierType with a string representation, for use in API responses and elsewhere
+ this.stringValue = stringValue;
+ }
+
+ public String getStringValue() {
+ return this.stringValue;
+ }
}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheEventListener.java b/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheEventListener.java
index 05b59bf16b282..d1c333e21818e 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheEventListener.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheEventListener.java
@@ -17,11 +17,12 @@
*/
public interface TieredCacheEventListener {
- void onMiss(K key, TierType tierType);
+ void onMiss(K key, CacheValue cacheValue);
void onRemoval(RemovalNotification notification);
- void onHit(K key, V value, TierType tierType);
+ void onHit(K key, CacheValue cacheValue);
void onCached(K key, V value, TierType tierType);
+ // Since only get() produces a CacheValue with stats, no need to modify onCached or onRemoval to have the CacheValue.
}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyService.java b/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyService.java
index 153dbf9b330f5..a0a24ef28764b 100644
--- a/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyService.java
+++ b/server/src/main/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyService.java
@@ -12,6 +12,7 @@
import org.opensearch.common.cache.RemovalNotification;
import org.opensearch.common.cache.RemovalReason;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
@@ -38,6 +39,7 @@ public class TieredCacheSpilloverStrategyService implements TieredCacheSer
* Maintains caching tiers in order of get calls.
*/
private final List> cachingTierList;
+ private final List> policies;
private TieredCacheSpilloverStrategyService(Builder builder) {
this.onHeapCachingTier = Objects.requireNonNull(builder.onHeapCachingTier);
@@ -45,6 +47,7 @@ private TieredCacheSpilloverStrategyService(Builder builder) {
this.tieredCacheEventListener = Objects.requireNonNull(builder.tieredCacheEventListener);
this.cachingTierList = this.diskCachingTier.map(diskTier -> Arrays.asList(onHeapCachingTier, diskTier))
.orElse(List.of(onHeapCachingTier));
+ this.policies = Objects.requireNonNull(builder.policies);
setRemovalListeners();
}
@@ -130,10 +133,12 @@ public void onRemoval(RemovalNotification notification) {
if (RemovalReason.EVICTED.equals(notification.getRemovalReason())) {
switch (notification.getTierType()) {
case ON_HEAP:
- diskCachingTier.ifPresent(diskTier -> {
- diskTier.put(notification.getKey(), notification.getValue());
- tieredCacheEventListener.onCached(notification.getKey(), notification.getValue(), TierType.DISK);
- });
+ if (checkPolicies(notification.getValue())) {
+ diskCachingTier.ifPresent(diskTier -> {
+ diskTier.put(notification.getKey(), notification.getValue());
+ tieredCacheEventListener.onCached(notification.getKey(), notification.getValue(), TierType.DISK);
+ });
+ }
break;
default:
break;
@@ -152,6 +157,15 @@ public Optional> getDiskCachingTier() {
return this.diskCachingTier;
}
+ boolean checkPolicies(V value) {
+ for (CacheTierPolicy policy : policies) {
+ if (!policy.checkData(value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
/**
* Register this service as a listener to removal events from different caching tiers.
*/
@@ -164,35 +178,21 @@ private void setRemovalListeners() {
private Function> getValueFromTierCache(boolean trackStats) {
return key -> {
for (CachingTier cachingTier : cachingTierList) {
- V value = cachingTier.get(key);
- if (value != null) {
+ CacheValue cacheValue = cachingTier.get(key);
+ if (cacheValue.value != null) {
if (trackStats) {
- tieredCacheEventListener.onHit(key, value, cachingTier.getTierType());
+ tieredCacheEventListener.onHit(key, cacheValue);
}
- return new CacheValue<>(value, cachingTier.getTierType());
+ return cacheValue; // new CacheValue<>(value, cachingTier.getTierType());
}
if (trackStats) {
- tieredCacheEventListener.onMiss(key, cachingTier.getTierType());
+ tieredCacheEventListener.onMiss(key, cacheValue);
}
}
return null;
};
}
- /**
- * Represents a cache value along with its associated tier type where it is stored.
- * @param Type of value.
- */
- public static class CacheValue {
- V value;
- TierType source;
-
- CacheValue(V value, TierType source) {
- this.value = value;
- this.source = source;
- }
- }
-
/**
* Builder object
* @param Type of key
@@ -202,6 +202,7 @@ public static class Builder {
private OnHeapCachingTier onHeapCachingTier;
private DiskCachingTier diskCachingTier;
private TieredCacheEventListener tieredCacheEventListener;
+ private ArrayList> policies = new ArrayList<>();
public Builder() {}
@@ -220,6 +221,17 @@ public Builder setTieredCacheEventListener(TieredCacheEventListener
return this;
}
+ public Builder withPolicy(CacheTierPolicy policy) {
+ this.policies.add(policy);
+ return this;
+ }
+
+ // Add multiple policies at once
+ public Builder withPolicies(List> policiesList) {
+ this.policies.addAll(policiesList);
+ return this;
+ }
+
public TieredCacheSpilloverStrategyService build() {
return new TieredCacheSpilloverStrategyService(this);
}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java
new file mode 100644
index 0000000000000..dc2b7a4ba1234
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java
@@ -0,0 +1,132 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier.keystore;
+
+/**
+ * An interface for objects that hold an in-memory record of hashes of keys in the disk cache.
+ * These objects have some internal data structure which stores some transformation of added
+ * int values. The internal representations may have collisions. Example transformations include a modulo
+ * or -abs(value), or some combination.
+ */
+public interface KeyLookupStore {
+
+ /**
+ * Transforms the input value into the internal representation for this keystore
+ * and adds it to the internal data structure.
+ * @param value The value to add.
+ * @return true if the value was added, false if it wasn't added because of a
+ * collision or if it was already present.
+ */
+ boolean add(T value);
+
+ /**
+ * Checks if the transformation of the value is in the keystore.
+ * @param value The value to check.
+ * @return true if the value was found, false otherwise. Due to collisions, false positives are
+ * possible, but there should be no false negatives unless forceRemove() is called.
+ */
+ boolean contains(T value);
+
+ /**
+ * Returns the transformed version of the input value, that would be used to stored it in the keystore.
+ * This transformation should be always be the same for a given instance.
+ * @param value The value to transform.
+ * @return The transformed value.
+ */
+ T getInternalRepresentation(T value);
+
+ /**
+ * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value)
+ * will never return a false negative. If removing would lead to a false negative, the value won't be removed.
+ * Classes may not implement safe removal.
+ * @param value The value to attempt to remove.
+ * @return true if the value was removed, false if it wasn't.
+ */
+ boolean remove(T value);
+
+ /**
+ * Returns the number of distinct values stored in the internal data structure.
+ * Does not count values which weren't successfully added due to collisions.
+ * @return The number of values
+ */
+ int getSize();
+
+ /**
+ * Returns the number of times add() has been run, including unsuccessful attempts.
+ * @return The number of adding attempts.
+ */
+ int getAddAttempts();
+
+ /**
+ * Returns the number of times add() has returned false due to a collision.
+ * @return The number of collisions.
+ */
+ int getCollisions();
+
+ /**
+ * Checks if two values would collide after being transformed by this store's transformation.
+ * @param value1 The first value to compare.
+ * @param value2 The second value to compare.
+ * @return true if the transformations are equal, false otherwise.
+ */
+ boolean isCollision(T value1, T value2);
+
+ /**
+ * Returns an estimate of the store's memory usage.
+ * @return The memory usage
+ */
+ long getMemorySizeInBytes();
+
+ /**
+ * Returns the cap for the store's memory usage.
+ * @return The cap, in bytes
+ */
+ long getMemorySizeCapInBytes();
+
+ /**
+ * Returns whether the store is at memory capacity and can't accept more entries
+ */
+ boolean isFull();
+
+ /**
+ * Deletes the internal data structure and regenerates it from the values passed in.
+ * Also resets all stats related to adding.
+ * @param newValues The keys that should be in the reset structure.
+ */
+ void regenerateStore(T[] newValues);
+
+ /**
+ * Deletes all keys and resets all stats related to adding.
+ */
+ void clear();
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java
new file mode 100644
index 0000000000000..ab3055a81d4c9
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier.keystore;
+
+import org.opensearch.common.metrics.CounterMetric;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * A stats holder for use in KeyLookupStore implementations.
+ * Getters should be exposed by the KeyLookupStore which uses it.
+ */
+public class KeyStoreStats {
+ protected CounterMetric size;
+ protected long memSizeCapInBytes;
+ protected CounterMetric numAddAttempts;
+ protected CounterMetric numCollisions;
+ protected boolean guaranteesNoFalseNegatives;
+ protected AtomicBoolean atCapacity;
+ protected CounterMetric numRemovalAttempts;
+ protected CounterMetric numSuccessfulRemovals;
+
+ protected KeyStoreStats(long memSizeCapInBytes) {
+ this.size = new CounterMetric();
+ this.numAddAttempts = new CounterMetric();
+ this.numCollisions = new CounterMetric();
+ this.memSizeCapInBytes = memSizeCapInBytes;
+ this.atCapacity = new AtomicBoolean(false);
+ this.numRemovalAttempts = new CounterMetric();
+ this.numSuccessfulRemovals = new CounterMetric();
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java
new file mode 100644
index 0000000000000..bb258439de83e
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java
@@ -0,0 +1,366 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier.keystore;
+
+import org.opensearch.common.metrics.CounterMetric;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.roaringbitmap.RoaringBitmap;
+
+/**
+ * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values.
+ * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28.
+ * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be
+ * safely removed from the store. The fraction of collided values should be low,
+ * about 0.5% for a store with 10^7 values and a modulo of 2^28.
+ * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap.
+ */
+public class RBMIntKeyLookupStore implements KeyLookupStore {
+ /**
+ * An enum representing modulo values for use in the keystore
+ */
+ public enum KeystoreModuloValue {
+ NONE(0), // No modulo applied
+ TWO_TO_THIRTY_ONE((int) Math.pow(2, 31)),
+ TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), // recommended value
+ TWO_TO_TWENTY_EIGHT((int) Math.pow(2, 28)),
+ TWO_TO_TWENTY_SIX((int) Math.pow(2, 26));
+
+ private final int value;
+
+ private KeystoreModuloValue(int value) {
+ this.value = value;
+ }
+
+ public int getValue() {
+ return this.value;
+ }
+ }
+
+ protected final int modulo;
+ protected final int modulo_bitmask;
+ // Since our modulo is always a power of two we can optimize it by ANDing with a particular bitmask
+ KeyStoreStats stats;
+ protected RoaringBitmap rbm;
+ private HashMap collidedIntCounters;
+ private HashMap> removalSets;
+ protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+ protected final Lock readLock = lock.readLock();
+ protected final Lock writeLock = lock.writeLock();
+ private long mostRecentByteEstimate;
+ protected final int REFRESH_SIZE_EST_INTERVAL = 10000;
+ // Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms,
+ // so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store
+ // in terms of storage impact
+
+ // Default constructor sets modulo = 2^28
+ public RBMIntKeyLookupStore(long memSizeCapInBytes) {
+ this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes);
+ }
+
+ public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) {
+ this.modulo = moduloValue.getValue();
+ if (modulo > 0) {
+ this.modulo_bitmask = modulo - 1; // keep last log_2(modulo) bits
+ } else {
+ this.modulo_bitmask = -1; // -1 in twos complement is all ones -> includes all bits -> same as no modulo
+ }
+ this.stats = new KeyStoreStats(memSizeCapInBytes);
+ this.rbm = new RoaringBitmap();
+ this.collidedIntCounters = new HashMap<>();
+ this.removalSets = new HashMap<>();
+ this.mostRecentByteEstimate = 0L;
+ }
+
+ private int transform(int value) {
+ return value & modulo_bitmask;
+ }
+
+ private void handleCollisions(int transformedValue) {
+ stats.numCollisions.inc();
+ CounterMetric numCollisions = collidedIntCounters.get(transformedValue);
+ if (numCollisions == null) { // First time the transformedValue has had a collision
+ numCollisions = new CounterMetric();
+ numCollisions.inc(2);
+ collidedIntCounters.put(transformedValue, numCollisions); // Initialize the number of colliding keys to 2
+ } else {
+ numCollisions.inc();
+ }
+ }
+
+ @Override
+ public boolean add(Integer value) {
+ if (value == null) {
+ return false;
+ }
+ stats.numAddAttempts.inc();
+
+ if (getSize() % REFRESH_SIZE_EST_INTERVAL == 0) {
+ mostRecentByteEstimate = getMemorySizeInBytes();
+ }
+ if (getMemorySizeCapInBytes() > 0 && mostRecentByteEstimate > getMemorySizeCapInBytes()) {
+ stats.atCapacity.set(true);
+ return false;
+ }
+ int transformedValue = transform(value);
+
+ writeLock.lock();
+ try {
+ boolean alreadyContained;
+ // saves calling transform() an additional time
+ alreadyContained = rbm.contains(transformedValue);
+ if (!alreadyContained) {
+ rbm.add(transformedValue);
+ stats.size.inc();
+ return true;
+ }
+ // If the value is already pending removal, take it out of the removalList
+ HashSet removalSet = removalSets.get(transformedValue);
+ if (removalSet != null) {
+ removalSet.remove(value);
+ // Don't increment the counter - this is handled by handleCollisions() later
+ if (removalSet.isEmpty()) {
+ removalSets.remove(transformedValue);
+ }
+ }
+
+ handleCollisions(transformedValue);
+ return false;
+ } finally {
+ writeLock.unlock();
+ }
+ }
+
+ @Override
+ public boolean contains(Integer value) {
+ if (value == null) {
+ return false;
+ }
+ int transformedValue = transform(value);
+ readLock.lock();
+ try {
+ return rbm.contains(transformedValue);
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
+ public Integer getInternalRepresentation(Integer value) {
+ if (value == null) {
+ return 0;
+ }
+ return Integer.valueOf(transform(value));
+ }
+
+ /**
+ * Attempts to remove a value from the keystore. WARNING: Removing keys which have not been added to the keystore
+ * may cause undefined behavior, including future false negatives!!
+ * @param value The value to attempt to remove.
+ * @return true if the value was removed, false otherwise
+ */
+ @Override
+ public boolean remove(Integer value) {
+ if (value == null) {
+ return false;
+ }
+ int transformedValue = transform(value);
+ readLock.lock();
+ try {
+ if (!rbm.contains(transformedValue)) { // saves additional transform() call
+ return false;
+ }
+ // move below into write lock
+ stats.numRemovalAttempts.inc();
+ } finally {
+ readLock.unlock();
+ }
+ writeLock.lock();
+ try {
+ CounterMetric numCollisions = collidedIntCounters.get(transformedValue);
+ if (numCollisions != null) {
+ // This transformed value has had a collision before
+ HashSet removalSet = removalSets.get(transformedValue);
+ if (removalSet == null) {
+ // First time a removal has been attempted for this transformed value
+ HashSet newRemovalSet = new HashSet<>();
+ newRemovalSet.add(value); // Add the key value, not the transformed value, to the list of attempted removals for this
+ // transformedValue
+ removalSets.put(transformedValue, newRemovalSet);
+ numCollisions.dec();
+ } else {
+ if (removalSet.contains(value)) {
+ return false; // We have already attempted to remove this value. Do nothing
+ }
+ removalSet.add(value);
+ numCollisions.dec();
+ // If numCollisions has reached zero, we can safely remove all values in removalList
+ if (numCollisions.count() == 0) {
+ removeFromRBM(transformedValue);
+ collidedIntCounters.remove(transformedValue);
+ removalSets.remove(transformedValue);
+ return true;
+ }
+ }
+ return false;
+ }
+ // Otherwise, there's not been a collision for this transformedValue, so we can safely remove
+ removeFromRBM(transformedValue);
+ return true;
+ } finally {
+ writeLock.unlock();
+ }
+ }
+
+ // Helper fn for remove()
+ private void removeFromRBM(int transformedValue) {
+ rbm.remove(transformedValue);
+ stats.size.dec();
+ stats.numSuccessfulRemovals.inc();
+ }
+
+ @Override
+ public int getSize() {
+ readLock.lock();
+ try {
+ return (int) stats.size.count();
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
+ public int getAddAttempts() {
+ return (int) stats.numAddAttempts.count();
+ }
+
+ @Override
+ public int getCollisions() {
+ return (int) stats.numCollisions.count();
+ }
+
+ @Override
+ public boolean isCollision(Integer value1, Integer value2) {
+ if (value1 == null || value2 == null) {
+ return false;
+ }
+ return transform(value1) == transform(value2);
+ }
+
+ static double getRBMSizeMultiplier(int numEntries, int modulo) {
+ double effectiveModulo = (double) modulo / 2;
+ /* This model was created when we used % operator to calculate modulo. This has range (-modulo, modulo).
+ Now we have optimized to use a bitmask, which has range [0, modulo). So the number of possible values stored
+ is halved. */
+ if (modulo == 0) {
+ effectiveModulo = Math.pow(2, 32);
+ }
+ double x = Math.log10((double) numEntries / effectiveModulo);
+ if (x < -5) {
+ return 7.0;
+ }
+ if (x < -2.75) {
+ return -2.5 * x - 5.5;
+ }
+ if (x <= 0) {
+ return -3.0 / 22.0 * x + 1;
+ }
+ return 1;
+ }
+
+ @Override
+ public long getMemorySizeInBytes() {
+ double multiplier = getRBMSizeMultiplier((int) stats.size.count(), modulo);
+ return (long) (rbm.getSizeInBytes() * multiplier);
+ }
+
+ @Override
+ public long getMemorySizeCapInBytes() {
+ return stats.memSizeCapInBytes;
+ }
+
+ @Override
+ public boolean isFull() {
+ return stats.atCapacity.get();
+ }
+
+ @Override
+ public void regenerateStore(Integer[] newValues) {
+ rbm.clear();
+ collidedIntCounters = new HashMap<>();
+ removalSets = new HashMap<>();
+ stats.size = new CounterMetric();
+ stats.numAddAttempts = new CounterMetric();
+ stats.numCollisions = new CounterMetric();
+ stats.guaranteesNoFalseNegatives = true;
+ stats.numRemovalAttempts = new CounterMetric();
+ stats.numSuccessfulRemovals = new CounterMetric();
+ for (int i = 0; i < newValues.length; i++) {
+ if (newValues[i] != null) {
+ add(newValues[i]);
+ }
+ }
+ }
+
+ @Override
+ public void clear() {
+ regenerateStore(new Integer[] {});
+ }
+
+ public int getNumRemovalAttempts() {
+ return (int) stats.numRemovalAttempts.count();
+ }
+
+ public int getNumSuccessfulRemovals() {
+ return (int) stats.numSuccessfulRemovals.count();
+ }
+
+ public boolean valueHasHadCollision(Integer value) {
+ if (value == null) {
+ return false;
+ }
+ return collidedIntCounters.containsKey(transform(value));
+ }
+
+ CounterMetric getNumCollisionsForValue(int value) { // package private for testing
+ return collidedIntCounters.get(transform(value));
+ }
+
+ HashSet getRemovalSetForValue(int value) {
+ return removalSets.get(transform(value));
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/package-info.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/package-info.java
new file mode 100644
index 0000000000000..f3be16dfa3d32
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/package-info.java
@@ -0,0 +1,9 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+/** Package for disk tier keystore. */
+package org.opensearch.common.cache.tier.keystore;
diff --git a/server/src/main/java/org/opensearch/common/metrics/CounterMetric.java b/server/src/main/java/org/opensearch/common/metrics/CounterMetric.java
index cb181840406a5..5c48c1f772ff0 100644
--- a/server/src/main/java/org/opensearch/common/metrics/CounterMetric.java
+++ b/server/src/main/java/org/opensearch/common/metrics/CounterMetric.java
@@ -62,5 +62,4 @@ public void dec(long n) {
public long count() {
return counter.sum();
}
-
}
diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
index c2c6effc3336f..5e3f4e959648e 100644
--- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
+++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
@@ -79,6 +79,7 @@
import org.opensearch.cluster.service.ClusterManagerTaskThrottler;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.annotation.PublicApi;
+import org.opensearch.common.cache.tier.DiskTierTookTimePolicy;
import org.opensearch.common.logging.Loggers;
import org.opensearch.common.network.NetworkModule;
import org.opensearch.common.network.NetworkService;
@@ -691,7 +692,8 @@ public void apply(Settings value, Settings current, Settings previous) {
CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE,
CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT,
CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT,
- IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING
+ IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING,
+ DiskTierTookTimePolicy.DISK_TOOKTIME_THRESHOLD_SETTING
)
)
);
diff --git a/server/src/main/java/org/opensearch/index/cache/request/RequestCacheStats.java b/server/src/main/java/org/opensearch/index/cache/request/RequestCacheStats.java
index 24f68899c2ac7..4836479133b67 100644
--- a/server/src/main/java/org/opensearch/index/cache/request/RequestCacheStats.java
+++ b/server/src/main/java/org/opensearch/index/cache/request/RequestCacheStats.java
@@ -32,6 +32,9 @@
package org.opensearch.index.cache.request;
+import org.opensearch.OpenSearchException;
+import org.opensearch.Version;
+import org.opensearch.common.cache.tier.TierType;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
@@ -40,6 +43,8 @@
import org.opensearch.core.xcontent.XContentBuilder;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
/**
* Request for the query cache statistics
@@ -48,73 +53,169 @@
*/
public class RequestCacheStats implements Writeable, ToXContentFragment {
- private long memorySize;
- private long evictions;
- private long hitCount;
- private long missCount;
+ private Map defaultStatsMap = new HashMap<>() {
+ {
+ for (TierType tierType : TierType.values()) {
+ put(tierType.getStringValue(), new StatsHolder());
+ // Every possible tier type must have counters, even if they are disabled. Then the counters report 0
+ }
+ }
+ };
+
+ private Map tierSpecificStatsMap = new HashMap<>() {
+ {
+ put(TierType.ON_HEAP.getStringValue(), new ShardRequestCache.OnHeapStatsAccumulator());
+ put(TierType.DISK.getStringValue(), new ShardRequestCache.DiskStatsAccumulator());
+ }
+ };
public RequestCacheStats() {}
public RequestCacheStats(StreamInput in) throws IOException {
- memorySize = in.readVLong();
- evictions = in.readVLong();
- hitCount = in.readVLong();
- missCount = in.readVLong();
+ if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
+ this.defaultStatsMap = in.readMap(StreamInput::readString, StatsHolder::new);
+ // Manually fill the tier-specific stats map
+ this.tierSpecificStatsMap = new HashMap<>();
+ tierSpecificStatsMap.put(TierType.ON_HEAP.getStringValue(), new ShardRequestCache.OnHeapStatsAccumulator(in));
+ tierSpecificStatsMap.put(TierType.DISK.getStringValue(), new ShardRequestCache.DiskStatsAccumulator(in));
+ } else {
+ // objects from earlier versions only contain on-heap info, and do not have entries info
+ long memorySize = in.readVLong();
+ long evictions = in.readVLong();
+ long hitCount = in.readVLong();
+ long missCount = in.readVLong();
+ this.defaultStatsMap.put(TierType.ON_HEAP.getStringValue(), new StatsHolder(memorySize, evictions, hitCount, missCount, 0));
+ }
+ checkTierSpecificMap();
}
- public RequestCacheStats(long memorySize, long evictions, long hitCount, long missCount) {
- this.memorySize = memorySize;
- this.evictions = evictions;
- this.hitCount = hitCount;
- this.missCount = missCount;
+ public RequestCacheStats(Map defaultMap, Map tierSpecificMap) {
+ // Create a RequestCacheStats with multiple tiers' statistics
+ // The input maps don't have to have all tiers statistics available
+ for (TierType tierType : defaultMap.keySet()) {
+ defaultStatsMap.put(tierType.getStringValue(), defaultMap.get(tierType));
+ }
+ for (TierType tierType : tierSpecificMap.keySet()) {
+ tierSpecificStatsMap.put(tierType.getStringValue(), tierSpecificMap.get(tierType));
+ }
+ checkTierSpecificMap();
}
public void add(RequestCacheStats stats) {
- this.memorySize += stats.memorySize;
- this.evictions += stats.evictions;
- this.hitCount += stats.hitCount;
- this.missCount += stats.missCount;
+ for (TierType tierType : TierType.values()) {
+ defaultStatsMap.get(tierType.getStringValue()).add(stats.defaultStatsMap.get(tierType.getStringValue()));
+ tierSpecificStatsMap.get(tierType.getStringValue()).add(stats.tierSpecificStatsMap.get(tierType.getStringValue()));
+ }
}
+ private StatsHolder getTierStats(TierType tierType) {
+ return defaultStatsMap.get(tierType.getStringValue());
+ }
+
+ ShardRequestCache.TierStatsAccumulator getTierSpecificStats(TierType tierType) { // pkg-private for testing
+ return tierSpecificStatsMap.get(tierType.getStringValue());
+ }
+
+ public ShardRequestCache.DiskStatsAccumulator getDiskSpecificStats() {
+ return (ShardRequestCache.DiskStatsAccumulator) tierSpecificStatsMap.get(TierType.DISK.getStringValue());
+ }
+
+ public long getMemorySizeInBytes(TierType tierType) {
+ return getTierStats(tierType).totalMetric.count();
+ }
+
+ public ByteSizeValue getMemorySize(TierType tierType) {
+ return new ByteSizeValue(getMemorySizeInBytes(tierType));
+ }
+
+ public long getEvictions(TierType tierType) {
+ return getTierStats(tierType).evictionsMetric.count();
+ }
+
+ public long getHitCount(TierType tierType) {
+ return getTierStats(tierType).hitCount.count();
+ }
+
+ public long getMissCount(TierType tierType) {
+ return getTierStats(tierType).missCount.count();
+ }
+
+ public long getEntries(TierType tierType) {
+ return getTierStats(tierType).entries.count();
+ }
+
+ // By default, return on-heap stats if no tier is specified
+
public long getMemorySizeInBytes() {
- return this.memorySize;
+ return getMemorySizeInBytes(TierType.ON_HEAP);
}
public ByteSizeValue getMemorySize() {
- return new ByteSizeValue(memorySize);
+ return getMemorySize(TierType.ON_HEAP);
}
public long getEvictions() {
- return this.evictions;
+ return getEvictions(TierType.ON_HEAP);
}
public long getHitCount() {
- return this.hitCount;
+ return getHitCount(TierType.ON_HEAP);
}
public long getMissCount() {
- return this.missCount;
+ return getMissCount(TierType.ON_HEAP);
+ }
+
+ public long getEntries() {
+ return getEntries(TierType.ON_HEAP);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
- out.writeVLong(memorySize);
- out.writeVLong(evictions);
- out.writeVLong(hitCount);
- out.writeVLong(missCount);
+ if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
+ out.writeMap(this.defaultStatsMap, StreamOutput::writeString, (o, v) -> v.writeTo(o)); // ?
+ // Manually write tier-specific stats map
+ tierSpecificStatsMap.get(TierType.ON_HEAP.getStringValue()).writeTo(out);
+ tierSpecificStatsMap.get(TierType.DISK.getStringValue()).writeTo(out);
+ } else {
+ // Write only on-heap values, and don't write entries metric
+ StatsHolder heapStats = defaultStatsMap.get(TierType.ON_HEAP.getStringValue());
+ out.writeVLong(heapStats.getMemorySize());
+ out.writeVLong(heapStats.getEvictions());
+ out.writeVLong(heapStats.getHitCount());
+ out.writeVLong(heapStats.getMissCount());
+ }
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(Fields.REQUEST_CACHE_STATS);
- builder.humanReadableField(Fields.MEMORY_SIZE_IN_BYTES, Fields.MEMORY_SIZE, getMemorySize());
- builder.field(Fields.EVICTIONS, getEvictions());
- builder.field(Fields.HIT_COUNT, getHitCount());
- builder.field(Fields.MISS_COUNT, getMissCount());
+ // write on-heap stats outside of tiers object
+ getTierStats(TierType.ON_HEAP).toXContent(builder, params);
+ getTierSpecificStats(TierType.ON_HEAP).toXContent(builder, params);
+ builder.startObject(Fields.TIERS);
+ for (TierType tierType : TierType.values()) { // fixed order
+ if (tierType != TierType.ON_HEAP) {
+ String tier = tierType.getStringValue();
+ builder.startObject(tier);
+ defaultStatsMap.get(tier).toXContent(builder, params);
+ tierSpecificStatsMap.get(tier).toXContent(builder, params);
+ builder.endObject();
+ }
+ }
+ builder.endObject();
builder.endObject();
return builder;
}
+ private void checkTierSpecificMap() {
+ for (TierType tierType : TierType.values()) {
+ if (tierSpecificStatsMap.get(tierType.getStringValue()) == null) {
+ throw new OpenSearchException("Missing TierStatsAccumulator for TierType " + tierType.getStringValue());
+ }
+ }
+ }
+
/**
* Fields used for parsing and toXContent
*
@@ -122,10 +223,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
*/
static final class Fields {
static final String REQUEST_CACHE_STATS = "request_cache";
+ static final String TIERS = "tiers";
static final String MEMORY_SIZE = "memory_size";
static final String MEMORY_SIZE_IN_BYTES = "memory_size_in_bytes";
static final String EVICTIONS = "evictions";
static final String HIT_COUNT = "hit_count";
static final String MISS_COUNT = "miss_count";
+ static final String ENTRIES = "entries";
}
}
diff --git a/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java b/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java
index efad437804bef..67f920850e949 100644
--- a/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java
+++ b/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java
@@ -33,10 +33,21 @@
package org.opensearch.index.cache.request;
import org.apache.lucene.util.Accountable;
+import org.opensearch.OpenSearchException;
+import org.opensearch.common.cache.tier.CacheValue;
+import org.opensearch.common.cache.tier.DiskTierRequestStats;
+import org.opensearch.common.cache.tier.OnHeapTierRequestStats;
+import org.opensearch.common.cache.tier.TierRequestStats;
import org.opensearch.common.cache.tier.TierType;
import org.opensearch.common.metrics.CounterMetric;
import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+import org.opensearch.core.xcontent.ToXContentFragment;
+import org.opensearch.core.xcontent.XContentBuilder;
+import java.io.IOException;
import java.util.EnumMap;
/**
@@ -46,39 +57,47 @@
*/
public final class ShardRequestCache {
- private EnumMap statsHolder = new EnumMap<>(TierType.class);
+ // Holds stats common to all tiers
+ private final EnumMap defaultStatsHolder = new EnumMap<>(TierType.class);
+
+ // Holds tier-specific stats
+ private final EnumMap tierSpecificStatsHolder = new EnumMap<>(TierType.class);
public ShardRequestCache() {
+ tierSpecificStatsHolder.put(TierType.ON_HEAP, new OnHeapStatsAccumulator());
+ tierSpecificStatsHolder.put(TierType.DISK, new DiskStatsAccumulator());
for (TierType tierType : TierType.values()) {
- statsHolder.put(tierType, new StatsHolder());
+ defaultStatsHolder.put(tierType, new StatsHolder());
+ if (tierSpecificStatsHolder.get(tierType) == null) {
+ throw new OpenSearchException("Missing TierStatsAccumulator for TierType " + tierType.getStringValue());
+ }
}
}
public RequestCacheStats stats() {
- // TODO: Change RequestCacheStats to support disk tier stats.
- return new RequestCacheStats(
- statsHolder.get(TierType.ON_HEAP).totalMetric.count(),
- statsHolder.get(TierType.ON_HEAP).evictionsMetric.count(),
- statsHolder.get(TierType.ON_HEAP).hitCount.count(),
- statsHolder.get(TierType.ON_HEAP).missCount.count()
- );
+ return new RequestCacheStats(defaultStatsHolder, tierSpecificStatsHolder);
}
- public void onHit(TierType tierType) {
- statsHolder.get(tierType).hitCount.inc();
+ public void onHit(CacheValue cacheValue) {
+ TierType source = cacheValue.getSource();
+ defaultStatsHolder.get(source).hitCount.inc();
+ tierSpecificStatsHolder.get(source).addRequestStats(cacheValue.getStats());
}
- public void onMiss(TierType tierType) {
- statsHolder.get(tierType).missCount.inc();
+ public void onMiss(CacheValue cacheValue) {
+ TierType source = cacheValue.getSource();
+ defaultStatsHolder.get(source).missCount.inc();
+ tierSpecificStatsHolder.get(source).addRequestStats(cacheValue.getStats());
}
public void onCached(Accountable key, BytesReference value, TierType tierType) {
- statsHolder.get(tierType).totalMetric.inc(key.ramBytesUsed() + value.ramBytesUsed());
+ defaultStatsHolder.get(tierType).totalMetric.inc(key.ramBytesUsed() + value.ramBytesUsed());
+ defaultStatsHolder.get(tierType).entries.inc();
}
public void onRemoval(Accountable key, BytesReference value, boolean evicted, TierType tierType) {
if (evicted) {
- statsHolder.get(tierType).evictionsMetric.inc();
+ defaultStatsHolder.get(tierType).evictionsMetric.inc();
}
long dec = 0;
if (key != null) {
@@ -87,14 +106,118 @@ public void onRemoval(Accountable key, BytesReference value, boolean evicted, Ti
if (value != null) {
dec += value.ramBytesUsed();
}
- statsHolder.get(tierType).totalMetric.dec(dec);
+ defaultStatsHolder.get(tierType).totalMetric.dec(dec);
+ defaultStatsHolder.get(tierType).entries.dec();
+ }
+
+ /**
+ * An abstract whose extending classes accumulate tier-specific stats.
+ * All extending classes should provide a constructor like TierStatsAccumulator(StreamInput in)
+ * as well as a no-argument constructor
+ * @param The tier-specific implementation of TierRequestStats to use
+ */
+ static abstract class TierStatsAccumulator implements Writeable, ToXContentFragment {
+ /**
+ * Add new stats from a single request to this holder.
+ * @param stats The stats from a single request to add
+ */
+ public abstract void addRequestStats(S stats);
+
+ /**
+ * Add the stats from another TierStatsHolder to this TierStatsHolder.
+ * Used when combining stats across multiple shards.
+ * @param other The other TierStatsHolder.
+ */
+ public abstract void add(TierStatsAccumulator other);
+ }
+
+ /**
+ * This class accumulates on-heap-tier-specific stats for a single shard.
+ * For now, on-heap tier has no unique stats, but future stats would be added here.
+ */
+ public static class OnHeapStatsAccumulator extends TierStatsAccumulator {
+ OnHeapStatsAccumulator() {}
+
+ OnHeapStatsAccumulator(StreamInput in) {}
+
+ @Override
+ public void addRequestStats(OnHeapTierRequestStats stats) {}
+
+ @Override
+ public void add(TierStatsAccumulator other) {}
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {}
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ return builder;
+ }
}
- static class StatsHolder {
+ /**
+ * This class accumulates disk-tier-specific stats for a single shard.
+ */
+ public static class DiskStatsAccumulator extends TierStatsAccumulator {
+ final CounterMetric totalGetTime;
+ final CounterMetric totalDiskReaches; // Number of times a get() has actually reached the disk
+
+ public DiskStatsAccumulator() {
+ this.totalGetTime = new CounterMetric();
+ this.totalDiskReaches = new CounterMetric();
+ }
+
+ public DiskStatsAccumulator(long totalGetTime, long totalDiskReaches) {
+ this.totalGetTime = new CounterMetric();
+ this.totalGetTime.inc(totalGetTime);
+ this.totalDiskReaches = new CounterMetric();
+ this.totalDiskReaches.inc(totalDiskReaches);
+ }
- final CounterMetric evictionsMetric = new CounterMetric();
- final CounterMetric totalMetric = new CounterMetric();
- final CounterMetric hitCount = new CounterMetric();
- final CounterMetric missCount = new CounterMetric();
+ public DiskStatsAccumulator(StreamInput in) throws IOException {
+ this(in.readVLong(), in.readVLong());
+ }
+
+ public long getTotalGetTime() {
+ return totalGetTime.count();
+ }
+
+ public long getTotalDiskReaches() {
+ return totalDiskReaches.count();
+ }
+
+ @Override
+ public void addRequestStats(DiskTierRequestStats stats) {
+ if (stats.getRequestReachedDisk()) {
+ this.totalDiskReaches.inc();
+ this.totalGetTime.inc(stats.getRequestGetTimeNanos());
+ }
+ }
+
+ @Override
+ public void add(TierStatsAccumulator other) {
+ assert other.getClass() == DiskStatsAccumulator.class;
+ DiskStatsAccumulator castOther = (DiskStatsAccumulator) other;
+ this.totalDiskReaches.inc(castOther.totalDiskReaches.count());
+ this.totalGetTime.inc(castOther.totalGetTime.count());
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeVLong(totalGetTime.count());
+ out.writeVLong(totalDiskReaches.count());
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.field(Fields.TOTAL_GET_TIME, getTotalGetTime());
+ builder.field(Fields.TOTAL_DISK_REACHES, getTotalDiskReaches());
+ return builder;
+ }
+
+ static final class Fields { // Used for field names in API response
+ static final String TOTAL_GET_TIME = "total_get_time_nanos";
+ static final String TOTAL_DISK_REACHES = "total_disk_reaches";
+ }
}
}
diff --git a/server/src/main/java/org/opensearch/index/cache/request/StatsHolder.java b/server/src/main/java/org/opensearch/index/cache/request/StatsHolder.java
new file mode 100644
index 0000000000000..74e610cf6d9cc
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/cache/request/StatsHolder.java
@@ -0,0 +1,114 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.cache.request;
+
+import org.opensearch.common.metrics.CounterMetric;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+import org.opensearch.core.common.unit.ByteSizeValue;
+import org.opensearch.core.xcontent.ToXContentFragment;
+import org.opensearch.core.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+/**
+ * The basic StatsHolder class, which accumulates shard-level stats that are common across all tiers.
+ * Used in ShardRequestCache. Extending classes also handle tier-specific stats for each tier.
+ */
+public class StatsHolder implements Serializable, Writeable, ToXContentFragment {
+ final CounterMetric totalMetric;
+ final CounterMetric evictionsMetric;
+ final CounterMetric hitCount;
+ final CounterMetric missCount;
+ final CounterMetric entries;
+
+ public StatsHolder() {
+ this.totalMetric = new CounterMetric();
+ this.evictionsMetric = new CounterMetric();
+ this.hitCount = new CounterMetric();
+ this.missCount = new CounterMetric();
+ this.entries = new CounterMetric();
+ }
+
+ public StatsHolder(long memorySize, long evictions, long hitCount, long missCount, long entries) {
+ // Switched argument order to match RequestCacheStats
+ this.totalMetric = new CounterMetric();
+ this.totalMetric.inc(memorySize);
+ this.evictionsMetric = new CounterMetric();
+ this.evictionsMetric.inc(evictions);
+ this.hitCount = new CounterMetric();
+ this.hitCount.inc(hitCount);
+ this.missCount = new CounterMetric();
+ this.missCount.inc(missCount);
+ this.entries = new CounterMetric();
+ this.entries.inc(entries);
+ }
+
+ public StatsHolder(StreamInput in) throws IOException {
+ // Read and write the values of the counter metrics. They should always be positive
+ // This object is new, so we shouldn't need version checks for different behavior
+ this(in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong());
+ // java forces us to do this in one line
+ // guaranteed to be evaluated in correct order (https://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.7.4)
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeVLong(totalMetric.count());
+ out.writeVLong(evictionsMetric.count());
+ out.writeVLong(hitCount.count());
+ out.writeVLong(missCount.count());
+ out.writeVLong(entries.count());
+ }
+
+ public void add(StatsHolder otherStats) {
+ // Add the argument's metrics to this object's metrics.
+ totalMetric.inc(otherStats.totalMetric.count());
+ evictionsMetric.inc(otherStats.evictionsMetric.count());
+ hitCount.inc(otherStats.hitCount.count());
+ missCount.inc(otherStats.missCount.count());
+ entries.inc(otherStats.entries.count());
+ }
+
+ public long getEvictions() {
+ return evictionsMetric.count();
+ }
+
+ public long getMemorySize() {
+ return totalMetric.count();
+ }
+
+ public long getHitCount() {
+ return hitCount.count();
+ }
+
+ public long getMissCount() {
+ return missCount.count();
+ }
+
+ public long getEntries() {
+ return entries.count();
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.humanReadableField(
+ RequestCacheStats.Fields.MEMORY_SIZE_IN_BYTES,
+ RequestCacheStats.Fields.MEMORY_SIZE,
+ new ByteSizeValue(getMemorySize())
+ );
+ builder.field(RequestCacheStats.Fields.EVICTIONS, getEvictions());
+ builder.field(RequestCacheStats.Fields.HIT_COUNT, getHitCount());
+ builder.field(RequestCacheStats.Fields.MISS_COUNT, getMissCount());
+ builder.field(RequestCacheStats.Fields.ENTRIES, getEntries());
+ return builder;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java b/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java
index d9c256b4b4a94..ac12e6bdf389a 100644
--- a/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java
+++ b/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java
@@ -34,6 +34,7 @@
import org.opensearch.common.cache.RemovalNotification;
import org.opensearch.common.cache.RemovalReason;
+import org.opensearch.common.cache.tier.CacheValue;
import org.opensearch.common.cache.tier.TierType;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.index.cache.request.ShardRequestCache;
@@ -57,13 +58,13 @@ public final void onCached(IndicesRequestCache.Key key, BytesReference value, Ti
}
@Override
- public final void onHit(TierType tierType) {
- stats().onHit(tierType);
+ public final void onHit(CacheValue cacheValue) {
+ stats().onHit(cacheValue);
}
@Override
- public final void onMiss(TierType tierType) {
- stats().onMiss(tierType);
+ public final void onMiss(CacheValue cacheValue) {
+ stats().onMiss(cacheValue);
}
@Override
diff --git a/server/src/main/java/org/opensearch/indices/IRCKeyWriteableSerializer.java b/server/src/main/java/org/opensearch/indices/IRCKeyWriteableSerializer.java
new file mode 100644
index 0000000000000..92e874518df54
--- /dev/null
+++ b/server/src/main/java/org/opensearch/indices/IRCKeyWriteableSerializer.java
@@ -0,0 +1,62 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.indices;
+
+import org.opensearch.OpenSearchException;
+import org.opensearch.common.cache.tier.Serializer;
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.io.stream.BytesStreamInput;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class serializes the IndicesRequestCache.Key using its writeTo method.
+ */
+public class IRCKeyWriteableSerializer implements Serializer {
+
+ IndicesRequestCache irc;
+
+ public IRCKeyWriteableSerializer(IndicesRequestCache irc) {
+ this.irc = irc;
+ }
+
+ @Override
+ public byte[] serialize(IndicesRequestCache.Key object) {
+ try {
+ BytesStreamOutput os = new BytesStreamOutput();
+ object.writeTo(os);
+ return BytesReference.toBytes(os.bytes());
+ } catch (IOException e) {
+ throw new OpenSearchException(e);
+ }
+ }
+
+ @Override
+ public IndicesRequestCache.Key deserialize(byte[] bytes) {
+ try {
+ BytesStreamInput is = new BytesStreamInput(bytes, 0, bytes.length);
+ return irc.new Key(is);
+ } catch (IOException e) {
+ throw new OpenSearchException(e);
+ }
+ }
+
+ @Override
+ public boolean equals(IndicesRequestCache.Key object, byte[] bytes) {
+ // Deserialization is much slower than serialization for keys of order 1 KB,
+ // while time to serialize is fairly constant (per byte)
+ if (bytes.length < 5000) {
+ return Arrays.equals(serialize(object), bytes);
+ } else {
+ return object.equals(deserialize(bytes));
+ }
+ }
+}
diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java
index 2f06394ab7120..cb253ca6a1ed6 100644
--- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java
+++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java
@@ -40,6 +40,11 @@
import org.apache.lucene.util.RamUsageEstimator;
import org.opensearch.common.CheckedSupplier;
import org.opensearch.common.cache.RemovalNotification;
+import org.opensearch.common.cache.tier.BytesReferenceSerializer;
+import org.opensearch.common.cache.tier.CachePolicyInfoWrapper;
+import org.opensearch.common.cache.tier.CacheValue;
+import org.opensearch.common.cache.tier.DiskTierTookTimePolicy;
+import org.opensearch.common.cache.tier.EhCacheDiskCachingTier;
import org.opensearch.common.cache.tier.OnHeapCachingTier;
import org.opensearch.common.cache.tier.OpenSearchOnHeapCache;
import org.opensearch.common.cache.tier.TierType;
@@ -48,6 +53,7 @@
import org.opensearch.common.cache.tier.TieredCacheService;
import org.opensearch.common.cache.tier.TieredCacheSpilloverStrategyService;
import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
import org.opensearch.common.settings.Settings;
@@ -68,6 +74,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
+import java.util.function.Function;
/**
* The indices request cache allows to cache a shard level request stage responses, helping with improving
@@ -114,10 +121,9 @@ public final class IndicesRequestCache implements TieredCacheEventListener tieredCacheService;
-
private final IndicesService indicesService;
- IndicesRequestCache(Settings settings, IndicesService indicesService) {
+ IndicesRequestCache(Settings settings, IndicesService indicesService, ClusterSettings clusterSettings) {
this.size = INDICES_CACHE_QUERY_SIZE.get(settings);
this.expire = INDICES_CACHE_QUERY_EXPIRE.exists(settings) ? INDICES_CACHE_QUERY_EXPIRE.get(settings) : null;
long sizeInBytes = size.getBytes();
@@ -127,16 +133,53 @@ public final class IndicesRequestCache implements TieredCacheEventListener k.ramBytesUsed() + v.ramBytesUsed()
).setMaximumWeight(sizeInBytes).setExpireAfterAccess(expire).build();
+ Function transformationFunction = (data) -> {
+ try {
+ return getPolicyInfo(data);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ };
+ // enabling this for testing purposes. Remove/tweak!!
+ long CACHE_SIZE_IN_BYTES = 1000000L;
+ String SETTING_PREFIX = "indices.request.cache";
+ String STORAGE_PATH = indicesService.getNodePaths()[0].indicesPath.toString() + "/request_cache";
+
+ double diskTierKeystoreWeightFraction = 0.05; // Allocate 5% of the on-heap weight to the disk tier's keystore
+ long keystoreMaxWeight = (long) (diskTierKeystoreWeightFraction * INDICES_CACHE_QUERY_SIZE.get(settings).getBytes());
+
+ EhCacheDiskCachingTier ehcacheDiskTier = new EhCacheDiskCachingTier.Builder().setKeyType(
+ Key.class
+ )
+ .setValueType(BytesReference.class)
+ .setExpireAfterAccess(TimeValue.MAX_VALUE)
+ .setSettings(settings)
+ .setThreadPoolAlias("ehcacheTest")
+ .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
+ .setStoragePath(STORAGE_PATH)
+ .setSettingPrefix(SETTING_PREFIX)
+ .setKeySerializer(new IRCKeyWriteableSerializer(this))
+ .setValueSerializer(new BytesReferenceSerializer())
+ .setKeyStoreMaxWeightInBytes(keystoreMaxWeight)
+ .build();
+
// Initialize tiered cache service. TODO: Enable Disk tier when tiered support is turned on.
tieredCacheService = new TieredCacheSpilloverStrategyService.Builder().setOnHeapCachingTier(
openSearchOnHeapCache
- ).setTieredCacheEventListener(this).build();
+ )
+ .setOnDiskCachingTier(ehcacheDiskTier)
+ .setTieredCacheEventListener(this)
+ .withPolicy(new DiskTierTookTimePolicy(settings, clusterSettings, transformationFunction))
+ .build();
this.indicesService = indicesService;
}
@Override
public void close() {
tieredCacheService.invalidateAll();
+ if (tieredCacheService.getDiskCachingTier().isPresent()) {
+ tieredCacheService.getDiskCachingTier().get().close();
+ }
}
void clear(CacheEntity entity) {
@@ -145,8 +188,8 @@ void clear(CacheEntity entity) {
}
@Override
- public void onMiss(Key key, TierType tierType) {
- key.entity.onMiss(tierType);
+ public void onMiss(Key key, CacheValue cacheValue) {
+ key.entity.onMiss(cacheValue);
}
@Override
@@ -155,8 +198,8 @@ public void onRemoval(RemovalNotification notification) {
}
@Override
- public void onHit(Key key, BytesReference value, TierType tierType) {
- key.entity.onHit(tierType);
+ public void onHit(Key key, CacheValue cacheValue) {
+ key.entity.onHit(cacheValue);
}
@Override
@@ -209,13 +252,18 @@ void invalidate(CacheEntity cacheEntity, DirectoryReader reader, BytesReference
tieredCacheService.invalidate(new Key(cacheEntity, cacheKey, readerCacheKeyId));
}
+ public static CachePolicyInfoWrapper getPolicyInfo(BytesReference data) throws IOException {
+ // Reads the policy info corresponding to this QSR, written in IndicesService$loadIntoContext,
+ // without having to create a potentially large short-lived QSR object just for this purpose
+ return new CachePolicyInfoWrapper(data.streamInput());
+ }
+
/**
* Loader for the request cache
*
* @opensearch.internal
*/
private static class Loader implements TieredCacheLoader {
-
private final CacheEntity entity;
private final CheckedSupplier loader;
private boolean loaded;
@@ -262,12 +310,12 @@ interface CacheEntity extends Accountable, Writeable {
/**
* Called each time this entity has a cache hit.
*/
- void onHit(TierType tierType);
+ void onHit(CacheValue cacheValue);
/**
* Called each time this entity has a cache miss.
*/
- void onMiss(TierType tierType);
+ void onMiss(CacheValue cacheValue);
/**
* Called when this entity instance is removed
diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java
index 34e0153dbe730..c8150103b29b5 100644
--- a/server/src/main/java/org/opensearch/indices/IndicesService.java
+++ b/server/src/main/java/org/opensearch/indices/IndicesService.java
@@ -61,6 +61,7 @@
import org.opensearch.common.CheckedFunction;
import org.opensearch.common.CheckedSupplier;
import org.opensearch.common.Nullable;
+import org.opensearch.common.cache.tier.CachePolicyInfoWrapper;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lifecycle.AbstractLifecycleComponent;
@@ -405,7 +406,7 @@ public IndicesService(
this.shardsClosedTimeout = settings.getAsTime(INDICES_SHARDS_CLOSED_TIMEOUT, new TimeValue(1, TimeUnit.DAYS));
this.analysisRegistry = analysisRegistry;
this.indexNameExpressionResolver = indexNameExpressionResolver;
- this.indicesRequestCache = new IndicesRequestCache(settings, this);
+ this.indicesRequestCache = new IndicesRequestCache(settings, this, clusterService.getClusterSettings());
this.indicesQueryCache = new IndicesQueryCache(settings);
this.mapperRegistry = mapperRegistry;
this.namedWriteableRegistry = namedWriteableRegistry;
@@ -1690,6 +1691,10 @@ public void loadIntoContext(ShardSearchRequest request, SearchContext context, Q
boolean[] loadedFromCache = new boolean[] { true };
BytesReference bytesReference = cacheShardLevelResult(context.indexShard(), directoryReader, request.cacheKey(), out -> {
queryPhase.execute(context);
+ CachePolicyInfoWrapper policyInfo = new CachePolicyInfoWrapper(context.queryResult().getTookTimeNanos());
+ policyInfo.writeTo(out);
+ // Write relevant info for cache tier policies before the whole QuerySearchResult, so we don't have to read
+ // the whole QSR into memory when we decide whether to allow it into a particular cache tier based on took time/other info
context.queryResult().writeToNoId(out);
loadedFromCache[0] = false;
});
@@ -1698,6 +1703,7 @@ public void loadIntoContext(ShardSearchRequest request, SearchContext context, Q
// restore the cached query result into the context
final QuerySearchResult result = context.queryResult();
StreamInput in = new NamedWriteableAwareStreamInput(bytesReference.streamInput(), namedWriteableRegistry);
+ CachePolicyInfoWrapper policyInfo = new CachePolicyInfoWrapper(in); // This wrapper is not needed outside the cache
result.readFromWithId(context.id(), in);
result.setSearchShardTarget(context.shardTarget());
} else if (context.queryResult().searchTimedOut()) {
@@ -1955,6 +1961,10 @@ public boolean allPendingDanglingIndicesWritten() {
|| (danglingIndicesToWrite.isEmpty() && danglingIndicesThreadPoolExecutor.getActiveCount() == 0);
}
+ public NodeEnvironment.NodePath[] getNodePaths() {
+ return nodeEnv.nodePaths();
+ }
+
/**
* Validates the cluster default index refresh interval.
*
diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhase.java b/server/src/main/java/org/opensearch/search/query/QueryPhase.java
index f3cf2c13ecdef..e6c281fdf74f4 100644
--- a/server/src/main/java/org/opensearch/search/query/QueryPhase.java
+++ b/server/src/main/java/org/opensearch/search/query/QueryPhase.java
@@ -131,6 +131,7 @@ public void preProcess(SearchContext context) {
}
public void execute(SearchContext searchContext) throws QueryPhaseExecutionException {
+ final long startTime = System.nanoTime();
if (searchContext.hasOnlySuggest()) {
suggestProcessor.process(searchContext);
searchContext.queryResult()
@@ -138,6 +139,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
new TopDocsAndMaxScore(new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN),
new DocValueFormat[0]
);
+ searchContext.queryResult().setTookTimeNanos(System.nanoTime() - startTime);
return;
}
@@ -165,6 +167,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
);
searchContext.queryResult().profileResults(shardResults);
}
+ searchContext.queryResult().setTookTimeNanos(System.nanoTime() - startTime);
}
// making public for testing
@@ -292,7 +295,6 @@ static boolean executeInternal(SearchContext searchContext, QueryPhaseSearcher q
queryResult.nodeQueueSize(rExecutor.getCurrentQueueSize());
queryResult.serviceTimeEWMA((long) rExecutor.getTaskExecutionEWMA());
}
-
return shouldRescore;
} finally {
// Search phase has finished, no longer need to check for timeout
diff --git a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java
index f8a1e99ff585f..35775e924629e 100644
--- a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java
+++ b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java
@@ -34,6 +34,7 @@
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.TotalHits;
+import org.opensearch.Version;
import org.opensearch.common.io.stream.DelayableWriteable;
import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
import org.opensearch.core.common.io.stream.StreamInput;
@@ -87,6 +88,7 @@ public final class QuerySearchResult extends SearchPhaseResult {
private int nodeQueueSize = -1;
private final boolean isNull;
+ private Long tookTimeNanos = null;
public QuerySearchResult() {
this(false);
@@ -364,6 +366,11 @@ public void readFromWithId(ShardSearchContextId id, StreamInput in) throws IOExc
nodeQueueSize = in.readInt();
setShardSearchRequest(in.readOptionalWriteable(ShardSearchRequest::new));
setRescoreDocIds(new RescoreDocIds(in));
+ if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
+ tookTimeNanos = in.readOptionalLong();
+ } else {
+ tookTimeNanos = null;
+ }
}
@Override
@@ -406,6 +413,9 @@ public void writeToNoId(StreamOutput out) throws IOException {
out.writeInt(nodeQueueSize);
out.writeOptionalWriteable(getShardSearchRequest());
getRescoreDocIds().writeTo(out);
+ if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
+ out.writeOptionalLong(tookTimeNanos);
+ }
}
public TotalHits getTotalHits() {
@@ -415,4 +425,12 @@ public TotalHits getTotalHits() {
public float getMaxScore() {
return maxScore;
}
+
+ public Long getTookTimeNanos() {
+ return tookTimeNanos;
+ }
+
+ public void setTookTimeNanos(long tookTime) { // Had to make this public again for use in DiskTierPolicyTests.java
+ tookTimeNanos = tookTime;
+ }
}
diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy
index db5ee445f413a..64decc7da12ca 100644
--- a/server/src/main/resources/org/opensearch/bootstrap/security.policy
+++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy
@@ -191,5 +191,6 @@ grant {
// For ehcache
permission java.lang.RuntimePermission "createClassLoader";
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
-
+ permission java.lang.RuntimePermission "accessDeclaredMembers";
+ permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};
diff --git a/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java b/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java
new file mode 100644
index 0000000000000..af81f04149ae6
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.cache.tier;
+
+import org.opensearch.common.Randomness;
+import org.opensearch.common.bytes.ReleasableBytesReference;
+import org.opensearch.common.util.BigArrays;
+import org.opensearch.common.util.PageCacheRecycler;
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.bytes.CompositeBytesReference;
+import org.opensearch.core.common.util.ByteArray;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.Random;
+
+public class BytesReferenceSerializerTests extends OpenSearchTestCase {
+ public void testEquality() throws Exception {
+ BytesReferenceSerializer ser = new BytesReferenceSerializer();
+ // Test that values are equal before and after serialization, for each implementation of BytesReference.
+ byte[] bytesValue = new byte[1000];
+ Random rand = Randomness.get();
+ rand.nextBytes(bytesValue);
+
+ BytesReference ba = new BytesArray(bytesValue);
+ byte[] serialized = ser.serialize(ba);
+ assertTrue(ser.equals(ba, serialized));
+ BytesReference deserialized = ser.deserialize(serialized);
+ assertEquals(ba, deserialized);
+
+ ba = new BytesArray(new byte[] {});
+ serialized = ser.serialize(ba);
+ assertTrue(ser.equals(ba, serialized));
+ deserialized = ser.deserialize(serialized);
+ assertEquals(ba, deserialized);
+
+ BytesReference cbr = CompositeBytesReference.of(new BytesArray(bytesValue), new BytesArray(bytesValue));
+ serialized = ser.serialize(cbr);
+ assertTrue(ser.equals(cbr, serialized));
+ deserialized = ser.deserialize(serialized);
+ assertEquals(cbr, deserialized);
+
+ // We need the PagedBytesReference to be larger than the page size (16 KB) in order to actually create it
+ byte[] pbrValue = new byte[PageCacheRecycler.PAGE_SIZE_IN_BYTES * 2];
+ rand.nextBytes(pbrValue);
+ ByteArray arr = BigArrays.NON_RECYCLING_INSTANCE.newByteArray(pbrValue.length);
+ arr.set(0L, pbrValue, 0, pbrValue.length);
+ assert !arr.hasArray();
+ BytesReference pbr = BytesReference.fromByteArray(arr, pbrValue.length);
+ serialized = ser.serialize(pbr);
+ assertTrue(ser.equals(pbr, serialized));
+ deserialized = ser.deserialize(serialized);
+ assertEquals(pbr, deserialized);
+
+ BytesReference rbr = new ReleasableBytesReference(new BytesArray(bytesValue), ReleasableBytesReference.NO_OP);
+ serialized = ser.serialize(rbr);
+ assertTrue(ser.equals(rbr, serialized));
+ deserialized = ser.deserialize(serialized);
+ assertEquals(rbr, deserialized);
+ }
+}
diff --git a/server/src/test/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicyTests.java b/server/src/test/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicyTests.java
new file mode 100644
index 0000000000000..f7df259ff44d2
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/cache/tier/DiskTierTookTimePolicyTests.java
@@ -0,0 +1,146 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.opensearch.action.OriginalIndices;
+import org.opensearch.action.OriginalIndicesTests;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.common.UUIDs;
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.core.common.Strings;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.index.shard.ShardId;
+import org.opensearch.indices.IndicesRequestCache;
+import org.opensearch.search.DocValueFormat;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.internal.AliasFilter;
+import org.opensearch.search.internal.ShardSearchContextId;
+import org.opensearch.search.internal.ShardSearchRequest;
+import org.opensearch.search.query.QuerySearchResult;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+import java.util.function.Function;
+
+public class DiskTierTookTimePolicyTests extends OpenSearchTestCase {
+ private final Function transformationFunction = (data) -> {
+ try {
+ return IndicesRequestCache.getPolicyInfo(data);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ };
+
+ private DiskTierTookTimePolicy getTookTimePolicy() {
+ // dummy settings
+ Settings dummySettings = Settings.EMPTY;
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ return new DiskTierTookTimePolicy(dummySettings, dummyClusterSettings, transformationFunction);
+ }
+
+ public void testQSRSetupFunction() throws IOException {
+ Long ttn = 100000000000L;
+ QuerySearchResult qsr = getQSR(ttn);
+ assertEquals(ttn, qsr.getTookTimeNanos());
+ }
+
+ public void testTookTimePolicy() throws Exception {
+ DiskTierTookTimePolicy tookTimePolicy = getTookTimePolicy();
+
+ // manually set threshold for test
+ double threshMillis = 10;
+ long shortMillis = (long) (0.9 * threshMillis);
+ long longMillis = (long) (1.5 * threshMillis);
+ tookTimePolicy.setThreshold(new TimeValue((long) threshMillis));
+ BytesReference shortTime = getValidPolicyInput(getQSR(shortMillis * 1000000));
+ BytesReference longTime = getValidPolicyInput(getQSR(longMillis * 1000000));
+
+ boolean shortResult = tookTimePolicy.checkData(shortTime);
+ assertFalse(shortResult);
+ boolean longResult = tookTimePolicy.checkData(longTime);
+ assertTrue(longResult);
+
+ DiskTierTookTimePolicy disabledPolicy = getTookTimePolicy();
+ disabledPolicy.setThreshold(TimeValue.ZERO);
+ shortResult = disabledPolicy.checkData(shortTime);
+ assertTrue(shortResult);
+ longResult = disabledPolicy.checkData(longTime);
+ assertTrue(longResult);
+ }
+
+ public static QuerySearchResult getQSR(long tookTimeNanos) {
+ // package-private, also used by IndicesRequestCacheTests.java
+ // setup from QuerySearchResultTests.java
+ ShardId shardId = new ShardId("index", "uuid", randomInt());
+ SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(randomBoolean());
+ ShardSearchRequest shardSearchRequest = new ShardSearchRequest(
+ OriginalIndicesTests.randomOriginalIndices(),
+ searchRequest,
+ shardId,
+ 1,
+ new AliasFilter(null, Strings.EMPTY_ARRAY),
+ 1.0f,
+ randomNonNegativeLong(),
+ null,
+ new String[0]
+ );
+ ShardSearchContextId id = new ShardSearchContextId(UUIDs.base64UUID(), randomLong());
+ QuerySearchResult result = new QuerySearchResult(
+ id,
+ new SearchShardTarget("node", shardId, null, OriginalIndices.NONE),
+ shardSearchRequest
+ );
+ TopDocs topDocs = new TopDocs(new TotalHits(randomLongBetween(0, Long.MAX_VALUE), TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
+ result.topDocs(new TopDocsAndMaxScore(topDocs, randomBoolean() ? Float.NaN : randomFloat()), new DocValueFormat[0]);
+
+ result.setTookTimeNanos(tookTimeNanos);
+ return result;
+ }
+
+ private BytesReference getValidPolicyInput(QuerySearchResult qsr) throws IOException {
+ // When it's used in the cache, the policy will receive BytesReferences which have a CachePolicyInfoWrapper
+ // at the beginning of them, followed by the actual QSR.
+ CachePolicyInfoWrapper policyInfo = new CachePolicyInfoWrapper(qsr.getTookTimeNanos());
+ BytesStreamOutput out = new BytesStreamOutput();
+ policyInfo.writeTo(out);
+ qsr.writeTo(out);
+ return out.bytes();
+ }
+}
diff --git a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java
index 804f236264daa..139e82b274d34 100644
--- a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java
+++ b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java
@@ -8,16 +8,22 @@
package org.opensearch.common.cache.tier;
+import org.opensearch.common.Randomness;
import org.opensearch.common.cache.RemovalListener;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.env.NodeEnvironment;
import org.opensearch.test.OpenSearchSingleNodeTestCase;
import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Phaser;
@@ -40,6 +46,8 @@ public void testBasicGetAndPut() throws IOException {
.setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
.setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
.setSettingPrefix(SETTING_PREFIX)
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new StringSerializer())
.build();
int randomKeys = randomIntBetween(10, 100);
Map keyValueMap = new HashMap<>();
@@ -50,13 +58,52 @@ public void testBasicGetAndPut() throws IOException {
ehCacheDiskCachingTierNew.put(entry.getKey(), entry.getValue());
}
for (Map.Entry entry : keyValueMap.entrySet()) {
- String value = ehCacheDiskCachingTierNew.get(entry.getKey());
- assertEquals(entry.getValue(), value);
+ CacheValue value = ehCacheDiskCachingTierNew.get(entry.getKey());
+ assertEquals(entry.getValue(), value.value);
+ assertEquals(TierType.DISK, value.getSource());
+ assertTrue(((DiskTierRequestStats) value.getStats()).getRequestReachedDisk());
+ assertTrue(((DiskTierRequestStats) value.getStats()).getRequestGetTimeNanos() > 0);
}
ehCacheDiskCachingTierNew.close();
}
}
+ public void testBasicGetAndPutBytesReference() throws Exception {
+ Settings settings = Settings.builder().build();
+ try (NodeEnvironment env = newNodeEnvironment(settings)) {
+ EhCacheDiskCachingTier ehCacheDiskCachingTier = new EhCacheDiskCachingTier.Builder<
+ String,
+ BytesReference>().setKeyType(String.class)
+ .setValueType(BytesReference.class)
+ .setExpireAfterAccess(TimeValue.MAX_VALUE)
+ .setSettings(settings)
+ .setThreadPoolAlias("ehcacheTest")
+ .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES * 2) // bigger so no evictions happen
+ .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
+ .setSettingPrefix(SETTING_PREFIX)
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new BytesReferenceSerializer())
+ .build();
+ int randomKeys = randomIntBetween(10, 100);
+ int valueLength = 1000;
+ Random rand = Randomness.get();
+ Map keyValueMap = new HashMap<>();
+ for (int i = 0; i < randomKeys; i++) {
+ byte[] valueBytes = new byte[valueLength];
+ rand.nextBytes(valueBytes);
+ keyValueMap.put(UUID.randomUUID().toString(), new BytesArray(valueBytes));
+ }
+ for (Map.Entry entry : keyValueMap.entrySet()) {
+ ehCacheDiskCachingTier.put(entry.getKey(), entry.getValue());
+ }
+ for (Map.Entry entry : keyValueMap.entrySet()) {
+ BytesReference value = ehCacheDiskCachingTier.get(entry.getKey()).value;
+ assertEquals(entry.getValue(), value);
+ }
+ ehCacheDiskCachingTier.close();
+ }
+ }
+
public void testConcurrentPut() throws Exception {
Settings settings = Settings.builder().build();
try (NodeEnvironment env = newNodeEnvironment(settings)) {
@@ -69,6 +116,8 @@ public void testConcurrentPut() throws Exception {
.setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
.setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
.setSettingPrefix(SETTING_PREFIX)
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new StringSerializer())
.build();
int randomKeys = randomIntBetween(20, 100);
Thread[] threads = new Thread[randomKeys];
@@ -91,8 +140,8 @@ public void testConcurrentPut() throws Exception {
phaser.arriveAndAwaitAdvance(); // Will trigger parallel puts above.
countDownLatch.await(); // Wait for all threads to finish
for (Map.Entry entry : keyValueMap.entrySet()) {
- String value = ehCacheDiskCachingTierNew.get(entry.getKey());
- assertEquals(entry.getValue(), value);
+ CacheValue value = ehCacheDiskCachingTierNew.get(entry.getKey());
+ assertEquals(entry.getValue(), value.value);
}
ehCacheDiskCachingTierNew.close();
}
@@ -111,6 +160,8 @@ public void testEhcacheParallelGets() throws Exception {
.setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
.setSettingPrefix(SETTING_PREFIX)
.setIsEventListenerModeSync(true) // For accurate count
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new StringSerializer())
.build();
ehCacheDiskCachingTierNew.setRemovalListener(removalListener(new AtomicInteger()));
int randomKeys = randomIntBetween(20, 100);
@@ -129,7 +180,7 @@ public void testEhcacheParallelGets() throws Exception {
for (Map.Entry entry : keyValueMap.entrySet()) {
threads[j] = new Thread(() -> {
phaser.arriveAndAwaitAdvance();
- assertEquals(entry.getValue(), ehCacheDiskCachingTierNew.get(entry.getKey()));
+ assertEquals(entry.getValue(), ehCacheDiskCachingTierNew.get(entry.getKey()).value);
countDownLatch.countDown();
});
threads[j].start();
@@ -153,6 +204,8 @@ public void testEhcacheKeyIterator() throws Exception {
.setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
.setSettingPrefix(SETTING_PREFIX)
.setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new StringSerializer())
.build();
int randomKeys = randomIntBetween(2, 2);
@@ -187,6 +240,8 @@ public void testCompute() throws Exception {
.setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
.setSettingPrefix(SETTING_PREFIX)
.setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
+ .setKeySerializer(new StringSerializer())
+ .setValueSerializer(new StringSerializer())
.build();
// For now it is unsupported.
assertThrows(
@@ -220,7 +275,62 @@ public boolean isLoaded() {
}
}
+ /*public void testThresholdPolicy() throws Exception {
+ long slowTookTimeNanos = 10000000000L; // 10 seconds
+ BytesReference slowResult = DiskTierTookTimePolicyTests.getQSRBytesReference(slowTookTimeNanos);
+
+ long fastTookTimeNanos = 100000L; // 100 microseconds
+ BytesReference fastResult = DiskTierTookTimePolicyTests.getQSRBytesReference(fastTookTimeNanos);
+
+ long thresholdMillis = 10;
+
+ // For this unit test, set the policy's threshold directly rather than from cluster settings
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ DiskTierTookTimePolicy policy = new DiskTierTookTimePolicy(Settings.EMPTY, dummyClusterSettings);
+ policy.setThreshold(new TimeValue(thresholdMillis));
+
+ Settings settings = Settings.builder().build();
+ try (NodeEnvironment env = newNodeEnvironment(settings)) {
+ EhCacheDiskCachingTier tier = new EhCacheDiskCachingTier.Builder()
+ .setKeyType(String.class)
+ .setValueType(BytesReference.class)
+ .setExpireAfterAccess(TimeValue.MAX_VALUE)
+ .setSettings(settings)
+ .setThreadPoolAlias("ehcacheTest")
+ .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES)
+ .setSettingPrefix(SETTING_PREFIX)
+ .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache")
+ .withPolicy(policy)
+ .build();
+ tier.put("slow", slowResult);
+ assertEquals(slowResult, tier.get("slow")); // key "slow" is found because the policy accepted it
+ tier.put("fast", fastResult);
+ assertNull(tier.get("fast")); // key "fast" -> null because the policy rejected it
+ tier.close();
+ }
+ }*/
+
private RemovalListener removalListener(AtomicInteger counter) {
return notification -> counter.incrementAndGet();
}
+
+ private static class StringSerializer implements Serializer {
+
+ private final Charset charset = StandardCharsets.UTF_8;
+
+ @Override
+ public byte[] serialize(String object) {
+ return object.getBytes(charset);
+ }
+
+ @Override
+ public String deserialize(byte[] bytes) {
+ return new String(bytes, charset);
+ }
+
+ @Override
+ public boolean equals(String object, byte[] bytes) {
+ return object.equals(deserialize(bytes));
+ }
+ }
}
diff --git a/server/src/test/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyServiceTests.java b/server/src/test/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyServiceTests.java
index bb7a22cc26037..855ab1be6b1e5 100644
--- a/server/src/test/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyServiceTests.java
+++ b/server/src/test/java/org/opensearch/common/cache/tier/TieredCacheSpilloverStrategyServiceTests.java
@@ -16,10 +16,12 @@
import java.util.ArrayList;
import java.util.EnumMap;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Function;
public class TieredCacheSpilloverStrategyServiceTests extends OpenSearchTestCase {
@@ -29,7 +31,8 @@ public void testComputeAndAbsentWithoutAnyOnHeapCacheEviction() throws Exception
TieredCacheSpilloverStrategyService spilloverStrategyService = intializeTieredCacheService(
onHeapCacheSize,
randomIntBetween(1, 4),
- eventListener
+ eventListener,
+ null
);
int numOfItems1 = randomIntBetween(1, onHeapCacheSize / 2 - 1);
List keys = new ArrayList<>();
@@ -73,7 +76,8 @@ public void testComputeAndAbsentWithEvictionsFromOnHeapCache() throws Exception
TieredCacheSpilloverStrategyService spilloverStrategyService = intializeTieredCacheService(
onHeapCacheSize,
diskCacheSize,
- eventListener
+ eventListener,
+ null
);
// Put values in cache more than it's size and cause evictions from onHeap.
@@ -140,7 +144,8 @@ public void testComputeAndAbsentWithEvictionsFromBothTier() throws Exception {
TieredCacheSpilloverStrategyService spilloverStrategyService = intializeTieredCacheService(
onHeapCacheSize,
diskCacheSize,
- eventListener
+ eventListener,
+ null
);
int numOfItems = randomIntBetween(totalSize + 1, totalSize * 3);
@@ -161,7 +166,8 @@ public void testGetAndCount() throws Exception {
TieredCacheSpilloverStrategyService spilloverStrategyService = intializeTieredCacheService(
onHeapCacheSize,
diskCacheSize,
- eventListener
+ eventListener,
+ null
);
int numOfItems1 = randomIntBetween(onHeapCacheSize + 1, totalSize);
@@ -198,6 +204,7 @@ public void testGetAndCount() throws Exception {
public void testWithDiskTierNull() throws Exception {
int onHeapCacheSize = randomIntBetween(10, 30);
MockTieredCacheEventListener eventListener = new MockTieredCacheEventListener();
+ Function identityFunction = (String value) -> { return value; };
TieredCacheSpilloverStrategyService spilloverStrategyService = new TieredCacheSpilloverStrategyService.Builder<
String,
String>().setOnHeapCachingTier(new MockOnHeapCacheTier<>(onHeapCacheSize)).setTieredCacheEventListener(eventListener).build();
@@ -212,6 +219,70 @@ public void testWithDiskTierNull() throws Exception {
assertEquals(0, eventListener.enumMap.get(TierType.DISK).missCount.count());
}
+ public void testDiskTierPolicies() throws Exception {
+ // For policy function, allow if what it receives starts with "a" and string is even length
+ ArrayList> policies = new ArrayList<>();
+ policies.add(new AllowFirstLetterA());
+ policies.add(new AllowEvenLengths());
+
+ int onHeapCacheSize = 0;
+ int diskCacheSize = 10000;
+ MockTieredCacheEventListener eventListener = new MockTieredCacheEventListener();
+ TieredCacheSpilloverStrategyService spilloverStrategyService = intializeTieredCacheService(
+ onHeapCacheSize,
+ diskCacheSize,
+ eventListener,
+ policies
+ );
+
+ Map keyValuePairs = new HashMap<>();
+ Map expectedOutputs = new HashMap<>();
+ keyValuePairs.put("key1", "abcd");
+ expectedOutputs.put("key1", true);
+ keyValuePairs.put("key2", "abcde");
+ expectedOutputs.put("key2", false);
+ keyValuePairs.put("key3", "bbc");
+ expectedOutputs.put("key3", false);
+ keyValuePairs.put("key4", "ab");
+ expectedOutputs.put("key4", true);
+ keyValuePairs.put("key5", "");
+ expectedOutputs.put("key5", false);
+
+ TieredCacheLoader loader = getTieredCacheLoaderWithKeyValueMap(keyValuePairs);
+
+ for (String key : keyValuePairs.keySet()) {
+ Boolean expectedOutput = expectedOutputs.get(key);
+ String value = spilloverStrategyService.computeIfAbsent(key, loader);
+ assertEquals(keyValuePairs.get(key), value);
+ String result = spilloverStrategyService.get(key);
+ if (expectedOutput) {
+ // Should retrieve from disk tier if it was accepted
+ assertEquals(keyValuePairs.get(key), result);
+ } else {
+ // Should miss as heap tier size = 0 and the policy rejected it
+ assertNull(result);
+ }
+ }
+ }
+
+ private static class AllowFirstLetterA implements CacheTierPolicy {
+ @Override
+ public boolean checkData(String data) {
+ try {
+ return (data.charAt(0) == 'a');
+ } catch (StringIndexOutOfBoundsException e) {
+ return false;
+ }
+ }
+ }
+
+ private static class AllowEvenLengths implements CacheTierPolicy {
+ @Override
+ public boolean checkData(String data) {
+ return data.length() % 2 == 0;
+ }
+ }
+
private TieredCacheLoader getTieredCacheLoader() {
return new TieredCacheLoader() {
boolean isLoaded = false;
@@ -229,16 +300,41 @@ public boolean isLoaded() {
};
}
+ private TieredCacheLoader getTieredCacheLoaderWithKeyValueMap(Map map) {
+ return new TieredCacheLoader() {
+ boolean isLoaded;
+
+ @Override
+ public String load(String key) throws Exception {
+ isLoaded = true;
+ return map.get(key);
+ }
+
+ @Override
+ public boolean isLoaded() {
+ return isLoaded;
+ }
+ };
+ }
+
private TieredCacheSpilloverStrategyService intializeTieredCacheService(
int onHeapCacheSize,
- int diksCacheSize,
- TieredCacheEventListener cacheEventListener
+ int diskCacheSize,
+ TieredCacheEventListener cacheEventListener,
+ List> policies // If passed null, default to no policies (empty list)
) {
- DiskCachingTier diskCache = new MockDiskCachingTier<>(diksCacheSize);
+ DiskCachingTier diskCache = new MockDiskCachingTier<>(diskCacheSize);
OnHeapCachingTier openSearchOnHeapCache = new MockOnHeapCacheTier<>(onHeapCacheSize);
+
+ List> policiesToUse = new ArrayList<>();
+ if (policies != null) {
+ policiesToUse = policies;
+ }
+
return new TieredCacheSpilloverStrategyService.Builder().setOnHeapCachingTier(openSearchOnHeapCache)
.setOnDiskCachingTier(diskCache)
.setTieredCacheEventListener(cacheEventListener)
+ .withPolicies(policiesToUse)
.build();
}
@@ -254,8 +350,8 @@ class MockOnHeapCacheTier implements OnHeapCachingTier, RemovalListe
}
@Override
- public V get(K key) {
- return this.onHeapCacheTier.get(key);
+ public CacheValue get(K key) {
+ return new CacheValue(this.onHeapCacheTier.get(key), TierType.ON_HEAP, new OnHeapTierRequestStats());
}
@Override
@@ -340,8 +436,8 @@ class MockTieredCacheEventListener implements TieredCacheEventListener cacheValue) {
+ enumMap.get(cacheValue.getSource()).missCount.inc();
}
@Override
@@ -352,8 +448,8 @@ public void onRemoval(RemovalNotification notification) {
}
@Override
- public void onHit(K key, V value, TierType tierType) {
- enumMap.get(tierType).hitCount.inc();
+ public void onHit(K key, CacheValue cacheValue) {
+ enumMap.get(cacheValue.getSource()).hitCount.inc();
}
@Override
@@ -381,8 +477,8 @@ class MockDiskCachingTier implements DiskCachingTier, RemovalListene
}
@Override
- public V get(K key) {
- return this.diskTier.get(key);
+ public CacheValue get(K key) {
+ return new CacheValue<>(this.diskTier.get(key), TierType.DISK, new DiskTierRequestStats(0L, true));
}
@Override
diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java
new file mode 100644
index 0000000000000..b1ec815980c23
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java
@@ -0,0 +1,416 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.common.cache.tier.keystore;
+
+import org.opensearch.common.Randomness;
+import org.opensearch.common.metrics.CounterMetric;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadPoolExecutor;
+
+import org.roaringbitmap.RoaringBitmap;
+
+public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase {
+
+ final int BYTES_IN_MB = 1048576;
+
+ public void testInit() {
+ long memCap = 100 * BYTES_IN_MB;
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap);
+ assertEquals(0, kls.getSize());
+ assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo);
+ assertEquals(memCap, kls.getMemorySizeCapInBytes());
+ }
+
+ public void testTransformationLogic() throws Exception {
+ int modulo = (int) Math.pow(2, 29);
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L);
+ int offset = 3;
+ for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design
+ int posValue = i * modulo + offset;
+ kls.add(posValue);
+ assertEquals(offset, (int) kls.getInternalRepresentation(posValue));
+ int negValue = -(i * modulo + offset);
+ kls.add(negValue);
+ assertEquals(modulo - offset, (int) kls.getInternalRepresentation(negValue));
+ }
+ assertEquals(2, kls.getSize());
+ int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE };
+ for (int value : testVals) {
+ assertTrue(kls.getInternalRepresentation(value) < modulo);
+ assertTrue(kls.getInternalRepresentation(value) >= 0);
+ }
+ RBMIntKeyLookupStore no_modulo_kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L);
+ Random rand = Randomness.get();
+ for (int i = 0; i < 100; i++) {
+ int val = rand.nextInt();
+ assertEquals(val, (int) no_modulo_kls.getInternalRepresentation(val));
+ }
+ }
+
+ public void testContains() throws Exception {
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L);
+ RBMIntKeyLookupStore noModuloKls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L);
+ for (int i = 0; i < kls.REFRESH_SIZE_EST_INTERVAL + 1000; i++) {
+ // set upper bound > number of elements to trigger a size check, ensuring we test that too
+ kls.add(i);
+ assertTrue(kls.contains(i));
+ noModuloKls.add(i);
+ assertTrue(noModuloKls.contains(i));
+ }
+ }
+
+ public void testAddingStatsGetters() throws Exception {
+ RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX;
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L);
+ kls.add(15);
+ kls.add(-15);
+ assertEquals(2, kls.getAddAttempts());
+ assertEquals(0, kls.getCollisions());
+
+ int offset = 1;
+ for (int i = 0; i < 10; i++) {
+ kls.add(i * moduloValue.getValue() + offset);
+ }
+ assertEquals(12, kls.getAddAttempts());
+ assertEquals(9, kls.getCollisions());
+ }
+
+ public void testRegenerateStore() throws Exception {
+ int numToAdd = 10000000;
+ Random rand = Randomness.get();
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L);
+ for (int i = 0; i < numToAdd; i++) {
+ kls.add(i);
+ }
+ assertEquals(numToAdd, kls.getSize());
+ Integer[] newVals = new Integer[1000]; // margin accounts for collisions
+ for (int j = 0; j < newVals.length; j++) {
+ newVals[j] = rand.nextInt();
+ }
+ kls.regenerateStore(newVals);
+ assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions
+
+ // test clear()
+ kls.clear();
+ assertEquals(0, kls.getSize());
+ }
+
+ public void testAddingDuplicates() throws Exception {
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0L);
+ int numToAdd = 4820411;
+ for (int i = 0; i < numToAdd; i++) {
+ kls.add(i);
+ kls.add(i);
+ }
+ for (int j = 0; j < 1000; j++) {
+ kls.add(577);
+ }
+ assertEquals(numToAdd, kls.getSize());
+ }
+
+ public void testMemoryCapBlocksAdd() throws Exception {
+ // Now that we're using a modified version of rbm.getSizeInBytes(), which doesn't provide an inverse function,
+ // we have to test filling just an RBM with random test values first so that we can get the resulting memory cap limit
+ // to use with our modified size estimate.
+ // This is much noisier so the precision is lower.
+
+ // It is necessary to use randomly distributed integers for both parts of this test, as we would do with hashes in the cache,
+ // as that's what our size estimator is designed for.
+ // If we add a run of integers, our size estimator is not valid, especially for small RBMs.
+
+ int[] maxEntriesArr = new int[] { 1342000, 100000, 3000000 };
+ long[] rbmReportedSizes = new long[4];
+ Random rand = Randomness.get();
+ for (int j = 0; j < maxEntriesArr.length; j++) {
+ RoaringBitmap rbm = new RoaringBitmap();
+ for (int i = 0; i < maxEntriesArr[j]; i++) {
+ rbm.add(rand.nextInt());
+ }
+ rbmReportedSizes[j] = rbm.getSizeInBytes();
+ }
+ RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE;
+ for (int i = 0; i < maxEntriesArr.length; i++) {
+ double multiplier = RBMIntKeyLookupStore.getRBMSizeMultiplier(maxEntriesArr[i], moduloValue.getValue());
+ long memSizeCapInBytes = (long) (rbmReportedSizes[i] * multiplier);
+ // long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue);
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes);
+ for (int j = 0; j < maxEntriesArr[i] + 5000; j++) {
+ kls.add(rand.nextInt());
+ }
+ assertTrue(Math.abs(maxEntriesArr[i] - kls.getSize()) < (double) maxEntriesArr[i] / 10);
+ }
+ }
+
+ public void testConcurrency() throws Exception {
+ Random rand = Randomness.get();
+ for (int j = 0; j < 5; j++) { // test with different numbers of threads
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L);
+ int numThreads = rand.nextInt(50) + 1;
+ ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
+ // In this test we want to add the first 200K numbers and check they're all correctly there.
+ // We do some duplicates too to ensure those aren't incorrectly added.
+ int amountToAdd = 200000;
+ ArrayList> wasAdded = new ArrayList<>(amountToAdd);
+ ArrayList> duplicatesWasAdded = new ArrayList<>();
+ for (int i = 0; i < amountToAdd; i++) {
+ wasAdded.add(null);
+ }
+ for (int i = 0; i < amountToAdd; i++) {
+ final int val = i;
+ Future fut = executor.submit(() -> {
+ boolean didAdd;
+ try {
+ didAdd = kls.add(val);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return didAdd;
+ });
+ wasAdded.set(val, fut);
+ if (val % 1000 == 0) {
+ // do a duplicate add
+ Future duplicateFut = executor.submit(() -> {
+ boolean didAdd;
+ try {
+ didAdd = kls.add(val);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return didAdd;
+ });
+ duplicatesWasAdded.add(duplicateFut);
+ }
+ }
+ int originalAdds = 0;
+ int duplicateAdds = 0;
+ for (Future fut : wasAdded) {
+ if (fut.get()) {
+ originalAdds++;
+ }
+ }
+ for (Future duplicateFut : duplicatesWasAdded) {
+ if (duplicateFut.get()) {
+ duplicateAdds++;
+ }
+ }
+ for (int i = 0; i < amountToAdd; i++) {
+ assertTrue(kls.contains(i));
+ }
+ assertEquals(amountToAdd, originalAdds + duplicateAdds);
+ assertEquals(amountToAdd, kls.getSize());
+ assertEquals(amountToAdd / 1000, kls.getCollisions());
+ executor.shutdown();
+ }
+ }
+
+ public void testRemoveNoCollisions() throws Exception {
+ long memCap = 100L * BYTES_IN_MB;
+ int numToAdd = 195000;
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap);
+ // there should be no collisions for sequential positive numbers up to modulo
+ for (int i = 0; i < numToAdd; i++) {
+ kls.add(i);
+ }
+ for (int i = 0; i < 1000; i++) {
+ assertTrue(kls.remove(i));
+ assertFalse(kls.contains(i));
+ assertFalse(kls.valueHasHadCollision(i));
+ }
+ assertEquals(numToAdd - 1000, kls.getSize());
+ }
+
+ public void testRemoveWithCollisions() throws Exception {
+ int modulo = (int) Math.pow(2, 26);
+ long memCap = 100L * BYTES_IN_MB;
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap);
+ for (int i = 0; i < 10; i++) {
+ kls.add(i);
+ if (i % 2 == 1) {
+ kls.add(-i);
+ assertFalse(kls.valueHasHadCollision(i));
+ kls.add(i + modulo);
+ assertTrue(kls.valueHasHadCollision(i));
+ } else {
+ assertFalse(kls.valueHasHadCollision(i));
+ }
+ }
+ assertEquals(15, kls.getSize());
+ for (int i = 0; i < 10; i++) {
+ boolean didRemove = kls.remove(i);
+ if (i % 2 == 1) {
+ // we expect a collision with i + modulo, so we can't remove
+ assertFalse(didRemove);
+ assertTrue(kls.contains(i));
+ // but we should be able to remove -i
+ boolean didRemoveNegative = kls.remove(-i);
+ assertTrue(didRemoveNegative);
+ assertFalse(kls.contains(-i));
+ } else {
+ // we expect no collision
+ assertTrue(didRemove);
+ assertFalse(kls.contains(i));
+ assertFalse(kls.valueHasHadCollision(i));
+ }
+ }
+ assertEquals(5, kls.getSize());
+ int offset = 12;
+ kls.add(offset);
+ for (int j = 1; j < 5; j++) {
+ kls.add(offset + j * modulo);
+ }
+ assertEquals(6, kls.getSize());
+ assertFalse(kls.remove(offset + modulo));
+ assertTrue(kls.valueHasHadCollision(offset + 15 * modulo));
+ assertTrue(kls.contains(offset + 17 * modulo));
+ }
+
+ public void testNullInputs() throws Exception {
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L);
+ assertFalse(kls.add(null));
+ assertFalse(kls.contains(null));
+ assertEquals(0, (int) kls.getInternalRepresentation(null));
+ assertFalse(kls.remove(null));
+ assertFalse(kls.isCollision(null, null));
+ assertEquals(0, kls.getAddAttempts());
+ Integer[] newVals = new Integer[] { 1, 17, -2, null, -4, null };
+ kls.regenerateStore(newVals);
+ assertEquals(4, kls.getSize());
+ }
+
+ public void testRemovalLogic() throws Exception {
+ RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX;
+ int modulo = moduloValue.getValue();
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L);
+
+ // Test standard sequence: add K1, K2, K3 which all transform to C, then:
+ // Remove K3
+ // Remove K2, re-add it, re-remove it twice (duplicate should do nothing)
+ // Remove K1, which should finally actually remove everything
+ int c = -42;
+ int k1 = c + modulo;
+ int k2 = c + 2 * modulo;
+ int k3 = c + 3 * modulo;
+ kls.add(k1);
+ assertTrue(kls.contains(k1));
+ assertTrue(kls.contains(k3));
+ kls.add(k2);
+ CounterMetric numCollisions = kls.getNumCollisionsForValue(k2);
+ assertNotNull(numCollisions);
+ assertEquals(2, numCollisions.count());
+ kls.add(k3);
+ assertEquals(3, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ boolean removed = kls.remove(k3);
+ assertFalse(removed);
+ HashSet removalSet = kls.getRemovalSetForValue(k3);
+ assertEquals(1, removalSet.size());
+ assertTrue(removalSet.contains(k3));
+ assertEquals(2, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ removed = kls.remove(k2);
+ assertFalse(removed);
+ assertEquals(2, removalSet.size());
+ assertTrue(removalSet.contains(k2));
+ assertEquals(1, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ kls.add(k2);
+ assertEquals(1, removalSet.size());
+ assertFalse(removalSet.contains(k2));
+ assertEquals(2, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ removed = kls.remove(k2);
+ assertFalse(removed);
+ assertEquals(2, removalSet.size());
+ assertTrue(removalSet.contains(k2));
+ assertEquals(1, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ removed = kls.remove(k2);
+ assertFalse(removed);
+ assertEquals(2, removalSet.size());
+ assertTrue(removalSet.contains(k2));
+ assertEquals(1, numCollisions.count());
+ assertEquals(1, kls.getSize());
+
+ removed = kls.remove(k1);
+ assertTrue(removed);
+ assertNull(kls.getRemovalSetForValue(k1));
+ assertNull(kls.getNumCollisionsForValue(k1));
+ assertEquals(0, kls.getSize());
+ }
+
+ public void testRemovalLogicWithHashCollision() throws Exception {
+ RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX;
+ int modulo = moduloValue.getValue();
+ RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L);
+
+ // Test adding K1 twice (maybe two keys hash to K1), then removing it twice.
+ // We expect it to be unable to remove the last one, but there should be no false negatives.
+ int c = 77;
+ int k1 = c + modulo;
+ int k2 = c + 2 * modulo;
+ kls.add(k1);
+ kls.add(k2);
+ CounterMetric numCollisions = kls.getNumCollisionsForValue(k1);
+ assertEquals(2, numCollisions.count());
+ kls.add(k1);
+ assertEquals(3, numCollisions.count());
+
+ boolean removed = kls.remove(k1);
+ assertFalse(removed);
+ HashSet removalSet = kls.getRemovalSetForValue(k1);
+ assertTrue(removalSet.contains(k1));
+ assertEquals(2, numCollisions.count());
+
+ removed = kls.remove(k2);
+ assertFalse(removed);
+ assertTrue(removalSet.contains(k2));
+ assertEquals(1, numCollisions.count());
+
+ removed = kls.remove(k1);
+ assertFalse(removed);
+ assertTrue(removalSet.contains(k1));
+ assertEquals(1, numCollisions.count());
+ assertTrue(kls.contains(k1));
+ assertTrue(kls.contains(k2));
+ }
+}
diff --git a/server/src/test/java/org/opensearch/index/cache/request/RequestCacheStatsTests.java b/server/src/test/java/org/opensearch/index/cache/request/RequestCacheStatsTests.java
new file mode 100644
index 0000000000000..54268d368f2e9
--- /dev/null
+++ b/server/src/test/java/org/opensearch/index/cache/request/RequestCacheStatsTests.java
@@ -0,0 +1,100 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.cache.request;
+
+import org.opensearch.common.cache.tier.TierType;
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.io.stream.BytesStreamInput;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class RequestCacheStatsTests extends OpenSearchTestCase {
+ public void testConstructorsAndAdd() throws Exception {
+ RequestCacheStats emptyStats = new RequestCacheStats();
+ for (TierType tierType : TierType.values()) {
+ assertTierState(emptyStats, tierType, 0, 0, 0, 0, 0);
+ }
+ assertDiskStatsState(emptyStats, 0, 0);
+ Map testHeapMap = new HashMap<>();
+ testHeapMap.put(TierType.ON_HEAP, new StatsHolder(1, 2, 3, 4, 5));
+ Map tierSpecificMap = new HashMap<>();
+ tierSpecificMap.put(TierType.DISK, new ShardRequestCache.DiskStatsAccumulator(6, 7));
+ RequestCacheStats heapAndSpecificOnlyStats = new RequestCacheStats(testHeapMap, tierSpecificMap);
+ for (TierType tierType : TierType.values()) {
+ if (tierType == TierType.ON_HEAP) {
+ assertTierState(heapAndSpecificOnlyStats, tierType, 1, 2, 3, 4, 5);
+ } else {
+ assertTierState(heapAndSpecificOnlyStats, tierType, 0, 0, 0, 0, 0);
+ }
+ }
+ assertDiskStatsState(heapAndSpecificOnlyStats, 6, 7);
+
+ Map testBothTiersMap = new HashMap<>();
+ testBothTiersMap.put(TierType.ON_HEAP, new StatsHolder(11, 12, 13, 14, 15));
+ testBothTiersMap.put(TierType.DISK, new StatsHolder(6, 7, 8, 9, 10));
+ Map newTierSpecificMap = new HashMap<>();
+ newTierSpecificMap.put(TierType.ON_HEAP, new ShardRequestCache.OnHeapStatsAccumulator());
+ newTierSpecificMap.put(TierType.DISK, new ShardRequestCache.DiskStatsAccumulator(8, 9));
+ RequestCacheStats bothTiersStats = new RequestCacheStats(testBothTiersMap, newTierSpecificMap);
+ assertTierState(bothTiersStats, TierType.ON_HEAP, 11, 12, 13, 14, 15);
+ assertTierState(bothTiersStats, TierType.DISK, 6, 7, 8, 9, 10);
+
+ bothTiersStats.add(heapAndSpecificOnlyStats);
+ assertTierState(bothTiersStats, TierType.ON_HEAP, 12, 14, 16, 18, 20);
+ assertTierState(bothTiersStats, TierType.DISK, 6, 7, 8, 9, 10);
+ assertDiskStatsState(bothTiersStats, 14, 16);
+ }
+
+ public void testSerialization() throws Exception {
+ // This test also implicitly tests StatsHolder serialization
+ BytesStreamOutput os = new BytesStreamOutput();
+
+ Map testMap = new HashMap<>();
+ testMap.put(TierType.ON_HEAP, new StatsHolder(11, 12, 13, 14, 15));
+ testMap.put(TierType.DISK, new StatsHolder(6, 7, 8, 9, 10));
+ Map tierSpecificMap = new HashMap<>();
+ tierSpecificMap.put(TierType.ON_HEAP, new ShardRequestCache.OnHeapStatsAccumulator());
+ tierSpecificMap.put(TierType.DISK, new ShardRequestCache.DiskStatsAccumulator(20, 21));
+ RequestCacheStats stats = new RequestCacheStats(testMap, tierSpecificMap);
+ stats.writeTo(os);
+ BytesStreamInput is = new BytesStreamInput(BytesReference.toBytes(os.bytes()));
+ RequestCacheStats deserialized = new RequestCacheStats(is);
+
+ assertTierState(deserialized, TierType.ON_HEAP, 11, 12, 13, 14, 15);
+ assertTierState(deserialized, TierType.DISK, 6, 7, 8, 9, 10);
+ assertDiskStatsState(deserialized, 20, 21);
+ }
+
+ private void assertTierState(
+ RequestCacheStats stats,
+ TierType tierType,
+ long memSize,
+ long evictions,
+ long hitCount,
+ long missCount,
+ long entries
+ ) {
+ assertEquals(memSize, stats.getMemorySizeInBytes(tierType));
+ assertEquals(evictions, stats.getEvictions(tierType));
+ assertEquals(hitCount, stats.getHitCount(tierType));
+ assertEquals(missCount, stats.getMissCount(tierType));
+ assertEquals(entries, stats.getEntries(tierType));
+ }
+
+ private void assertDiskStatsState(RequestCacheStats stats, long totalGetTime, long totalDiskReaches) {
+ assertEquals(totalGetTime, ((ShardRequestCache.DiskStatsAccumulator) stats.getTierSpecificStats(TierType.DISK)).getTotalGetTime());
+ assertEquals(
+ totalDiskReaches,
+ ((ShardRequestCache.DiskStatsAccumulator) stats.getTierSpecificStats(TierType.DISK)).getTotalDiskReaches()
+ );
+ }
+}
diff --git a/server/src/test/java/org/opensearch/index/cache/request/ShardRequestCacheTests.java b/server/src/test/java/org/opensearch/index/cache/request/ShardRequestCacheTests.java
new file mode 100644
index 0000000000000..9d55ff3a5c5f2
--- /dev/null
+++ b/server/src/test/java/org/opensearch/index/cache/request/ShardRequestCacheTests.java
@@ -0,0 +1,40 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.cache.request;
+
+import org.opensearch.common.cache.tier.DiskTierRequestStats;
+import org.opensearch.test.OpenSearchTestCase;
+
+public class ShardRequestCacheTests extends OpenSearchTestCase {
+ // Serialization and getter logic is implicitly tested in RequestCacheStatsTests.java,
+ // in this file, check logic for StatsHolder.TierStatsAccumulator implementations
+
+ public void testInit() throws Exception {
+ ShardRequestCache src = new ShardRequestCache();
+ RequestCacheStats rcs = src.stats();
+ }
+
+ public void testDiskStatsAccumulator() throws Exception {
+ ShardRequestCache.DiskStatsAccumulator acc = new ShardRequestCache.DiskStatsAccumulator();
+ DiskTierRequestStats reachedDiskReqStats = new DiskTierRequestStats(145L, true);
+ acc.addRequestStats(reachedDiskReqStats);
+ assertEquals(1, acc.getTotalDiskReaches());
+ assertEquals(145, acc.getTotalGetTime());
+ DiskTierRequestStats noDiskReqStats = new DiskTierRequestStats(391392L, false);
+ acc.addRequestStats(noDiskReqStats);
+ assertEquals(1, acc.getTotalDiskReaches());
+ assertEquals(145, acc.getTotalGetTime());
+
+ ShardRequestCache.DiskStatsAccumulator other = new ShardRequestCache.DiskStatsAccumulator();
+ other.addRequestStats(new DiskTierRequestStats(1L, true));
+ acc.add(other);
+ assertEquals(146, acc.getTotalGetTime());
+ assertEquals(2, acc.getTotalDiskReaches());
+ }
+}
diff --git a/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java b/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java
new file mode 100644
index 0000000000000..723f8927d076d
--- /dev/null
+++ b/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java
@@ -0,0 +1,60 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+package org.opensearch.indices;
+
+import org.opensearch.common.Randomness;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.index.IndexService;
+import org.opensearch.index.shard.IndexShard;
+import org.opensearch.test.OpenSearchSingleNodeTestCase;
+
+import java.util.Random;
+import java.util.UUID;
+
+public class IRCKeyWriteableSerializerTests extends OpenSearchSingleNodeTestCase {
+
+ public void testSerializer() throws Exception {
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ IndicesService indicesService = getInstanceFromNode(IndicesService.class);
+ IndicesRequestCache irc = new IndicesRequestCache(Settings.EMPTY, indicesService, dummyClusterSettings);
+ IndexService indexService = createIndex("test");
+ IndexShard indexShard = indexService.getShardOrNull(0);
+ IndicesService.IndexShardCacheEntity entity = indicesService.new IndexShardCacheEntity(indexShard);
+ IRCKeyWriteableSerializer ser = new IRCKeyWriteableSerializer(irc);
+
+ int NUM_KEYS = 1000;
+ int[] valueLengths = new int[] { 1000, 6000 }; // test both branches in equals()
+ Random rand = Randomness.get();
+ for (int valueLength : valueLengths) {
+ for (int i = 0; i < NUM_KEYS; i++) {
+ IndicesRequestCache.Key key = getRandomIRCKey(valueLength, rand, irc, entity);
+ byte[] serialized = ser.serialize(key);
+ assertTrue(ser.equals(key, serialized));
+ IndicesRequestCache.Key deserialized = ser.deserialize(serialized);
+ assertTrue(key.equals(deserialized));
+ }
+ }
+ }
+
+ private IndicesRequestCache.Key getRandomIRCKey(
+ int valueLength,
+ Random random,
+ IndicesRequestCache irc,
+ IndicesService.IndexShardCacheEntity entity
+ ) {
+ byte[] value = new byte[valueLength];
+ for (int i = 0; i < valueLength; i++) {
+ value[i] = (byte) (random.nextInt(126 - 32) + 32);
+ }
+ BytesReference keyValue = new BytesArray(value);
+ return irc.new Key(entity, keyValue, UUID.randomUUID().toString()); // same UUID source as used in real key
+ }
+}
diff --git a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java
index 18ec013711f22..8c49fc36076e2 100644
--- a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java
+++ b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java
@@ -41,15 +41,24 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
+import org.opensearch.action.OriginalIndices;
+import org.opensearch.action.OriginalIndicesTests;
+import org.opensearch.action.search.SearchRequest;
import org.opensearch.common.CheckedSupplier;
+import org.opensearch.common.UUIDs;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
+import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.io.IOUtils;
+import org.opensearch.core.common.Strings;
import org.opensearch.core.common.bytes.AbstractBytesReference;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.common.io.stream.StreamInput;
@@ -62,6 +71,12 @@
import org.opensearch.index.cache.request.ShardRequestCache;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.index.shard.IndexShard;
+import org.opensearch.search.DocValueFormat;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.internal.AliasFilter;
+import org.opensearch.search.internal.ShardSearchContextId;
+import org.opensearch.search.internal.ShardSearchRequest;
+import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.test.OpenSearchSingleNodeTestCase;
import java.io.IOException;
@@ -73,7 +88,12 @@ public class IndicesRequestCacheTests extends OpenSearchSingleNodeTestCase {
public void testBasicOperationsCache() throws Exception {
ShardRequestCache requestCacheStats = new ShardRequestCache();
- IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, getInstanceFromNode(IndicesService.class));
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ IndicesRequestCache cache = new IndicesRequestCache(
+ Settings.EMPTY,
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
+ );
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
@@ -127,7 +147,12 @@ public void testBasicOperationsCache() throws Exception {
}
public void testCacheDifferentReaders() throws Exception {
- IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, getInstanceFromNode(IndicesService.class));
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ IndicesRequestCache cache = new IndicesRequestCache(
+ Settings.EMPTY,
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
+ );
AtomicBoolean indexShard = new AtomicBoolean(true);
ShardRequestCache requestCacheStats = new ShardRequestCache();
Directory dir = newDirectory();
@@ -222,8 +247,13 @@ public void testCacheDifferentReaders() throws Exception {
public void testEviction() throws Exception {
final ByteSizeValue size;
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
{
- IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, getInstanceFromNode(IndicesService.class));
+ IndicesRequestCache cache = new IndicesRequestCache(
+ Settings.EMPTY,
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
+ );
AtomicBoolean indexShard = new AtomicBoolean(true);
ShardRequestCache requestCacheStats = new ShardRequestCache();
Directory dir = newDirectory();
@@ -250,7 +280,8 @@ public void testEviction() throws Exception {
}
IndicesRequestCache cache = new IndicesRequestCache(
Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), size.getBytes() + 1 + "b").build(),
- null
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
);
AtomicBoolean indexShard = new AtomicBoolean(true);
ShardRequestCache requestCacheStats = new ShardRequestCache();
@@ -287,7 +318,12 @@ public void testEviction() throws Exception {
}
public void testClearAllEntityIdentity() throws Exception {
- IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, getInstanceFromNode(IndicesService.class));
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ IndicesRequestCache cache = new IndicesRequestCache(
+ Settings.EMPTY,
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
+ );
AtomicBoolean indexShard = new AtomicBoolean(true);
ShardRequestCache requestCacheStats = new ShardRequestCache();
@@ -372,7 +408,12 @@ public BytesReference get() {
public void testInvalidate() throws Exception {
ShardRequestCache requestCacheStats = new ShardRequestCache();
- IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, getInstanceFromNode(IndicesService.class));
+ ClusterSettings dummyClusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ IndicesRequestCache cache = new IndicesRequestCache(
+ Settings.EMPTY,
+ getInstanceFromNode(IndicesService.class),
+ dummyClusterSettings
+ );
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
@@ -494,6 +535,40 @@ public void testSerializationDeserializationOfCacheKey() throws Exception {
}
+ private static BytesReference getQSRBytesReference(long tookTimeNanos) throws IOException {
+ // unfortunately no good way to separate this out from DiskTierTookTimePolicyTests.getQSR() :(
+ ShardId shardId = new ShardId("index", "uuid", randomInt());
+ SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(randomBoolean());
+ ShardSearchRequest shardSearchRequest = new ShardSearchRequest(
+ OriginalIndicesTests.randomOriginalIndices(),
+ searchRequest,
+ shardId,
+ 1,
+ new AliasFilter(null, Strings.EMPTY_ARRAY),
+ 1.0f,
+ randomNonNegativeLong(),
+ null,
+ new String[0]
+ );
+ ShardSearchContextId id = new ShardSearchContextId(UUIDs.base64UUID(), randomLong());
+ QuerySearchResult result = new QuerySearchResult(
+ id,
+ new SearchShardTarget("node", shardId, null, OriginalIndices.NONE),
+ shardSearchRequest
+ );
+ TopDocs topDocs = new TopDocs(new TotalHits(randomLongBetween(0, Long.MAX_VALUE), TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
+ result.topDocs(new TopDocsAndMaxScore(topDocs, randomBoolean() ? Float.NaN : randomFloat()), new DocValueFormat[0]);
+
+ result.setTookTimeNanos(tookTimeNanos);
+
+ BytesStreamOutput out = new BytesStreamOutput();
+ // it appears to need a boolean and then a ShardSearchContextId written to the stream before the QSR in order to deserialize?
+ out.writeBoolean(false);
+ id.writeTo(out);
+ result.writeToNoId(out);
+ return out.bytes();
+ }
+
private class TestBytesReference extends AbstractBytesReference {
int dummyValue;
diff --git a/server/src/test/java/org/opensearch/indices/IndicesServiceCloseTests.java b/server/src/test/java/org/opensearch/indices/IndicesServiceCloseTests.java
index 5dd4eb504ec2f..accd7a29efb43 100644
--- a/server/src/test/java/org/opensearch/indices/IndicesServiceCloseTests.java
+++ b/server/src/test/java/org/opensearch/indices/IndicesServiceCloseTests.java
@@ -37,6 +37,7 @@
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.routing.allocation.DiskThresholdSettings;
import org.opensearch.common.cache.RemovalNotification;
+import org.opensearch.common.cache.tier.CacheValue;
import org.opensearch.common.cache.tier.TierType;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.concurrent.OpenSearchExecutors;
@@ -342,10 +343,10 @@ public Object getCacheIdentity() {
}
@Override
- public void onHit(TierType tierType) {}
+ public void onHit(CacheValue cacheValue) {}
@Override
- public void onMiss(TierType tierType) {}
+ public void onMiss(CacheValue cacheValue) {}
@Override
public void onRemoval(RemovalNotification notification) {}
diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java
index 7c84078af080e..f4657e303fbd8 100644
--- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java
+++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java
@@ -121,6 +121,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Semaphore;
@@ -823,6 +824,118 @@ public Scroll scroll() {
}
}
+ public void testQuerySearchResultTookTime() throws Exception {
+ // I wasn't able to introduce a delay in these tests as everything between creation and usage of the QuerySearchResult object
+ // happen in a single line - we would have to modify QueryPhase.execute() to take a delay parameter
+ // However this was tested manually
+ createIndex("index");
+ final SearchService service = getInstanceFromNode(SearchService.class);
+ final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
+ final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index"));
+ final IndexShard indexShard = indexService.getShard(0);
+ SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true);
+ searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()));
+
+ ShardSearchRequest request = new ShardSearchRequest(
+ OriginalIndices.NONE,
+ searchRequest,
+ indexShard.shardId(),
+ 2, // must have >1 shards for executeQueryPhase to return the QuerySearchResult
+ new AliasFilter(null, Strings.EMPTY_ARRAY),
+ 1.0f,
+ -1,
+ null,
+ null
+ );
+
+ SearchShardTask task = new SearchShardTask(123L, "", "", "", null, Collections.emptyMap());
+ service.executeQueryPhase(request, randomBoolean(), task, new ActionListener() {
+ @Override
+ public void onResponse(SearchPhaseResult searchPhaseResult) {
+ assertEquals(QuerySearchResult.class, searchPhaseResult.getClass()); // 2+ shards -> QuerySearchResult returned
+ QuerySearchResult qsr = (QuerySearchResult) searchPhaseResult;
+ assertTrue(qsr.getTookTimeNanos() > 0); // Above zero means it's been set at some point
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ throw new AssertionError(e);
+ }
+ });
+ }
+
+ public void testQuerySearchResultTookTimeCacheableRequest() throws Exception {
+ // Test 2 identical cacheable requests and assert both have the same tookTime
+ // Similarly, no delay could be added
+ createIndex("index");
+ final SearchService service = getInstanceFromNode(SearchService.class);
+ final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
+ final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index"));
+ final IndexShard indexShard = indexService.getShard(0);
+ SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true);
+ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
+
+ searchRequest.source(searchSourceBuilder);
+ searchSourceBuilder.scriptField(
+ "field" + 0,
+ new Script(ScriptType.INLINE, MockScriptEngine.NAME, CustomScriptPlugin.DUMMY_SCRIPT, Collections.emptyMap())
+ );
+ searchSourceBuilder.size(0); // from testIgnoreScriptfieldIfSizeZero
+
+ String[] dummyRoutings = new String[] {};
+ OriginalIndices dummyOriginalIndices = new OriginalIndices(new String[] { "index'" }, IndicesOptions.LENIENT_EXPAND_OPEN);
+
+ ShardSearchRequest request = new ShardSearchRequest(
+ dummyOriginalIndices,
+ searchRequest,
+ indexShard.shardId(),
+ 2, // must have >1 shards for executeQueryPhase to return the QuerySearchResult
+ new AliasFilter(null, Strings.EMPTY_ARRAY),
+ 1.0f,
+ 0L,
+ // if nowInMillis is negative, it fails when trying to write the shardSearchRequest to cache as it uses WriteVLong which only
+ // takes positive longs
+ null,
+ dummyRoutings // similar for routings
+ );
+
+ final CompletableFuture firstResult = new CompletableFuture<>();
+ final CompletableFuture secondResult = new CompletableFuture<>();
+ SearchShardTask task = new SearchShardTask(123L, "", "", "", null, Collections.emptyMap());
+ service.executeQueryPhase(request, randomBoolean(), task, new ActionListener() {
+ @Override
+ public void onResponse(SearchPhaseResult searchPhaseResult) {
+ assertEquals(QuerySearchResult.class, searchPhaseResult.getClass()); // 2+ shards -> QuerySearchResult returned
+ QuerySearchResult qsr = (QuerySearchResult) searchPhaseResult;
+ firstResult.complete(qsr.getTookTimeNanos());
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ throw new AssertionError(e);
+ }
+ });
+
+ service.executeQueryPhase(request, randomBoolean(), task, new ActionListener() {
+ @Override
+ public void onResponse(SearchPhaseResult searchPhaseResult) {
+ assertEquals(QuerySearchResult.class, searchPhaseResult.getClass()); // 2+ shards -> QuerySearchResult returned
+ QuerySearchResult qsr = (QuerySearchResult) searchPhaseResult;
+ secondResult.complete(qsr.getTookTimeNanos());
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ throw new AssertionError(e);
+ }
+ });
+
+ long firstResultVal = firstResult.get();
+ long secondResultVal = secondResult.get();
+ assertEquals(firstResultVal, secondResultVal);
+ assertTrue(firstResultVal > 0);
+ }
+
public void testCanMatch() throws Exception {
createIndex("index");
final SearchService service = getInstanceFromNode(SearchService.class);
@@ -1010,6 +1123,7 @@ public void onFailure(Exception e) {
}
}
});
+
latch.await();
}
diff --git a/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
index 39126a607f968..ef30cea39be5c 100644
--- a/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
+++ b/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
@@ -85,9 +85,14 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
+import org.opensearch.action.OriginalIndices;
+import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchShardTask;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
+import org.opensearch.core.common.Strings;
+import org.opensearch.core.index.Index;
+import org.opensearch.core.index.shard.ShardId;
import org.opensearch.core.tasks.TaskCancelledException;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
@@ -103,9 +108,11 @@
import org.opensearch.lucene.queries.MinDocQuery;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.collapse.CollapseBuilder;
+import org.opensearch.search.internal.AliasFilter;
import org.opensearch.search.internal.ContextIndexSearcher;
import org.opensearch.search.internal.ScrollContext;
import org.opensearch.search.internal.SearchContext;
+import org.opensearch.search.internal.ShardSearchRequest;
import org.opensearch.search.sort.SortAndFormats;
import org.opensearch.test.TestSearchContext;
import org.opensearch.threadpool.ThreadPool;
@@ -115,6 +122,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -1145,6 +1153,114 @@ public void testQueryTimeoutChecker() throws Exception {
createTimeoutCheckerThenWaitThenRun(timeCacheLifespan / 4, timeCacheLifespan / 2 + timeTolerance, false, true);
}
+ public void testQuerySearchResultTookTime() throws IOException {
+ int sleepMillis = randomIntBetween(10, 100); // between 0.01 and 0.1 sec
+ DelayedQueryPhaseSearcher delayedQueryPhaseSearcher = new DelayedQueryPhaseSearcher(sleepMillis);
+
+ // we need to test queryPhase.execute(), not executeInternal(), since that's what the timer wraps around
+ // for that we must set up a searchContext with more functionality than the TestSearchContext,
+ // which requires a bit of complexity with test classes
+
+ Directory dir = newDirectory();
+ final Sort sort = new Sort(new SortField("rank", SortField.Type.INT));
+ IndexWriterConfig iwc = newIndexWriterConfig().setIndexSort(sort);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ for (int i = 0; i < 10; i++) {
+ doc.add(new StringField("foo", Integer.toString(i), Store.NO));
+ }
+ w.addDocument(doc);
+ w.close();
+ IndexReader reader = DirectoryReader.open(dir);
+
+ QueryShardContext queryShardContext = mock(QueryShardContext.class);
+ when(queryShardContext.fieldMapper("user")).thenReturn(
+ new NumberFieldType("user", NumberType.INTEGER, true, false, true, false, null, Collections.emptyMap())
+ );
+
+ Index index = new Index("IndexName", "UUID");
+ ShardId shardId = new ShardId(index, 0);
+ long nowInMillis = System.currentTimeMillis();
+ String clusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(3, 10);
+ SearchRequest searchRequest = new SearchRequest();
+ searchRequest.allowPartialSearchResults(randomBoolean());
+ ShardSearchRequest request = new ShardSearchRequest(
+ OriginalIndices.NONE,
+ searchRequest,
+ shardId,
+ 1,
+ AliasFilter.EMPTY,
+ 1f,
+ nowInMillis,
+ clusterAlias,
+ Strings.EMPTY_ARRAY
+ );
+ TestSearchContextWithRequest searchContext = new TestSearchContextWithRequest(
+ queryShardContext,
+ indexShard,
+ newEarlyTerminationContextSearcher(reader, 0, executor),
+ request
+ );
+
+ QueryPhase queryPhase = new QueryPhase(delayedQueryPhaseSearcher);
+ queryPhase.execute(searchContext);
+ Long tookTime = searchContext.queryResult().getTookTimeNanos();
+ assertTrue(tookTime >= (long) sleepMillis * 1000000);
+ reader.close();
+ dir.close();
+ }
+
+ private class TestSearchContextWithRequest extends TestSearchContext {
+ ShardSearchRequest request;
+ Query query;
+
+ public TestSearchContextWithRequest(
+ QueryShardContext queryShardContext,
+ IndexShard indexShard,
+ ContextIndexSearcher searcher,
+ ShardSearchRequest request
+ ) {
+ super(queryShardContext, indexShard, searcher);
+ this.request = request;
+ this.query = new TermQuery(new Term("foo", "bar"));
+ }
+
+ @Override
+ public ShardSearchRequest request() {
+ return request;
+ }
+
+ @Override
+ public Query query() {
+ return this.query;
+ }
+ }
+
+ private class DelayedQueryPhaseSearcher extends QueryPhase.DefaultQueryPhaseSearcher implements QueryPhaseSearcher {
+ // add delay into searchWith
+ private final int sleepMillis;
+
+ public DelayedQueryPhaseSearcher(int sleepMillis) {
+ super();
+ this.sleepMillis = sleepMillis;
+ }
+
+ @Override
+ public boolean searchWith(
+ SearchContext searchContext,
+ ContextIndexSearcher searcher,
+ Query query,
+ LinkedList collectors,
+ boolean hasFilterCollector,
+ boolean hasTimeout
+ ) throws IOException {
+ try {
+ Thread.sleep(sleepMillis);
+ } catch (Exception ignored) {}
+ return super.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
+ }
+ }
+
private void createTimeoutCheckerThenWaitThenRun(
long timeout,
long sleepAfterCreation,
diff --git a/server/src/test/java/org/opensearch/search/query/QuerySearchResultTests.java b/server/src/test/java/org/opensearch/search/query/QuerySearchResultTests.java
index 41e4e1ae45a73..1b8fc9d7dbc5c 100644
--- a/server/src/test/java/org/opensearch/search/query/QuerySearchResultTests.java
+++ b/server/src/test/java/org/opensearch/search/query/QuerySearchResultTests.java
@@ -56,6 +56,8 @@
import org.opensearch.search.suggest.SuggestTests;
import org.opensearch.test.OpenSearchTestCase;
+import java.util.HashMap;
+
import static java.util.Collections.emptyList;
public class QuerySearchResultTests extends OpenSearchTestCase {
@@ -99,25 +101,36 @@ private static QuerySearchResult createTestInstance() throws Exception {
if (randomBoolean()) {
result.aggregations(InternalAggregationsTests.createTestInstance());
}
+ assertNull(result.getTookTimeNanos());
return result;
}
public void testSerialization() throws Exception {
- QuerySearchResult querySearchResult = createTestInstance();
- QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry, QuerySearchResult::new);
- assertEquals(querySearchResult.getContextId().getId(), deserialized.getContextId().getId());
- assertNull(deserialized.getSearchShardTarget());
- assertEquals(querySearchResult.topDocs().maxScore, deserialized.topDocs().maxScore, 0f);
- assertEquals(querySearchResult.topDocs().topDocs.totalHits, deserialized.topDocs().topDocs.totalHits);
- assertEquals(querySearchResult.from(), deserialized.from());
- assertEquals(querySearchResult.size(), deserialized.size());
- assertEquals(querySearchResult.hasAggs(), deserialized.hasAggs());
- if (deserialized.hasAggs()) {
- Aggregations aggs = querySearchResult.consumeAggs().expand();
- Aggregations deserializedAggs = deserialized.consumeAggs().expand();
- assertEquals(aggs.asList(), deserializedAggs.asList());
+ HashMap expectedValues = new HashMap<>(); // map contains whether to set took time, and if so, to what value
+ expectedValues.put(false, null);
+ expectedValues.put(true, 1000L);
+ for (Boolean doSetTookTime : expectedValues.keySet()) {
+ QuerySearchResult querySearchResult = createTestInstance();
+ if (doSetTookTime) {
+ querySearchResult.setTookTimeNanos(expectedValues.get(doSetTookTime));
+ }
+ QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry, QuerySearchResult::new);
+ assertEquals(querySearchResult.getContextId().getId(), deserialized.getContextId().getId());
+ assertNull(deserialized.getSearchShardTarget());
+ assertEquals(querySearchResult.topDocs().maxScore, deserialized.topDocs().maxScore, 0f);
+ assertEquals(querySearchResult.topDocs().topDocs.totalHits, deserialized.topDocs().topDocs.totalHits);
+ assertEquals(querySearchResult.from(), deserialized.from());
+ assertEquals(querySearchResult.size(), deserialized.size());
+ assertEquals(querySearchResult.hasAggs(), deserialized.hasAggs());
+ if (deserialized.hasAggs()) {
+ Aggregations aggs = querySearchResult.consumeAggs().expand();
+ Aggregations deserializedAggs = deserialized.consumeAggs().expand();
+ assertEquals(aggs.asList(), deserializedAggs.asList());
+ }
+ assertEquals(querySearchResult.terminatedEarly(), deserialized.terminatedEarly());
+ assertEquals(querySearchResult.getTookTimeNanos(), deserialized.getTookTimeNanos());
+ assertEquals(expectedValues.get(doSetTookTime), querySearchResult.getTookTimeNanos());
}
- assertEquals(querySearchResult.terminatedEarly(), deserialized.terminatedEarly());
}
public void testNullResponse() throws Exception {