diff --git a/.idea/caches/deviceStreaming.xml b/.idea/caches/deviceStreaming.xml new file mode 100644 index 0000000..33049c4 --- /dev/null +++ b/.idea/caches/deviceStreaming.xml @@ -0,0 +1,1313 @@ + + + + + + \ No newline at end of file diff --git a/.idea/deviceManager.xml b/.idea/deviceManager.xml new file mode 100644 index 0000000..91f9558 --- /dev/null +++ b/.idea/deviceManager.xml @@ -0,0 +1,13 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..6e86672 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..f8ab653 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + { + "associatedIndex": 0 +} + + + + { + "keyToString": { + "ASKED_SHARE_PROJECT_CONFIGURATION_FILES": "true", + "ModuleVcsDetector.initialDetectionPerformed": "true", + "RunOnceActivity.ShowReadmeOnStart": "true", + "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252": "true", + "RunOnceActivity.cidr.known.project.marker": "true", + "RunOnceActivity.git.unshallow": "true", + "RunOnceActivity.readMode.enableVisualFormatting": "true", + "cf.first.check.clang-format": "false", + "cidr.known.project.marker": "true", + "dart.analysis.tool.window.visible": "false", + "git-widget-placeholder": "start-flutter-2", + "kotlin-language-version-configured": "true", + "last_opened_file_path": "/Users/emmanuel/Code/pp/acho/mobile_app/lib", + "project.structure.last.edited": "Project", + "project.structure.proportion": "0.0", + "project.structure.side.proportion": "0.2", + "settings.editor.selected.configurable": "project.propDebugger", + "show.migrate.to.gradle.popup": "false" + }, + "keyToStringList": { + "com.intellij.ide.scratch.ScratchImplUtil$2/New Scratch File": [ + "Dart" + ] + } +} + + + + + + + + + + + + + + 1768432606666 + + + + + + + + + + + + file://$PROJECT_DIR$/mobile_app/lib/utils.dart + + + + + \ No newline at end of file diff --git a/mobile_app/android/app/src/main/AndroidManifest.xml b/mobile_app/android/app/src/main/AndroidManifest.xml index ff78bf3..bb3e8cc 100644 --- a/mobile_app/android/app/src/main/AndroidManifest.xml +++ b/mobile_app/android/app/src/main/AndroidManifest.xml @@ -1,4 +1,5 @@ + files = []; + FileApp({super.key, required this.files}); + + @override + Widget build(BuildContext context) { + //TODO:switch to class, so we can access more attributes outside of title + + return SizedBox.shrink( + child: Column( + children: [ + const ListTile( + leading: Text( + "files", + style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), + )), + Column( + children: List.generate(files.length, (int index) { + String fileName = files[index].path.split("/").last; + String fileType = files[index].path.split("/").last.split(".").last; + + Map fileIcon = {"pdf": Icon(Icons.picture_as_pdf)}; + + return ListTile( + leading: fileIcon[fileType] ?? Icon(Icons.book), + trailing: Icon(Icons.chevron_right), + //TODO: style to make borders visible + onTap: () { + PdfScanner().openFile(files[index]); + //TODO: Handle click, popular search bar with text controller + }, + title: Text(fileName), // Display results from search + ); + })), + ], + )); + } +} diff --git a/mobile_app/lib/home.dart b/mobile_app/lib/home.dart index d048149..7509d9f 100644 --- a/mobile_app/lib/home.dart +++ b/mobile_app/lib/home.dart @@ -1,88 +1,181 @@ import 'dart:math'; +import 'dart:io'; import 'package:flutter/material.dart'; +import 'package:flutter/foundation.dart'; import 'package:gap/gap.dart'; +import 'package:mobile_app/utils.dart'; +import 'package:flutter_tantivy/flutter_tantivy.dart'; -class HomeApp extends StatelessWidget { - const HomeApp({super.key}); +class HomeApp extends StatefulWidget { + List files = []; + HomeApp({super.key, required this.files}); + + @override + State createState() => _HomeAppState(); +} + +class _HomeAppState extends State { + List matchedDocuments = []; + List searchedItems = []; @override Widget build(BuildContext context) { double width = MediaQuery.sizeOf(context).width; - //TODO: cap at 5 - late List searchedItems = ['A', 'B', 'C']; - late List recentDocuments = [ - 'A', - 'B', - 'C' - ]; //TODO:switch to class, so we can access more attributes outside of title + void _showDocumentDetails(SearchResult result) { + showDialog( + context: context, + builder: (context) => AlertDialog( + title: const Text("Document Preview"), + content: SingleChildScrollView( + child: Column( + mainAxisSize: MainAxisSize.min, + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + const Text( + "Matching Chunk:", + style: TextStyle( + fontWeight: FontWeight.bold, color: Colors.blue), + ), + const Gap(10), + Text( + result.doc.text, // The full text chunk + style: const TextStyle(fontSize: 15, height: 1.5), + ), + ], + ), + ), + actions: [ + TextButton( + onPressed: () => Navigator.pop(context), + child: const Text("Close"), + ), + ElevatedButton( + onPressed: () { + ///TODO: Integrate with PDF viewer to jump to specific page + Navigator.pop(context); + }, + child: const Text("Open Full PDF"), + ), + ], + ), + ); + } return SizedBox.shrink( - child: Column( - children: [ - SizedBox( - height: 70, - width: width - 40, - child: SearchBar( - leading: const Icon(Icons.search), - hintText: "Search...", - - shape: WidgetStateProperty.all( - const RoundedRectangleBorder( - borderRadius: BorderRadius.all( - Radius.circular(3)), // This makes the corners square - ), + child: Column(children: [ + SizedBox( + height: 70, + width: width - 40, + child: SearchBar( + leading: const Icon(Icons.search), + hintText: "Search...", + shape: WidgetStateProperty.all( + const RoundedRectangleBorder( + borderRadius: BorderRadius.all( + Radius.circular(3)), // This makes the corners square ), + ), - backgroundColor: WidgetStateProperty.all(Colors.grey[200]), - elevation: WidgetStateProperty.all(0), // Flat style + backgroundColor: WidgetStateProperty.all(Colors.grey[200]), + elevation: WidgetStateProperty.all(0), // Flat style - onChanged: (text) { - //TODO: Handle search logic here - //spawn process into a new isolate to prevent u.i jank - }, - onSubmitted: (text) { - //TODO: Handle enter key press, - //TODO: similar to above depending on latency we may just use this - }, - ), + onChanged: (text) {}, + onSubmitted: (text) async { + final List docs = await compute(findMatch, text); + //TODO: Handle enter key press, + //TODO: similar to above depending on latency we may just use this + saveSearchHistory(text); + setState(() { + matchedDocuments = docs; + }); + + List _searchedItems = await getSearchHistory(); + setState(() { + searchedItems = _searchedItems; + }); + }, ), - const ListTile( - leading: Text( - "Recent Searches", - style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), - )), - Row( - children: List.generate(searchedItems.length, (int index) { - return SizedBox( - width: 70, - child: TextButton( - //TODO: style to make borders visible - onPressed: () { - //TODO: Handle click, popular search bar with text controller - }, - child: - Text(searchedItems[index]), // Display results from search - )); - })), - const ListTile( - leading: Text( - "Recent Documents", - style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), - )), - Column( - children: List.generate(recentDocuments.length, (int index) { - return ListTile( - leading: const Icon(Icons.picture_as_pdf), - onTap: () { - // Handle click - }, - trailing: const Icon(Icons.chevron_right), - title: Text(recentDocuments[index]), // Display results from search - ); - })) - ], - )); + ), + const ListTile( + leading: Text( + "Recent Searches", + style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), + )), + Row( + children: List.generate(min(searchedItems.length, 3), (int index) { + return Expanded( + child: OutlinedButton( + //TODO: style to make borders visible + style: OutlinedButton.styleFrom( + shape: const StadiumBorder(), // Makes it look like a pill/chip + side: BorderSide(color: Colors.grey[300]!), + ), + onPressed: () async { + final List docs = + await compute(findMatch, searchedItems[index]); + + setState(() { + matchedDocuments = docs; + }); + }, + child: Text(searchedItems[index]), // Display results from search + )); + })), + matchedDocuments.length >= 1 + ? const ListTile( + leading: Text( + "Matching Documents", + style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), + )) + : Expanded( + child: Column( + children: [ + const ListTile( + leading: Text( + "Files", + style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold), + )), + Column( + children: List.generate(widget.files.length, (int index) { + String fileName = widget.files[index].path.split("/").last; + String fileType = + widget.files[index].path.split("/").last.split(".").last; + + Map fileIcon = { + "pdf": Icon(Icons.picture_as_pdf) + }; + + return ListTile( + leading: fileIcon[fileType] ?? Icon(Icons.book), + trailing: Icon(Icons.chevron_right), + //TODO: style to make borders visible + onTap: () { + PdfScanner().openFile(widget.files[index]); + //TODO: Handle click, popular search bar with text controller + }, + title: Text(fileName), // Display results from search + ); + })), + ], + )), + Expanded( + child: ListView.builder( + itemCount: matchedDocuments.length, + itemBuilder: (context, index) { + final result = matchedDocuments[index]; + + return ListTile( + leading: const Icon(Icons.picture_as_pdf), + onTap: () { + _showDocumentDetails(result); + }, + trailing: const Icon(Icons.chevron_right), + title: Text(result.doc.text + .substring(0, 50)), // Display results from search + ); + })), + ])); } } diff --git a/mobile_app/lib/main.dart b/mobile_app/lib/main.dart index e62081b..abcdf67 100644 --- a/mobile_app/lib/main.dart +++ b/mobile_app/lib/main.dart @@ -1,10 +1,40 @@ +import 'dart:io'; + import 'package:flutter/material.dart'; -import 'package:mobile_app/src/rust/frb_generated.dart'; +import 'package:flutter/services.dart'; import 'package:mobile_app/settings.dart'; +import 'package:flutter/foundation.dart'; import 'package:mobile_app/home.dart'; +import 'package:mobile_app/file.dart'; +import 'package:flutter_tantivy/flutter_tantivy.dart'; +import 'package:path_provider/path_provider.dart'; +import 'package:mobile_app/storage.dart'; +import 'package:permission_handler/permission_handler.dart'; +import 'package:mobile_app/utils.dart'; Future main() async { await RustLib.init(); + WidgetsFlutterBinding + .ensureInitialized(); // Ensure plugin services are initialized + + RootIsolateToken rootIsolateToken = RootIsolateToken.instance!; + BackgroundIsolateBinaryMessenger.ensureInitialized(rootIsolateToken); + await Log.init(); + + var status = await Permission.manageExternalStorage.status; + + if (!status.isGranted) { + status = await Permission.manageExternalStorage.request(); + } + + status = await Permission.manageExternalStorage.status; + Log.logger.i("Permission for external storage: $status"); + + final directory = await getApplicationDocumentsDirectory(); + final indexPath = '${directory.path}/tantivy_index'; + initTantivy(dirPath: indexPath); + Log.logger.i("Index Path $indexPath"); + runApp(const MyApp()); } @@ -17,6 +47,7 @@ class MyApp extends StatefulWidget { class MyAppState extends State { PageController pageController = PageController(); + List folders = []; int selectIndex = 0; void onPageChanged(int index) { @@ -25,6 +56,27 @@ class MyAppState extends State { }); } + @override + void initState() { + super.initState(); + _loadPdfs(); + _indexDocuments(); + } + + void _loadPdfs() async { + PdfScanner scanner = PdfScanner(); + List files = + await scanner.getAllPdfs(); // This might block UI if not careful + setState(() { + folders = files; + }); + } + + void _indexDocuments() async { + PdfScanner scanner = PdfScanner(); + scanner.indexPdfFiles(); + } + void onItemTap(int selectedItems) { pageController.jumpToPage(selectedItems); } @@ -35,21 +87,21 @@ class MyAppState extends State { home: Scaffold( appBar: AppBar(title: const Text('Acho')), body: PageView( - children: [HomeApp(), SettingsApp()], + children: [HomeApp(files: folders), SettingsApp()], controller: pageController, onPageChanged: onPageChanged, ), bottomNavigationBar: BottomNavigationBar( onTap: onItemTap, selectedItemColor: Colors.brown, - items: const [ + items: [ BottomNavigationBarItem( backgroundColor: Colors.red, label: 'Home', icon: Icon(Icons.home_filled), - activeIcon: HomeApp(), + activeIcon: HomeApp(files: []), ), - BottomNavigationBarItem( + const BottomNavigationBarItem( label: 'Settings', icon: Icon(Icons.settings), activeIcon: SettingsApp(), diff --git a/mobile_app/lib/src/rust/api/keyword_search.dart b/mobile_app/lib/src/rust/api/keyword_search.dart new file mode 100644 index 0000000..441be26 --- /dev/null +++ b/mobile_app/lib/src/rust/api/keyword_search.dart @@ -0,0 +1,26 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// Rust type: RustOpaqueMoi> +abstract class PathBuf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class SearchFn implements RustOpaqueInterface { + PathBuf get pathToIndex; + + set pathToIndex(PathBuf pathToIndex); + + Future deleteIndex(); + + Future ingestPdfDir({required PathBuf filePath}); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a new SearchFn instance and initializes the index on disk + static Future newInstance({required PathBuf path}) => + RustLib.instance.api.crateApiKeywordSearchSearchFnNew(path: path); +} diff --git a/mobile_app/lib/src/rust/lib.dart b/mobile_app/lib/src/rust/lib.dart new file mode 100644 index 0000000..5890449 --- /dev/null +++ b/mobile_app/lib/src/rust/lib.dart @@ -0,0 +1,25 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import 'frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// Rust type: RustOpaqueMoi>> +abstract class AHashMapStringFacet implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi >>> +abstract class ArcIndex implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi , usize) >>> +abstract class IndexMapStringVecStringUsize implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Path implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Result implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Value implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/commit.dart b/mobile_app/lib/src/rust/third_party/seekstorm/commit.dart new file mode 100644 index 0000000..59093da --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/commit.dart @@ -0,0 +1,31 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `commit` + +abstract class Commit { + /// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM + /// to the final compressed data structure (roaring bitmap) on Mmap or disk - + /// which is persistent, more compact, with lower query latency and allows search with realtime=false. + /// Commit is invoked automatically each time 64K documents are newly indexed as well as on close_index (e.g. server quit). + /// There is no way to prevent this automatic commit by not manually invoking it. + /// But commit can also be invoked manually at any time at any number of newly indexed documents. + /// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability + /// is invoked implicitly with every index_doc, + /// i.e. the document can immediately searched and included in the search results + /// if it matches the query AND the query paramter realtime=true is enabled. + /// **Use commit with caution, as it is an expensive operation**. + /// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents and when the index is closed with close_index. + /// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost. + /// There are only 2 reasons that justify a manual commit: + /// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or + /// 2. if after indexing new documents there won't be more documents indexed (for some time), + /// so there won't be (soon) a commit invoked automatically at the next 64k threshold or close_index, + /// but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash. + Future commit(); +} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/geo_search.dart b/mobile_app/lib/src/rust/third_party/seekstorm/geo_search.dart new file mode 100644 index 0000000..9447d2b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/geo_search.dart @@ -0,0 +1,52 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'index.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'search.dart'; + +/// encode 2D-coordinate (lat/lon) into 64-bit Morton code +/// This method is lossy/quantized as two f64 coordinate values are mapped to a single u64 Morton code! +/// The z-value of a point in multidimensions is simply calculated by interleaving the binary representations of its coordinate values. +Future encodeMorton2D({required List point}) => + RustLib.instance.api.seekstormGeoSearchEncodeMorton2D(point: point); + +/// decode 64-bit Morton code into 2D-coordinate (lat/lon) +/// This method is lossy/quantized as a single u64 Morton code is converted to two f64 coordinate values! +Future decodeMorton2D({required BigInt code}) => + RustLib.instance.api.seekstormGeoSearchDecodeMorton2D(code: code); + +/// Comparison of the distances between two morton encoded positions and a base position +Future mortonOrdering( + {required BigInt morton1, + required BigInt morton2, + required List basePoint, + required SortOrder order}) => + RustLib.instance.api.seekstormGeoSearchMortonOrdering( + morton1: morton1, morton2: morton2, basePoint: basePoint, order: order); + +/// calculates distance in kilometers or miles between two 2D-coordinates using Euclidian distance (Pythagoras theorem) with Equirectangular approximation. +Future euclidianDistance( + {required List point1, + required List point2, + required DistanceUnit unit}) => + RustLib.instance.api.seekstormGeoSearchEuclidianDistance( + point1: point1, point2: point2, unit: unit); + +/// Converts a Point and a distance radius into a range of morton_codes for geo search range filtering. +/// The conversion is lossy due to coordinate to Morton code rounding errors and Equirectangular approximation of Euclidian distance. +Future pointDistanceToMortonRange( + {required List point, + required double distance, + required DistanceUnit unit}) => + RustLib.instance.api.seekstormGeoSearchPointDistanceToMortonRange( + point: point, distance: distance, unit: unit); + +// Rust type: RustOpaqueMoi> +abstract class Ordering implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class RangeU64 implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/highlighter.dart b/mobile_app/lib/src/rust/third_party/seekstorm/highlighter.dart new file mode 100644 index 0000000..cb10c4a --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/highlighter.dart @@ -0,0 +1,89 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `Fragment` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `compose`, `fmt`, `fmt`, `name`, `schemas` + +/// Returns the Highlighter object used as get_document parameter for highlighting fields in documents +Future highlighter( + {required ArcIndex indexArc, + required List highlights, + required List queryTermsVec}) => + RustLib.instance.api.seekstormHighlighterHighlighter( + indexArc: indexArc, + highlights: highlights, + queryTermsVec: queryTermsVec); + +// Rust type: RustOpaqueMoi> +abstract class Highlighter implements RustOpaqueInterface {} + +/// Specifies the number and size of fragments (snippets, summaries) to generate from each specified field to provide a "keyword in context" (KWIC) functionality. +/// With highlight_markup the matching query terms within the fragments can be highlighted with HTML markup. +class Highlight { + /// Specifies the field from which the fragments (snippets, summaries) are created. + final String field; + + /// Allows to specifiy multiple highlight result fields from the same source field, leaving the original field intact, + /// Default: if name is empty then field is used instead, i.e the original field is overwritten with the highlight. + final String name; + + /// If 0/default then return the full original text without fragmenting. + final BigInt fragmentNumber; + + /// Specifies the length of a highlight fragment. + /// The default 0 returns the full original text without truncating, but still with highlighting if highlight_markup is enabled. + final BigInt fragmentSize; + + /// if true, the matching query terms within the fragments are highlighted with HTML markup **\term\<\/b\>**. + final bool highlightMarkup; + + /// Specifies the markup tags to insert **before** each highlighted term (e.g. \"\\" or \"\\"). This can be any string, but is most often an HTML or XML tag. + /// Only used when **highlight_markup** is set to true. + final String preTags; + + /// Specifies the markup tags to insert **after** each highlighted term. (e.g. \"\<\/b\>\" or \"\<\/em\>\"). This can be any string, but is most often an HTML or XML tag. + /// Only used when **highlight_markup** is set to true. + final String postTags; + + const Highlight({ + required this.field, + required this.name, + required this.fragmentNumber, + required this.fragmentSize, + required this.highlightMarkup, + required this.preTags, + required this.postTags, + }); + + static Future default_() => + RustLib.instance.api.seekstormHighlighterHighlightDefault(); + + @override + int get hashCode => + field.hashCode ^ + name.hashCode ^ + fragmentNumber.hashCode ^ + fragmentSize.hashCode ^ + highlightMarkup.hashCode ^ + preTags.hashCode ^ + postTags.hashCode; + + @override + bool operator ==(Object other) => + identical(this, other) || + other is Highlight && + runtimeType == other.runtimeType && + field == other.field && + name == other.name && + fragmentNumber == other.fragmentNumber && + fragmentSize == other.fragmentSize && + highlightMarkup == other.highlightMarkup && + preTags == other.preTags && + postTags == other.postTags; +} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/index.dart b/mobile_app/lib/src/rust/third_party/seekstorm/index.dart new file mode 100644 index 0000000..91550b6 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/index.dart @@ -0,0 +1,857 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'highlighter.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'search.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `BlockObjectIndex`, `IndexedField`, `LevelIndex`, `NgramSet`, `NonUniquePostingListObjectQuery`, `NonUniqueTermObject`, `PostingListObject0`, `PostingListObjectIndex`, `PostingListObjectQuery`, `QueueObject`, `ResultFacet`, `SegmentIndex`, `SegmentLevel0`, `Shard`, `TermObject` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas` +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `close`, `delete_document`, `delete_documents_by_query`, `delete_documents`, `index_document_2`, `index_document_shard`, `index_document`, `index_documents`, `update_document`, `update_documents` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `default`, `default`, `default`, `default`, `default`, `default`, `default`, `default` + +/// Get the version of the SeekStorm search library +Future version() => RustLib.instance.api.seekstormIndexVersion(); + +/// Create index in RAM. +/// Inner data structures for create index and open_index +/// * `index_path` - index path. +/// * `meta` - index meta object. +/// * `schema` - schema. +/// * `synonyms` - vector of synonyms. +/// * `segment_number_bits1` - number of index segments: e.g. 11 bits for 2048 segments. +/// * `mute` - prevent emitting status messages (e.g. when using pipes for data interprocess communication). +/// * `force_shard_number` - set number of shards manually or automatically. +/// - none: number of shards is set automatically = number of physical processor cores (default) +/// - small: slower indexing, higher latency, slightly higher throughput, faster realtime search, lower RAM consumption +/// - large: faster indexing, lower latency, slightly lower throughput, slower realtime search, higher RAM consumption +Future createIndex( + {required Path indexPath, + required IndexMetaObject meta, + required List schema, + required List synonyms, + required BigInt segmentNumberBits1, + required bool mute, + BigInt? forceShardNumber}) => + RustLib.instance.api.seekstormIndexCreateIndex( + indexPath: indexPath, + meta: meta, + schema: schema, + synonyms: synonyms, + segmentNumberBits1: segmentNumberBits1, + mute: mute, + forceShardNumber: forceShardNumber); + +/// Loads the index from disk into RAM or MMAP. +/// * `index_path` - index path. +/// * `mute` - prevent emitting status messages (e.g. when using pipes for data interprocess communication). +Future openIndex({required Path indexPath, required bool mute}) => + RustLib.instance.api + .seekstormIndexOpenIndex(indexPath: indexPath, mute: mute); + +// Rust type: RustOpaqueMoi> +abstract class DocumentItem implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class FacetField implements RustOpaqueInterface { + ValueType get max; + + ValueType get min; + + String get name; + + IndexMapStringVecStringUsize get values; + + set max(ValueType max); + + set min(ValueType min); + + set name(String name); + + set values(IndexMapStringVecStringUsize values); + + static Future default_() => + RustLib.instance.api.seekstormIndexFacetFieldDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class FileType implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class FrequentwordType implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormIndexFrequentwordTypeDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class Index implements RustOpaqueInterface { + /// Add/append/update/merge synonyms in index + /// Affects only subsequently indexed documents + Future addSynonyms({required List synonyms}); + + int get indexFormatVersionMajor; + + int get indexFormatVersionMinor; + + IndexMetaObject get meta; + + Map get schemaMap; + + List get storedFieldNames; + + set indexFormatVersionMajor(int indexFormatVersionMajor); + + set indexFormatVersionMinor(int indexFormatVersionMinor); + + set meta(IndexMetaObject meta); + + set schemaMap(Map schemaMap); + + set storedFieldNames(List storedFieldNames); + + /// Reset index to empty, while maintaining schema + Future clearIndex(); + + /// Get number of indexed documents. + Future committedDocCount(); + + /// Current document count: indexed document count - deleted document count + Future currentDocCount(); + + /// Delete index from disc and ram + Future deleteIndex(); + + /// Get number of facets defined in the index schema. + Future facetsCount(); + + /// Get document for document id + /// Arguments: + /// * `doc_id`: Specifies which document to load from the document store of the index. + /// * `include_uncommited`: Return also documents which have not yet been committed. + /// * `highlighter_option`: Specifies the extraction of keyword-in-context (KWIC) fragments from fields in documents, and the highlighting of the query terms within. + /// * `fields`: Specifies which of the stored fields to return with each document. Default: If empty return all stored fields + /// * `distance_fields`: insert distance fields into result documents, calculating the distance between a specified facet field of type Point and a base Point, in kilometers or miles. + /// using Euclidian distance (Pythagoras theorem) with Equirectangular approximation. + Future> getDocument( + {required BigInt docId, + required bool includeUncommited, + Highlighter? highlighterOption, + required Set fields, + required List distanceFields}); + + /// get_facet_value: Returns value from facet field for a doc_id even if schema stored=false (field not stored in document JSON). + /// Facet fields are more compact than fields stored in document JSON. + /// Strings are stored more compact as indices to a unique term dictionary. Numbers are stored binary, not as strings. + /// Facet fields are faster because no document loading, decompression and JSON decoding is required. + /// Facet fields are always memory mapped, internally always stored with fixed byte length layout, regardless of string size. + Future getFacetValue( + {required String field, required BigInt docId}); + + /// Get file for document id + /// Arguments: + /// * `doc_id`: Specifies which document to load from the document store of the index. + /// + /// Returns: + /// * `Vec`: The file content as a byte vector. + /// + Future getFile({required BigInt docId}); + + /// get_index_string_facets: list of string facet fields, each with field name and a map of unique values and their count (number of times the specific value appears in the whole index). + /// values are sorted by their occurrence count within all indexed documents in descending order + /// * `query_facets`: Must be set if facet fields should be returned in get_index_facets. If set to Vec::new() then no facet fields are returned. + /// The prefix property of a QueryFacet allows to filter the returned facet values to those matching a given prefix, if there are too many distinct values per facet field. + /// The length property of a QueryFacet allows limiting the number of returned distinct values per facet field, if there are too many distinct values. The QueryFacet can be used to improve the usability in an UI. + /// If the length property of a QueryFacet is set to 0 then no facet values for that facet are returned. + /// The facet values are sorted by the frequency of the appearance of the value within the indexed documents matching the query in descending order. + /// Example: query_facets = vec![QueryFacet::String16 {field: "language".to_string(),prefix: "ger".to_string(),length: 5},QueryFacet::String16 {field: "brand".to_string(),prefix: "a".to_string(),length: 5}]; + Future getIndexStringFacets( + {required List queryFacets}); + + /// Get synonyms from index + Future> getSynonyms(); + + /// get_index_facets_minmax: return map of numeric facet fields, each with field name and min/max values. + Future> indexFacetsMinmax(); + + /// Get number of indexed documents. + Future indexedDocCount(); + + /// Get number of index levels. One index level comprises 64K documents. + Future levelCount(); + + /// Set/replace/overwrite synonyms in index + /// Affects only subsequently indexed documents + Future setSynonyms({required List synonyms}); + + /// Get number of index shards. + Future shardCount(); + + /// are there uncommited documents? + Future uncommittedDocCount(); +} + +// Rust type: RustOpaqueMoi> +abstract class IndexMetaObject implements RustOpaqueInterface { + AccessType get accessType; + + FrequentwordType get frequentWords; + + BigInt get id; + + String get name; + + int get ngramIndexing; + + QueryCompletion? get queryCompletion; + + SimilarityType get similarity; + + SpellingCorrection? get spellingCorrection; + + StemmerType get stemmer; + + StopwordType get stopWords; + + TokenizerType get tokenizer; + + set accessType(AccessType accessType); + + set frequentWords(FrequentwordType frequentWords); + + set id(BigInt id); + + set name(String name); + + set ngramIndexing(int ngramIndexing); + + set queryCompletion(QueryCompletion? queryCompletion); + + set similarity(SimilarityType similarity); + + set spellingCorrection(SpellingCorrection? spellingCorrection); + + set stemmer(StemmerType stemmer); + + set stopWords(StopwordType stopWords); + + set tokenizer(TokenizerType tokenizer); +} + +// Rust type: RustOpaqueMoi> +abstract class MinMaxField implements RustOpaqueInterface { + ValueType get max; + + ValueType get min; + + set max(ValueType max); + + set min(ValueType min); + + static Future default_() => + RustLib.instance.api.seekstormIndexMinMaxFieldDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class MinMaxFieldJson implements RustOpaqueInterface { + Value get max; + + Value get min; + + set max(Value max); + + set min(Value min); + + static Future default_() => + RustLib.instance.api.seekstormIndexMinMaxFieldJsonDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class SchemaField implements RustOpaqueInterface { + double get boost; + + bool get completionSource; + + bool get dictionarySource; + + bool get facet; + + String get field; + + FieldType get fieldType; + + bool get indexed; + + bool get longest; + + bool get stored; + + set boost(double boost); + + set completionSource(bool completionSource); + + set dictionarySource(bool dictionarySource); + + set facet(bool facet); + + set field(String field); + + set fieldType(FieldType fieldType); + + set indexed(bool indexed); + + set longest(bool longest); + + set stored(bool stored); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a new SchemaField. + /// Defines a field in index schema: field, stored, indexed , field_type, facet, boost. + /// # Parameters + /// - field: unique name of a field + /// - stored: only stored fields are returned in the search results + /// - indexed: only indexed fields can be searched + /// - field_type: type of a field: u8, u16, u32, u64, i8, i16, i32, i64, f32, f64, point + /// - facet: enable faceting for a field: for sorting results by field values, for range filtering, for result count per field value or range + /// - `longest`: This allows to annotate (manually set) the longest field in schema. + /// Otherwise the longest field will be automatically detected in first index_document. + /// Setting/detecting the longest field ensures efficient index encoding. + /// - boost: optional custom weight factor for Bm25 ranking + /// # Returns + /// - SchemaField + /// # Example + /// ```rust + /// use seekstorm::index::{SchemaField, FieldType}; + /// let schema_field = SchemaField::new("title".to_string(), true, true, FieldType::String16, false, false, 1.0, false, false); + /// ``` + static Future newInstance( + {required String field, + required bool stored, + required bool indexed, + required FieldType fieldType, + required bool facet, + required bool longest, + required double boost, + required bool dictionarySource, + required bool completionSource}) => + RustLib.instance.api.seekstormIndexSchemaFieldNew( + field: field, + stored: stored, + indexed: indexed, + fieldType: fieldType, + facet: facet, + longest: longest, + boost: boost, + dictionarySource: dictionarySource, + completionSource: completionSource); +} + +// Rust type: RustOpaqueMoi> +abstract class StopwordType implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormIndexStopwordTypeDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class ValueType implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormIndexValueTypeDefault(); +} + +abstract class Close { + /// Remove index from RAM (Reverse of open_index) + Future close(); +} + +abstract class DeleteDocument { + /// Delete document from index by document id + Future deleteDocument({required BigInt docid}); +} + +abstract class DeleteDocuments { + /// Delete documents from index by document id + Future deleteDocuments({required Uint64List docidVec}); +} + +abstract class DeleteDocumentsByQuery { + /// Delete documents from index by query + /// Delete and search have identical parameters. + /// It is recommended to test with search prior to delete to verify that only those documents are returned that you really want to delete. + Future deleteDocumentsByQuery( + {required String queryString, + required QueryType queryTypeDefault, + required BigInt offset, + required BigInt length, + required bool includeUncommited, + required List fieldFilter, + required List facetFilter, + required List resultSort}); +} + +abstract class IndexDocument { + /// Indexes a single document + /// May block, if the threshold of documents indexed in parallel is exceeded. + Future indexDocument( + {required Map document, required FileType file}); +} + +abstract class IndexDocument2 { + Future indexDocument2( + {required DocumentItem documentItem, required FileType file}); +} + +abstract class IndexDocumentShard { + /// Indexes a single document + /// May block, if the threshold of documents indexed in parallel is exceeded. + Future indexDocumentShard( + {required Map document, required FileType file}); +} + +abstract class IndexDocuments { + /// Indexes a list of documents + /// May block, if the threshold of documents indexed in parallel is exceeded. + Future indexDocuments({required List> documentVec}); +} + +abstract class UpdateDocument { + /// Update document in index + /// Update_document is a combination of delete_document and index_document. + /// All current limitations of delete_document apply. + Future updateDocument( + {required (BigInt, Map) idDocument}); +} + +abstract class UpdateDocuments { + /// Update documents in index + /// Update_document is a combination of delete_document and index_document. + /// All current limitations of delete_document apply. + Future updateDocuments( + {required List<(BigInt, Map)> idDocumentVec}); +} + +/// Defines where the index resides during search: +/// - Ram (the complete index is preloaded to Ram when opening the index) +/// - Mmap (the index is accessed via memory-mapped files). See architecture.md for details. +/// - At commit the data is serialized to disk for persistence both in Ram and Mmap mode. +/// - The serialization format is identical for Ram and Mmap mode, allowing to change it retrospectively. +enum AccessType { + /// Ram (the complete index is preloaded to Ram when opening the index). + /// - Index size is limited by available RAM size. + /// - Slightly fastesr search speed. + /// - Higher index loading time. + /// - Higher RAM usage. + ram, + + /// Mmap (the index is accessed via memory-mapped files). See architecture.md for details. + /// - Enables index size scaling beyond RAM size. + /// - Slightly slower search speed compared to Ram. + /// - Faster index loading time compared to Ram. + /// - Lower RAM usage. + mmap, + ; +} + +/// Type of posting list compression. +enum CompressionType { + delta, + array, + bitmap, + rle, + error, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexCompressionTypeDefault(); +} + +/// DistanceField defines a field for proximity search. +class DistanceField { + /// field name of a numeric facet field (currently onyl Point field type supported) + final String field; + + /// field name of the distance field we are deriving from the numeric facet field (Point type) and the base (Point type) + final String distance; + + /// base point (lat,lon) for distance calculation + final Float64List base; + + /// distance unit for the distance field: kilometers or miles + final DistanceUnit unit; + + const DistanceField({ + required this.field, + required this.distance, + required this.base, + required this.unit, + }); + + static Future default_() => + RustLib.instance.api.seekstormIndexDistanceFieldDefault(); + + @override + int get hashCode => + field.hashCode ^ distance.hashCode ^ base.hashCode ^ unit.hashCode; + + @override + bool operator ==(Object other) => + identical(this, other) || + other is DistanceField && + runtimeType == other.runtimeType && + field == other.field && + distance == other.distance && + base == other.base && + unit == other.unit; +} + +/// DistanceUnit defines the unit for distance calculation: kilometers or miles. +enum DistanceUnit { + /// Kilometers + kilometers, + + /// Miles + miles, + ; +} + +/// FieldType defines the type of a field in the document: u8, u16, u32, u64, i8, i16, i32, i64, f32, f64, point, string, stringset, text. +enum FieldType { + /// Unsigned 8-bit integer + u8, + + /// Unsigned 16-bit integer + u16, + + /// Unsigned 32-bit integer + u32, + + /// Unsigned 64-bit integer + u64, + + /// Signed 8-bit integer + i8, + + /// Signed 16-bit integer + i16, + + /// Signed 32-bit integer + i32, + + /// Signed 64-bit integer + i64, + + /// Timestamp is identical to I64, but to be used for Unix timestamps . + /// The reason for a separate FieldType is to enable the UI to interpret I64 as timestamp without using the field name as indicator. + /// For date facets and filtering. + timestamp, + + /// Floating point 32-bit + f32, + + /// Floating point 64-bit + f64, + + /// Boolean + bool, + + /// String16 + /// allows a maximum cardinality of 65_535 (16 bit) distinct values, is space-saving. + string16, + + /// String32 + /// allows a maximum cardinality of 4_294_967_295 (32 bit) distinct values + string32, + + /// StringSet16 is a set of strings, e.g. tags, categories, keywords, authors, genres, etc. + /// allows a maximum cardinality of 65_535 (16 bit) distinct values, is space-saving. + stringSet16, + + /// StringSet32 is a set of strings, e.g. tags, categories, keywords, authors, genres, etc. + /// allows a maximum cardinality of 4_294_967_295 (32 bit) distinct values + stringSet32, + + /// Point is a geographic field type: A `Vec` with two coordinate values (latitude and longitude) are internally encoded into a single u64 value (Morton code). + /// Morton codes enable efficient range queries. + /// Latitude and longitude are a pair of numbers (coordinates) used to describe a position on the plane of a geographic coordinate system. + /// The numbers are in decimal degrees format and range from -90 to 90 for latitude and -180 to 180 for longitude. + /// Coordinates are internally stored as u64 morton code: both f64 values are multiplied by 10_000_000, converted to i32 and bitwise interleaved into a single u64 morton code + /// The conversion between longitude/latitude coordinates and Morton code is lossy due to rounding errors. + point, + + /// Text is a text field, that will be tokenized by the selected Tokenizer into string tokens. + text, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexFieldTypeDefault(); +} + +enum NgramType { + /// no n-grams, only single terms are indexed + singleTerm, + + /// Ngram frequent frequent + ngramFf, + + /// Ngram frequent rare + ngramFr, + + /// Ngram rare frequent + ngramRf, + + /// Ngram frequent frequent frequent + ngramFff, + + /// Ngram rare frequent frequent + ngramRff, + + /// Ngram frequent frequent rare + ngramFfr, + + /// Ngram frequent rare frequent + ngramFrf, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexNgramTypeDefault(); +} + +/// Defines spelling correction (fuzzy search) settings for an index. +class QueryCompletion { + /// Maximum number of completions to generate during indexing + /// disabled if == 0 + final BigInt maxCompletionEntries; + + const QueryCompletion({ + required this.maxCompletionEntries, + }); + + @override + int get hashCode => maxCompletionEntries.hashCode; + + @override + bool operator ==(Object other) => + identical(this, other) || + other is QueryCompletion && + runtimeType == other.runtimeType && + maxCompletionEntries == other.maxCompletionEntries; +} + +/// Similarity type defines the scoring and ranking of the search results: +/// - Bm25f: considers documents composed from several fields, with different field lengths and importance +/// - Bm25fProximity: considers term proximity, e.g. for implicit phrase search with improved relevancy +enum SimilarityType { + /// Bm25f considers documents composed from several fields, with different field lengths and importance + bm25F, + + /// Bm25fProximity considers term proximity, e.g. for implicit phrase search with improved relevancy + bm25FProximity, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexSimilarityTypeDefault(); +} + +/// Defines spelling correction (fuzzy search) settings for an index. +class SpellingCorrection { + /// The edit distance thresholds for suggestions: 1..2 recommended; higher values increase latency and memory consumption. + final BigInt maxDictionaryEditDistance; + + /// Term length thresholds for each edit distance. + /// None: max_dictionary_edit_distance for all terms lengths + /// Some(\[4\]): max_dictionary_edit_distance for all terms lengths >= 4, + /// Some(\[2,8\]): max_dictionary_edit_distance for all terms lengths >=2, max_dictionary_edit_distance +1 for all terms for lengths>=8 + final Uint64List? termLengthThreshold; + + /// The minimum frequency count for dictionary words to be considered eligible for spelling correction. + /// Depends on the corpus size, 1..20 recommended. + /// If count_threshold is too high, some correct words might be missed from the dictionary and deemed misspelled, + /// if count_threshold is too low, some misspelled words from the corpus might be considered correct and added to the dictionary. + /// Dictionary terms eligible for spelling correction (frequency count >= count_threshold) consume much more RAM, than the candidates (frequency count < count_threshold), + /// but the terms below count_threshold will be included in dictionary.csv too. + final BigInt countThreshold; + + /// Limits the maximum number of dictionary entries (terms >= count_threshold) to generate during indexing, preventing excessive RAM consumption. + /// The number of terms in dictionary.csv will be higher, because it contains also the terms < count_threshold, to become eligible in the future during incremental dictionary updates. + /// Dictionary terms eligible for spelling correction (frequency count >= count_threshold) consume much more RAM, than the candidates (frequency count < count_threshold). + /// ⚠️ Above this threshold no new terms are added to the dictionary, causing them to be deemed incorrect during spelling correction and possibly changed to similar terms that are in the dictionary. + final BigInt maxDictionaryEntries; + + const SpellingCorrection({ + required this.maxDictionaryEditDistance, + this.termLengthThreshold, + required this.countThreshold, + required this.maxDictionaryEntries, + }); + + @override + int get hashCode => + maxDictionaryEditDistance.hashCode ^ + termLengthThreshold.hashCode ^ + countThreshold.hashCode ^ + maxDictionaryEntries.hashCode; + + @override + bool operator ==(Object other) => + identical(this, other) || + other is SpellingCorrection && + runtimeType == other.runtimeType && + maxDictionaryEditDistance == other.maxDictionaryEditDistance && + termLengthThreshold == other.termLengthThreshold && + countThreshold == other.countThreshold && + maxDictionaryEntries == other.maxDictionaryEntries; +} + +/// Defines stemming behavior, reducing inflected words to their word stem, base or root form. +/// Stemming increases recall, but decreases precision. It can introduce false positive results. +enum StemmerType { + /// No stemming + none, + + /// Arabic stemmer + arabic, + + /// Danish stemmer + danish, + + /// Dutch stemmer + dutch, + + /// English stemmer + english, + + /// Finnish stemmer + finnish, + + /// French stemmer + french, + + /// German stemmer + german, + + /// Hungarian stemmer + greek, + + /// Hungarian stemmer + hungarian, + + /// Italian stemmer + italian, + + /// Norwegian stemmer + norwegian, + + /// Portuguese stemmer + portuguese, + + /// Romanian stemmer + romanian, + + /// Russian stemmer + russian, + + /// Spanish stemmer + spanish, + + /// Swedish stemmer + swedish, + + /// Tamil stemmer + tamil, + + /// Turkish stemmer + turkish, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexStemmerTypeDefault(); +} + +/// Defines synonyms for terms per index. +class Synonym { + /// List of terms that are synonyms. + final List terms; + + /// Creates alternative versions of documents where in each copy a term is replaced with one of its synonyms. + /// Doesn't impact the query latency, but does increase the index size. + /// Multi-way synonyms (default): all terms are synonyms of each other. + /// One-way synonyms: only the first term is a synonym of the following terms, but not vice versa. + /// E.g. [street, avenue, road] will result in searches for street to return documents containing any of the terms street, avenue or road, + /// but searches for avenue will only return documents containing avenue, but not documents containing street or road. + /// Currently only single terms without spaces are supported. + /// Synonyms are supported in result highlighting. + /// The synonyms that were created with the synonyms parameter in create_index are stored in synonyms.json in the index directory contains + /// Can be manually modified, but becomes effective only after restart and only for newly indexed documents. + final bool multiway; + + const Synonym({ + required this.terms, + required this.multiway, + }); + + @override + int get hashCode => terms.hashCode ^ multiway.hashCode; + + @override + bool operator ==(Object other) => + identical(this, other) || + other is Synonym && + runtimeType == other.runtimeType && + terms == other.terms && + multiway == other.multiway; +} + +/// Defines tokenizer behavior: +/// AsciiAlphabetic +/// - Mainly for for benchmark compatibility +/// - Only ASCII alphabetic chars are recognized as token. +/// +/// UnicodeAlphanumeric +/// - All Unicode alphanumeric chars are recognized as token. +/// - Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. +/// +/// UnicodeAlphanumericFolded +/// - All Unicode alphanumeric chars are recognized as token. +/// - Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. +/// - Diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters are converted into its basic representation. +/// - Apostroph handling prevents that short term parts preceding or following the apostroph get indexed (e.g. "s" in "someone's"). +/// - Tokenizing might be slower due to folding and apostroph processing. +/// +/// UnicodeAlphanumericZH +/// - Implements Chinese word segmentation to segment continuous Chinese text into tokens for indexing and search. +/// - Supports mixed Latin and Chinese texts +/// - Supports Chinese sentence boundary chars for KWIC snippets ahd highlighting. +/// - Requires feature #[cfg(feature = "zh")] +enum TokenizerType { + /// Only ASCII alphabetic chars are recognized as token. Mainly for benchmark compatibility. + asciiAlphabetic, + + /// All Unicode alphanumeric chars are recognized as token. + /// Allow '+' '-' '#' in middle or end of a token: c++, c#, block-max. + unicodeAlphanumeric, + + /// All Unicode alphanumeric chars are recognized as token. + /// Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. + /// Diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters are converted into its basic representation. + /// Apostroph handling prevents that short term parts preceding or following the apostroph get indexed (e.g. "s" in "someone's"). + /// Tokenizing might be slower due to folding and apostroph processing. + unicodeAlphanumericFolded, + + /// Tokens are separated by whitespace. Mainly for benchmark compatibility. + whitespace, + + /// Tokens are separated by whitespace. Token are converted to lowercase. Mainly for benchmark compatibility. + whitespaceLowercase, + + /// Implements Chinese word segmentation to segment continuous Chinese text into tokens for indexing and search. + /// Supports mixed Latin and Chinese texts + /// Supports Chinese sentence boundary chars for KWIC snippets ahd highlighting. + /// Requires feature #[cfg(feature = "zh")] + unicodeAlphanumericZh, + ; + + static Future default_() => + RustLib.instance.api.seekstormIndexTokenizerTypeDefault(); +} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/ingest.dart b/mobile_app/lib/src/rust/third_party/seekstorm/ingest.dart new file mode 100644 index 0000000..ddfead1 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/ingest.dart @@ -0,0 +1,83 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'index.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `pdfium_option` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `deref`, `initialize` +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `index_pdf_bytes`, `index_pdf_file`, `index_pdf`, `ingest_csv`, `ingest_json`, `ingest_pdf` + +// Rust type: RustOpaqueMoi>> +abstract class PdfDocument implements RustOpaqueInterface {} + +abstract class IndexPdf { + Future indexPdf( + {required Path filePath, + required BigInt fileSize, + required PlatformInt64 fileDate, + required FileType file, + required PdfDocument pdf}); +} + +abstract class IndexPdfBytes { + /// Index PDF file from byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - Path to the file (fallback, if title and date can't be extracted) + /// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) + /// * `file_bytes` - Byte array of the file + Future indexPdfBytes( + {required Path filePath, + required PlatformInt64 fileDate, + required List fileBytes}); +} + +abstract class IndexPdfFile { + /// Index PDF file from local disk. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - Path to the file + /// # Returns + /// * `Result<(), String>` - Ok(()) or Err(String) + Future indexPdfFile({required Path filePath}); +} + +abstract class IngestCsv { + /// Ingest local data files in [CSV](https://en.wikipedia.org/wiki/Comma-separated_values). + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + Future ingestCsv( + {required Path dataPath, + required bool hasHeader, + required bool quoting, + required int delimiter, + BigInt? skipDocs, + BigInt? numDocs}); +} + +abstract class IngestJson { + /// Ingest local data files in [JSON](https://en.wikipedia.org/wiki/JSON), [Newline-delimited JSON](https://github.com/ndjson/ndjson-spec) (ndjson), and [Concatenated JSON](https://en.wikipedia.org/wiki/JSON_streaming) formats via console command. + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + Future ingestJson({required Path dataPath}); +} + +abstract class IngestPdf { + /// Index PDF files from local disk directory and sub-directories or from file. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - Path to the file + Future ingestPdf({required Path filePath}); +} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/search.dart b/mobile_app/lib/src/rust/third_party/seekstorm/search.dart new file mode 100644 index 0000000..5374173 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/search.dart @@ -0,0 +1,291 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `FilterSparse`, `RangeF32`, `RangeF64`, `RangeI16`, `RangeI32`, `RangeI64`, `RangeI8`, `RangeU16`, `RangeU32`, `RangeU64`, `RangeU8`, `ResultSortIndex`, `SearchResult` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `clone`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `compose`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `name`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas`, `schemas` +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `search_shard`, `search` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `default` + +// Rust type: RustOpaqueMoi> +abstract class FacetFilter implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class FacetValue implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class QueryFacet implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormSearchQueryFacetDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class QueryRewriting implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormSearchQueryRewritingDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class Ranges implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.seekstormSearchRangesDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class ResultObject implements RustOpaqueInterface { + AHashMapStringFacet get facets; + + String get originalQuery; + + String get query; + + List get queryTerms; + + BigInt get resultCount; + + BigInt get resultCountTotal; + + List get results; + + set facets(AHashMapStringFacet facets); + + set originalQuery(String originalQuery); + + set query(String query); + + set queryTerms(List queryTerms); + + set resultCount(BigInt resultCount); + + set resultCountTotal(BigInt resultCountTotal); + + set results(List results); + + static Future default_() => + RustLib.instance.api.seekstormSearchResultObjectDefault(); +} + +// Rust type: RustOpaqueMoi> +abstract class ResultSort implements RustOpaqueInterface { + FacetValue get base; + + String get field; + + SortOrder get order; + + set base(FacetValue base); + + set field(String field); + + set order(SortOrder order); +} + +abstract class Search { + /// Search the index for all indexed documents, both for committed and uncommitted documents. + /// The latter enables true realtime search: documents are available for search in exact the same millisecond they are indexed. + /// Arguments: + /// * `query_string`: query string `+` `-` `""` search operators are recognized. + /// * `query_type_default`: Specifiy default QueryType: + /// * **Union**, disjunction, OR, + /// * **Intersection**, conjunction, AND, `+`, + /// * **Phrase** `""`, + /// * **Not**, except, minus `-`. + /// + /// The default QueryType is superseded if the query parser detects that a different query type is specified within the query string (`+` `-` `""`). + /// + /// Boolean queries are specified in the search method either via the query_type parameter or via operator chars within the query parameter. + /// The interpretation of operator chars within the query string (set `query_type=QueryType::Union`) allows to specify advanced search operations via a simple search box. + /// + /// Intersection, AND `+` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="+red +apple".to_string(); + /// ``` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Intersection; + /// let query_string="red apple".to_string(); + /// ``` + /// Union, OR + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="red apple".to_string(); + /// ``` + /// Phrase `""` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="\"red apple\"".to_string(); + /// ``` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Phrase; + /// let query_string="red apple".to_string(); + /// ``` + /// Except, minus, NOT `-` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="apple -red".to_string(); + /// ``` + /// Mixed phrase and intersection + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="+\"the who\" +uk".to_string(); + /// ``` + /// * `offset`: offset of search results to return. + /// * `length`: number of search results to return. + /// With length=0, resultType::TopkCount will be automatically downgraded to resultType::Count, returning the number of results only, without returning the results itself. + /// * `result_type`: type of search results to return: Count, Topk, TopkCount. + /// * `include_uncommited`: true realtime search: include indexed documents which where not yet committed into search results. + /// * `field_filter`: Specify field names where to search at querytime, whereas SchemaField.indexed is set at indextime. If set to Vec::new() then all indexed fields are searched. + /// * `query_facets`: Must be set if facets should be returned in ResultObject. If set to Vec::new() then no facet fields are returned. + /// Facet fields are only collected, counted and returned for ResultType::Count and ResultType::TopkCount, but not for ResultType::Topk. + /// The prefix property of a QueryFacet allows at query time to filter the returned facet values to those matching a given prefix, if there are too many distinct values per facet field. + /// The length property of a QueryFacet allows at query time limiting the number of returned distinct values per facet field, if there are too many distinct values. The QueryFacet can be used to improve the usability in an UI. + /// If the length property of a QueryFacet is set to 0 then no facet values for that facet are collected, counted and returned at query time. That decreases the query latency significantly. + /// The facet values are sorted by the frequency of the appearance of the value within the indexed documents matching the query in descending order. + /// Examples: + /// query_facets = vec![QueryFacet::String16 {field: "language".into(),prefix: "ger".into(),length: 5},QueryFacet::String16 {field: "brand".into(),prefix: "a".into(),length: 5}]; + /// query_facets = vec![QueryFacet::U8 {field: "age".into(), range_type: RangeType::CountWithinRange, ranges: vec![("0-20".into(), 0),("20-40".into(), 20), ("40-60".into(), 40),("60-80".into(), 60), ("80-100".into(), 80)]}]; + /// query_facets = vec![QueryFacet::Point {field: "location".into(),base:vec![38.8951, -77.0364],unit:DistanceUnit::Kilometers,range_type: RangeType::CountWithinRange,ranges: vec![ ("0-200".into(), 0.0),("200-400".into(), 200.0), ("400-600".into(), 400.0), ("600-800".into(), 600.0), ("800-1000".into(), 800.0)]}]; + /// * `facet_filter`: Search results are filtered to documents matching specific string values or numerical ranges in the facet fields. If set to Vec::new() then result are not facet filtered. + /// The filter parameter filters the returned results to those documents both matching the query AND matching for all (boolean AND) stated facet filter fields at least one (boolean OR) of the stated values. + /// If the query is changed then both facet counts and search results are changed. If the facet filter is changed then only the search results are changed, while facet counts remain unchanged. + /// The facet counts depend only from the query and not which facet filters are selected. + /// Examples: + /// facet_filter=vec![FacetFilter::String{field:"language".into(),filter:vec!["german".into()]},FacetFilter::String{field:"brand".into(),filter:vec!["apple".into(),"google".into()]}]; + /// facet_filter=vec![FacetFilter::U8{field:"age".into(),filter: 21..65}]; + /// facet_filter = vec![FacetFilter::Point {field: "location".into(),filter: (vec![38.8951, -77.0364], 0.0..1000.0, DistanceUnit::Kilometers)}]; + /// * `result_sort`: Sort field and order: Search results are sorted by the specified facet field, either in ascending or descending order. + /// If no sort field is specified, then the search results are sorted by rank in descending order per default. + /// Multiple sort fields are combined by a "sort by, then sort by"-method ("tie-breaking"-algorithm). + /// The results are sorted by the first field, and only for those results where the first field value is identical (tie) the results are sub-sorted by the second field, + /// until the n-th field value is either not equal or the last field is reached. + /// A special _score field (BM25x), reflecting how relevant the result is for a given search query (phrase match, match in title etc.) can be combined with any of the other sort fields as primary, secondary or n-th search criterium. + /// Sort is only enabled on facet fields that are defined in schema at create_index! + /// Examples: + /// result_sort = vec![ResultSort {field: "price".into(), order: SortOrder::Descending, base: FacetValue::None},ResultSort {field: "language".into(), order: SortOrder::Ascending, base: FacetValue::None}]; + /// result_sort = vec![ResultSort {field: "location".into(),order: SortOrder::Ascending, base: FacetValue::Point(vec![38.8951, -77.0364])}]; + /// * `query_rewriting`: Enables query rewriting features such as spelling correction and query auto-completion (QAC). + /// The spelling correction of multi-term query strings handles three cases: + /// 1. mistakenly inserted space into a correct term led to two incorrect terms: `hels inki` -> `helsinki` + /// 2. mistakenly omitted space between two correct terms led to one incorrect combined term: `modernart` -> `modern art` + /// 3. multiple independent input terms with/without spelling errors: `cinese indastrialication` -> `chinese industrialization` + /// + /// Query correction/completion supports phrases "", but is disabled, if +- operators are used, or if a opening quote is used after the first term, or if a closing quote is used before the last term. + /// See QueryRewriting enum for details. + /// ⚠️ In addition to setting the query_rewriting parameter per query, the incremental creation of the Symspell dictionary during the indexing of documents has to be enabled via the create_index parameter `meta.spelling_correction`. + /// + /// Facets: + /// If query_string is empty, then index facets (collected at index time) are returned, otherwise query facets (collected at query time) are returned. + /// Facets are defined in 3 different places: + /// the facet fields are defined in schema at create_index, + /// the facet field values are set in index_document at index time, + /// the query_facets/facet_filter search parameters are specified at query time. + /// Facets are then returned in the search result object. + Future search( + {required String queryString, + required QueryType queryTypeDefault, + required BigInt offset, + required BigInt length, + required ResultType resultType, + required bool includeUncommited, + required List fieldFilter, + required List queryFacets, + required List facetFilter, + required List resultSort, + required QueryRewriting queryRewriting}); +} + +abstract class SearchShard { + Future searchShard( + {required String queryString, + required QueryType queryTypeDefault, + required BigInt offset, + required BigInt length, + required ResultType resultType, + required bool includeUncommited, + required List fieldFilter, + required List queryFacets, + required List facetFilter, + required List resultSort}); +} + +/// Specifies the default QueryType: The following query types are supported: +/// - **Union** (OR, disjunction), +/// - **Intersection** (AND, conjunction), +/// - **Phrase** (""), +/// - **Not** (-). +/// +/// The default QueryType is superseded if the query parser detects that a different query type is specified within the query string (+ - ""). +enum QueryType { + /// Union (OR, disjunction) + union, + + /// Intersection (AND, conjunction) + intersection, + + /// Phrase ("") + phrase, + + /// Not (-) + not, + ; + + static Future default_() => + RustLib.instance.api.seekstormSearchQueryTypeDefault(); +} + +/// specifies how to count the frequency of numerical facet field values +enum RangeType { + /// within the specified range + countWithinRange, + + /// within the range and all ranges above + countAboveRange, + + /// within the range and all ranges below + countBelowRange, + ; + + static Future default_() => + RustLib.instance.api.seekstormSearchRangeTypeDefault(); +} + +/// The following result types are supported: +/// - **Count** (count all results that match the query, but returning top-k results is not required) +/// - **Topk** (returns the top-k results per query, but counting all results that match the query is not required) +/// - **TopkCount** (returns the top-k results per query + count all results that match the query) +enum ResultType { + /// Count all results that match the query, without returning top-k results + count, + + /// Return the top-k results per query, without counting all results that match the query + topk, + + /// Return the top-k results per query and count all results that match the query + topkCount, + ; + + static Future default_() => + RustLib.instance.api.seekstormSearchResultTypeDefault(); +} + +/// Specifies the sort order for the search results. +enum SortOrder { + /// Ascending sort order + ascending, + + /// Descending sort order + descending, + ; +} diff --git a/mobile_app/lib/src/rust/third_party/seekstorm/utils.dart b/mobile_app/lib/src/rust/third_party/seekstorm/utils.dart new file mode 100644 index 0000000..91066ad --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/seekstorm/utils.dart @@ -0,0 +1,20 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +/// Truncates a string to a maximum number of characters. +Future truncate({required String source, required BigInt maxChars}) => + RustLib.instance.api + .seekstormUtilsTruncate(source: source, maxChars: maxChars); + +/// Returns a substring of the given string, starting at the specified index and with the specified length. +Future substring( + {required String source, + required BigInt start, + required BigInt length}) => + RustLib.instance.api + .seekstormUtilsSubstring(source: source, start: start, length: length); diff --git a/mobile_app/lib/src/rust/third_party/tokio/io.dart b/mobile_app/lib/src/rust/third_party/tokio/io.dart new file mode 100644 index 0000000..7328f7f --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/io.dart @@ -0,0 +1,2712 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `chain`, `chain`, `copy_bidirectional_with_sizes`, `copy_bidirectional`, `copy_buf`, `copy`, `join`, `read_buf`, `read_buf`, `read_exact`, `read_exact`, `read_line`, `read_line`, `read_to_end`, `read_to_end`, `read_to_string`, `read_to_string`, `read_until`, `read_until`, `read`, `read`, `split`, `write_all_buf`, `write_all_buf`, `write_all`, `write_all`, `write_buf`, `write_buf`, `write_vectored`, `write_vectored`, `write`, `write` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `BufReader`, `BufStream`, `BufWriter`, `Chain`, `Join` +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `consume`, `fill_buf`, `flush`, `lines`, `read_f32_le`, `read_f32`, `read_f64_le`, `read_f64`, `read_i128_le`, `read_i128`, `read_i16_le`, `read_i16`, `read_i32_le`, `read_i32`, `read_i64_le`, `read_i64`, `read_i8`, `read_u128_le`, `read_u128`, `read_u16_le`, `read_u16`, `read_u32_le`, `read_u32`, `read_u64_le`, `read_u64`, `read_u8`, `rewind`, `seek`, `shutdown`, `split`, `stream_position`, `take`, `write_f32_le`, `write_f32`, `write_f64_le`, `write_f64`, `write_i128_le`, `write_i128`, `write_i16_le`, `write_i16`, `write_i32_le`, `write_i32`, `write_i64_le`, `write_i64`, `write_i8`, `write_u128_le`, `write_u128`, `write_u16_le`, `write_u16`, `write_u32_le`, `write_u32`, `write_u64_le`, `write_u64`, `write_u8` + +/// Constructs a new handle to the standard error of the current process. +/// +/// The returned handle allows writing to standard error from the within the +/// Tokio runtime. +/// +/// Concurrent writes to stderr must be executed with care: Only individual +/// writes to this [`AsyncWrite`] are guaranteed to be intact. In particular +/// you should be aware that writes using [`write_all`] are not guaranteed +/// to occur as a single write, so multiple threads writing data with +/// [`write_all`] may result in interleaved output. +/// +/// [`AsyncWrite`]: AsyncWrite +/// [`write_all`]: crate::io::AsyncWriteExt::write_all() +/// +/// # Examples +/// +/// ``` +/// use tokio::io::{self, AsyncWriteExt}; +/// +/// #[tokio::main] +/// async fn main() -> io::Result<()> { +/// let mut stderr = io::stderr(); +/// stderr.write_all(b"Print some error here.").await?; +/// Ok(()) +/// } +/// ``` +Future stderr() => RustLib.instance.api.tokioIoStderr(); + +/// Constructs a new handle to the standard input of the current process. +/// +/// This handle is best used for non-interactive uses, such as when a file +/// is piped into the application. For technical reasons, `stdin` is +/// implemented by using an ordinary blocking read on a separate thread, and +/// it is impossible to cancel that read. This can make shutdown of the +/// runtime hang until the user presses enter. +/// +/// For interactive uses, it is recommended to spawn a thread dedicated to +/// user input and use blocking IO directly in that thread. +Future stdin() => RustLib.instance.api.tokioIoStdin(); + +/// Constructs a new handle to the standard output of the current process. +/// +/// The returned handle allows writing to standard out from the within the +/// Tokio runtime. +/// +/// Concurrent writes to stdout must be executed with care: Only individual +/// writes to this [`AsyncWrite`] are guaranteed to be intact. In particular +/// you should be aware that writes using [`write_all`] are not guaranteed +/// to occur as a single write, so multiple threads writing data with +/// [`write_all`] may result in interleaved output. +/// +/// [`AsyncWrite`]: AsyncWrite +/// [`write_all`]: crate::io::AsyncWriteExt::write_all() +/// +/// # Examples +/// +/// ``` +/// use tokio::io::{self, AsyncWriteExt}; +/// +/// #[tokio::main] +/// async fn main() -> io::Result<()> { +/// let mut stdout = io::stdout(); +/// stdout.write_all(b"Hello world!").await?; +/// Ok(()) +/// } +/// ``` +/// +/// The following is an example of using `stdio` with loop. +/// +/// ``` +/// use tokio::io::{self, AsyncWriteExt}; +/// +/// #[tokio::main] +/// async fn main() { +/// let messages = vec!["hello", " world\n"]; +/// +/// // When you use `stdio` in a loop, it is recommended to create +/// // a single `stdio` instance outside the loop and call a write +/// // operation against that instance on each loop. +/// // +/// // Repeatedly creating `stdout` instances inside the loop and +/// // writing to that handle could result in mangled output since +/// // each write operation is handled by a different blocking thread. +/// let mut stdout = io::stdout(); +/// +/// for message in &messages { +/// stdout.write_all(message.as_bytes()).await.unwrap(); +/// stdout.flush().await.unwrap(); +/// } +/// } +/// ``` +Future stdout() => RustLib.instance.api.tokioIoStdout(); + +/// Creates a value that is always at EOF for reads, and ignores all data written. +/// +/// All writes on the returned instance will return `Poll::Ready(Ok(buf.len()))` +/// and the contents of the buffer will not be inspected. +/// +/// All reads from the returned instance will return `Poll::Ready(Ok(0))`. +/// +/// This is an asynchronous version of [`std::io::empty`][std]. +/// +/// [std]: std::io::empty +/// +/// # Examples +/// +/// A slightly sad example of not reading anything into a buffer: +/// +/// ``` +/// use tokio::io::{self, AsyncReadExt}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let mut buffer = String::new(); +/// io::empty().read_to_string(&mut buffer).await.unwrap(); +/// assert!(buffer.is_empty()); +/// # } +/// ``` +/// +/// A convoluted way of getting the length of a buffer: +/// +/// ``` +/// use tokio::io::{self, AsyncWriteExt}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let buffer = vec![1, 2, 3, 5, 8]; +/// let num_bytes = io::empty().write(&buffer).await.unwrap(); +/// assert_eq!(num_bytes, 5); +/// # } +/// ``` +Future empty() => RustLib.instance.api.tokioIoEmpty(); + +/// Create a new pair of `DuplexStream`s that act like a pair of connected sockets. +/// +/// The `max_buf_size` argument is the maximum amount of bytes that can be +/// written to a side before the write returns `Poll::Pending`. +Future<(DuplexStream, DuplexStream)> duplex({required BigInt maxBufSize}) => + RustLib.instance.api.tokioIoDuplex(maxBufSize: maxBufSize); + +/// Creates unidirectional buffer that acts like in memory pipe. +/// +/// The `max_buf_size` argument is the maximum amount of bytes that can be +/// written to a buffer before the it returns `Poll::Pending`. +/// +/// # Unify reader and writer +/// +/// The reader and writer half can be unified into a single structure +/// of `SimplexStream` that supports both reading and writing or +/// the `SimplexStream` can be already created as unified structure +/// using [`SimplexStream::new_unsplit()`]. +/// +/// ``` +/// # async fn ex() -> std::io::Result<()> { +/// # use tokio::io::{AsyncReadExt, AsyncWriteExt}; +/// let (reader, writer) = tokio::io::simplex(64); +/// let mut simplex_stream = reader.unsplit(writer); +/// simplex_stream.write_all(b"hello").await?; +/// +/// let mut buf = [0u8; 5]; +/// simplex_stream.read_exact(&mut buf).await?; +/// assert_eq!(&buf, b"hello"); +/// # Ok(()) +/// # } +/// ``` +Future<(ReadHalfSimplexStream, WriteHalfSimplexStream)> simplex( + {required BigInt maxBufSize}) => + RustLib.instance.api.tokioIoSimplex(maxBufSize: maxBufSize); + +/// Creates an instance of an async reader that infinitely repeats one byte. +/// +/// All reads from this reader will succeed by filling the specified buffer with +/// the given byte. +/// +/// This is an asynchronous version of [`std::io::repeat`][std]. +/// +/// [std]: std::io::repeat +/// +/// # Examples +/// +/// ``` +/// use tokio::io::{self, AsyncReadExt}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let mut buffer = [0; 3]; +/// io::repeat(0b101).read_exact(&mut buffer).await.unwrap(); +/// assert_eq!(buffer, [0b101, 0b101, 0b101]); +/// # } +/// ``` +Future repeat({required int byte}) => + RustLib.instance.api.tokioIoRepeat(byte: byte); + +/// Creates an instance of an async writer which will successfully consume all +/// data. +/// +/// All calls to [`poll_write`] on the returned instance will return +/// `Poll::Ready(Ok(buf.len()))` and the contents of the buffer will not be +/// inspected. +/// +/// This is an asynchronous version of [`std::io::sink`][std]. +/// +/// [`poll_write`]: crate::io::AsyncWrite::poll_write() +/// [std]: std::io::sink +/// +/// # Examples +/// +/// ``` +/// use tokio::io::{self, AsyncWriteExt}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> io::Result<()> { +/// let buffer = vec![1, 2, 3, 5, 8]; +/// let num_bytes = io::sink().write(&buffer).await?; +/// assert_eq!(num_bytes, 5); +/// Ok(()) +/// # } +/// ``` +Future sink() => RustLib.instance.api.tokioIoSink(); + +// Rust type: RustOpaqueMoi> +abstract class DuplexStream implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Empty implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class FillBufSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class FlushSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Interest implements RustOpaqueInterface { + /// Add together two `Interest` values. + /// + /// This function works from a `const` context. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Interest; + /// + /// const BOTH: Interest = Interest::READABLE.add(Interest::WRITABLE); + /// + /// assert!(BOTH.is_readable()); + /// assert!(BOTH.is_writable()); + Future add({required Interest other}); + + /// Returns true if the value includes error interest. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Interest; + /// + /// assert!(Interest::ERROR.is_error()); + /// assert!(!Interest::WRITABLE.is_error()); + /// + /// let combined = Interest::READABLE | Interest::ERROR; + /// assert!(combined.is_error()); + /// ``` + Future isError(); + + /// Returns true if the value includes readable interest. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Interest; + /// + /// assert!(Interest::READABLE.is_readable()); + /// assert!(!Interest::WRITABLE.is_readable()); + /// + /// let both = Interest::READABLE | Interest::WRITABLE; + /// assert!(both.is_readable()); + /// ``` + Future isReadable(); + + /// Returns true if the value includes writable interest. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Interest; + /// + /// assert!(!Interest::READABLE.is_writable()); + /// assert!(Interest::WRITABLE.is_writable()); + /// + /// let both = Interest::READABLE | Interest::WRITABLE; + /// assert!(both.is_writable()); + /// ``` + Future isWritable(); + + /// Remove `Interest` from `self`. + /// + /// Interests present in `other` but *not* in `self` are ignored. + /// + /// Returns `None` if the set would be empty after removing `Interest`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Interest; + /// + /// const RW_INTEREST: Interest = Interest::READABLE.add(Interest::WRITABLE); + /// + /// let w_interest = RW_INTEREST.remove(Interest::READABLE).unwrap(); + /// assert!(!w_interest.is_readable()); + /// assert!(w_interest.is_writable()); + /// + /// // Removing all interests from the set returns `None`. + /// assert_eq!(w_interest.remove(Interest::WRITABLE), None); + /// + /// // Remove all interests at once. + /// assert_eq!(RW_INTEREST.remove(RW_INTEREST), None); + /// ``` + Future remove({required Interest other}); +} + +// Rust type: RustOpaqueMoi>> +abstract class LinesSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadF32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadF32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadF64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadF64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadHalfSimplexStream implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI128LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI128MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI16LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI16MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadI8MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU128LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU128MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU16LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU16MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ReadU8MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Ready implements RustOpaqueInterface { + /// Returns true if `Ready` is the empty set. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(Ready::EMPTY.is_empty()); + /// assert!(!Ready::READABLE.is_empty()); + /// ``` + Future isEmpty(); + + /// Returns `true` if the value includes error `readiness`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(!Ready::EMPTY.is_error()); + /// assert!(!Ready::WRITABLE.is_error()); + /// assert!(Ready::ERROR.is_error()); + /// ``` + Future isError(); + + /// Returns `true` if the value includes read-closed `readiness`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(!Ready::EMPTY.is_read_closed()); + /// assert!(!Ready::READABLE.is_read_closed()); + /// assert!(Ready::READ_CLOSED.is_read_closed()); + /// ``` + Future isReadClosed(); + + /// Returns `true` if the value includes `readable`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(!Ready::EMPTY.is_readable()); + /// assert!(Ready::READABLE.is_readable()); + /// assert!(Ready::READ_CLOSED.is_readable()); + /// assert!(!Ready::WRITABLE.is_readable()); + /// ``` + Future isReadable(); + + /// Returns `true` if the value includes writable `readiness`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(!Ready::EMPTY.is_writable()); + /// assert!(!Ready::READABLE.is_writable()); + /// assert!(Ready::WRITABLE.is_writable()); + /// assert!(Ready::WRITE_CLOSED.is_writable()); + /// ``` + Future isWritable(); + + /// Returns `true` if the value includes write-closed `readiness`. + /// + /// # Examples + /// + /// ``` + /// use tokio::io::Ready; + /// + /// assert!(!Ready::EMPTY.is_write_closed()); + /// assert!(!Ready::WRITABLE.is_write_closed()); + /// assert!(Ready::WRITE_CLOSED.is_write_closed()); + /// ``` + Future isWriteClosed(); +} + +// Rust type: RustOpaqueMoi> +abstract class Repeat implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class SeekFrom implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class SeekSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class ShutdownSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class SimplexStream implements RustOpaqueInterface { + /// Creates unidirectional buffer that acts like in memory pipe. To create split + /// version with separate reader and writer you can use [`simplex`] function. + /// + /// The `max_buf_size` argument is the maximum amount of bytes that can be + /// written to a buffer before the it returns `Poll::Pending`. + static Future newUnsplit({required BigInt maxBufSize}) => + RustLib.instance.api + .tokioIoSimplexStreamNewUnsplit(maxBufSize: maxBufSize); +} + +// Rust type: RustOpaqueMoi> +abstract class Sink implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class SplitSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Stderr implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Stdin implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Stdout implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class TakeSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteF32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteF32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteF64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteF64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteHalfSimplexStream implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI128LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI128MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI16LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI16MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteI8MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU128LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU128MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU16LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU16MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU32LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU32MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU64LeMutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU64MutSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class WriteU8MutSelf implements RustOpaqueInterface {} + +abstract class AsyncBufReadExt { + /// Tells this buffer that `amt` bytes have been consumed from the + /// buffer, so they should no longer be returned in calls to [`read`]. + /// + /// This function is a lower-level call. It needs to be paired with the + /// [`fill_buf`] method to function properly. This function does not + /// perform any I/O, it simply informs this object that some amount of + /// its buffer, returned from [`fill_buf`], has been consumed and should + /// no longer be returned. As such, this function may do odd things if + /// [`fill_buf`] isn't called before calling it. + /// + /// The `amt` must be less than the number of bytes in the buffer + /// returned by [`fill_buf`]. + /// + /// [`read`]: crate::io::AsyncReadExt::read + /// [`fill_buf`]: crate::io::AsyncBufReadExt::fill_buf + Future consume({required BigInt amt}); + + /// Returns the contents of the internal buffer, filling it with more + /// data from the inner reader if it is empty. + /// + /// This function is a lower-level call. It needs to be paired with the + /// [`consume`] method to function properly. When calling this method, + /// none of the contents will be "read" in the sense that later calling + /// `read` may return the same contents. As such, [`consume`] must be + /// called with the number of bytes that are consumed from this buffer + /// to ensure that the bytes are never returned twice. + /// + /// An empty buffer returned indicates that the stream has reached EOF. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn fill_buf(&mut self) -> io::Result<&[u8]>; + /// ``` + /// + /// # Errors + /// + /// This function will return an I/O error if the underlying reader was + /// read, but returned an error. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If you use it as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no data was read. + /// + /// [`consume`]: crate::io::AsyncBufReadExt::consume + Future fillBuf(); + + /// Returns a stream over the lines of this reader. + /// This method is the async equivalent to [`BufRead::lines`](std::io::BufRead::lines). + /// + /// The stream returned from this function will yield instances of + /// [`io::Result`]`<`[`Option`]`<`[`String`]`>>`. Each string returned will *not* have a newline + /// byte (the 0xA byte) or `CRLF` (0xD, 0xA bytes) at the end. + /// + /// [`io::Result`]: std::io::Result + /// [`Option`]: core::option::Option + /// [`String`]: String + /// + /// # Errors + /// + /// Each line of the stream has the same error semantics as [`AsyncBufReadExt::read_line`]. + /// + /// # Examples + /// + /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In + /// this example, we use [`Cursor`] to iterate over all the lines in a byte + /// slice. + /// + /// [`Cursor`]: std::io::Cursor + /// + /// ``` + /// use tokio::io::AsyncBufReadExt; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let cursor = Cursor::new(b"lorem\nipsum\r\ndolor"); + /// + /// let mut lines = cursor.lines(); + /// + /// assert_eq!(lines.next_line().await.unwrap(), Some(String::from("lorem"))); + /// assert_eq!(lines.next_line().await.unwrap(), Some(String::from("ipsum"))); + /// assert_eq!(lines.next_line().await.unwrap(), Some(String::from("dolor"))); + /// assert_eq!(lines.next_line().await.unwrap(), None); + /// # } + /// ``` + /// + /// [`AsyncBufReadExt::read_line`]: AsyncBufReadExt::read_line + Future lines(); + + /// Returns a stream of the contents of this reader split on the byte + /// `byte`. + /// + /// This method is the asynchronous equivalent to + /// [`BufRead::split`](std::io::BufRead::split). + /// + /// The stream returned from this function will yield instances of + /// [`io::Result`]`<`[`Option`]`<`[`Vec`]`>>`. Each vector returned will *not* have + /// the delimiter byte at the end. + /// + /// [`io::Result`]: std::io::Result + /// [`Option`]: core::option::Option + /// [`Vec`]: std::vec::Vec + /// + /// # Errors + /// + /// Each item of the stream has the same error semantics as + /// [`AsyncBufReadExt::read_until`](AsyncBufReadExt::read_until). + /// + /// # Examples + /// + /// ``` + /// # use tokio::io::AsyncBufRead; + /// use tokio::io::AsyncBufReadExt; + /// + /// # async fn dox(my_buf_read: impl AsyncBufRead + Unpin) -> std::io::Result<()> { + /// let mut segments = my_buf_read.split(b'f'); + /// + /// while let Some(segment) = segments.next_segment().await? { + /// println!("length = {}", segment.len()) + /// } + /// # Ok(()) + /// # } + /// ``` + Future split({required int byte}); +} + +abstract class AsyncReadExt { + /// Reads an 32-bit floating point type in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_f32(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read 32-bit floating point type from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0xff, 0x7f, 0xff, 0xff]); + /// + /// assert_eq!(f32::MIN, reader.read_f32().await?); + /// Ok(()) + /// # } + /// ``` + Future readF32(); + + /// Reads an 32-bit floating point type in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_f32_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read 32-bit floating point type from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0xff, 0xff, 0x7f, 0xff]); + /// + /// assert_eq!(f32::MIN, reader.read_f32_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readF32Le(); + + /// Reads an 64-bit floating point type in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_f64(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read 64-bit floating point type from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0xff, 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + /// ]); + /// + /// assert_eq!(f64::MIN, reader.read_f64().await?); + /// Ok(()) + /// # } + /// ``` + Future readF64(); + + /// Reads an 64-bit floating point type in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_f64_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read 64-bit floating point type from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff + /// ]); + /// + /// assert_eq!(f64::MIN, reader.read_f64_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readF64Le(); + + /// Reads an signed 128-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i128(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 128-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x80, 0, 0, 0, 0, 0, 0, 0, + /// 0, 0, 0, 0, 0, 0, 0, 0 + /// ]); + /// + /// assert_eq!(i128::MIN, reader.read_i128().await?); + /// Ok(()) + /// # } + /// ``` + Future readI128(); + + /// Reads an signed 128-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i128_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 128-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x80, 0, 0, 0, 0, 0, 0, 0, + /// 0, 0, 0, 0, 0, 0, 0, 0 + /// ]); + /// + /// assert_eq!(128, reader.read_i128_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readI128Le(); + + /// Reads a signed 16-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i16(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 16 bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x00, 0xc1, 0xff, 0x7c]); + /// + /// assert_eq!(193, reader.read_i16().await?); + /// assert_eq!(-132, reader.read_i16().await?); + /// Ok(()) + /// # } + /// ``` + Future readI16(); + + /// Reads a signed 16-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i16_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 16 bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x00, 0xc1, 0xff, 0x7c]); + /// + /// assert_eq!(-16128, reader.read_i16_le().await?); + /// assert_eq!(31999, reader.read_i16_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readI16Le(); + + /// Reads a signed 32-bit integer in big-endian order from the + /// underlying reader. + /// + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i32(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 32-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0xff, 0xff, 0x7a, 0x33]); + /// + /// assert_eq!(-34253, reader.read_i32().await?); + /// Ok(()) + /// # } + /// ``` + Future readI32(); + + /// Reads a signed 32-bit integer in little-endian order from the + /// underlying reader. + /// + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i32_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 32-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0xff, 0xff, 0x7a, 0x33]); + /// + /// assert_eq!(863698943, reader.read_i32_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readI32Le(); + + /// Reads an signed 64-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i64(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 64-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x80, 0, 0, 0, 0, 0, 0, 0]); + /// + /// assert_eq!(i64::MIN, reader.read_i64().await?); + /// Ok(()) + /// # } + /// ``` + Future readI64(); + + /// Reads an signed 64-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i64_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read signed 64-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x80, 0, 0, 0, 0, 0, 0, 0]); + /// + /// assert_eq!(128, reader.read_i64_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readI64Le(); + + /// Reads a signed 8 bit integer from the underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_i8(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is cancel safe. If this method is used as an event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no data were read. + /// + /// # Examples + /// + /// Read unsigned 8 bit integers from an `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x02, 0xfb]); + /// + /// assert_eq!(2, reader.read_i8().await?); + /// assert_eq!(-5, reader.read_i8().await?); + /// + /// Ok(()) + /// # } + /// ``` + Future readI8(); + + /// Reads an unsigned 128-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u128(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 128-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83, + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83 + /// ]); + /// + /// assert_eq!(16947640962301618749969007319746179, reader.read_u128().await?); + /// Ok(()) + /// # } + /// ``` + Future readU128(); + + /// Reads an unsigned 128-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u128_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 128-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83, + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83 + /// ]); + /// + /// assert_eq!(174826588484952389081207917399662330624, reader.read_u128_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readU128Le(); + + /// Reads an unsigned 16-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u16(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 16 bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![2, 5, 3, 0]); + /// + /// assert_eq!(517, reader.read_u16().await?); + /// assert_eq!(768, reader.read_u16().await?); + /// Ok(()) + /// # } + /// ``` + Future readU16(); + + /// Reads an unsigned 16-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u16_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 16 bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![2, 5, 3, 0]); + /// + /// assert_eq!(1282, reader.read_u16_le().await?); + /// assert_eq!(3, reader.read_u16_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readU16Le(); + + /// Reads an unsigned 32-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u32(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 32-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x00, 0x00, 0x01, 0x0b]); + /// + /// assert_eq!(267, reader.read_u32().await?); + /// Ok(()) + /// # } + /// ``` + Future readU32(); + + /// Reads an unsigned 32-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u32_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 32-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![0x00, 0x00, 0x01, 0x0b]); + /// + /// assert_eq!(184614912, reader.read_u32_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readU32Le(); + + /// Reads an unsigned 64-bit integer in big-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u64(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 64-bit big-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83 + /// ]); + /// + /// assert_eq!(918733457491587, reader.read_u64().await?); + /// Ok(()) + /// # } + /// ``` + Future readU64(); + + /// Reads an unsigned 64-bit integer in little-endian order from the + /// underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u64_le(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is not cancellation safe. If the method is used as the + /// event in a [`tokio::select!`](crate::select) statement and some + /// other branch completes first, then some data may be lost. + /// + /// # Examples + /// + /// Read unsigned 64-bit little-endian integers from a `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![ + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83 + /// ]); + /// + /// assert_eq!(9477368352180732672, reader.read_u64_le().await?); + /// Ok(()) + /// # } + /// ``` + Future readU64Le(); + + /// Reads an unsigned 8 bit integer from the underlying reader. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn read_u8(&mut self) -> io::Result; + /// ``` + /// + /// It is recommended to use a buffered reader to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncReadExt::read_exact`]. + /// + /// [`AsyncReadExt::read_exact`]: AsyncReadExt::read_exact + /// + /// # Cancel safety + /// + /// This method is cancel safe. If this method is used as an event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no data were read. + /// + /// # Examples + /// + /// Read unsigned 8 bit integers from an `AsyncRead`: + /// + /// ```rust + /// use tokio::io::{self, AsyncReadExt}; + /// + /// use std::io::Cursor; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut reader = Cursor::new(vec![2, 5]); + /// + /// assert_eq!(2, reader.read_u8().await?); + /// assert_eq!(5, reader.read_u8().await?); + /// + /// Ok(()) + /// # } + /// ``` + Future readU8(); + + /// Creates an adaptor which reads at most `limit` bytes from it. + /// + /// This function returns a new instance of `AsyncRead` which will read + /// at most `limit` bytes, after which it will always return EOF + /// (`Ok(0)`). Any read errors will not count towards the number of + /// bytes read and future calls to [`read()`] may succeed. + /// + /// [`read()`]: fn@crate::io::AsyncReadExt::read + /// + /// [read]: AsyncReadExt::read + /// + /// # Examples + /// + /// [`File`][crate::fs::File]s implement `Read`: + /// + /// ```no_run + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::io::{self, AsyncReadExt}; + /// use tokio::fs::File; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let f = File::open("foo.txt").await?; + /// let mut buffer = [0; 5]; + /// + /// // read at most five bytes + /// let mut handle = f.take(5); + /// + /// handle.read(&mut buffer).await?; + /// Ok(()) + /// } + /// # } + /// ``` + Future take({required BigInt limit}); +} + +abstract class AsyncSeekExt { + /// Creates a future which will rewind to the beginning of the stream. + /// + /// This is convenience method, equivalent to `self.seek(SeekFrom::Start(0))`. + Future rewind(); + + /// Creates a future which will seek an IO object, and then yield the + /// new position in the object and the object itself. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn seek(&mut self, pos: SeekFrom) -> io::Result; + /// ``` + /// + /// In the case of an error the buffer and the object will be discarded, with + /// the error yielded. + /// + /// # Examples + /// + /// ```no_run + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::fs::File; + /// use tokio::io::{AsyncSeekExt, AsyncReadExt}; + /// + /// use std::io::SeekFrom; + /// + /// # async fn dox() -> std::io::Result<()> { + /// let mut file = File::open("foo.txt").await?; + /// file.seek(SeekFrom::Start(6)).await?; + /// + /// let mut contents = vec![0u8; 10]; + /// file.read_exact(&mut contents).await?; + /// # Ok(()) + /// # } + /// # } + /// ``` + Future seek({required SeekFrom pos}); + + /// Creates a future which will return the current seek position from the + /// start of the stream. + /// + /// This is equivalent to `self.seek(SeekFrom::Current(0))`. + Future streamPosition(); +} + +abstract class AsyncWriteExt { + /// Flushes this output stream, ensuring that all intermediately buffered + /// contents reach their destination. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn flush(&mut self) -> io::Result<()>; + /// ``` + /// + /// # Errors + /// + /// It is considered an error if not all bytes could be written due to + /// I/O errors or EOF being reached. + /// + /// # Cancel safety + /// + /// This method is cancel safe. + /// + /// If `flush` is used as the event in a [`tokio::select!`](crate::select) + /// statement and some other branch completes first, then the data in the + /// buffered data in this `AsyncWrite` may have been partially flushed. + /// However, it is guaranteed that the buffer is advanced by the amount of + /// bytes that have been partially flushed. + /// + /// # Examples + /// + /// ```no_run + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::io::{self, BufWriter, AsyncWriteExt}; + /// use tokio::fs::File; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let f = File::create("foo.txt").await?; + /// let mut buffer = BufWriter::new(f); + /// + /// buffer.write_all(b"some bytes").await?; + /// buffer.flush().await?; + /// Ok(()) + /// } + /// # } + /// ``` + Future flush(); + + /// Shuts down the output stream, ensuring that the value can be dropped + /// cleanly. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn shutdown(&mut self) -> io::Result<()>; + /// ``` + /// + /// Similar to [`flush`], all intermediately buffered is written to the + /// underlying stream. Once the operation completes, the caller should + /// no longer attempt to write to the stream. For example, the + /// `TcpStream` implementation will issue a `shutdown(Write)` sys call. + /// + /// [`flush`]: fn@crate::io::AsyncWriteExt::flush + /// + /// # Examples + /// + /// ```no_run + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::io::{self, BufWriter, AsyncWriteExt}; + /// use tokio::fs::File; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let f = File::create("foo.txt").await?; + /// let mut buffer = BufWriter::new(f); + /// + /// buffer.write_all(b"some bytes").await?; + /// buffer.shutdown().await?; + /// Ok(()) + /// } + /// # } + /// ``` + Future shutdown(); + + /// Writes an 32-bit floating point type in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_f32(&mut self, n: f32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write 32-bit floating point type to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_f32(f32::MIN).await?; + /// + /// assert_eq!(writer, vec![0xff, 0x7f, 0xff, 0xff]); + /// Ok(()) + /// # } + /// ``` + Future writeF32({required double n}); + + /// Writes an 32-bit floating point type in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_f32_le(&mut self, n: f32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write 32-bit floating point type to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_f32_le(f32::MIN).await?; + /// + /// assert_eq!(writer, vec![0xff, 0xff, 0x7f, 0xff]); + /// Ok(()) + /// # } + /// ``` + Future writeF32Le({required double n}); + + /// Writes an 64-bit floating point type in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_f64(&mut self, n: f64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write 64-bit floating point type to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_f64(f64::MIN).await?; + /// + /// assert_eq!(writer, vec![ + /// 0xff, 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeF64({required double n}); + + /// Writes an 64-bit floating point type in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_f64_le(&mut self, n: f64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write 64-bit floating point type to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_f64_le(f64::MIN).await?; + /// + /// assert_eq!(writer, vec![ + /// 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeF64Le({required double n}); + + /// Writes an signed 128-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i128(&mut self, n: i128) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 128-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i128(i128::MIN).await?; + /// + /// assert_eq!(writer, vec![ + /// 0x80, 0, 0, 0, 0, 0, 0, 0, + /// 0, 0, 0, 0, 0, 0, 0, 0 + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeI128({required BigInt n}); + + /// Writes an signed 128-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i128_le(&mut self, n: i128) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 128-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i128_le(i128::MIN).await?; + /// + /// assert_eq!(writer, vec![ + /// 0, 0, 0, 0, 0, 0, 0, + /// 0, 0, 0, 0, 0, 0, 0, 0, 0x80 + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeI128Le({required BigInt n}); + + /// Writes a signed 16-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i16(&mut self, n: i16) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 16-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i16(193).await?; + /// writer.write_i16(-132).await?; + /// + /// assert_eq!(writer, b"\x00\xc1\xff\x7c"); + /// Ok(()) + /// # } + /// ``` + Future writeI16({required int n}); + + /// Writes a signed 16-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i16_le(&mut self, n: i16) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 16-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i16_le(193).await?; + /// writer.write_i16_le(-132).await?; + /// + /// assert_eq!(writer, b"\xc1\x00\x7c\xff"); + /// Ok(()) + /// # } + /// ``` + Future writeI16Le({required int n}); + + /// Writes a signed 32-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i32(&mut self, n: i32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 32-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i32(267).await?; + /// writer.write_i32(1205419366).await?; + /// + /// assert_eq!(writer, b"\x00\x00\x01\x0b\x47\xd9\x3d\x66"); + /// Ok(()) + /// # } + /// ``` + Future writeI32({required int n}); + + /// Writes a signed 32-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i32_le(&mut self, n: i32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 32-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i32_le(267).await?; + /// writer.write_i32_le(1205419366).await?; + /// + /// assert_eq!(writer, b"\x0b\x01\x00\x00\x66\x3d\xd9\x47"); + /// Ok(()) + /// # } + /// ``` + Future writeI32Le({required int n}); + + /// Writes an signed 64-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i64(&mut self, n: i64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 64-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i64(i64::MIN).await?; + /// writer.write_i64(i64::MAX).await?; + /// + /// assert_eq!(writer, b"\x80\x00\x00\x00\x00\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff"); + /// Ok(()) + /// # } + /// ``` + Future writeI64({required PlatformInt64 n}); + + /// Writes an signed 64-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i64_le(&mut self, n: i64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 64-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i64_le(i64::MIN).await?; + /// writer.write_i64_le(i64::MAX).await?; + /// + /// assert_eq!(writer, b"\x00\x00\x00\x00\x00\x00\x00\x80\xff\xff\xff\xff\xff\xff\xff\x7f"); + /// Ok(()) + /// # } + /// ``` + Future writeI64Le({required PlatformInt64 n}); + + /// Writes a signed 8-bit integer to the underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_i8(&mut self, n: i8) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write signed 8 bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_i8(-2).await?; + /// writer.write_i8(126).await?; + /// + /// assert_eq!(writer, b"\xFE\x7E"); + /// Ok(()) + /// # } + /// ``` + Future writeI8({required int n}); + + /// Writes an unsigned 128-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u128(&mut self, n: u128) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 128-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u128(16947640962301618749969007319746179).await?; + /// + /// assert_eq!(writer, vec![ + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83, + /// 0x00, 0x03, 0x43, 0x95, 0x4d, 0x60, 0x86, 0x83 + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeU128({required BigInt n}); + + /// Writes an unsigned 128-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u128_le(&mut self, n: u128) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 128-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u128_le(16947640962301618749969007319746179).await?; + /// + /// assert_eq!(writer, vec![ + /// 0x83, 0x86, 0x60, 0x4d, 0x95, 0x43, 0x03, 0x00, + /// 0x83, 0x86, 0x60, 0x4d, 0x95, 0x43, 0x03, 0x00, + /// ]); + /// Ok(()) + /// # } + /// ``` + Future writeU128Le({required BigInt n}); + + /// Writes an unsigned 16-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u16(&mut self, n: u16) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 16-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u16(517).await?; + /// writer.write_u16(768).await?; + /// + /// assert_eq!(writer, b"\x02\x05\x03\x00"); + /// Ok(()) + /// # } + /// ``` + Future writeU16({required int n}); + + /// Writes an unsigned 16-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u16_le(&mut self, n: u16) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 16-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u16_le(517).await?; + /// writer.write_u16_le(768).await?; + /// + /// assert_eq!(writer, b"\x05\x02\x00\x03"); + /// Ok(()) + /// # } + /// ``` + Future writeU16Le({required int n}); + + /// Writes an unsigned 32-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u32(&mut self, n: u32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 32-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u32(267).await?; + /// writer.write_u32(1205419366).await?; + /// + /// assert_eq!(writer, b"\x00\x00\x01\x0b\x47\xd9\x3d\x66"); + /// Ok(()) + /// # } + /// ``` + Future writeU32({required int n}); + + /// Writes an unsigned 32-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u32_le(&mut self, n: u32) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 32-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u32_le(267).await?; + /// writer.write_u32_le(1205419366).await?; + /// + /// assert_eq!(writer, b"\x0b\x01\x00\x00\x66\x3d\xd9\x47"); + /// Ok(()) + /// # } + /// ``` + Future writeU32Le({required int n}); + + /// Writes an unsigned 64-bit integer in big-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u64(&mut self, n: u64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 64-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u64(918733457491587).await?; + /// writer.write_u64(143).await?; + /// + /// assert_eq!(writer, b"\x00\x03\x43\x95\x4d\x60\x86\x83\x00\x00\x00\x00\x00\x00\x00\x8f"); + /// Ok(()) + /// # } + /// ``` + Future writeU64({required BigInt n}); + + /// Writes an unsigned 64-bit integer in little-endian order to the + /// underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u64_le(&mut self, n: u64) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 64-bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u64_le(918733457491587).await?; + /// writer.write_u64_le(143).await?; + /// + /// assert_eq!(writer, b"\x83\x86\x60\x4d\x95\x43\x03\x00\x8f\x00\x00\x00\x00\x00\x00\x00"); + /// Ok(()) + /// # } + /// ``` + Future writeU64Le({required BigInt n}); + + /// Writes an unsigned 8-bit integer to the underlying writer. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn write_u8(&mut self, n: u8) -> io::Result<()>; + /// ``` + /// + /// It is recommended to use a buffered writer to avoid excessive + /// syscalls. + /// + /// # Errors + /// + /// This method returns the same errors as [`AsyncWriteExt::write_all`]. + /// + /// [`AsyncWriteExt::write_all`]: AsyncWriteExt::write_all + /// + /// # Examples + /// + /// Write unsigned 8 bit integers to a `AsyncWrite`: + /// + /// ```rust + /// use tokio::io::{self, AsyncWriteExt}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> io::Result<()> { + /// let mut writer = Vec::new(); + /// + /// writer.write_u8(2).await?; + /// writer.write_u8(5).await?; + /// + /// assert_eq!(writer, b"\x02\x05"); + /// Ok(()) + /// # } + /// ``` + Future writeU8({required int n}); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/macros/support.dart b/mobile_app/lib/src/rust/third_party/tokio/macros/support.dart new file mode 100644 index 0000000..ccfc3ee --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/macros/support.dart @@ -0,0 +1,15 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import '../signal/unix.dart'; +import '../sync/oneshot.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +Future threadRngN({required int n}) => + RustLib.instance.api.tokioMacrosSupportThreadRngN(n: n); + +Future pollBudgetAvailable({required Context cx}) => + RustLib.instance.api.tokioMacrosSupportPollBudgetAvailable(cx: cx); diff --git a/mobile_app/lib/src/rust/third_party/tokio/net.dart b/mobile_app/lib/src/rust/third_party/tokio/net.dart new file mode 100644 index 0000000..8e24b87 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net.dart @@ -0,0 +1,4504 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'io.dart'; +import 'net/tcp.dart'; +import 'net/tcp/listener.dart'; +import 'net/tcp/split_owned.dart'; +import 'net/udp.dart'; +import 'net/unix.dart'; +import 'net/unix/datagram/socket.dart'; +import 'net/unix/listener.dart'; +import 'net/unix/pipe.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'signal/unix.dart'; +import 'time/error.dart'; + +// These functions are ignored because they have generic arguments: `lookup_host` + +// Rust type: RustOpaqueMoi> +abstract class TcpListener implements RustOpaqueInterface { + /// Accepts a new incoming connection from this listener. + /// + /// This function will yield once a new TCP connection is established. When + /// established, the corresponding [`TcpStream`] and the remote peer's + /// address will be returned. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If the method is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no new connections were + /// accepted by this method. + /// + /// [`TcpStream`]: struct@crate::net::TcpStream + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpListener; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let listener = TcpListener::bind("127.0.0.1:8080").await?; + /// + /// match listener.accept().await { + /// Ok((_socket, addr)) => println!("new client: {:?}", addr), + /// Err(e) => println!("couldn't get client: {:?}", e), + /// } + /// + /// Ok(()) + /// } + /// ``` + Future<(TcpStream, SocketAddr)> accept(); + + /// Creates new `TcpListener` from a `std::net::TcpListener`. + /// + /// This function is intended to be used to wrap a TCP listener from the + /// standard library in the Tokio equivalent. + /// + /// This API is typically paired with the `socket2` crate and the `Socket` + /// type to build up and customize a listener before it's shipped off to the + /// backing event loop. This allows configuration of options like + /// `SO_REUSEPORT`, binding to multiple addresses, etc. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the listener is in + /// non-blocking mode. Otherwise all I/O operations on the listener + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::net::TcpListener::set_nonblocking + /// + /// # Examples + /// + /// ```rust,no_run + /// use std::error::Error; + /// use tokio::net::TcpListener; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let std_listener = std::net::TcpListener::bind("127.0.0.1:0")?; + /// std_listener.set_nonblocking(true)?; + /// let listener = TcpListener::from_std(std_listener)?; + /// Ok(()) + /// } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromStd({required TcpListener listener}) => + RustLib.instance.api.tokioNetTcpListenerFromStd(listener: listener); + + /// Turns a [`tokio::net::TcpListener`] into a [`std::net::TcpListener`]. + /// + /// The returned [`std::net::TcpListener`] will have nonblocking mode set as + /// `true`. Use [`set_nonblocking`] to change the blocking mode if needed. + /// + /// # Examples + /// + /// ```rust,no_run + /// use std::error::Error; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let tokio_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await?; + /// let std_listener = tokio_listener.into_std()?; + /// std_listener.set_nonblocking(false)?; + /// Ok(()) + /// } + /// ``` + /// + /// [`tokio::net::TcpListener`]: TcpListener + /// [`std::net::TcpListener`]: std::net::TcpListener + /// [`set_nonblocking`]: fn@std::net::TcpListener::set_nonblocking + Future intoStd(); + + /// Returns the local address that this listener is bound to. + /// + /// This can be useful, for example, when binding to port 0 to figure out + /// which port was actually bound. + /// + /// # Examples + /// + /// ```rust,no_run + /// use tokio::net::TcpListener; + /// + /// use std::io; + /// use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let listener = TcpListener::bind("127.0.0.1:8080").await?; + /// + /// assert_eq!(listener.local_addr()?, + /// SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 8080))); + /// + /// Ok(()) + /// } + /// ``` + Future localAddr(); + + /// Polls to accept a new incoming connection to this listener. + /// + /// If there is no connection to accept, `Poll::Pending` is returned and the + /// current task will be notified by a waker. Note that on multiple calls + /// to `poll_accept`, only the `Waker` from the `Context` passed to the most + /// recent call is scheduled to receive a wakeup. + Future pollAccept({required Context cx}); + + /// Sets the value for the `IP_TTL` option on this socket. + /// + /// This value sets the time-to-live field that is used in every packet sent + /// from this socket. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpListener; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let listener = TcpListener::bind("127.0.0.1:0").await?; + /// + /// listener.set_ttl(100).expect("could not set TTL"); + /// + /// Ok(()) + /// } + /// ``` + Future setTtl({required int ttl}); + + /// Gets the value of the `IP_TTL` option for this socket. + /// + /// For more information about this option, see [`set_ttl`]. + /// + /// [`set_ttl`]: method@Self::set_ttl + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpListener; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let listener = TcpListener::bind("127.0.0.1:0").await?; + /// + /// listener.set_ttl(100).expect("could not set TTL"); + /// assert_eq!(listener.ttl()?, 100); + /// + /// Ok(()) + /// } + /// ``` + Future ttl(); +} + +// Rust type: RustOpaqueMoi> +abstract class TcpSocket implements RustOpaqueInterface { + /// Binds the socket to the given address. + /// + /// This calls the `bind(2)` operating-system function. Behavior is + /// platform specific. Refer to the target platform's documentation for more + /// details. + /// + /// # Examples + /// + /// Bind a socket before listening. + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// # drop(listener); + /// + /// Ok(()) + /// } + /// ``` + Future bind({required SocketAddr addr}); + + /// Establishes a TCP connection with a peer at the specified socket address. + /// + /// The `TcpSocket` is consumed. Once the connection is established, a + /// connected [`TcpStream`] is returned. If the connection fails, the + /// encountered error is returned. + /// + /// [`TcpStream`]: TcpStream + /// + /// This calls the `connect(2)` operating-system function. Behavior is + /// platform specific. Refer to the target platform's documentation for more + /// details. + /// + /// # Examples + /// + /// Connecting to a peer. + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// let stream = socket.connect(addr).await?; + /// # drop(stream); + /// + /// Ok(()) + /// } + /// ``` + Future connect({required SocketAddr addr}); + + /// Converts a [`std::net::TcpStream`] into a `TcpSocket`. The provided + /// socket must not have been connected prior to calling this function. This + /// function is typically used together with crates such as [`socket2`] to + /// configure socket options that are not available on `TcpSocket`. + /// + /// [`std::net::TcpStream`]: struct@std::net::TcpStream + /// [`socket2`]: https://docs.rs/socket2/ + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the socket is in + /// non-blocking mode. Otherwise all I/O operations on the socket + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// [`set_nonblocking`]: std::net::TcpStream::set_nonblocking + /// + /// # Examples + /// + /// ``` + /// use tokio::net::TcpSocket; + /// use socket2::{Domain, Socket, Type}; + /// + /// #[tokio::main] + /// async fn main() -> std::io::Result<()> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// let socket2_socket = Socket::new(Domain::IPV4, Type::STREAM, None)?; + /// socket2_socket.set_nonblocking(true)?; + /// + /// let socket = TcpSocket::from_std_stream(socket2_socket.into()); + /// + /// Ok(()) + /// } + /// ``` + static Future fromStdStream({required TcpStream stdStream}) => + RustLib.instance.api.tokioNetTcpSocketFromStdStream(stdStream: stdStream); + + /// Gets the value of the `SO_KEEPALIVE` option on this socket. + Future keepalive(); + + /// Reads the linger duration for this socket by getting the `SO_LINGER` + /// option. + /// + /// For more information about this option, see [`set_linger`]. + /// + /// [`set_linger`]: TcpSocket::set_linger + Future linger(); + + /// Converts the socket into a `TcpListener`. + /// + /// `backlog` defines the maximum number of pending connections are queued + /// by the operating system at any given time. Connection are removed from + /// the queue with [`TcpListener::accept`]. When the queue is full, the + /// operating-system will start rejecting connections. + /// + /// [`TcpListener::accept`]: TcpListener::accept + /// + /// This calls the `listen(2)` operating-system function, marking the socket + /// as a passive socket. Behavior is platform specific. Refer to the target + /// platform's documentation for more details. + /// + /// # Examples + /// + /// Create a `TcpListener`. + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// # drop(listener); + /// + /// Ok(()) + /// } + /// ``` + Future listen({required int backlog}); + + /// Gets the local address of this socket. + /// + /// Will fail on windows if called before `bind`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.bind(addr)?; + /// assert_eq!(socket.local_addr().unwrap().to_string(), "127.0.0.1:8080"); + /// let listener = socket.listen(1024)?; + /// Ok(()) + /// } + /// ``` + Future localAddr(); + + /// Creates a new socket configured for IPv4. + /// + /// Calls `socket(2)` with `AF_INET` and `SOCK_STREAM`. + /// + /// # Returns + /// + /// On success, the newly created `TcpSocket` is returned. If an error is + /// encountered, it is returned instead. + /// + /// # Examples + /// + /// Create a new IPv4 socket and start listening. + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// let socket = TcpSocket::new_v4()?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(128)?; + /// # drop(listener); + /// Ok(()) + /// } + /// ``` + static Future newV4() => + RustLib.instance.api.tokioNetTcpSocketNewV4(); + + /// Creates a new socket configured for IPv6. + /// + /// Calls `socket(2)` with `AF_INET6` and `SOCK_STREAM`. + /// + /// # Returns + /// + /// On success, the newly created `TcpSocket` is returned. If an error is + /// encountered, it is returned instead. + /// + /// # Examples + /// + /// Create a new IPv6 socket and start listening. + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "[::1]:8080".parse().unwrap(); + /// let socket = TcpSocket::new_v6()?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(128)?; + /// # drop(listener); + /// Ok(()) + /// } + /// ``` + static Future newV6() => + RustLib.instance.api.tokioNetTcpSocketNewV6(); + + /// Gets the value of the `TCP_NODELAY` option on this socket. + /// + /// For more information about this option, see [`set_nodelay`]. + /// + /// [`set_nodelay`]: TcpSocket::set_nodelay + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// # async fn dox() -> Result<(), Box> { + /// let socket = TcpSocket::new_v4()?; + /// + /// println!("{:?}", socket.nodelay()?); + /// # Ok(()) + /// # } + /// ``` + Future nodelay(); + + /// Returns the size of the TCP receive buffer for this socket. + /// + /// On most operating systems, this is the value of the `SO_RCVBUF` socket + /// option. + /// + /// Note that if [`set_recv_buffer_size`] has been called on this socket + /// previously, the value returned by this function may not be the same as + /// the argument provided to `set_recv_buffer_size`. This is for the + /// following reasons: + /// + /// * Most operating systems have minimum and maximum allowed sizes for the + /// receive buffer, and will clamp the provided value if it is below the + /// minimum or above the maximum. The minimum and maximum buffer sizes are + /// OS-dependent. + /// * Linux will double the buffer size to account for internal bookkeeping + /// data, and returns the doubled value from `getsockopt(2)`. As per `man + /// 7 socket`: + /// > Sets or gets the maximum socket send buffer in bytes. The + /// > kernel doubles this value (to allow space for bookkeeping + /// > overhead) when it is set using `setsockopt(2)`, and this doubled + /// > value is returned by `getsockopt(2)`. + /// + /// [`set_recv_buffer_size`]: #method.set_recv_buffer_size + Future recvBufferSize(); + + /// Retrieves the value set for `SO_REUSEADDR` on this socket. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.set_reuseaddr(true)?; + /// assert!(socket.reuseaddr().unwrap()); + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// Ok(()) + /// } + /// ``` + Future reuseaddr(); + + /// Allows the socket to bind to an in-use port. Only available for unix systems + /// (excluding Solaris, Illumos, and Cygwin). + /// + /// Behavior is platform specific. Refer to the target platform's + /// documentation for more details. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.set_reuseport(true)?; + /// assert!(socket.reuseport().unwrap()); + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// Ok(()) + /// } + /// ``` + Future reuseport(); + + /// Returns the size of the TCP send buffer for this socket. + /// + /// On most operating systems, this is the value of the `SO_SNDBUF` socket + /// option. + /// + /// Note that if [`set_send_buffer_size`] has been called on this socket + /// previously, the value returned by this function may not be the same as + /// the argument provided to `set_send_buffer_size`. This is for the + /// following reasons: + /// + /// * Most operating systems have minimum and maximum allowed sizes for the + /// send buffer, and will clamp the provided value if it is below the + /// minimum or above the maximum. The minimum and maximum buffer sizes are + /// OS-dependent. + /// * Linux will double the buffer size to account for internal bookkeeping + /// data, and returns the doubled value from `getsockopt(2)`. As per `man + /// 7 socket`: + /// > Sets or gets the maximum socket send buffer in bytes. The + /// > kernel doubles this value (to allow space for bookkeeping + /// > overhead) when it is set using `setsockopt(2)`, and this doubled + /// > value is returned by `getsockopt(2)`. + /// + /// [`set_send_buffer_size`]: #method.set_send_buffer_size + Future sendBufferSize(); + + /// Sets value for the `SO_KEEPALIVE` option on this socket. + Future setKeepalive({required bool keepalive}); + + /// Sets the linger duration of this socket by setting the `SO_LINGER` option. + /// + /// This option controls the action taken when a stream has unsent messages and the stream is + /// closed. If `SO_LINGER` is set, the system shall block the process until it can transmit the + /// data or until the time expires. + /// + /// If `SO_LINGER` is not specified, and the socket is closed, the system handles the call in a + /// way that allows the process to continue as quickly as possible. + /// + /// This option is deprecated because setting `SO_LINGER` on a socket used with Tokio is always + /// incorrect as it leads to blocking the thread when the socket is closed. For more details, + /// please see: + /// + /// > Volumes of communications have been devoted to the intricacies of `SO_LINGER` versus + /// > non-blocking (`O_NONBLOCK`) sockets. From what I can tell, the final word is: don't do + /// > it. Rely on the `shutdown()`-followed-by-`read()`-eof technique instead. + /// > + /// > From [The ultimate `SO_LINGER` page, or: why is my tcp not reliable](https://blog.netherlabs.nl/articles/2009/01/18/the-ultimate-so_linger-page-or-why-is-my-tcp-not-reliable) + Future setLinger({Duration? dur}); + + /// Sets the value of the `TCP_NODELAY` option on this socket. + /// + /// If set, this option disables the Nagle algorithm. This means that segments are always + /// sent as soon as possible, even if there is only a small amount of data. When not set, + /// data is buffered until there is a sufficient amount to send out, thereby avoiding + /// the frequent sending of small packets. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// # async fn dox() -> Result<(), Box> { + /// let socket = TcpSocket::new_v4()?; + /// + /// socket.set_nodelay(true)?; + /// # Ok(()) + /// # } + /// ``` + Future setNodelay({required bool nodelay}); + + /// Sets the size of the TCP receive buffer on this socket. + /// + /// On most operating systems, this sets the `SO_RCVBUF` socket option. + Future setRecvBufferSize({required int size}); + + /// Allows the socket to bind to an in-use address. + /// + /// Behavior is platform specific. Refer to the target platform's + /// documentation for more details. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.set_reuseaddr(true)?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// # drop(listener); + /// + /// Ok(()) + /// } + /// ``` + Future setReuseaddr({required bool reuseaddr}); + + /// Allows the socket to bind to an in-use port. Only available for unix systems + /// (excluding Solaris, Illumos, and Cygwin). + /// + /// Behavior is platform specific. Refer to the target platform's + /// documentation for more details. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpSocket; + /// + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let addr = "127.0.0.1:8080".parse().unwrap(); + /// + /// let socket = TcpSocket::new_v4()?; + /// socket.set_reuseport(true)?; + /// socket.bind(addr)?; + /// + /// let listener = socket.listen(1024)?; + /// Ok(()) + /// } + /// ``` + Future setReuseport({required bool reuseport}); + + /// Sets the size of the TCP send buffer on this socket. + /// + /// On most operating systems, this sets the `SO_SNDBUF` socket option. + Future setSendBufferSize({required int size}); + + /// Sets the value for the `IPV6_TCLASS` option on this socket. + /// + /// Specifies the traffic class field that is used in every packet + /// sent from this socket. + /// + /// # Note + /// + /// This may not have any effect on IPv4 sockets. + Future setTclassV6({required int tclass}); + + /// Deprecated. Use [`set_tos_v4()`] instead. + /// + /// [`set_tos_v4()`]: Self::set_tos_v4 + Future setTos({required int tos}); + + /// Sets the value for the `IP_TOS` option on this socket. + /// + /// This value sets the type-of-service field that is used in every packet + /// sent from this socket. + /// + /// # Note + /// + /// - This may not have any effect on IPv6 sockets. + /// - On Windows, `IP_TOS` is only supported on [Windows 8+ or + /// Windows Server 2012+.](https://docs.microsoft.com/en-us/windows/win32/winsock/ipproto-ip-socket-options) + Future setTosV4({required int tos}); + + /// Returns the value of the `SO_ERROR` option. + Future takeError(); + + /// Gets the value of the `IPV6_TCLASS` option for this socket. + /// + /// For more information about this option, see [`set_tclass_v6`]. + /// + /// [`set_tclass_v6`]: Self::set_tclass_v6 + Future tclassV6(); + + /// Deprecated. Use [`tos_v4()`] instead. + /// + /// [`tos_v4()`]: Self::tos_v4 + Future tos(); + + /// Gets the value of the `IP_TOS` option for this socket. + /// + /// For more information about this option, see [`set_tos_v4`]. + /// + /// [`set_tos_v4`]: Self::set_tos_v4 + Future tosV4(); +} + +// Rust type: RustOpaqueMoi> +abstract class TcpStream implements RustOpaqueInterface { + /// Creates new `TcpStream` from a `std::net::TcpStream`. + /// + /// This function is intended to be used to wrap a TCP stream from the + /// standard library in the Tokio equivalent. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the stream is in + /// non-blocking mode. Otherwise all I/O operations on the stream + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::net::TcpStream::set_nonblocking + /// + /// # Examples + /// + /// ```rust,no_run + /// use std::error::Error; + /// use tokio::net::TcpStream; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let std_stream = std::net::TcpStream::connect("127.0.0.1:34254")?; + /// std_stream.set_nonblocking(true)?; + /// let stream = TcpStream::from_std(std_stream)?; + /// Ok(()) + /// } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromStd({required TcpStream stream}) => + RustLib.instance.api.tokioNetTcpStreamFromStd(stream: stream); + + /// Splits a `TcpStream` into a read half and a write half, which can be used + /// to read and write the stream concurrently. + /// + /// Unlike [`split`], the owned halves can be moved to separate tasks, however + /// this comes at the cost of a heap allocation. + /// + /// **Note:** Dropping the write half will shut down the write half of the TCP + /// stream. This is equivalent to calling [`shutdown()`] on the `TcpStream`. + /// + /// [`split`]: TcpStream::split() + /// [`shutdown()`]: fn@crate::io::AsyncWriteExt::shutdown + Future<(OwnedReadHalf, OwnedWriteHalf)> intoSplit(); + + /// Turns a [`tokio::net::TcpStream`] into a [`std::net::TcpStream`]. + /// + /// The returned [`std::net::TcpStream`] will have nonblocking mode set as `true`. + /// Use [`set_nonblocking`] to change the blocking mode if needed. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::io::Read; + /// use tokio::net::TcpListener; + /// # use tokio::net::TcpStream; + /// # use tokio::io::AsyncWriteExt; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// let mut data = [0u8; 12]; + /// # if false { + /// let listener = TcpListener::bind("127.0.0.1:34254").await?; + /// # } + /// # let listener = TcpListener::bind("127.0.0.1:0").await?; + /// # let addr = listener.local_addr().unwrap(); + /// # let handle = tokio::spawn(async move { + /// # let mut stream: TcpStream = TcpStream::connect(addr).await.unwrap(); + /// # stream.write_all(b"Hello world!").await.unwrap(); + /// # }); + /// let (tokio_tcp_stream, _) = listener.accept().await?; + /// let mut std_tcp_stream = tokio_tcp_stream.into_std()?; + /// # handle.await.expect("The task being joined has panicked"); + /// std_tcp_stream.set_nonblocking(false)?; + /// std_tcp_stream.read_exact(&mut data)?; + /// # assert_eq!(b"Hello world!", &data); + /// Ok(()) + /// } + /// ``` + /// [`tokio::net::TcpStream`]: TcpStream + /// [`std::net::TcpStream`]: std::net::TcpStream + /// [`set_nonblocking`]: fn@std::net::TcpStream::set_nonblocking + Future intoStd(); + + /// Reads the linger duration for this socket by getting the `SO_LINGER` + /// option. + /// + /// For more information about this option, see [`set_linger`]. + /// + /// [`set_linger`]: TcpStream::set_linger + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// println!("{:?}", stream.linger()?); + /// # Ok(()) + /// # } + /// ``` + Future linger(); + + /// Returns the local address that this stream is bound to. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// println!("{:?}", stream.local_addr()?); + /// # Ok(()) + /// # } + /// ``` + Future localAddr(); + + /// Gets the value of the `TCP_NODELAY` option on this socket. + /// + /// For more information about this option, see [`set_nodelay`]. + /// + /// [`set_nodelay`]: TcpStream::set_nodelay + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// println!("{:?}", stream.nodelay()?); + /// # Ok(()) + /// # } + /// ``` + Future nodelay(); + + /// Receives data on the socket from the remote address to which it is + /// connected, without removing that data from the queue. On success, + /// returns the number of bytes peeked. + /// + /// Successive calls return the same data. This is accomplished by passing + /// `MSG_PEEK` as a flag to the underlying `recv` system call. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If the method is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no peek was performed, and + /// that `buf` has not been modified. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use tokio::io::AsyncReadExt; + /// use std::error::Error; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let mut stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// let mut b1 = [0; 10]; + /// let mut b2 = [0; 10]; + /// + /// // Peek at the data + /// let n = stream.peek(&mut b1).await?; + /// + /// // Read the data + /// assert_eq!(n, stream.read(&mut b2[..n]).await?); + /// assert_eq!(&b1[..n], &b2[..n]); + /// + /// Ok(()) + /// } + /// ``` + /// + /// The [`read`] method is defined on the [`AsyncReadExt`] trait. + /// + /// [`read`]: fn@crate::io::AsyncReadExt::read + /// [`AsyncReadExt`]: trait@crate::io::AsyncReadExt + Future peek({required U8 buf}); + + /// Returns the remote address that this stream is connected to. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// println!("{:?}", stream.peer_addr()?); + /// # Ok(()) + /// # } + /// ``` + Future peerAddr(); + + /// Attempts to receive data on the socket, without removing that data from + /// the queue, registering the current task for wakeup if data is not yet + /// available. + /// + /// Note that on multiple calls to `poll_peek`, `poll_read` or + /// `poll_read_ready`, only the `Waker` from the `Context` passed to the + /// most recent call is scheduled to receive a wakeup. (However, + /// `poll_write` retains a second, independent waker.) + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if data is not yet available. + /// * `Poll::Ready(Ok(n))` if data is available. `n` is the number of bytes peeked. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::io::{self, ReadBuf}; + /// use tokio::net::TcpStream; + /// + /// use std::future::poll_fn; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let stream = TcpStream::connect("127.0.0.1:8000").await?; + /// let mut buf = [0; 10]; + /// let mut buf = ReadBuf::new(&mut buf); + /// + /// poll_fn(|cx| { + /// stream.poll_peek(cx, &mut buf) + /// }).await?; + /// + /// Ok(()) + /// } + /// ``` + Future pollPeek({required Context cx, required ReadBuf buf}); + + /// Polls for read readiness. + /// + /// If the tcp stream is not currently ready for reading, this method will + /// store a clone of the `Waker` from the provided `Context`. When the tcp + /// stream becomes ready for reading, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_read_ready`, `poll_read` or + /// `poll_peek`, only the `Waker` from the `Context` passed to the most + /// recent call is scheduled to receive a wakeup. (However, + /// `poll_write_ready` retains a second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`readable`] is not feasible. Where possible, using [`readable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the tcp stream is not ready for reading. + /// * `Poll::Ready(Ok(()))` if the tcp stream is ready for reading. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`readable`]: method@Self::readable + Future pollReadReady({required Context cx}); + + /// Polls for write readiness. + /// + /// If the tcp stream is not currently ready for writing, this method will + /// store a clone of the `Waker` from the provided `Context`. When the tcp + /// stream becomes ready for writing, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_write_ready` or `poll_write`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. (However, `poll_read_ready` retains a + /// second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`writable`] is not feasible. Where possible, using [`writable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the tcp stream is not ready for writing. + /// * `Poll::Ready(Ok(()))` if the tcp stream is ready for writing. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`writable`]: method@Self::writable + Future pollWriteReady({required Context cx}); + + /// Waits for the socket to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with `try_read()`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// let mut msg = vec![0; 1024]; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut msg) { + /// Ok(n) => { + /// msg.truncate(n); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// println!("GOT = {:?}", msg); + /// Ok(()) + /// } + /// ``` + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with `try_read()` or `try_write()`. It + /// can be used to concurrently read / write to the same socket on a single + /// task without splitting the socket. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// Concurrently read and write to the stream on the same task without + /// splitting. + /// + /// ```no_run + /// use tokio::io::Interest; + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// loop { + /// let ready = stream.ready(Interest::READABLE | Interest::WRITABLE).await?; + /// + /// if ready.is_readable() { + /// let mut data = vec![0; 1024]; + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut data) { + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// + /// } + /// + /// if ready.is_writable() { + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// println!("write {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// } + /// } + /// ``` + Future ready({required Interest interest}); + + /// Sets the linger duration of this socket by setting the `SO_LINGER` option. + /// + /// This option controls the action taken when a stream has unsent messages and the stream is + /// closed. If `SO_LINGER` is set, the system shall block the process until it can transmit the + /// data or until the time expires. + /// + /// If `SO_LINGER` is not specified, and the stream is closed, the system handles the call in a + /// way that allows the process to continue as quickly as possible. + /// + /// This option is deprecated because setting `SO_LINGER` on a socket used with Tokio is + /// always incorrect as it leads to blocking the thread when the socket is closed. For more + /// details, please see: + /// + /// > Volumes of communications have been devoted to the intricacies of `SO_LINGER` versus + /// > non-blocking (`O_NONBLOCK`) sockets. From what I can tell, the final word is: don't + /// > do it. Rely on the `shutdown()`-followed-by-`read()`-eof technique instead. + /// > + /// > From [The ultimate `SO_LINGER` page, or: why is my tcp not reliable](https://blog.netherlabs.nl/articles/2009/01/18/the-ultimate-so_linger-page-or-why-is-my-tcp-not-reliable) + /// + /// # Examples + /// + /// ```no_run + /// # #![allow(deprecated)] + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// stream.set_linger(None)?; + /// # Ok(()) + /// # } + /// ``` + Future setLinger({Duration? dur}); + + /// Sets the value of the `TCP_NODELAY` option on this socket. + /// + /// If set, this option disables the Nagle algorithm. This means that + /// segments are always sent as soon as possible, even if there is only a + /// small amount of data. When not set, data is buffered until there is a + /// sufficient amount to send out, thereby avoiding the frequent sending of + /// small packets. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// stream.set_nodelay(true)?; + /// # Ok(()) + /// # } + /// ``` + Future setNodelay({required bool nodelay}); + + /// Sets the value for the `IP_TTL` option on this socket. + /// + /// This value sets the time-to-live field that is used in every packet sent + /// from this socket. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// stream.set_ttl(123)?; + /// # Ok(()) + /// # } + /// ``` + Future setTtl({required int ttl}); + + /// Returns the value of the `SO_ERROR` option. + Future takeError(); + + /// Tries to read data from the stream into the provided buffer, returning how + /// many bytes were read. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read()` is non-blocking, the buffer does not have to be stored by + /// the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`readable()`]: TcpStream::readable() + /// [`ready()`]: TcpStream::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. If `n` is `0`, then it can indicate one of two scenarios: + /// + /// 1. The stream's read half is closed and will no longer yield data. + /// 2. The specified buffer was 0 bytes in length. + /// + /// If the stream is not ready to read data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Creating the buffer **after** the `await` prevents it from + /// // being stored in the async task. + /// let mut buf = [0; 4096]; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut buf) { + /// Ok(0) => break, + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryRead({required U8 buf}); + + /// Tries to read data from the stream into the provided buffers, returning + /// how many bytes were read. + /// + /// Data is copied to fill each buffer in order, with the final buffer + /// written to possibly being only partially filled. This method behaves + /// equivalently to a single call to [`try_read()`] with concatenated + /// buffers. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read_vectored()` is non-blocking, the buffer does not have to be + /// stored by the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`try_read()`]: TcpStream::try_read() + /// [`readable()`]: TcpStream::readable() + /// [`ready()`]: TcpStream::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. `Ok(0)` indicates the stream's read half is closed + /// and will no longer yield data. If the stream is not ready to read data + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io::{self, IoSliceMut}; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Creating the buffer **after** the `await` prevents it from + /// // being stored in the async task. + /// let mut buf_a = [0; 512]; + /// let mut buf_b = [0; 1024]; + /// let mut bufs = [ + /// IoSliceMut::new(&mut buf_a), + /// IoSliceMut::new(&mut buf_b), + /// ]; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read_vectored(&mut bufs) { + /// Ok(0) => break, + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryReadVectored({required IoSliceMut bufs}); + + /// Try to write a buffer to the stream, returning how many bytes were + /// written. + /// + /// The function will attempt to write the entire contents of `buf`, but + /// only part of the buffer may be written. + /// + /// This function is usually paired with `writable()`. + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWrite({required List buf}); + + /// Tries to write several buffers to the stream, returning how many bytes + /// were written. + /// + /// Data is written from each buffer in order, with the final buffer read + /// from possibly being only partially consumed. This method behaves + /// equivalently to a single call to [`try_write()`] with concatenated + /// buffers. + /// + /// This function is usually paired with `writable()`. + /// + /// [`try_write()`]: TcpStream::try_write() + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// let bufs = [io::IoSlice::new(b"hello "), io::IoSlice::new(b"world")]; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write_vectored(&bufs) { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWriteVectored({required List bufs}); + + /// Gets the value of the `IP_TTL` option for this socket. + /// + /// For more information about this option, see [`set_ttl`]. + /// + /// [`set_ttl`]: TcpStream::set_ttl + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// println!("{:?}", stream.ttl()?); + /// # Ok(()) + /// # } + /// ``` + Future ttl(); + + /// Waits for the socket to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is usually + /// paired with `try_write()`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future writable(); +} + +// Rust type: RustOpaqueMoi> +abstract class UdpSocket implements RustOpaqueInterface { + /// Gets the value of the `SO_BROADCAST` option for this socket. + /// + /// For more information about this option, see [`set_broadcast`]. + /// + /// [`set_broadcast`]: method@Self::set_broadcast + Future broadcast(); + + /// Creates new `UdpSocket` from a previously bound `std::net::UdpSocket`. + /// + /// This function is intended to be used to wrap a UDP socket from the + /// standard library in the Tokio equivalent. + /// + /// This can be used in conjunction with `socket2`'s `Socket` interface to + /// configure a socket before it's handed off, such as setting options like + /// `reuse_address` or binding to multiple addresses. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the socket is in + /// non-blocking mode. Otherwise all I/O operations on the socket + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::net::UdpSocket::set_nonblocking + /// + /// # Panics + /// + /// This function panics if thread-local runtime is not set. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + /// + /// # Example + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// # use std::{io, net::SocketAddr}; + /// + /// # #[tokio::main] + /// # async fn main() -> io::Result<()> { + /// let addr = "0.0.0.0:8080".parse::().unwrap(); + /// let std_sock = std::net::UdpSocket::bind(addr)?; + /// std_sock.set_nonblocking(true)?; + /// let sock = UdpSocket::from_std(std_sock)?; + /// // use `sock` + /// # Ok(()) + /// # } + /// ``` + static Future fromStd({required UdpSocket socket}) => + RustLib.instance.api.tokioNetUdpSocketFromStd(socket: socket); + + /// Turns a [`tokio::net::UdpSocket`] into a [`std::net::UdpSocket`]. + /// + /// The returned [`std::net::UdpSocket`] will have nonblocking mode set as + /// `true`. Use [`set_nonblocking`] to change the blocking mode if needed. + /// + /// # Examples + /// + /// ```rust,no_run + /// use std::error::Error; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let tokio_socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await?; + /// let std_socket = tokio_socket.into_std()?; + /// std_socket.set_nonblocking(false)?; + /// Ok(()) + /// } + /// ``` + /// + /// [`tokio::net::UdpSocket`]: UdpSocket + /// [`std::net::UdpSocket`]: std::net::UdpSocket + /// [`set_nonblocking`]: fn@std::net::UdpSocket::set_nonblocking + Future intoStd(); + + /// Executes an operation of the `IP_ADD_MEMBERSHIP` type. + /// + /// This function specifies a new multicast group for this socket to join. + /// The address must be a valid multicast address, and `interface` is the + /// address of the local interface with which the system should join the + /// multicast group. If it's equal to `INADDR_ANY` then an appropriate + /// interface is chosen by the system. + Future joinMulticastV4( + {required Ipv4Addr multiaddr, required Ipv4Addr interface_}); + + /// Executes an operation of the `IPV6_ADD_MEMBERSHIP` type. + /// + /// This function specifies a new multicast group for this socket to join. + /// The address must be a valid multicast address, and `interface` is the + /// index of the interface to join/leave (or 0 to indicate any interface). + Future joinMulticastV6( + {required Ipv6Addr multiaddr, required int interface_}); + + /// Executes an operation of the `IP_DROP_MEMBERSHIP` type. + /// + /// For more information about this option, see [`join_multicast_v4`]. + /// + /// [`join_multicast_v4`]: method@Self::join_multicast_v4 + Future leaveMulticastV4( + {required Ipv4Addr multiaddr, required Ipv4Addr interface_}); + + /// Executes an operation of the `IPV6_DROP_MEMBERSHIP` type. + /// + /// For more information about this option, see [`join_multicast_v6`]. + /// + /// [`join_multicast_v6`]: method@Self::join_multicast_v6 + Future leaveMulticastV6( + {required Ipv6Addr multiaddr, required int interface_}); + + /// Returns the local address that this socket is bound to. + /// + /// # Example + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// # use std::{io, net::SocketAddr}; + /// + /// # #[tokio::main] + /// # async fn main() -> io::Result<()> { + /// let addr = "0.0.0.0:8080".parse::().unwrap(); + /// let sock = UdpSocket::bind(addr).await?; + /// // the address the socket is bound to + /// let local_addr = sock.local_addr()?; + /// # Ok(()) + /// # } + /// ``` + Future localAddr(); + + /// Gets the value of the `IP_MULTICAST_LOOP` option for this socket. + /// + /// For more information about this option, see [`set_multicast_loop_v4`]. + /// + /// [`set_multicast_loop_v4`]: method@Self::set_multicast_loop_v4 + Future multicastLoopV4(); + + /// Gets the value of the `IPV6_MULTICAST_LOOP` option for this socket. + /// + /// For more information about this option, see [`set_multicast_loop_v6`]. + /// + /// [`set_multicast_loop_v6`]: method@Self::set_multicast_loop_v6 + Future multicastLoopV6(); + + /// Gets the value of the `IP_MULTICAST_TTL` option for this socket. + /// + /// For more information about this option, see [`set_multicast_ttl_v4`]. + /// + /// [`set_multicast_ttl_v4`]: method@Self::set_multicast_ttl_v4 + Future multicastTtlV4(); + + /// Receives a single datagram from the connected address without removing it from the queue. + /// On success, returns the number of bytes read from whence the data came. + /// + /// # Notes + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and peek returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// let mut buf = vec![0u8; 32]; + /// let len = socket.peek(&mut buf).await?; + /// + /// println!("peeked {:?} bytes", len); + /// + /// Ok(()) + /// } + /// ``` + /// + /// [`peek_sender`]: method@Self::peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future peek({required U8 buf}); + + /// Receives data from the socket, without removing it from the input queue. + /// On success, returns the number of bytes read and the address from whence + /// the data came. + /// + /// # Notes + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and `peek_from` returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// let mut buf = vec![0u8; 32]; + /// let (len, addr) = socket.peek_from(&mut buf).await?; + /// + /// println!("peeked {:?} bytes from {:?}", len, addr); + /// + /// Ok(()) + /// } + /// ``` + /// + /// [`peek_sender`]: method@Self::peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future<(BigInt, SocketAddr)> peekFrom({required U8 buf}); + + /// Retrieve the sender of the data at the head of the input queue, waiting if empty. + /// + /// This is equivalent to calling [`peek_from`] with a zero-sized buffer, + /// but suppresses the `WSAEMSGSIZE` error on Windows and the "invalid argument" error on macOS. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [`peek_from`]: method@Self::peek_from + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future peekSender(); + + /// Returns the socket address of the remote peer this socket was connected to. + /// + /// # Example + /// + /// ``` + /// use tokio::net::UdpSocket; + /// + /// # use std::{io, net::SocketAddr}; + /// # #[tokio::main] + /// # async fn main() -> io::Result<()> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// let addr = "0.0.0.0:8080".parse::().unwrap(); + /// let peer = "127.0.0.1:11100".parse::().unwrap(); + /// let sock = UdpSocket::bind(addr).await?; + /// sock.connect(peer).await?; + /// assert_eq!(peer, sock.peer_addr()?); + /// # Ok(()) + /// # } + /// ``` + Future peerAddr(); + + /// Receives data from the connected address, without removing it from the input queue. + /// + /// # Notes + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and peek returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`poll_peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(()))` reads data into `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`poll_peek_sender`]: method@Self::poll_peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future pollPeek({required Context cx, required ReadBuf buf}); + + /// Receives data from the socket, without removing it from the input queue. + /// On success, returns the sending address of the datagram. + /// + /// # Notes + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and peek returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`poll_peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(addr))` reads data from `addr` into `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`poll_peek_sender`]: method@Self::poll_peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future pollPeekFrom( + {required Context cx, required ReadBuf buf}); + + /// Retrieve the sender of the data at the head of the input queue, + /// scheduling a wakeup if empty. + /// + /// This is equivalent to calling [`poll_peek_from`] with a zero-sized buffer, + /// but suppresses the `WSAEMSGSIZE` error on Windows and the "invalid argument" error on macOS. + /// + /// # Notes + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [`poll_peek_from`]: method@Self::poll_peek_from + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future pollPeekSender({required Context cx}); + + /// Attempts to receive a single datagram message on the socket from the remote + /// address to which it is `connect`ed. + /// + /// The [`connect`] method will connect this socket to a remote address. This method + /// resolves to an error if the socket is not connected. + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(()))` reads data `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`connect`]: method@Self::connect + Future pollRecv({required Context cx, required ReadBuf buf}); + + /// Attempts to receive a single datagram on the socket. + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(addr))` reads data from `addr` into `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// # Notes + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future pollRecvFrom( + {required Context cx, required ReadBuf buf}); + + /// Polls for read/receive readiness. + /// + /// If the udp stream is not currently ready for receiving, this method will + /// store a clone of the `Waker` from the provided `Context`. When the udp + /// socket becomes ready for reading, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_recv_ready`, `poll_recv` or + /// `poll_peek`, only the `Waker` from the `Context` passed to the most + /// recent call is scheduled to receive a wakeup. (However, + /// `poll_send_ready` retains a second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`readable`] is not feasible. Where possible, using [`readable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the udp stream is not ready for reading. + /// * `Poll::Ready(Ok(()))` if the udp stream is ready for reading. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`readable`]: method@Self::readable + Future pollRecvReady({required Context cx}); + + /// Attempts to send data on the socket to the remote address to which it + /// was previously `connect`ed. + /// + /// The [`connect`] method will connect this socket to a remote address. + /// This method will fail if the socket is not connected. + /// + /// Note that on multiple calls to a `poll_*` method in the send direction, + /// only the `Waker` from the `Context` passed to the most recent call will + /// be scheduled to receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not available to write + /// * `Poll::Ready(Ok(n))` `n` is the number of bytes sent + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`connect`]: method@Self::connect + Future pollSend( + {required Context cx, required List buf}); + + /// Polls for write/send readiness. + /// + /// If the udp stream is not currently ready for sending, this method will + /// store a clone of the `Waker` from the provided `Context`. When the udp + /// stream becomes ready for sending, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_send_ready` or `poll_send`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. (However, `poll_recv_ready` retains a + /// second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`writable`] is not feasible. Where possible, using [`writable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the udp stream is not ready for writing. + /// * `Poll::Ready(Ok(()))` if the udp stream is ready for writing. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`writable`]: method@Self::writable + Future pollSendReady({required Context cx}); + + /// Attempts to send data on the socket to a given address. + /// + /// Note that on multiple calls to a `poll_*` method in the send direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to write + /// * `Poll::Ready(Ok(n))` `n` is the number of bytes sent. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + Future pollSendTo( + {required Context cx, + required List buf, + required SocketAddr target}); + + /// Waits for the socket to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with `try_recv()`. + /// + /// The function may complete without the socket being readable. This is a + /// false-positive and attempting a `try_recv()` will return with + /// `io::ErrorKind::WouldBlock`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv(&mut buf) { + /// Ok(n) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with `try_recv()` or `try_send()`. It + /// can be used to concurrently `recv` / `send` to the same socket on a single + /// task without splitting the socket. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// Concurrently receive from and send to the socket on the same task + /// without splitting. + /// + /// ```no_run + /// use tokio::io::{self, Interest}; + /// use tokio::net::UdpSocket; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// loop { + /// let ready = socket.ready(Interest::READABLE | Interest::WRITABLE).await?; + /// + /// if ready.is_readable() { + /// // The buffer is **not** included in the async task and will only exist + /// // on the stack. + /// let mut data = [0; 1024]; + /// match socket.try_recv(&mut data[..]) { + /// Ok(n) => { + /// println!("received {:?}", &data[..n]); + /// } + /// // False-positive, continue + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {} + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// if ready.is_writable() { + /// // Write some data + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// println!("sent {} bytes", n); + /// } + /// // False-positive, continue + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {} + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// } + /// } + /// ``` + Future ready({required Interest interest}); + + /// Receives a single datagram message on the socket from the remote address + /// to which it is connected. On success, returns the number of bytes read. + /// + /// The function must be called with valid byte array `buf` of sufficient + /// size to hold the message bytes. If a message is too long to fit in the + /// supplied buffer, excess bytes may be discarded. + /// + /// The [`connect`] method will connect this socket to a remote address. + /// This method will fail if the socket is not connected. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// socket. + /// + /// [`connect`]: method@Self::connect + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Bind socket + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// let mut buf = vec![0; 10]; + /// let n = socket.recv(&mut buf).await?; + /// + /// println!("received {} bytes {:?}", n, &buf[..n]); + /// + /// Ok(()) + /// } + /// ``` + Future recv({required U8 buf}); + + /// Receives a single datagram message on the socket. On success, returns + /// the number of bytes read and the origin. + /// + /// The function must be called with valid byte array `buf` of sufficient + /// size to hold the message bytes. If a message is too long to fit in the + /// supplied buffer, excess bytes may be discarded. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv_from` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// socket. + /// + /// # Example + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// let mut buf = vec![0u8; 32]; + /// let (len, addr) = socket.recv_from(&mut buf).await?; + /// + /// println!("received {:?} bytes from {:?}", len, addr); + /// + /// Ok(()) + /// } + /// ``` + /// + /// # Notes + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future<(BigInt, SocketAddr)> recvFrom({required U8 buf}); + + /// Sends data on the socket to the remote address that the socket is + /// connected to. + /// + /// The [`connect`] method will connect this socket to a remote address. + /// This method will fail if the socket is not connected. + /// + /// [`connect`]: method@Self::connect + /// + /// # Return + /// + /// On success, the number of bytes sent is returned, otherwise, the + /// encountered error is returned. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `send` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that the message was not sent. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::io; + /// use tokio::net::UdpSocket; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Bind socket + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// // Send a message + /// socket.send(b"hello world").await?; + /// + /// Ok(()) + /// } + /// ``` + Future send({required List buf}); + + /// Sets the value of the `SO_BROADCAST` option for this socket. + /// + /// When enabled, this socket is allowed to send packets to a broadcast + /// address. + Future setBroadcast({required bool on_}); + + /// Sets the value of the `IP_MULTICAST_LOOP` option for this socket. + /// + /// If enabled, multicast packets will be looped back to the local socket. + /// + /// # Note + /// + /// This may not have any effect on IPv6 sockets. + Future setMulticastLoopV4({required bool on_}); + + /// Sets the value of the `IPV6_MULTICAST_LOOP` option for this socket. + /// + /// Controls whether this socket sees the multicast packets it sends itself. + /// + /// # Note + /// + /// This may not have any effect on IPv4 sockets. + Future setMulticastLoopV6({required bool on_}); + + /// Sets the value of the `IP_MULTICAST_TTL` option for this socket. + /// + /// Indicates the time-to-live value of outgoing multicast packets for + /// this socket. The default value is 1 which means that multicast packets + /// don't leave the local network unless explicitly requested. + /// + /// # Note + /// + /// This may not have any effect on IPv6 sockets. + Future setMulticastTtlV4({required int ttl}); + + /// Sets the value for the `IPV6_TCLASS` option on this socket. + /// + /// Specifies the traffic class field that is used in every packet + /// sent from this socket. + /// + /// # Note + /// + /// This may not have any effect on IPv4 sockets. + Future setTclassV6({required int tclass}); + + /// Deprecated. Use [`set_tos_v4()`] instead. + /// + /// [`set_tos_v4()`]: Self::set_tos_v4 + Future setTos({required int tos}); + + /// Sets the value for the `IP_TOS` option on this socket. + /// + /// This value sets the type-of-service field that is used in every packet + /// sent from this socket. + /// + /// # Note + /// + /// - This may not have any effect on IPv6 sockets. + /// - On Windows, `IP_TOS` is only supported on [Windows 8+ or + /// Windows Server 2012+.](https://docs.microsoft.com/en-us/windows/win32/winsock/ipproto-ip-socket-options) + Future setTosV4({required int tos}); + + /// Sets the value for the `IP_TTL` option on this socket. + /// + /// This value sets the time-to-live field that is used in every packet sent + /// from this socket. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// # use std::io; + /// + /// # async fn dox() -> io::Result<()> { + /// let sock = UdpSocket::bind("127.0.0.1:8080").await?; + /// sock.set_ttl(60)?; + /// + /// # Ok(()) + /// # } + /// ``` + Future setTtl({required int ttl}); + + /// Returns the value of the `SO_ERROR` option. + /// + /// # Examples + /// ``` + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// // Create a socket + /// let socket = UdpSocket::bind("0.0.0.0:8080").await?; + /// + /// if let Ok(Some(err)) = socket.take_error() { + /// println!("Got error: {:?}", err); + /// } + /// + /// Ok(()) + /// } + /// ``` + Future takeError(); + + /// Gets the value of the `IPV6_TCLASS` option for this socket. + /// + /// For more information about this option, see [`set_tclass_v6`]. + /// + /// [`set_tclass_v6`]: Self::set_tclass_v6 + Future tclassV6(); + + /// Deprecated. Use [`tos_v4()`] instead. + /// + /// [`tos_v4()`]: Self::tos_v4 + Future tos(); + + /// Gets the value of the `IP_TOS` option for this socket. + /// + /// For more information about this option, see [`set_tos_v4`]. + /// + /// [`set_tos_v4`]: Self::set_tos_v4 + Future tosV4(); + + /// Tries to receive data on the connected address without removing it from the input queue. + /// On success, returns the number of bytes read. + /// + /// When there is no pending data, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `readable()`. + /// + /// # Notes + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and peek returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`try_peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [`try_peek_sender`]: method@Self::try_peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future tryPeek({required U8 buf}); + + /// Tries to receive data on the socket without removing it from the input queue. + /// On success, returns the number of bytes read and the sending address of the + /// datagram. + /// + /// When there is no pending data, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `readable()`. + /// + /// # Notes + /// + /// On Windows, if the data is larger than the buffer specified, the buffer + /// is filled with the first part of the data, and peek returns the error + /// `WSAEMSGSIZE(10040)`. The excess data is lost. + /// Make sure to always use a sufficiently large buffer to hold the + /// maximum UDP packet size, which can be up to 65536 bytes in size. + /// + /// MacOS will return an error if you pass a zero-sized buffer. + /// + /// If you're merely interested in learning the sender of the data at the head of the queue, + /// try [`try_peek_sender`]. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [`try_peek_sender`]: method@Self::try_peek_sender + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future<(BigInt, SocketAddr)> tryPeekFrom({required U8 buf}); + + /// Try to retrieve the sender of the data at the head of the input queue. + /// + /// When there is no pending data, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `readable()`. + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + Future tryPeekSender(); + + /// Tries to receive a single datagram message on the socket from the remote + /// address to which it is connected. On success, returns the number of + /// bytes read. + /// + /// This method must be called with valid byte array `buf` of sufficient size + /// to hold the message bytes. If a message is too long to fit in the + /// supplied buffer, excess bytes may be discarded. + /// + /// When there is no pending data, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `readable()`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv(&mut buf) { + /// Ok(n) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryRecv({required U8 buf}); + + /// Tries to receive a single datagram message on the socket. On success, + /// returns the number of bytes read and the origin. + /// + /// This method must be called with valid byte array `buf` of sufficient size + /// to hold the message bytes. If a message is too long to fit in the + /// supplied buffer, excess bytes may be discarded. + /// + /// When there is no pending data, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `readable()`. + /// + /// # Notes + /// + /// Note that the socket address **cannot** be implicitly trusted, because it is relatively + /// trivial to send a UDP datagram with a spoofed origin in a [packet injection attack]. + /// Because UDP is stateless and does not validate the origin of a packet, + /// the attacker does not need to be able to intercept traffic in order to interfere. + /// It is important to be aware of this when designing your application-level protocol. + /// + /// [packet injection attack]: https://en.wikipedia.org/wiki/Packet_injection + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv_from(&mut buf) { + /// Ok((n, _addr)) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future<(BigInt, SocketAddr)> tryRecvFrom({required U8 buf}); + + /// Tries to send data on the socket to the remote address to which it is + /// connected. + /// + /// When the socket buffer is full, `Err(io::ErrorKind::WouldBlock)` is + /// returned. This function is usually paired with `writable()`. + /// + /// # Returns + /// + /// If successful, `Ok(n)` is returned, where `n` is the number of bytes + /// sent. If the socket is not ready to send data, + /// `Err(ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Bind a UDP socket + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// // Connect to a peer + /// socket.connect("127.0.0.1:8081").await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// socket.writable().await?; + /// + /// // Try to send data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future trySend({required List buf}); + + /// Tries to send data on the socket to the given address, but if the send is + /// blocked this will return right away. + /// + /// This function is usually paired with `writable()`. + /// + /// # Returns + /// + /// If successful, returns the number of bytes sent + /// + /// Users should ensure that when the remote cannot receive, the + /// [`ErrorKind::WouldBlock`] is properly handled. An error can also occur + /// if the IP version of the socket does not match that of `target`. + /// + /// [`ErrorKind::WouldBlock`]: std::io::ErrorKind::WouldBlock + /// + /// # Example + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// let dst = "127.0.0.1:8081".parse()?; + /// + /// loop { + /// socket.writable().await?; + /// + /// match socket.try_send_to(&b"hello world"[..], dst) { + /// Ok(sent) => { + /// println!("sent {} bytes", sent); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// // Writable false positive. + /// continue; + /// } + /// Err(e) => return Err(e.into()), + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future trySendTo( + {required List buf, required SocketAddr target}); + + /// Gets the value of the `IP_TTL` option for this socket. + /// + /// For more information about this option, see [`set_ttl`]. + /// + /// [`set_ttl`]: method@Self::set_ttl + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// # use std::io; + /// + /// # async fn dox() -> io::Result<()> { + /// let sock = UdpSocket::bind("127.0.0.1:8080").await?; + /// + /// println!("{:?}", sock.ttl()?); + /// # Ok(()) + /// # } + /// ``` + Future ttl(); + + /// Waits for the socket to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is + /// usually paired with `try_send()` or `try_send_to()`. + /// + /// The function may complete without the socket being writable. This is a + /// false-positive and attempting a `try_send()` will return with + /// `io::ErrorKind::WouldBlock`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UdpSocket; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Bind socket + /// let socket = UdpSocket::bind("127.0.0.1:8080").await?; + /// socket.connect("127.0.0.1:8081").await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// socket.writable().await?; + /// + /// // Try to send data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future writable(); +} + +// Rust type: RustOpaqueMoi> +abstract class UnixDatagram implements RustOpaqueInterface { + /// Creates new [`UnixDatagram`] from a [`std::os::unix::net::UnixDatagram`]. + /// + /// This function is intended to be used to wrap a `UnixDatagram` from the + /// standard library in the Tokio equivalent. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the socket is in + /// non-blocking mode. Otherwise all I/O operations on the socket + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::os::unix::net::UnixDatagram::set_nonblocking + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a Tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// use std::os::unix::net::UnixDatagram as StdUDS; + /// use tempfile::tempdir; + /// + /// // We use a temporary directory so that the socket + /// // files left by the bound sockets will get cleaned up. + /// let tmp = tempdir()?; + /// + /// // Bind the socket to a filesystem path + /// let socket_path = tmp.path().join("socket"); + /// let std_socket = StdUDS::bind(&socket_path)?; + /// std_socket.set_nonblocking(true)?; + /// let tokio_socket = UnixDatagram::from_std(std_socket)?; + /// + /// # Ok(()) + /// # } + /// ``` + static Future fromStd({required UnixDatagram datagram}) => + RustLib.instance.api.tokioNetUnixDatagramFromStd(datagram: datagram); + + /// Turns a [`tokio::net::UnixDatagram`] into a [`std::os::unix::net::UnixDatagram`]. + /// + /// The returned [`std::os::unix::net::UnixDatagram`] will have nonblocking + /// mode set as `true`. Use [`set_nonblocking`] to change the blocking mode + /// if needed. + /// + /// # Examples + /// + /// ```rust,no_run + /// # use std::error::Error; + /// # async fn dox() -> Result<(), Box> { + /// let tokio_socket = tokio::net::UnixDatagram::bind("/path/to/the/socket")?; + /// let std_socket = tokio_socket.into_std()?; + /// std_socket.set_nonblocking(false)?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`tokio::net::UnixDatagram`]: UnixDatagram + /// [`std::os::unix::net::UnixDatagram`]: std::os::unix::net::UnixDatagram + /// [`set_nonblocking`]: fn@std::os::unix::net::UnixDatagram::set_nonblocking + Future intoStd(); + + /// Returns the local address that this socket is bound to. + /// + /// # Examples + /// For a socket bound to a local path + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// use tempfile::tempdir; + /// + /// // We use a temporary directory so that the socket + /// // files left by the bound sockets will get cleaned up. + /// let tmp = tempdir()?; + /// + /// // Bind socket to a filesystem path + /// let socket_path = tmp.path().join("socket"); + /// let socket = UnixDatagram::bind(&socket_path)?; + /// + /// assert_eq!(socket.local_addr()?.as_pathname().unwrap(), &socket_path); + /// + /// # Ok(()) + /// # } + /// ``` + /// + /// For an unbound socket + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create an unbound socket + /// let socket = UnixDatagram::unbound()?; + /// + /// assert!(socket.local_addr()?.is_unnamed()); + /// + /// # Ok(()) + /// # } + /// ``` + Future localAddr(); + + /// Creates an unnamed pair of connected sockets. + /// + /// This function will create a pair of interconnected Unix sockets for + /// communicating back and forth between one another. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No SOCK_DGRAM for `socketpair` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create the pair of sockets + /// let (sock1, sock2) = UnixDatagram::pair()?; + /// + /// // Since the sockets are paired, the paired send/recv + /// // functions can be used + /// let bytes = b"hail eris"; + /// sock1.send(bytes).await?; + /// + /// let mut buff = vec![0u8; 24]; + /// let size = sock2.recv(&mut buff).await?; + /// + /// let dgram = &buff[..size]; + /// assert_eq!(dgram, bytes); + /// + /// # Ok(()) + /// # } + /// ``` + static Future<(UnixDatagram, UnixDatagram)> pair() => + RustLib.instance.api.tokioNetUnixDatagramPair(); + + /// Returns the address of this socket's peer. + /// + /// The `connect` method will connect the socket to a peer. + /// + /// # Examples + /// For a peer with a local path + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// use tempfile::tempdir; + /// + /// // Create an unbound socket + /// let tx = UnixDatagram::unbound()?; + /// + /// // Create another, bound socket + /// let tmp = tempdir()?; + /// let rx_path = tmp.path().join("rx"); + /// let rx = UnixDatagram::bind(&rx_path)?; + /// + /// // Connect to the bound socket + /// tx.connect(&rx_path)?; + /// + /// assert_eq!(tx.peer_addr()?.as_pathname().unwrap(), &rx_path); + /// + /// # Ok(()) + /// # } + /// ``` + /// + /// For an unbound peer + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No SOCK_DGRAM for `socketpair` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create the pair of sockets + /// let (sock1, sock2) = UnixDatagram::pair()?; + /// + /// assert!(sock1.peer_addr()?.is_unnamed()); + /// + /// # Ok(()) + /// # } + /// ``` + Future peerAddr(); + + /// Attempts to receive a single datagram message on the socket from the remote + /// address to which it is `connect`ed. + /// + /// The [`connect`] method will connect this socket to a remote address. This method + /// resolves to an error if the socket is not connected. + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(()))` reads data `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`connect`]: method@Self::connect + Future pollRecv({required Context cx, required ReadBuf buf}); + + /// Attempts to receive a single datagram on the specified address. + /// + /// Note that on multiple calls to a `poll_*` method in the `recv` direction, only the + /// `Waker` from the `Context` passed to the most recent call will be scheduled to + /// receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready to read + /// * `Poll::Ready(Ok(addr))` reads data from `addr` into `ReadBuf` if the socket is ready + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + Future pollRecvFrom( + {required Context cx, required ReadBuf buf}); + + /// Polls for read/receive readiness. + /// + /// If the socket is not currently ready for receiving, this method will + /// store a clone of the `Waker` from the provided `Context`. When the + /// socket becomes ready for reading, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_recv_ready`, `poll_recv` or + /// `poll_peek`, only the `Waker` from the `Context` passed to the most + /// recent call is scheduled to receive a wakeup. (However, + /// `poll_send_ready` retains a second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`readable`] is not feasible. Where possible, using [`readable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready for reading. + /// * `Poll::Ready(Ok(()))` if the socket is ready for reading. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`readable`]: method@Self::readable + Future pollRecvReady({required Context cx}); + + /// Attempts to send data on the socket to the remote address to which it + /// was previously `connect`ed. + /// + /// The [`connect`] method will connect this socket to a remote address. + /// This method will fail if the socket is not connected. + /// + /// Note that on multiple calls to a `poll_*` method in the send direction, + /// only the `Waker` from the `Context` passed to the most recent call will + /// be scheduled to receive a wakeup. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not available to write + /// * `Poll::Ready(Ok(n))` `n` is the number of bytes sent + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`connect`]: method@Self::connect + Future pollSend( + {required Context cx, required List buf}); + + /// Polls for write/send readiness. + /// + /// If the socket is not currently ready for sending, this method will + /// store a clone of the `Waker` from the provided `Context`. When the socket + /// becomes ready for sending, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_send_ready` or `poll_send`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. (However, `poll_recv_ready` retains a + /// second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`writable`] is not feasible. Where possible, using [`writable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the socket is not ready for writing. + /// * `Poll::Ready(Ok(()))` if the socket is ready for writing. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`writable`]: method@Self::writable + Future pollSendReady({required Context cx}); + + /// Waits for the socket to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with `try_recv()`. + /// + /// The function may complete without the socket being readable. This is a + /// false-positive and attempting a `try_recv()` will return with + /// `io::ErrorKind::WouldBlock`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// socket.connect(&server_path)?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv(&mut buf) { + /// Ok(n) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with `try_recv()` or `try_send()`. It + /// can be used to concurrently `recv` / `send` to the same socket on a single + /// task without splitting the socket. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// Concurrently receive from and send to the socket on the same task + /// without splitting. + /// + /// ```no_run + /// use tokio::io::Interest; + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// socket.connect(&server_path)?; + /// + /// loop { + /// let ready = socket.ready(Interest::READABLE | Interest::WRITABLE).await?; + /// + /// if ready.is_readable() { + /// let mut data = [0; 1024]; + /// match socket.try_recv(&mut data[..]) { + /// Ok(n) => { + /// println!("received {:?}", &data[..n]); + /// } + /// // False-positive, continue + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {} + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// if ready.is_writable() { + /// // Write some data + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// println!("sent {} bytes", n); + /// } + /// // False-positive, continue + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {} + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// } + /// } + /// ``` + Future ready({required Interest interest}); + + /// Receives data from the socket. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// socket. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No SOCK_DGRAM for `socketpair` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create the pair of sockets + /// let (sock1, sock2) = UnixDatagram::pair()?; + /// + /// // Since the sockets are paired, the paired send/recv + /// // functions can be used + /// let bytes = b"hello world"; + /// sock1.send(bytes).await?; + /// + /// let mut buff = vec![0u8; 24]; + /// let size = sock2.recv(&mut buff).await?; + /// + /// let dgram = &buff[..size]; + /// assert_eq!(dgram, bytes); + /// + /// # Ok(()) + /// # } + /// ``` + Future recv({required U8 buf}); + + /// Receives data from the socket. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv_from` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// socket. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// use tempfile::tempdir; + /// + /// // We use a temporary directory so that the socket + /// // files left by the bound sockets will get cleaned up. + /// let tmp = tempdir()?; + /// + /// // Bind each socket to a filesystem path + /// let tx_path = tmp.path().join("tx"); + /// let tx = UnixDatagram::bind(&tx_path)?; + /// let rx_path = tmp.path().join("rx"); + /// let rx = UnixDatagram::bind(&rx_path)?; + /// + /// let bytes = b"hello world"; + /// tx.send_to(bytes, &rx_path).await?; + /// + /// let mut buf = vec![0u8; 24]; + /// let (size, addr) = rx.recv_from(&mut buf).await?; + /// + /// let dgram = &buf[..size]; + /// assert_eq!(dgram, bytes); + /// assert_eq!(addr.as_pathname().unwrap(), &tx_path); + /// + /// # Ok(()) + /// # } + /// ``` + Future<(BigInt, SocketAddr)> recvFrom({required U8 buf}); + + /// Sends data on the socket to the socket's peer. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `send` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that the message was not sent. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No SOCK_DGRAM for `socketpair` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create the pair of sockets + /// let (sock1, sock2) = UnixDatagram::pair()?; + /// + /// // Since the sockets are paired, the paired send/recv + /// // functions can be used + /// let bytes = b"hello world"; + /// sock1.send(bytes).await?; + /// + /// let mut buff = vec![0u8; 24]; + /// let size = sock2.recv(&mut buff).await?; + /// + /// let dgram = &buff[..size]; + /// assert_eq!(dgram, bytes); + /// + /// # Ok(()) + /// # } + /// ``` + Future send({required List buf}); + + /// Shuts down the read, write, or both halves of this connection. + /// + /// This function will cause all pending and future I/O calls on the + /// specified portions to immediately return with an appropriate value + /// (see the documentation of `Shutdown`). + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No SOCK_DGRAM for `socketpair` in miri. + /// use tokio::net::UnixDatagram; + /// use std::net::Shutdown; + /// + /// // Create an unbound socket + /// let (socket, other) = UnixDatagram::pair()?; + /// + /// socket.shutdown(Shutdown::Both)?; + /// + /// // NOTE: the following commented out code does NOT work as expected. + /// // Due to an underlying issue, the recv call will block indefinitely. + /// // See: https://github.com/tokio-rs/tokio/issues/1679 + /// //let mut buff = vec![0u8; 24]; + /// //let size = socket.recv(&mut buff).await?; + /// //assert_eq!(size, 0); + /// + /// let send_result = socket.send(b"hello world").await; + /// assert!(send_result.is_err()); + /// + /// # Ok(()) + /// # } + /// ``` + Future shutdown({required Shutdown how}); + + /// Returns the value of the `SO_ERROR` option. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// + /// // Create an unbound socket + /// let socket = UnixDatagram::unbound()?; + /// + /// if let Ok(Some(err)) = socket.take_error() { + /// println!("Got error: {:?}", err); + /// } + /// + /// # Ok(()) + /// # } + /// ``` + Future takeError(); + + /// Tries to receive a datagram from the peer without waiting. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// socket.connect(&server_path)?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv(&mut buf) { + /// Ok(n) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryRecv({required U8 buf}); + + /// Tries to receive data from the socket without waiting. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// + /// loop { + /// // Wait for the socket to be readable + /// socket.readable().await?; + /// + /// // The buffer is **not** included in the async task and will + /// // only exist on the stack. + /// let mut buf = [0; 1024]; + /// + /// // Try to recv data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_recv_from(&mut buf) { + /// Ok((n, _addr)) => { + /// println!("GOT {:?}", &buf[..n]); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future<(BigInt, SocketAddr)> tryRecvFrom({required U8 buf}); + + /// Tries to send a datagram to the peer without waiting. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// socket.connect(&server_path)?; + /// + /// loop { + /// // Wait for the socket to be writable + /// socket.writable().await?; + /// + /// // Try to send data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future trySend({required List buf}); + + /// Creates a new `UnixDatagram` which is not bound to any address. + /// + /// # Examples + /// ``` + /// # use std::error::Error; + /// # #[tokio::main] + /// # async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// use tokio::net::UnixDatagram; + /// use tempfile::tempdir; + /// + /// // Create an unbound socket + /// let tx = UnixDatagram::unbound()?; + /// + /// // Create another, bound socket + /// let tmp = tempdir()?; + /// let rx_path = tmp.path().join("rx"); + /// let rx = UnixDatagram::bind(&rx_path)?; + /// + /// // Send to the bound socket + /// let bytes = b"hello world"; + /// tx.send_to(bytes, &rx_path).await?; + /// + /// let mut buf = vec![0u8; 24]; + /// let (size, addr) = rx.recv_from(&mut buf).await?; + /// + /// let dgram = &buf[..size]; + /// assert_eq!(dgram, bytes); + /// + /// # Ok(()) + /// # } + /// ``` + static Future unbound() => + RustLib.instance.api.tokioNetUnixDatagramUnbound(); + + /// Waits for the socket to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is + /// usually paired with `try_send()` or `try_send_to()`. + /// + /// The function may complete without the socket being writable. This is a + /// false-positive and attempting a `try_send()` will return with + /// `io::ErrorKind::WouldBlock`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixDatagram; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let dir = tempfile::tempdir().unwrap(); + /// let client_path = dir.path().join("client.sock"); + /// let server_path = dir.path().join("server.sock"); + /// let socket = UnixDatagram::bind(&client_path)?; + /// socket.connect(&server_path)?; + /// + /// loop { + /// // Wait for the socket to be writable + /// socket.writable().await?; + /// + /// // Try to send data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match socket.try_send(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future writable(); +} + +// Rust type: RustOpaqueMoi> +abstract class UnixListener implements RustOpaqueInterface { + /// Accepts a new incoming connection to this listener. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If the method is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no new connections were + /// accepted by this method. + Future<(UnixStream, SocketAddr)> accept(); + + /// Creates new [`UnixListener`] from a [`std::os::unix::net::UnixListener`]. + /// + /// This function is intended to be used to wrap a `UnixListener` from the + /// standard library in the Tokio equivalent. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the listener is in + /// non-blocking mode. Otherwise all I/O operations on the listener + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::os::unix::net::UnixListener::set_nonblocking + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixListener; + /// use std::os::unix::net::UnixListener as StdUnixListener; + /// # use std::error::Error; + /// + /// # async fn dox() -> Result<(), Box> { + /// let std_listener = StdUnixListener::bind("/path/to/the/socket")?; + /// std_listener.set_nonblocking(true)?; + /// let listener = UnixListener::from_std(std_listener)?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromStd({required UnixListener listener}) => + RustLib.instance.api.tokioNetUnixListenerFromStd(listener: listener); + + /// Turns a [`tokio::net::UnixListener`] into a [`std::os::unix::net::UnixListener`]. + /// + /// The returned [`std::os::unix::net::UnixListener`] will have nonblocking mode + /// set as `true`. Use [`set_nonblocking`] to change the blocking mode if needed. + /// + /// # Examples + /// + /// ```rust,no_run + /// # use std::error::Error; + /// # async fn dox() -> Result<(), Box> { + /// let tokio_listener = tokio::net::UnixListener::bind("/path/to/the/socket")?; + /// let std_listener = tokio_listener.into_std()?; + /// std_listener.set_nonblocking(false)?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`tokio::net::UnixListener`]: UnixListener + /// [`std::os::unix::net::UnixListener`]: std::os::unix::net::UnixListener + /// [`set_nonblocking`]: fn@std::os::unix::net::UnixListener::set_nonblocking + Future intoStd(); + + /// Returns the local socket address of this listener. + Future localAddr(); + + /// Polls to accept a new incoming connection to this listener. + /// + /// If there is no connection to accept, `Poll::Pending` is returned and the + /// current task will be notified by a waker. Note that on multiple calls + /// to `poll_accept`, only the `Waker` from the `Context` passed to the most + /// recent call is scheduled to receive a wakeup. + Future pollAccept({required Context cx}); + + /// Returns the value of the `SO_ERROR` option. + Future takeError(); +} + +// Rust type: RustOpaqueMoi> +abstract class UnixSocket implements RustOpaqueInterface { + /// Converts the socket into a [`UnixDatagram`]. + /// + /// Calling this function on a socket created by [`new_stream`] will return an error. + /// + /// [`new_stream`]: `UnixSocket::new_stream` + Future datagram(); + + /// Converts the socket into a `UnixListener`. + /// + /// `backlog` defines the maximum number of pending connections are queued + /// by the operating system at any given time. Connection are removed from + /// the queue with [`UnixListener::accept`]. When the queue is full, the + /// operating-system will start rejecting connections. + /// + /// Calling this function on a socket created by [`new_datagram`] will return an error. + /// + /// This calls the `listen(2)` operating-system function, marking the socket + /// as a passive socket. + /// + /// [`new_datagram`]: `UnixSocket::new_datagram` + Future listen({required int backlog}); + + /// Creates a new Unix datagram socket. + /// + /// Calls `socket(2)` with `AF_UNIX` and `SOCK_DGRAM`. + /// + /// # Returns + /// + /// On success, the newly created [`UnixSocket`] is returned. If an error is + /// encountered, it is returned instead. + static Future newDatagram() => + RustLib.instance.api.tokioNetUnixSocketNewDatagram(); + + /// Creates a new Unix stream socket. + /// + /// Calls `socket(2)` with `AF_UNIX` and `SOCK_STREAM`. + /// + /// # Returns + /// + /// On success, the newly created [`UnixSocket`] is returned. If an error is + /// encountered, it is returned instead. + static Future newStream() => + RustLib.instance.api.tokioNetUnixSocketNewStream(); +} + +// Rust type: RustOpaqueMoi> +abstract class UnixStream implements RustOpaqueInterface { + /// Creates new [`UnixStream`] from a [`std::os::unix::net::UnixStream`]. + /// + /// This function is intended to be used to wrap a `UnixStream` from the + /// standard library in the Tokio equivalent. + /// + /// # Notes + /// + /// The caller is responsible for ensuring that the stream is in + /// non-blocking mode. Otherwise all I/O operations on the stream + /// will block the thread, which will cause unexpected behavior. + /// Non-blocking mode can be set using [`set_nonblocking`]. + /// + /// Passing a listener in blocking mode is always erroneous, + /// and the behavior in that case may change in the future. + /// For example, it could panic. + /// + /// [`set_nonblocking`]: std::os::unix::net::UnixStream::set_nonblocking + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::os::unix::net::UnixStream as StdUnixStream; + /// # use std::error::Error; + /// + /// # async fn dox() -> Result<(), Box> { + /// let std_stream = StdUnixStream::connect("/path/to/the/socket")?; + /// std_stream.set_nonblocking(true)?; + /// let stream = UnixStream::from_std(std_stream)?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromStd({required UnixStream stream}) => + RustLib.instance.api.tokioNetUnixStreamFromStd(stream: stream); + + /// Splits a `UnixStream` into a read half and a write half, which can be used + /// to read and write the stream concurrently. + /// + /// Unlike [`split`], the owned halves can be moved to separate tasks, however + /// this comes at the cost of a heap allocation. + /// + /// **Note:** Dropping the write half will only shut down the write half of the + /// stream. This is equivalent to calling [`shutdown()`] on the `UnixStream`. + /// + /// [`split`]: Self::split() + /// [`shutdown()`]: fn@crate::io::AsyncWriteExt::shutdown + Future<(OwnedReadHalf, OwnedWriteHalf)> intoSplit(); + + /// Turns a [`tokio::net::UnixStream`] into a [`std::os::unix::net::UnixStream`]. + /// + /// The returned [`std::os::unix::net::UnixStream`] will have nonblocking + /// mode set as `true`. Use [`set_nonblocking`] to change the blocking + /// mode if needed. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::io::Read; + /// use tokio::net::UnixListener; + /// # use tokio::net::UnixStream; + /// # use tokio::io::AsyncWriteExt; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// # if cfg!(miri) { return Ok(()); } // No `socket` in miri. + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// + /// let mut data = [0u8; 12]; + /// let listener = UnixListener::bind(&bind_path)?; + /// # let handle = tokio::spawn(async { + /// # let mut stream = UnixStream::connect(bind_path).await.unwrap(); + /// # stream.write(b"Hello world!").await.unwrap(); + /// # }); + /// let (tokio_unix_stream, _) = listener.accept().await?; + /// let mut std_unix_stream = tokio_unix_stream.into_std()?; + /// # handle.await.expect("The task being joined has panicked"); + /// std_unix_stream.set_nonblocking(false)?; + /// std_unix_stream.read_exact(&mut data)?; + /// # assert_eq!(b"Hello world!", &data); + /// Ok(()) + /// } + /// ``` + /// [`tokio::net::UnixStream`]: UnixStream + /// [`std::os::unix::net::UnixStream`]: std::os::unix::net::UnixStream + /// [`set_nonblocking`]: fn@std::os::unix::net::UnixStream::set_nonblocking + Future intoStd(); + + /// Returns the socket address of the local half of this connection. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// println!("{:?}", stream.local_addr()?); + /// # Ok(()) + /// # } + /// ``` + Future localAddr(); + + /// Creates an unnamed pair of connected sockets. + /// + /// This function will create a pair of interconnected Unix sockets for + /// communicating back and forth between one another. Each socket will + /// be associated with the default event loop's handle. + static Future<(UnixStream, UnixStream)> pair() => + RustLib.instance.api.tokioNetUnixStreamPair(); + + /// Returns the socket address of the remote half of this connection. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// + /// # async fn dox() -> Result<(), Box> { + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// println!("{:?}", stream.peer_addr()?); + /// # Ok(()) + /// # } + /// ``` + Future peerAddr(); + + /// Returns effective credentials of the process which called `connect` or `pair`. + Future peerCred(); + + /// Polls for read readiness. + /// + /// If the unix stream is not currently ready for reading, this method will + /// store a clone of the `Waker` from the provided `Context`. When the unix + /// stream becomes ready for reading, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_read_ready` or `poll_read`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. (However, `poll_write_ready` retains a + /// second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`readable`] is not feasible. Where possible, using [`readable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the unix stream is not ready for reading. + /// * `Poll::Ready(Ok(()))` if the unix stream is ready for reading. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`readable`]: method@Self::readable + Future pollReadReady({required Context cx}); + + /// Polls for write readiness. + /// + /// If the unix stream is not currently ready for writing, this method will + /// store a clone of the `Waker` from the provided `Context`. When the unix + /// stream becomes ready for writing, `Waker::wake` will be called on the + /// waker. + /// + /// Note that on multiple calls to `poll_write_ready` or `poll_write`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. (However, `poll_read_ready` retains a + /// second, independent waker.) + /// + /// This function is intended for cases where creating and pinning a future + /// via [`writable`] is not feasible. Where possible, using [`writable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the unix stream is not ready for writing. + /// * `Poll::Ready(Ok(()))` if the unix stream is ready for writing. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + /// + /// [`writable`]: method@Self::writable + Future pollWriteReady({required Context cx}); + + /// Waits for the socket to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with `try_read()`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// let mut msg = vec![0; 1024]; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut msg) { + /// Ok(n) => { + /// msg.truncate(n); + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// println!("GOT = {:?}", msg); + /// Ok(()) + /// } + /// ``` + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with `try_read()` or `try_write()`. It + /// can be used to concurrently read / write to the same socket on a single + /// task without splitting the socket. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// Concurrently read and write to the stream on the same task without + /// splitting. + /// + /// ```no_run + /// use tokio::io::Interest; + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// loop { + /// let ready = stream.ready(Interest::READABLE | Interest::WRITABLE).await?; + /// + /// if ready.is_readable() { + /// let mut data = vec![0; 1024]; + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut data) { + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// + /// } + /// + /// if ready.is_writable() { + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// println!("write {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// } + /// } + /// ``` + Future ready({required Interest interest}); + + /// Returns the value of the `SO_ERROR` option. + Future takeError(); + + /// Try to read data from the stream into the provided buffer, returning how + /// many bytes were read. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read()` is non-blocking, the buffer does not have to be stored by + /// the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`readable()`]: UnixStream::readable() + /// [`ready()`]: UnixStream::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. If `n` is `0`, then it can indicate one of two scenarios: + /// + /// 1. The stream's read half is closed and will no longer yield data. + /// 2. The specified buffer was 0 bytes in length. + /// + /// If the stream is not ready to read data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Creating the buffer **after** the `await` prevents it from + /// // being stored in the async task. + /// let mut buf = [0; 4096]; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read(&mut buf) { + /// Ok(0) => break, + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryRead({required U8 buf}); + + /// Tries to read data from the stream into the provided buffers, returning + /// how many bytes were read. + /// + /// Data is copied to fill each buffer in order, with the final buffer + /// written to possibly being only partially filled. This method behaves + /// equivalently to a single call to [`try_read()`] with concatenated + /// buffers. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read_vectored()` is non-blocking, the buffer does not have to be + /// stored by the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`try_read()`]: UnixStream::try_read() + /// [`readable()`]: UnixStream::readable() + /// [`ready()`]: UnixStream::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. `Ok(0)` indicates the stream's read half is closed + /// and will no longer yield data. If the stream is not ready to read data + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io::{self, IoSliceMut}; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// loop { + /// // Wait for the socket to be readable + /// stream.readable().await?; + /// + /// // Creating the buffer **after** the `await` prevents it from + /// // being stored in the async task. + /// let mut buf_a = [0; 512]; + /// let mut buf_b = [0; 1024]; + /// let mut bufs = [ + /// IoSliceMut::new(&mut buf_a), + /// IoSliceMut::new(&mut buf_b), + /// ]; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_read_vectored(&mut bufs) { + /// Ok(0) => break, + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryReadVectored({required IoSliceMut bufs}); + + /// Tries to write a buffer to the stream, returning how many bytes were + /// written. + /// + /// The function will attempt to write the entire contents of `buf`, but + /// only part of the buffer may be written. + /// + /// This function is usually paired with `writable()`. + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWrite({required List buf}); + + /// Tries to write several buffers to the stream, returning how many bytes + /// were written. + /// + /// Data is written from each buffer in order, with the final buffer read + /// from possible being only partially consumed. This method behaves + /// equivalently to a single call to [`try_write()`] with concatenated + /// buffers. + /// + /// This function is usually paired with `writable()`. + /// + /// [`try_write()`]: UnixStream::try_write() + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// let bufs = [io::IoSlice::new(b"hello "), io::IoSlice::new(b"world")]; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write_vectored(&bufs) { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWriteVectored({required List buf}); + + /// Waits for the socket to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is usually + /// paired with `try_write()`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::UnixStream; + /// use std::error::Error; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let dir = tempfile::tempdir().unwrap(); + /// let bind_path = dir.path().join("bind_path"); + /// let stream = UnixStream::connect(bind_path).await?; + /// + /// loop { + /// // Wait for the socket to be writable + /// stream.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match stream.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future writable(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/tcp.dart b/mobile_app/lib/src/rust/third_party/tokio/net/tcp.dart new file mode 100644 index 0000000..785aecf --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/tcp.dart @@ -0,0 +1,297 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import '../../../lib.dart'; +import '../io.dart'; +import '../net.dart'; +import '../signal/unix.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'tcp/split_owned.dart'; +import 'unix.dart'; + +// Rust type: RustOpaqueMoi> +abstract class OwnedReadHalf implements RustOpaqueInterface { + /// Returns the local address that this stream is bound to. + Future localAddr(); + + /// Receives data on the socket from the remote address to which it is + /// connected, without removing that data from the queue. On success, + /// returns the number of bytes peeked. + /// + /// See the [`TcpStream::peek`] level documentation for more details. + /// + /// [`TcpStream::peek`]: TcpStream::peek + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::TcpStream; + /// use tokio::io::AsyncReadExt; + /// use std::error::Error; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // Connect to a peer + /// let stream = TcpStream::connect("127.0.0.1:8080").await?; + /// let (mut read_half, _) = stream.into_split(); + /// + /// let mut b1 = [0; 10]; + /// let mut b2 = [0; 10]; + /// + /// // Peek at the data + /// let n = read_half.peek(&mut b1).await?; + /// + /// // Read the data + /// assert_eq!(n, read_half.read(&mut b2[..n]).await?); + /// assert_eq!(&b1[..n], &b2[..n]); + /// + /// Ok(()) + /// } + /// ``` + /// + /// The [`read`] method is defined on the [`AsyncReadExt`] trait. + /// + /// [`read`]: fn@crate::io::AsyncReadExt::read + /// [`AsyncReadExt`]: trait@crate::io::AsyncReadExt + Future peek({required U8 buf}); + + /// Returns the remote address that this stream is connected to. + Future peerAddr(); + + /// Attempt to receive data on the socket, without removing that data from + /// the queue, registering the current task for wakeup if data is not yet + /// available. + /// + /// Note that on multiple calls to `poll_peek` or `poll_read`, only the + /// `Waker` from the `Context` passed to the most recent call is scheduled + /// to receive a wakeup. + /// + /// See the [`TcpStream::poll_peek`] level documentation for more details. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::io::{self, ReadBuf}; + /// use tokio::net::TcpStream; + /// + /// use std::future::poll_fn; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let stream = TcpStream::connect("127.0.0.1:8000").await?; + /// let (mut read_half, _) = stream.into_split(); + /// let mut buf = [0; 10]; + /// let mut buf = ReadBuf::new(&mut buf); + /// + /// poll_fn(|cx| { + /// read_half.poll_peek(cx, &mut buf) + /// }).await?; + /// + /// Ok(()) + /// } + /// ``` + /// + /// [`TcpStream::poll_peek`]: TcpStream::poll_peek + Future pollPeek({required Context cx, required ReadBuf buf}); + + /// Waits for the socket to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with `try_read()`. + /// + /// This function is also equivalent to [`TcpStream::ready`]. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with [`try_read()`]. It can be used instead + /// of [`readable()`] to check the returned ready set for [`Ready::READABLE`] + /// and [`Ready::READ_CLOSED`] events. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// This function is equivalent to [`TcpStream::ready`]. + /// + /// [`try_read()`]: Self::try_read + /// [`readable()`]: Self::readable + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + Future ready({required Interest interest}); + + /// Attempts to put the two halves of a `TcpStream` back together and + /// recover the original socket. Succeeds only if the two halves + /// originated from the same call to [`into_split`]. + /// + /// [`into_split`]: TcpStream::into_split() + Future reunite({required OwnedWriteHalf other}); + + /// Tries to read data from the stream into the provided buffer, returning how + /// many bytes were read. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read()` is non-blocking, the buffer does not have to be stored by + /// the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`readable()`]: Self::readable() + /// [`ready()`]: Self::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. If `n` is `0`, then it can indicate one of two scenarios: + /// + /// 1. The stream's read half is closed and will no longer yield data. + /// 2. The specified buffer was 0 bytes in length. + /// + /// If the stream is not ready to read data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + Future tryRead({required U8 buf}); + + /// Tries to read data from the stream into the provided buffers, returning + /// how many bytes were read. + /// + /// Data is copied to fill each buffer in order, with the final buffer + /// written to possibly being only partially filled. This method behaves + /// equivalently to a single call to [`try_read()`] with concatenated + /// buffers. + /// + /// Receives any pending data from the socket but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read_vectored()` is non-blocking, the buffer does not have to be + /// stored by the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] or [`ready()`] is used with this function. + /// + /// [`try_read()`]: Self::try_read() + /// [`readable()`]: Self::readable() + /// [`ready()`]: Self::ready() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. `Ok(0)` indicates the stream's read half is closed + /// and will no longer yield data. If the stream is not ready to read data + /// `Err(io::ErrorKind::WouldBlock)` is returned. + Future tryReadVectored({required IoSliceMut bufs}); +} + +// Rust type: RustOpaqueMoi> +abstract class OwnedWriteHalf implements RustOpaqueInterface { + /// Returns the local address that this stream is bound to. + Future localAddr(); + + /// Returns the remote address that this stream is connected to. + Future peerAddr(); + + /// Waits for any of the requested ready states. + /// + /// This function is usually paired with [`try_write()`]. It can be used instead + /// of [`writable()`] to check the returned ready set for [`Ready::WRITABLE`] + /// and [`Ready::WRITE_CLOSED`] events. + /// + /// The function may complete without the socket being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// This function is equivalent to [`TcpStream::ready`]. + /// + /// [`try_write()`]: Self::try_write + /// [`writable()`]: Self::writable + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read or write that fails with `WouldBlock` or + /// `Poll::Pending`. + Future ready({required Interest interest}); + + /// Attempts to put the two halves of a `TcpStream` back together and + /// recover the original socket. Succeeds only if the two halves + /// originated from the same call to [`into_split`]. + /// + /// [`into_split`]: TcpStream::into_split() + Future reunite({required OwnedReadHalf other}); + + /// Tries to write a buffer to the stream, returning how many bytes were + /// written. + /// + /// The function will attempt to write the entire contents of `buf`, but + /// only part of the buffer may be written. + /// + /// This function is usually paired with `writable()`. + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + Future tryWrite({required List buf}); + + /// Tries to write several buffers to the stream, returning how many bytes + /// were written. + /// + /// Data is written from each buffer in order, with the final buffer read + /// from possible being only partially consumed. This method behaves + /// equivalently to a single call to [`try_write()`] with concatenated + /// buffers. + /// + /// This function is usually paired with `writable()`. + /// + /// [`try_write()`]: Self::try_write() + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the stream is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + Future tryWriteVectored({required List bufs}); + + /// Waits for the socket to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is usually + /// paired with `try_write()`. + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + Future writable(); +} + +// Rust type: RustOpaqueMoi> +abstract class ReuniteError implements RustOpaqueInterface { + OwnedReadHalf get field0; + + OwnedWriteHalf get field1; + + set field0(OwnedReadHalf field0); + + set field1(OwnedWriteHalf field1); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/tcp/listener.dart b/mobile_app/lib/src/rust/third_party/tokio/net/tcp/listener.dart new file mode 100644 index 0000000..899f6ce --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/tcp/listener.dart @@ -0,0 +1,13 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `bind` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `fmt`, `try_from` + +// Rust type: RustOpaqueMoi >>> +abstract class PollResultTcpStreamSocketAddr implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/tcp/split_owned.dart b/mobile_app/lib/src/rust/third_party/tokio/net/tcp/split_owned.dart new file mode 100644 index 0000000..931e92b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/tcp/split_owned.dart @@ -0,0 +1,20 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `try_read_buf` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_ref`, `as_ref`, `drop`, `fmt`, `fmt`, `fmt`, `fmt`, `is_write_vectored`, `poll_flush`, `poll_read`, `poll_shutdown`, `poll_write_vectored`, `poll_write` +// These functions have error during generation (see debug logs or enable `stop_on_error: true` for more details): `forget` + +// Rust type: RustOpaqueMoi]>> +abstract class IoSliceMut implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class IoSlice implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi >>> +abstract class PollResultUsize implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/udp.dart b/mobile_app/lib/src/rust/third_party/tokio/net/udp.dart new file mode 100644 index 0000000..32fe7b8 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/udp.dart @@ -0,0 +1,13 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `async_io`, `bind`, `connect`, `recv_buf_from`, `recv_buf`, `send_to`, `try_io`, `try_recv_buf_from`, `try_recv_buf` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `fmt`, `try_from` + +// Rust type: RustOpaqueMoi >>> +abstract class PollResultSocketAddr implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/unix.dart b/mobile_app/lib/src/rust/third_party/tokio/net/unix.dart new file mode 100644 index 0000000..dd3efc5 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/unix.dart @@ -0,0 +1,41 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import '../../../lib.dart'; +import '../net.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// Rust type: RustOpaqueMoi> +abstract class SocketAddr implements RustOpaqueInterface, ToSocketAddrs { + /// Returns the contents of this address if it is a `pathname` address. + /// + /// Documentation reflected in [`SocketAddr`]. + /// + /// [`SocketAddr`]: std::os::unix::net::SocketAddr + Future asPathname(); + + /// Returns `true` if the address is unnamed. + /// + /// Documentation reflected in [`SocketAddr`]. + /// + /// [`SocketAddr`]: std::os::unix::net::SocketAddr + Future isUnnamed(); +} + +// Rust type: RustOpaqueMoi> +abstract class UCred implements RustOpaqueInterface { + /// Gets GID (group ID) of the process. + Future gid(); + + /// Gets PID (process ID) of the process. + /// + /// This is only implemented under Linux, Android, iOS, macOS, Solaris, + /// Illumos and Cygwin. On other platforms this will always return `None`. + Future pid(); + + /// Gets UID (user ID) of the process. + Future uid(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/unix/datagram/socket.dart b/mobile_app/lib/src/rust/third_party/tokio/net/unix/datagram/socket.dart new file mode 100644 index 0000000..3a90df1 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/unix/datagram/socket.dart @@ -0,0 +1,13 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `async_io`, `bind`, `connect`, `poll_send_to`, `recv_buf_from`, `recv_buf`, `send_to`, `try_io`, `try_recv_buf_from`, `try_recv_buf`, `try_send_to` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_fd`, `as_raw_fd`, `fmt`, `try_from` + +// Rust type: RustOpaqueMoi> +abstract class Shutdown implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/unix/listener.dart b/mobile_app/lib/src/rust/third_party/tokio/net/unix/listener.dart new file mode 100644 index 0000000..0441883 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/unix/listener.dart @@ -0,0 +1,13 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `bind` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_fd`, `as_raw_fd`, `fmt`, `try_from` + +// Rust type: RustOpaqueMoi >>> +abstract class PollResultUnixStreamSocketAddr implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/net/unix/pipe.dart b/mobile_app/lib/src/rust/third_party/tokio/net/unix/pipe.dart new file mode 100644 index 0000000..ac63416 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/net/unix/pipe.dart @@ -0,0 +1,2871 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import '../../../../lib.dart'; +import '../../io.dart'; +import '../../process/sys.dart'; +import '../../signal/unix.dart'; +import '../../sync/broadcast.dart'; +import '../../sync/broadcast/error.dart'; +import '../../sync/mpsc/bounded.dart'; +import '../../sync/mpsc/error.dart'; +import '../../sync/oneshot.dart'; +import '../../sync/watch.dart'; +import '../tcp/split_owned.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `open_receiver`, `open_sender`, `try_read_buf` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `PipeEnd` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_fd`, `as_fd`, `as_raw_fd`, `as_raw_fd`, `assert_receiver_is_total_eq`, `clone`, `clone`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `is_write_vectored`, `poll_flush`, `poll_read`, `poll_shutdown`, `poll_write_vectored`, `poll_write` + +/// Creates a new anonymous Unix pipe. +/// +/// This function will open a new pipe and associate both pipe ends with the default +/// event loop. +/// +/// If you need to create a pipe for communication with a spawned process, you can +/// use [`Stdio::piped()`] instead. +/// +/// [`Stdio::piped()`]: std::process::Stdio::piped +/// +/// # Errors +/// +/// If creating a pipe fails, this function will return with the related OS error. +/// +/// # Examples +/// +/// Create a pipe and pass the writing end to a spawned process. +/// +/// ```no_run +/// use tokio::net::unix::pipe; +/// use tokio::process::Command; +/// # use tokio::io::AsyncReadExt; +/// # use std::error::Error; +/// +/// # async fn dox() -> Result<(), Box> { +/// let (tx, mut rx) = pipe::pipe()?; +/// let mut buffer = String::new(); +/// +/// let status = Command::new("echo") +/// .arg("Hello, world!") +/// .stdout(tx.into_blocking_fd()?) +/// .status(); +/// rx.read_to_string(&mut buffer).await?; +/// +/// assert!(status.await?.success()); +/// assert_eq!(buffer, "Hello, world!\n"); +/// # Ok(()) +/// # } +/// ``` +/// +/// # Panics +/// +/// This function panics if it is not called from within a runtime with +/// IO enabled. +/// +/// The runtime is usually set implicitly when this function is called +/// from a future driven by a tokio runtime, otherwise runtime can be set +/// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. +Future<(Sender, Receiver)> pipe() => + RustLib.instance.api.tokioNetUnixPipePipe(); + +// Rust type: RustOpaqueMoi> +abstract class OpenOptions implements RustOpaqueInterface { + /// Sets the option for the append mode. + /// + /// This option, when true, means that writes will append to a file instead + /// of overwriting previous contents. Note that setting + /// `.write(true).append(true)` has the same effect as setting only + /// `.append(true)`. + /// + /// For most filesystems, the operating system guarantees that all writes are + /// atomic: no writes get mangled because another process writes at the same + /// time. + /// + /// One maybe obvious note when using append-mode: make sure that all data + /// that belongs together is written to the file in one operation. This + /// can be done by concatenating strings before passing them to [`write()`], + /// or using a buffered writer (with a buffer of adequate size), + /// and calling [`flush()`] when the message is complete. + /// + /// If a file is opened with both read and append access, beware that after + /// opening, and after every write, the position for reading may be set at the + /// end of the file. So, before writing, save the current position (using + /// [`seek`]`(`[`SeekFrom`]`::`[`Current`]`(0))`), and restore it before the next read. + /// + /// This is an async version of [`std::fs::OpenOptions::append`][std] + /// + /// [std]: std::fs::OpenOptions::append + /// + /// ## Note + /// + /// This function doesn't create the file if it doesn't exist. Use the [`create`] + /// method to do so. + /// + /// [`write()`]: crate::io::AsyncWriteExt::write + /// [`flush()`]: crate::io::AsyncWriteExt::flush + /// [`seek`]: crate::io::AsyncSeekExt::seek + /// [`SeekFrom`]: std::io::SeekFrom + /// [`Current`]: std::io::SeekFrom::Current + /// [`create`]: OpenOptions::create + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .append(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future append({required bool append}); + + /// Sets the option for creating a new file. + /// + /// This option indicates whether a new file will be created if the file + /// does not yet already exist. + /// + /// In order for the file to be created, [`write`] or [`append`] access must + /// be used. + /// + /// This is an async version of [`std::fs::OpenOptions::create`][std] + /// + /// [std]: std::fs::OpenOptions::create + /// [`write`]: OpenOptions::write + /// [`append`]: OpenOptions::append + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .write(true) + /// .create(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future create({required bool create}); + + /// Sets the option to always create a new file. + /// + /// This option indicates whether a new file will be created. No file is + /// allowed to exist at the target location, also no (dangling) symlink. + /// + /// This option is useful because it is atomic. Otherwise between checking + /// whether a file exists and creating a new one, the file may have been + /// created by another process (a TOCTOU race condition / attack). + /// + /// If `.create_new(true)` is set, [`.create()`] and [`.truncate()`] are + /// ignored. + /// + /// The file must be opened with write or append access in order to create a + /// new file. + /// + /// This is an async version of [`std::fs::OpenOptions::create_new`][std] + /// + /// [std]: std::fs::OpenOptions::create_new + /// [`.create()`]: OpenOptions::create + /// [`.truncate()`]: OpenOptions::truncate + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .write(true) + /// .create_new(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future createNew({required bool createNew}); + + /// Passes custom flags to the `flags` argument of `open`. + /// + /// The bits that define the access mode are masked out with `O_ACCMODE`, to + /// ensure they do not interfere with the access mode set by Rusts options. + /// + /// Custom flags can only set flags, not remove flags set by Rusts options. + /// This options overwrites any previously set custom flags. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let mut options = OpenOptions::new(); + /// options.write(true); + /// if cfg!(unix) { + /// options.custom_flags(libc::O_NOFOLLOW); + /// } + /// let file = options.open("foo.txt").await?; + /// + /// Ok(()) + /// } + /// ``` + Future customFlags({required int flags}); + + static Future default_() => + RustLib.instance.api.tokioNetUnixPipeOpenOptionsDefault(); + + /// Sets the mode bits that a new file will be created with. + /// + /// If a new file is created as part of an `OpenOptions::open` call then this + /// specified `mode` will be used as the permission bits for the new file. + /// If no `mode` is set, the default of `0o666` will be used. + /// The operating system masks out bits with the system's `umask`, to produce + /// the final permissions. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let mut options = OpenOptions::new(); + /// options.mode(0o644); // Give read/write for owner and read for others. + /// let file = options.open("foo.txt").await?; + /// + /// Ok(()) + /// } + /// ``` + Future mode({required int mode}); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a blank new set of options ready for configuration. + /// + /// All options are initially set to `false`. + /// + /// This is an async version of [`std::fs::OpenOptions::new`][std] + /// + /// [std]: std::fs::OpenOptions::new + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// + /// let mut options = OpenOptions::new(); + /// let future = options.read(true).open("foo.txt"); + /// ``` + static Future newInstance() => + RustLib.instance.api.tokioNetUnixPipeOpenOptionsNew(); + + /// Sets the option for read access. + /// + /// This option, when true, will indicate that the file should be + /// `read`-able if opened. + /// + /// This is an async version of [`std::fs::OpenOptions::read`][std] + /// + /// [std]: std::fs::OpenOptions::read + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .read(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future read({required bool read}); + + /// Sets the option for truncating a previous file. + /// + /// If a file is successfully opened with this option set it will truncate + /// the file to 0 length if it already exists. + /// + /// The file must be opened with write access for truncate to work. + /// + /// This is an async version of [`std::fs::OpenOptions::truncate`][std] + /// + /// [std]: std::fs::OpenOptions::truncate + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .write(true) + /// .truncate(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future truncate({required bool truncate}); + + /// Sets the option to skip the check for FIFO file type. + /// + /// By default, [`open_receiver`] and [`open_sender`] functions will check + /// if the opened file is a FIFO file. Set this option to `true` if you are + /// sure the file is a FIFO file. + /// + /// [`open_receiver`]: OpenOptions::open_receiver + /// [`open_sender`]: OpenOptions::open_sender + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use nix::{unistd::mkfifo, sys::stat::Mode}; + /// # use std::error::Error; + /// + /// // Our program has exclusive access to this path. + /// const FIFO_NAME: &str = "path/to/a/new/fifo"; + /// + /// # async fn dox() -> Result<(), Box> { + /// mkfifo(FIFO_NAME, Mode::S_IRWXU)?; + /// let rx = pipe::OpenOptions::new() + /// .unchecked(true) + /// .open_receiver(FIFO_NAME)?; + /// # Ok(()) + /// # } + /// ``` + Future unchecked({required bool value}); + + /// Sets the option for write access. + /// + /// This option, when true, will indicate that the file should be + /// `write`-able if opened. + /// + /// This is an async version of [`std::fs::OpenOptions::write`][std] + /// + /// [std]: std::fs::OpenOptions::write + /// + /// # Examples + /// + /// ```no_run + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let file = OpenOptions::new() + /// .write(true) + /// .open("foo.txt") + /// .await?; + /// + /// Ok(()) + /// } + /// ``` + Future write({required bool write}); +} + +// Rust type: RustOpaqueMoi >>> +abstract class PollResult implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Receiver implements RustOpaqueInterface { + /// Creates a new `Receiver` from a [`File`]. + /// + /// This function is intended to construct a pipe from a [`File`] representing + /// a special FIFO file. It will check if the file is a pipe and has read access, + /// set it in non-blocking mode and perform the conversion. + /// + /// # Errors + /// + /// Fails with `io::ErrorKind::InvalidInput` if the file is not a pipe or it + /// does not have read access. Also fails with any standard OS error if it occurs. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromFile({required File file}) => + RustLib.instance.api.tokioNetUnixPipeReceiverFromFile(file: file); + + /// Creates a new `Receiver` from a [`File`] without checking pipe properties. + /// + /// This function is intended to construct a pipe from a File representing + /// a special FIFO file. The conversion assumes nothing about the underlying + /// file; it is left up to the user to make sure it is opened with read access, + /// represents a pipe and is set in non-blocking mode. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::fs::OpenOptions; + /// use std::os::unix::fs::{FileTypeExt, OpenOptionsExt}; + /// # use std::error::Error; + /// + /// const FIFO_NAME: &str = "path/to/a/fifo"; + /// + /// # async fn dox() -> Result<(), Box> { + /// let file = OpenOptions::new() + /// .read(true) + /// .custom_flags(libc::O_NONBLOCK) + /// .open(FIFO_NAME)?; + /// if file.metadata()?.file_type().is_fifo() { + /// let rx = pipe::Receiver::from_file_unchecked(file)?; + /// /* use the Receiver */ + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromFileUnchecked({required File file}) => + RustLib.instance.api + .tokioNetUnixPipeReceiverFromFileUnchecked(file: file); + + /// Creates a new `Receiver` from an [`OwnedFd`]. + /// + /// This function is intended to construct a pipe from an [`OwnedFd`] representing + /// an anonymous pipe or a special FIFO file. It will check if the file descriptor + /// is a pipe and has read access, set it in non-blocking mode and perform the + /// conversion. + /// + /// # Errors + /// + /// Fails with `io::ErrorKind::InvalidInput` if the file descriptor is not a pipe + /// or it does not have read access. Also fails with any standard OS error if it + /// occurs. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromOwnedFd({required OwnedFd ownedFd}) => + RustLib.instance.api + .tokioNetUnixPipeReceiverFromOwnedFd(ownedFd: ownedFd); + + /// Creates a new `Receiver` from an [`OwnedFd`] without checking pipe properties. + /// + /// This function is intended to construct a pipe from an [`OwnedFd`] representing + /// an anonymous pipe or a special FIFO file. The conversion assumes nothing about + /// the underlying pipe; it is left up to the user to make sure that the file + /// descriptor represents the reading end of a pipe and the pipe is set in + /// non-blocking mode. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromOwnedFdUnchecked({required OwnedFd ownedFd}) => + RustLib.instance.api + .tokioNetUnixPipeReceiverFromOwnedFdUnchecked(ownedFd: ownedFd); + + /// Converts the pipe into an [`OwnedFd`] in blocking mode. + /// + /// This function will deregister this pipe end from the event loop, set + /// it in blocking mode and perform the conversion. + Future intoBlockingFd(); + + /// Converts the pipe into an [`OwnedFd`] in nonblocking mode. + /// + /// This function will deregister this pipe end from the event loop and + /// perform the conversion. Returned file descriptor will be in nonblocking + /// mode. + Future intoNonblockingFd(); + + /// Polls for read readiness. + /// + /// If the pipe is not currently ready for reading, this method will + /// store a clone of the `Waker` from the provided `Context`. When the pipe + /// becomes ready for reading, `Waker::wake` will be called on the waker. + /// + /// Note that on multiple calls to `poll_read_ready` or `poll_read`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. + /// + /// This function is intended for cases where creating and pinning a future + /// via [`readable`] is not feasible. Where possible, using [`readable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// [`readable`]: Self::readable + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the pipe is not ready for reading. + /// * `Poll::Ready(Ok(()))` if the pipe is ready for reading. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + Future pollReadReady({required Context cx}); + + /// Waits for the pipe to become readable. + /// + /// This function is equivalent to `ready(Interest::READABLE)` and is usually + /// paired with [`try_read()`]. + /// + /// [`try_read()`]: Self::try_read() + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a reading end of a fifo + /// let rx = pipe::OpenOptions::new().open_receiver("path/to/a/fifo")?; + /// + /// let mut msg = vec![0; 1024]; + /// + /// loop { + /// // Wait for the pipe to be readable + /// rx.readable().await?; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match rx.try_read(&mut msg) { + /// Ok(n) => { + /// msg.truncate(n); + /// break; + /// } + /// Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// println!("GOT = {:?}", msg); + /// Ok(()) + /// } + /// ``` + Future readable(); + + /// Waits for any of the requested ready states. + /// + /// This function can be used instead of [`readable()`] to check the returned + /// ready set for [`Ready::READABLE`] and [`Ready::READ_CLOSED`] events. + /// + /// The function may complete without the pipe being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// [`readable()`]: Self::readable + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to read that fails with `WouldBlock` or + /// `Poll::Pending`. + Future ready({required Interest interest}); + + /// Tries to read data from the pipe into the provided buffer, returning how + /// many bytes were read. + /// + /// Reads any pending data from the pipe but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read()` is non-blocking, the buffer does not have to be stored by + /// the async task and can exist entirely on the stack. + /// + /// Usually [`readable()`] is used with this function. + /// + /// [`readable()`]: Self::readable() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. If `n` is `0`, then it can indicate one of two scenarios: + /// + /// 1. The pipe's writing end is closed and will no longer write data. + /// 2. The specified buffer was 0 bytes in length. + /// + /// If the pipe is not ready to read data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a reading end of a fifo + /// let rx = pipe::OpenOptions::new().open_receiver("path/to/a/fifo")?; + /// + /// let mut msg = vec![0; 1024]; + /// + /// loop { + /// // Wait for the pipe to be readable + /// rx.readable().await?; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match rx.try_read(&mut msg) { + /// Ok(n) => { + /// msg.truncate(n); + /// break; + /// } + /// Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// println!("GOT = {:?}", msg); + /// Ok(()) + /// } + /// ``` + Future tryRead({required U8 buf}); + + /// Tries to read data from the pipe into the provided buffers, returning + /// how many bytes were read. + /// + /// Data is copied to fill each buffer in order, with the final buffer + /// written to possibly being only partially filled. This method behaves + /// equivalently to a single call to [`try_read()`] with concatenated + /// buffers. + /// + /// Reads any pending data from the pipe but does not wait for new data + /// to arrive. On success, returns the number of bytes read. Because + /// `try_read_vectored()` is non-blocking, the buffer does not have to be + /// stored by the async task and can exist entirely on the stack. + /// + /// Usually, [`readable()`] is used with this function. + /// + /// [`try_read()`]: Self::try_read() + /// [`readable()`]: Self::readable() + /// + /// # Return + /// + /// If data is successfully read, `Ok(n)` is returned, where `n` is the + /// number of bytes read. `Ok(0)` indicates the pipe's writing end is + /// closed and will no longer write data. If the pipe is not ready to read + /// data `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a reading end of a fifo + /// let rx = pipe::OpenOptions::new().open_receiver("path/to/a/fifo")?; + /// + /// loop { + /// // Wait for the pipe to be readable + /// rx.readable().await?; + /// + /// // Creating the buffer **after** the `await` prevents it from + /// // being stored in the async task. + /// let mut buf_a = [0; 512]; + /// let mut buf_b = [0; 1024]; + /// let mut bufs = [ + /// io::IoSliceMut::new(&mut buf_a), + /// io::IoSliceMut::new(&mut buf_b), + /// ]; + /// + /// // Try to read data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match rx.try_read_vectored(&mut bufs) { + /// Ok(0) => break, + /// Ok(n) => { + /// println!("read {} bytes", n); + /// } + /// Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryReadVectored({required IoSliceMut bufs}); +} + +// Rust type: RustOpaqueMoi>> +abstract class ReceiverT implements RustOpaqueInterface { + /// Blocking receive to call outside of asynchronous contexts. + /// + /// # Panics + /// + /// This function panics if called within an asynchronous execution + /// context. + /// + /// # Examples + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use std::thread; + /// use tokio::sync::broadcast; + /// + /// #[tokio::main] + /// async fn main() { + /// let (tx, mut rx) = broadcast::channel(16); + /// + /// let sync_code = thread::spawn(move || { + /// assert_eq!(rx.blocking_recv(), Ok(10)); + /// }); + /// + /// let _ = tx.send(10); + /// sync_code.join().unwrap(); + /// } + /// # } + /// ``` + Future blockingRecv(); + + /// Variant of [`Self::recv_many`] for blocking contexts. + /// + /// The same conditions as in [`Self::blocking_recv`] apply. + Future blockingRecvMany( + {required VecT buffer, required BigInt limit}); + + /// Returns a reference to the most recently sent value. + /// + /// This method does not mark the returned value as seen, so future calls to + /// [`changed`] may return immediately even if you have already seen the + /// value with a call to `borrow`. + /// + /// Outstanding borrows hold a read lock on the inner value. This means that + /// long-lived borrows could cause the producer half to block. It is recommended + /// to keep the borrow as short-lived as possible. Additionally, if you are + /// running in an environment that allows `!Send` futures, you must ensure that + /// the returned `Ref` type is never held alive across an `.await` point, + /// otherwise, it can lead to a deadlock. + /// + /// The priority policy of the lock is dependent on the underlying lock + /// implementation, and this type does not guarantee that any particular policy + /// will be used. In particular, a producer which is waiting to acquire the lock + /// in `send` might or might not block concurrent calls to `borrow`, e.g.: + /// + ///
Potential deadlock example + /// + /// ```text + /// // Task 1 (on thread A) | // Task 2 (on thread B) + /// let _ref1 = rx.borrow(); | + /// | // will block + /// | let _ = tx.send(()); + /// // may deadlock | + /// let _ref2 = rx.borrow(); | + /// ``` + ///
+ /// + /// For more information on when to use this method versus + /// [`borrow_and_update`], see [here](self#borrow_and_update-versus-borrow). + /// + /// [`changed`]: Receiver::changed + /// [`borrow_and_update`]: Receiver::borrow_and_update + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::watch; + /// + /// let (_, rx) = watch::channel("hello"); + /// assert_eq!(*rx.borrow(), "hello"); + /// ``` + Future borrow(); + + /// Returns a reference to the most recently sent value and marks that value + /// as seen. + /// + /// This method marks the current value as seen. Subsequent calls to [`changed`] + /// will not return immediately until the [`Sender`] has modified the shared + /// value again. + /// + /// Outstanding borrows hold a read lock on the inner value. This means that + /// long-lived borrows could cause the producer half to block. It is recommended + /// to keep the borrow as short-lived as possible. Additionally, if you are + /// running in an environment that allows `!Send` futures, you must ensure that + /// the returned `Ref` type is never held alive across an `.await` point, + /// otherwise, it can lead to a deadlock. + /// + /// The priority policy of the lock is dependent on the underlying lock + /// implementation, and this type does not guarantee that any particular policy + /// will be used. In particular, a producer which is waiting to acquire the lock + /// in `send` might or might not block concurrent calls to `borrow`, e.g.: + /// + ///
Potential deadlock example + /// + /// ```text + /// // Task 1 (on thread A) | // Task 2 (on thread B) + /// let _ref1 = rx1.borrow_and_update(); | + /// | // will block + /// | let _ = tx.send(()); + /// // may deadlock | + /// let _ref2 = rx2.borrow_and_update(); | + /// ``` + ///
+ /// + /// For more information on when to use this method versus [`borrow`], see + /// [here](self#borrow_and_update-versus-borrow). + /// + /// [`changed`]: Receiver::changed + /// [`borrow`]: Receiver::borrow + Future borrowAndUpdate(); + + /// Returns the current capacity of the channel. + /// + /// The capacity goes down when the sender sends a value by calling [`Sender::send`] or by reserving + /// capacity with [`Sender::reserve`]. The capacity goes up when values are received. + /// This is distinct from [`max_capacity`], which always returns buffer capacity initially + /// specified when calling [`channel`]. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel::<()>(5); + /// + /// assert_eq!(rx.capacity(), 5); + /// + /// // Making a reservation drops the capacity by one. + /// let permit = tx.reserve().await.unwrap(); + /// assert_eq!(rx.capacity(), 4); + /// assert_eq!(rx.len(), 0); + /// + /// // Sending and receiving a value increases the capacity by one. + /// permit.send(()); + /// assert_eq!(rx.len(), 1); + /// rx.recv().await.unwrap(); + /// assert_eq!(rx.capacity(), 5); + /// + /// // Directly sending a message drops the capacity by one. + /// tx.send(()).await.unwrap(); + /// assert_eq!(rx.capacity(), 4); + /// assert_eq!(rx.len(), 1); + /// + /// // Receiving the message increases the capacity by one. + /// rx.recv().await.unwrap(); + /// assert_eq!(rx.capacity(), 5); + /// assert_eq!(rx.len(), 0); + /// # } + /// ``` + /// [`capacity`]: Receiver::capacity + /// [`max_capacity`]: Receiver::max_capacity + Future capacity(); + + /// Waits for a change notification, then marks the current value as seen. + /// + /// If the current value in the channel has not yet been marked seen when + /// this method is called, the method marks that value seen and returns + /// immediately. If the newest value has already been marked seen, then the + /// method sleeps until a new message is sent by a [`Sender`] connected to + /// this `Receiver`, or until all [`Sender`]s are dropped. + /// + /// For more information, see + /// [*Change notifications*](self#change-notifications) in the module-level documentation. + /// + /// # Errors + /// + /// Returns a [`RecvError`](error::RecvError) if the channel has been closed __AND__ + /// the current value is seen. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If you use it as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no values have been marked + /// seen by this call to `changed`. + /// + /// [`Sender`]: struct@Sender + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::watch; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = watch::channel("hello"); + /// + /// tokio::spawn(async move { + /// tx.send("goodbye").unwrap(); + /// }); + /// + /// assert!(rx.changed().await.is_ok()); + /// assert_eq!(*rx.borrow_and_update(), "goodbye"); + /// + /// // The `tx` handle has been dropped + /// assert!(rx.changed().await.is_err()); + /// # } + /// ``` + Future changed(); + + /// Closes the receiving half of a channel without dropping it. + /// + /// This prevents any further messages from being sent on the channel while + /// still enabling the receiver to drain messages that are buffered. Any + /// outstanding [`Permit`] values will still be able to send messages. + /// + /// To guarantee that no messages are dropped, after calling `close()`, + /// `recv()` must be called until `None` is returned. If there are + /// outstanding [`Permit`] or [`OwnedPermit`] values, the `recv` method will + /// not return `None` until those are released. + /// + /// [`Permit`]: Permit + /// [`OwnedPermit`]: OwnedPermit + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(20); + /// + /// tokio::spawn(async move { + /// let mut i = 0; + /// while let Ok(permit) = tx.reserve().await { + /// permit.send(i); + /// i += 1; + /// } + /// }); + /// + /// rx.close(); + /// + /// while let Some(msg) = rx.recv().await { + /// println!("got {}", msg); + /// } + /// + /// // Channel closed and no messages are lost. + /// # } + /// ``` + Future close(); + + /// Checks if this channel contains a message that this receiver has not yet + /// seen. The current value will not be marked as seen. + /// + /// Although this method is called `has_changed`, it does not check + /// messages for equality, so this call will return true even if the current + /// message is equal to the previous message. + /// + /// # Errors + /// + /// Returns a [`RecvError`](error::RecvError) if and only if the channel has been closed. + /// + /// # Examples + /// + /// ## Basic usage + /// + /// ``` + /// use tokio::sync::watch; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = watch::channel("hello"); + /// + /// tx.send("goodbye").unwrap(); + /// + /// assert!(rx.has_changed().unwrap()); + /// assert_eq!(*rx.borrow_and_update(), "goodbye"); + /// + /// // The value has been marked as seen + /// assert!(!rx.has_changed().unwrap()); + /// # } + /// ``` + /// + /// ## Closed channel example + /// + /// ``` + /// use tokio::sync::watch; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, rx) = watch::channel("hello"); + /// tx.send("goodbye").unwrap(); + /// + /// drop(tx); + /// + /// // The channel is closed + /// assert!(rx.has_changed().is_err()); + /// # } + /// ``` + Future hasChanged(); + + /// Checks if a channel is closed. + /// + /// This method returns `true` if the channel has been closed. The channel is closed + /// when all [`Sender`] have been dropped. + /// + /// [`Sender`]: crate::sync::broadcast::Sender + /// + /// # Examples + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, rx) = broadcast::channel::<()>(10); + /// assert!(!rx.is_closed()); + /// + /// drop(tx); + /// + /// assert!(rx.is_closed()); + /// # } + /// ``` + Future isClosed(); + + /// Returns true if there aren't any messages in the channel that the [`Receiver`] + /// has yet to receive. + /// + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// + /// assert!(rx1.is_empty()); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// + /// assert!(!rx1.is_empty()); + /// assert_eq!(rx1.recv().await.unwrap(), 10); + /// assert_eq!(rx1.recv().await.unwrap(), 20); + /// assert!(rx1.is_empty()); + /// # } + /// ``` + Future isEmpty(); + + /// Checks if this receiver is terminated. + /// + /// This function returns true if this receiver has already yielded a [`Poll::Ready`] result. + /// If so, this receiver should no longer be polled. + /// + /// # Examples + /// + /// Sending a value and polling it. + /// + /// ``` + /// use tokio::sync::oneshot; + /// + /// use std::task::Poll; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = oneshot::channel(); + /// + /// // A receiver is not terminated when it is initialized. + /// assert!(!rx.is_terminated()); + /// + /// // A receiver is not terminated it is polled and is still pending. + /// let poll = futures::poll!(&mut rx); + /// assert_eq!(poll, Poll::Pending); + /// assert!(!rx.is_terminated()); + /// + /// // A receiver is not terminated if a value has been sent, but not yet read. + /// tx.send(0).unwrap(); + /// assert!(!rx.is_terminated()); + /// + /// // A receiver *is* terminated after it has been polled and yielded a value. + /// assert_eq!((&mut rx).await, Ok(0)); + /// assert!(rx.is_terminated()); + /// # } + /// ``` + /// + /// Dropping the sender. + /// + /// ``` + /// use tokio::sync::oneshot; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = oneshot::channel::<()>(); + /// + /// // A receiver is not immediately terminated when the sender is dropped. + /// drop(tx); + /// assert!(!rx.is_terminated()); + /// + /// // A receiver *is* terminated after it has been polled and yielded an error. + /// let _ = (&mut rx).await.unwrap_err(); + /// assert!(rx.is_terminated()); + /// # } + /// ``` + Future isTerminated(); + + /// Returns the number of messages that were sent into the channel and that + /// this [`Receiver`] has yet to receive. + /// + /// If the returned value from `len` is larger than the next largest power of 2 + /// of the capacity of the channel any call to [`recv`] will return an + /// `Err(RecvError::Lagged)` and any call to [`try_recv`] will return an + /// `Err(TryRecvError::Lagged)`, e.g. if the capacity of the channel is 10, + /// [`recv`] will start to return `Err(RecvError::Lagged)` once `len` returns + /// values larger than 16. + /// + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// [`recv`]: crate::sync::broadcast::Receiver::recv + /// [`try_recv`]: crate::sync::broadcast::Receiver::try_recv + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// + /// assert_eq!(rx1.len(), 2); + /// assert_eq!(rx1.recv().await.unwrap(), 10); + /// assert_eq!(rx1.len(), 1); + /// assert_eq!(rx1.recv().await.unwrap(), 20); + /// assert_eq!(rx1.len(), 0); + /// # } + /// ``` + Future len(); + + /// Marks the state as changed. + /// + /// After invoking this method [`has_changed()`](Self::has_changed) + /// returns `true` and [`changed()`](Self::changed) returns + /// immediately, regardless of whether a new value has been sent. + /// + /// This is useful for triggering an initial change notification after + /// subscribing to synchronize new receivers. + Future markChanged(); + + /// Marks the state as unchanged. + /// + /// The current value will be considered seen by the receiver. + /// + /// This is useful if you are not interested in the current value + /// visible in the receiver. + Future markUnchanged(); + + /// Returns the maximum buffer capacity of the channel. + /// + /// The maximum capacity is the buffer capacity initially specified when calling + /// [`channel`]. This is distinct from [`capacity`], which returns the *current* + /// available buffer capacity: as messages are sent and received, the value + /// returned by [`capacity`] will go up or down, whereas the value + /// returned by [`max_capacity`] will remain constant. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, rx) = mpsc::channel::<()>(5); + /// + /// // both max capacity and capacity are the same at first + /// assert_eq!(rx.max_capacity(), 5); + /// assert_eq!(rx.capacity(), 5); + /// + /// // Making a reservation doesn't change the max capacity. + /// let permit = tx.reserve().await.unwrap(); + /// assert_eq!(rx.max_capacity(), 5); + /// // but drops the capacity by one + /// assert_eq!(rx.capacity(), 4); + /// # } + /// ``` + /// [`capacity`]: Receiver::capacity + /// [`max_capacity`]: Receiver::max_capacity + Future maxCapacity(); + + /// Polls to receive the next message on this channel. + /// + /// This method returns: + /// + /// * `Poll::Pending` if no messages are available but the channel is not + /// closed, or if a spurious failure happens. + /// * `Poll::Ready(Some(message))` if a message is available. + /// * `Poll::Ready(None)` if the channel has been closed and all messages + /// sent before it was closed have been received. + /// + /// When the method returns `Poll::Pending`, the `Waker` in the provided + /// `Context` is scheduled to receive a wakeup when a message is sent on any + /// receiver, or when the channel is closed. Note that on multiple calls to + /// `poll_recv` or `poll_recv_many`, only the `Waker` from the `Context` + /// passed to the most recent call is scheduled to receive a wakeup. + /// + /// If this method returns `Poll::Pending` due to a spurious failure, then + /// the `Waker` will be notified when the situation causing the spurious + /// failure has been resolved. Note that receiving such a wakeup does not + /// guarantee that the next call will succeed — it could fail with another + /// spurious failure. + Future pollRecv({required Context cx}); + + /// Polls to receive multiple messages on this channel, extending the provided buffer. + /// + /// This method returns: + /// * `Poll::Pending` if no messages are available but the channel is not closed, or if a + /// spurious failure happens. + /// * `Poll::Ready(count)` where `count` is the number of messages successfully received and + /// stored in `buffer`. This can be less than, or equal to, `limit`. + /// * `Poll::Ready(0)` if `limit` is set to zero or when the channel is closed. + /// + /// When the method returns `Poll::Pending`, the `Waker` in the provided + /// `Context` is scheduled to receive a wakeup when a message is sent on any + /// receiver, or when the channel is closed. Note that on multiple calls to + /// `poll_recv` or `poll_recv_many`, only the `Waker` from the `Context` + /// passed to the most recent call is scheduled to receive a wakeup. + /// + /// Note that this method does not guarantee that exactly `limit` messages + /// are received. Rather, if at least one message is available, it returns + /// as many messages as it can up to the given limit. This method returns + /// zero only if the channel is closed (or if `limit` is zero). + /// + /// # Examples + /// + /// ``` + /// use std::task::{Context, Poll}; + /// use std::pin::Pin; + /// use tokio::sync::mpsc; + /// use futures::Future; + /// + /// struct MyReceiverFuture<'a> { + /// receiver: mpsc::Receiver, + /// buffer: &'a mut Vec, + /// limit: usize, + /// } + /// + /// impl<'a> Future for MyReceiverFuture<'a> { + /// type Output = usize; // Number of messages received + /// + /// fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + /// let MyReceiverFuture { receiver, buffer, limit } = &mut *self; + /// + /// // Now `receiver` and `buffer` are mutable references, and `limit` is copied + /// match receiver.poll_recv_many(cx, *buffer, *limit) { + /// Poll::Pending => Poll::Pending, + /// Poll::Ready(count) => Poll::Ready(count), + /// } + /// } + /// } + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, rx) = mpsc::channel(32); + /// let mut buffer = Vec::new(); + /// + /// let my_receiver_future = MyReceiverFuture { + /// receiver: rx, + /// buffer: &mut buffer, + /// limit: 3, + /// }; + /// + /// for i in 0..10 { + /// tx.send(i).await.unwrap(); + /// } + /// + /// let count = my_receiver_future.await; + /// assert_eq!(count, 3); + /// assert_eq!(buffer, vec![0,1,2]) + /// # } + /// ``` + Future pollRecvMany( + {required Context cx, required VecT buffer, required BigInt limit}); + + /// Receives the next value for this receiver. + /// + /// Each [`Receiver`] handle will receive a clone of all values sent + /// **after** it has subscribed. + /// + /// `Err(RecvError::Closed)` is returned when all `Sender` halves have + /// dropped, indicating that no further values can be sent on the channel. + /// + /// If the [`Receiver`] handle falls behind, once the channel is full, newly + /// sent values will overwrite old values. At this point, a call to [`recv`] + /// will return with `Err(RecvError::Lagged)` and the [`Receiver`]'s + /// internal cursor is updated to point to the oldest value still held by + /// the channel. A subsequent call to [`recv`] will return this value + /// **unless** it has been since overwritten. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// channel. + /// + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// [`recv`]: crate::sync::broadcast::Receiver::recv + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// let mut rx2 = tx.subscribe(); + /// + /// tokio::spawn(async move { + /// assert_eq!(rx1.recv().await.unwrap(), 10); + /// assert_eq!(rx1.recv().await.unwrap(), 20); + /// }); + /// + /// tokio::spawn(async move { + /// assert_eq!(rx2.recv().await.unwrap(), 10); + /// assert_eq!(rx2.recv().await.unwrap(), 20); + /// }); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// # } + /// ``` + /// + /// Handling lag + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = broadcast::channel(2); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// tx.send(30).unwrap(); + /// + /// // The receiver lagged behind + /// assert!(rx.recv().await.is_err()); + /// + /// // At this point, we can abort or continue with lost messages + /// + /// assert_eq!(20, rx.recv().await.unwrap()); + /// assert_eq!(30, rx.recv().await.unwrap()); + /// # } + /// ``` + Future recv(); + + /// Receives the next values for this receiver and extends `buffer`. + /// + /// This method extends `buffer` by no more than a fixed number of values + /// as specified by `limit`. If `limit` is zero, the function immediately + /// returns `0`. The return value is the number of values added to `buffer`. + /// + /// For `limit > 0`, if there are no messages in the channel's queue, but + /// the channel has not yet been closed, this method will sleep until a + /// message is sent or the channel is closed. Note that if [`close`] is + /// called, but there are still outstanding [`Permits`] from before it was + /// closed, the channel is not considered closed by `recv_many` until the + /// permits are released. + /// + /// For non-zero values of `limit`, this method will never return `0` unless + /// the channel has been closed and there are no remaining messages in the + /// channel's queue. This indicates that no further values can ever be + /// received from this `Receiver`. The channel is closed when all senders + /// have been dropped, or when [`close`] is called. + /// + /// The capacity of `buffer` is increased as needed. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If `recv_many` is used as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, it is guaranteed that no messages were received on this + /// channel. + /// + /// [`close`]: Self::close + /// [`Permits`]: struct@crate::sync::mpsc::Permit + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut buffer: Vec<&str> = Vec::with_capacity(2); + /// let limit = 2; + /// let (tx, mut rx) = mpsc::channel(100); + /// let tx2 = tx.clone(); + /// tx2.send("first").await.unwrap(); + /// tx2.send("second").await.unwrap(); + /// tx2.send("third").await.unwrap(); + /// + /// // Call `recv_many` to receive up to `limit` (2) values. + /// assert_eq!(2, rx.recv_many(&mut buffer, limit).await); + /// assert_eq!(vec!["first", "second"], buffer); + /// + /// // If the buffer is full, the next call to `recv_many` + /// // reserves additional capacity. + /// assert_eq!(1, rx.recv_many(&mut buffer, 1).await); + /// + /// tokio::spawn(async move { + /// tx.send("fourth").await.unwrap(); + /// }); + /// + /// // 'tx' is dropped, but `recv_many` + /// // is guaranteed not to return 0 as the channel + /// // is not yet closed. + /// assert_eq!(1, rx.recv_many(&mut buffer, 1).await); + /// assert_eq!(vec!["first", "second", "third", "fourth"], buffer); + /// + /// // Once the last sender is dropped, the channel is + /// // closed and `recv_many` returns 0, capacity unchanged. + /// drop(tx2); + /// assert_eq!(0, rx.recv_many(&mut buffer, limit).await); + /// assert_eq!(vec!["first", "second", "third", "fourth"], buffer); + /// # } + /// ``` + Future recvMany({required VecT buffer, required BigInt limit}); + + /// Re-subscribes to the channel starting from the current tail element. + /// + /// This [`Receiver`] handle will receive a clone of all values sent + /// **after** it has resubscribed. This will not include elements that are + /// in the queue of the current receiver. Consider the following example. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = broadcast::channel(2); + /// + /// tx.send(1).unwrap(); + /// let mut rx2 = rx.resubscribe(); + /// tx.send(2).unwrap(); + /// + /// assert_eq!(rx2.recv().await.unwrap(), 2); + /// assert_eq!(rx.recv().await.unwrap(), 1); + /// # } + /// ``` + Future resubscribe(); + + /// Returns `true` if receivers belong to the same channel. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, rx) = broadcast::channel::<()>(16); + /// let rx2 = tx.subscribe(); + /// + /// assert!(rx.same_channel(&rx2)); + /// + /// let (_tx3, rx3) = broadcast::channel::<()>(16); + /// + /// assert!(!rx3.same_channel(&rx2)); + /// # } + /// ``` + Future sameChannel({required ReceiverT other}); + + /// Returns the number of [`Sender`] handles. + Future senderStrongCount(); + + /// Returns the number of [`WeakSender`] handles. + Future senderWeakCount(); + + /// Attempts to return a pending value on this receiver without awaiting. + /// + /// This is useful for a flavor of "optimistic check" before deciding to + /// await on a receiver. + /// + /// Compared with [`recv`], this function has three failure cases instead of two + /// (one for closed, one for an empty buffer, one for a lagging receiver). + /// + /// `Err(TryRecvError::Closed)` is returned when all `Sender` halves have + /// dropped, indicating that no further values can be sent on the channel. + /// + /// If the [`Receiver`] handle falls behind, once the channel is full, newly + /// sent values will overwrite old values. At this point, a call to [`recv`] + /// will return with `Err(TryRecvError::Lagged)` and the [`Receiver`]'s + /// internal cursor is updated to point to the oldest value still held by + /// the channel. A subsequent call to [`try_recv`] will return this value + /// **unless** it has been since overwritten. If there are no values to + /// receive, `Err(TryRecvError::Empty)` is returned. + /// + /// [`recv`]: crate::sync::broadcast::Receiver::recv + /// [`try_recv`]: crate::sync::broadcast::Receiver::try_recv + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = broadcast::channel(16); + /// + /// assert!(rx.try_recv().is_err()); + /// + /// tx.send(10).unwrap(); + /// + /// let value = rx.try_recv().unwrap(); + /// assert_eq!(10, value); + /// # } + /// ``` + Future tryRecv(); +} + +// Rust type: RustOpaqueMoi> +abstract class Sender implements RustOpaqueInterface { + /// Creates a new `Sender` from a [`File`]. + /// + /// This function is intended to construct a pipe from a [`File`] representing + /// a special FIFO file. It will check if the file is a pipe and has write access, + /// set it in non-blocking mode and perform the conversion. + /// + /// # Errors + /// + /// Fails with `io::ErrorKind::InvalidInput` if the file is not a pipe or it + /// does not have write access. Also fails with any standard OS error if it occurs. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromFile({required File file}) => + RustLib.instance.api.tokioNetUnixPipeSenderFromFile(file: file); + + /// Creates a new `Sender` from a [`File`] without checking pipe properties. + /// + /// This function is intended to construct a pipe from a File representing + /// a special FIFO file. The conversion assumes nothing about the underlying + /// file; it is left up to the user to make sure it is opened with write access, + /// represents a pipe and is set in non-blocking mode. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::fs::OpenOptions; + /// use std::os::unix::fs::{FileTypeExt, OpenOptionsExt}; + /// # use std::error::Error; + /// + /// const FIFO_NAME: &str = "path/to/a/fifo"; + /// + /// # async fn dox() -> Result<(), Box> { + /// let file = OpenOptions::new() + /// .write(true) + /// .custom_flags(libc::O_NONBLOCK) + /// .open(FIFO_NAME)?; + /// if file.metadata()?.file_type().is_fifo() { + /// let tx = pipe::Sender::from_file_unchecked(file)?; + /// /* use the Sender */ + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromFileUnchecked({required File file}) => + RustLib.instance.api.tokioNetUnixPipeSenderFromFileUnchecked(file: file); + + /// Creates a new `Sender` from an [`OwnedFd`]. + /// + /// This function is intended to construct a pipe from an [`OwnedFd`] representing + /// an anonymous pipe or a special FIFO file. It will check if the file descriptor + /// is a pipe and has write access, set it in non-blocking mode and perform the + /// conversion. + /// + /// # Errors + /// + /// Fails with `io::ErrorKind::InvalidInput` if the file descriptor is not a pipe + /// or it does not have write access. Also fails with any standard OS error if it + /// occurs. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromOwnedFd({required OwnedFd ownedFd}) => + RustLib.instance.api.tokioNetUnixPipeSenderFromOwnedFd(ownedFd: ownedFd); + + /// Creates a new `Sender` from an [`OwnedFd`] without checking pipe properties. + /// + /// This function is intended to construct a pipe from an [`OwnedFd`] representing + /// an anonymous pipe or a special FIFO file. The conversion assumes nothing about + /// the underlying pipe; it is left up to the user to make sure that the file + /// descriptor represents the writing end of a pipe and the pipe is set in + /// non-blocking mode. + /// + /// # Panics + /// + /// This function panics if it is not called from within a runtime with + /// IO enabled. + /// + /// The runtime is usually set implicitly when this function is called + /// from a future driven by a tokio runtime, otherwise runtime can be set + /// explicitly with [`Runtime::enter`](crate::runtime::Runtime::enter) function. + static Future fromOwnedFdUnchecked({required OwnedFd ownedFd}) => + RustLib.instance.api + .tokioNetUnixPipeSenderFromOwnedFdUnchecked(ownedFd: ownedFd); + + /// Converts the pipe into an [`OwnedFd`] in blocking mode. + /// + /// This function will deregister this pipe end from the event loop, set + /// it in blocking mode and perform the conversion. + Future intoBlockingFd(); + + /// Converts the pipe into an [`OwnedFd`] in nonblocking mode. + /// + /// This function will deregister this pipe end from the event loop and + /// perform the conversion. The returned file descriptor will be in nonblocking + /// mode. + Future intoNonblockingFd(); + + /// Polls for write readiness. + /// + /// If the pipe is not currently ready for writing, this method will + /// store a clone of the `Waker` from the provided `Context`. When the pipe + /// becomes ready for writing, `Waker::wake` will be called on the waker. + /// + /// Note that on multiple calls to `poll_write_ready` or `poll_write`, only + /// the `Waker` from the `Context` passed to the most recent call is + /// scheduled to receive a wakeup. + /// + /// This function is intended for cases where creating and pinning a future + /// via [`writable`] is not feasible. Where possible, using [`writable`] is + /// preferred, as this supports polling from multiple tasks at once. + /// + /// [`writable`]: Self::writable + /// + /// # Return value + /// + /// The function returns: + /// + /// * `Poll::Pending` if the pipe is not ready for writing. + /// * `Poll::Ready(Ok(()))` if the pipe is ready for writing. + /// * `Poll::Ready(Err(e))` if an error is encountered. + /// + /// # Errors + /// + /// This function may encounter any standard I/O error except `WouldBlock`. + Future pollWriteReady({required Context cx}); + + /// Waits for any of the requested ready states. + /// + /// This function can be used instead of [`writable()`] to check the returned + /// ready set for [`Ready::WRITABLE`] and [`Ready::WRITE_CLOSED`] events. + /// + /// The function may complete without the pipe being ready. This is a + /// false-positive and attempting an operation will return with + /// `io::ErrorKind::WouldBlock`. The function can also return with an empty + /// [`Ready`] set, so you should always check the returned value and possibly + /// wait again if the requested states are not set. + /// + /// [`writable()`]: Self::writable + /// + /// # Cancel safety + /// + /// This method is cancel safe. Once a readiness event occurs, the method + /// will continue to return immediately until the readiness event is + /// consumed by an attempt to write that fails with `WouldBlock` or + /// `Poll::Pending`. + Future ready({required Interest interest}); + + /// Tries to write a buffer to the pipe, returning how many bytes were + /// written. + /// + /// The function will attempt to write the entire contents of `buf`, but + /// only part of the buffer may be written. If the length of `buf` is not + /// greater than `PIPE_BUF` (an OS constant, 4096 under Linux), then the + /// write is guaranteed to be atomic, i.e. either the entire content of + /// `buf` will be written or this method will fail with `WouldBlock`. There + /// is no such guarantee if `buf` is larger than `PIPE_BUF`. + /// + /// This function is usually paired with [`writable`]. + /// + /// [`writable`]: Self::writable + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the pipe is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a writing end of a fifo + /// let tx = pipe::OpenOptions::new().open_sender("path/to/a/fifo")?; + /// + /// loop { + /// // Wait for the pipe to be writable + /// tx.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match tx.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWrite({required List buf}); + + /// Tries to write several buffers to the pipe, returning how many bytes + /// were written. + /// + /// Data is written from each buffer in order, with the final buffer read + /// from possible being only partially consumed. This method behaves + /// equivalently to a single call to [`try_write()`] with concatenated + /// buffers. + /// + /// If the total length of buffers is not greater than `PIPE_BUF` (an OS + /// constant, 4096 under Linux), then the write is guaranteed to be atomic, + /// i.e. either the entire contents of buffers will be written or this + /// method will fail with `WouldBlock`. There is no such guarantee if the + /// total length of buffers is greater than `PIPE_BUF`. + /// + /// This function is usually paired with [`writable`]. + /// + /// [`try_write()`]: Self::try_write() + /// [`writable`]: Self::writable + /// + /// # Return + /// + /// If data is successfully written, `Ok(n)` is returned, where `n` is the + /// number of bytes written. If the pipe is not ready to write data, + /// `Err(io::ErrorKind::WouldBlock)` is returned. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a writing end of a fifo + /// let tx = pipe::OpenOptions::new().open_sender("path/to/a/fifo")?; + /// + /// let bufs = [io::IoSlice::new(b"hello "), io::IoSlice::new(b"world")]; + /// + /// loop { + /// // Wait for the pipe to be writable + /// tx.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match tx.try_write_vectored(&bufs) { + /// Ok(n) => { + /// break; + /// } + /// Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future tryWriteVectored({required List buf}); + + /// Waits for the pipe to become writable. + /// + /// This function is equivalent to `ready(Interest::WRITABLE)` and is usually + /// paired with [`try_write()`]. + /// + /// [`try_write()`]: Self::try_write + /// + /// # Examples + /// + /// ```no_run + /// use tokio::net::unix::pipe; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// // Open a writing end of a fifo + /// let tx = pipe::OpenOptions::new().open_sender("path/to/a/fifo")?; + /// + /// loop { + /// // Wait for the pipe to be writable + /// tx.writable().await?; + /// + /// // Try to write data, this may still fail with `WouldBlock` + /// // if the readiness event is a false positive. + /// match tx.try_write(b"hello world") { + /// Ok(n) => { + /// break; + /// } + /// Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + /// continue; + /// } + /// Err(e) => { + /// return Err(e.into()); + /// } + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + Future writable(); +} + +// Rust type: RustOpaqueMoi>> +abstract class SenderT implements RustOpaqueInterface { + /// Blocking send to call outside of asynchronous contexts. + /// + /// This method is intended for use cases where you are sending from + /// synchronous code to asynchronous code, and will work even if the + /// receiver is not using [`blocking_recv`] to receive the message. + /// + /// [`blocking_recv`]: fn@crate::sync::mpsc::Receiver::blocking_recv + /// + /// # Panics + /// + /// This function panics if called within an asynchronous execution + /// context. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use std::thread; + /// use tokio::runtime::Runtime; + /// use tokio::sync::mpsc; + /// + /// fn main() { + /// let (tx, mut rx) = mpsc::channel::(1); + /// + /// let sync_code = thread::spawn(move || { + /// tx.blocking_send(10).unwrap(); + /// }); + /// + /// Runtime::new().unwrap().block_on(async move { + /// assert_eq!(Some(10), rx.recv().await); + /// }); + /// sync_code.join().unwrap() + /// } + /// # } + /// ``` + Future blockingSend({required T value}); + + /// Returns a reference to the most recently sent value + /// + /// Outstanding borrows hold a read lock on the inner value. This means that + /// long-lived borrows could cause the producer half to block. It is recommended + /// to keep the borrow as short-lived as possible. Additionally, if you are + /// running in an environment that allows `!Send` futures, you must ensure that + /// the returned `Ref` type is never held alive across an `.await` point, + /// otherwise, it can lead to a deadlock. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::watch; + /// + /// let (tx, _) = watch::channel("hello"); + /// assert_eq!(*tx.borrow(), "hello"); + /// ``` + Future borrow(); + + /// Returns the current capacity of the channel. + /// + /// The capacity goes down when sending a value by calling [`send`] or by reserving capacity + /// with [`reserve`]. The capacity goes up when values are received by the [`Receiver`]. + /// This is distinct from [`max_capacity`], which always returns buffer capacity initially + /// specified when calling [`channel`] + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel::<()>(5); + /// + /// assert_eq!(tx.capacity(), 5); + /// + /// // Making a reservation drops the capacity by one. + /// let permit = tx.reserve().await.unwrap(); + /// assert_eq!(tx.capacity(), 4); + /// + /// // Sending and receiving a value increases the capacity by one. + /// permit.send(()); + /// rx.recv().await.unwrap(); + /// assert_eq!(tx.capacity(), 5); + /// # } + /// ``` + /// + /// [`send`]: Sender::send + /// [`reserve`]: Sender::reserve + /// [`channel`]: channel + /// [`max_capacity`]: Sender::max_capacity + Future capacity(); + + /// A future which completes when the number of [Receiver]s subscribed to this `Sender` reaches + /// zero. + /// + /// # Examples + /// + /// ``` + /// use futures::FutureExt; + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel::(16); + /// let mut rx2 = tx.subscribe(); + /// + /// let _ = tx.send(10); + /// + /// assert_eq!(rx1.recv().await.unwrap(), 10); + /// drop(rx1); + /// assert!(tx.closed().now_or_never().is_none()); + /// + /// assert_eq!(rx2.recv().await.unwrap(), 10); + /// drop(rx2); + /// assert!(tx.closed().now_or_never().is_some()); + /// # } + /// ``` + Future closed(); + + static Future default_() => + RustLib.instance.api.tokioNetUnixPipeSenderTDefault(); + + /// Converts the `Sender` to a [`WeakSender`] that does not count + /// towards RAII semantics, i.e. if all `Sender` instances of the + /// channel were dropped and only `WeakSender` instances remain, + /// the channel is closed. + Future downgrade(); + + /// Checks if the channel has been closed. This happens when the + /// [`Receiver`] is dropped, or when the [`Receiver::close`] method is + /// called. + /// + /// [`Receiver`]: crate::sync::mpsc::Receiver + /// [`Receiver::close`]: crate::sync::mpsc::Receiver::close + /// + /// ``` + /// let (tx, rx) = tokio::sync::mpsc::channel::<()>(42); + /// assert!(!tx.is_closed()); + /// + /// let tx2 = tx.clone(); + /// assert!(!tx2.is_closed()); + /// + /// drop(rx); + /// assert!(tx.is_closed()); + /// assert!(tx2.is_closed()); + /// ``` + Future isClosed(); + + /// Returns true if there are no queued values. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// let mut rx2 = tx.subscribe(); + /// + /// assert!(tx.is_empty()); + /// + /// tx.send(10).unwrap(); + /// + /// assert!(!tx.is_empty()); + /// + /// rx1.recv().await.unwrap(); + /// + /// // The queue is still not empty since rx2 hasn't seen the value. + /// assert!(!tx.is_empty()); + /// + /// rx2.recv().await.unwrap(); + /// + /// assert!(tx.is_empty()); + /// # } + /// ``` + Future isEmpty(); + + /// Returns the number of queued values. + /// + /// A value is queued until it has either been seen by all receivers that were alive at the time + /// it was sent, or has been evicted from the queue by subsequent sends that exceeded the + /// queue's capacity. + /// + /// # Note + /// + /// In contrast to [`Receiver::len`], this method only reports queued values and not values that + /// have been evicted from the queue before being seen by all receivers. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// let mut rx2 = tx.subscribe(); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// tx.send(30).unwrap(); + /// + /// assert_eq!(tx.len(), 3); + /// + /// rx1.recv().await.unwrap(); + /// + /// // The len is still 3 since rx2 hasn't seen the first value yet. + /// assert_eq!(tx.len(), 3); + /// + /// rx2.recv().await.unwrap(); + /// + /// assert_eq!(tx.len(), 2); + /// # } + /// ``` + Future len(); + + /// Returns the maximum buffer capacity of the channel. + /// + /// The maximum capacity is the buffer capacity initially specified when calling + /// [`channel`]. This is distinct from [`capacity`], which returns the *current* + /// available buffer capacity: as messages are sent and received, the + /// value returned by [`capacity`] will go up or down, whereas the value + /// returned by [`max_capacity`] will remain constant. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, _rx) = mpsc::channel::<()>(5); + /// + /// // both max capacity and capacity are the same at first + /// assert_eq!(tx.max_capacity(), 5); + /// assert_eq!(tx.capacity(), 5); + /// + /// // Making a reservation doesn't change the max capacity. + /// let permit = tx.reserve().await.unwrap(); + /// assert_eq!(tx.max_capacity(), 5); + /// // but drops the capacity by one + /// assert_eq!(tx.capacity(), 4); + /// # } + /// ``` + /// + /// [`channel`]: channel + /// [`max_capacity`]: Sender::max_capacity + /// [`capacity`]: Sender::capacity + Future maxCapacity(); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates the sending-half of the [`broadcast`] channel. + /// + /// See the documentation of [`broadcast::channel`] for more information on this method. + /// + /// [`broadcast`]: crate::sync::broadcast + /// [`broadcast::channel`]: crate::sync::broadcast::channel + static Future newInstance({required BigInt capacity}) => + RustLib.instance.api.tokioNetUnixPipeSenderTNew(capacity: capacity); + + /// Checks whether the `oneshot` channel has been closed, and if not, schedules the + /// `Waker` in the provided `Context` to receive a notification when the channel is + /// closed. + /// + /// A [`Receiver`] is closed by either calling [`close`] explicitly, or when the + /// [`Receiver`] value is dropped. + /// + /// Note that on multiple calls to poll, only the `Waker` from the `Context` passed + /// to the most recent call will be scheduled to receive a wakeup. + /// + /// [`Receiver`]: struct@crate::sync::oneshot::Receiver + /// [`close`]: fn@crate::sync::oneshot::Receiver::close + /// + /// # Return value + /// + /// This function returns: + /// + /// * `Poll::Pending` if the channel is still open. + /// * `Poll::Ready(())` if the channel is closed. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::oneshot; + /// + /// use std::future::poll_fn; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (mut tx, mut rx) = oneshot::channel::<()>(); + /// + /// tokio::spawn(async move { + /// rx.close(); + /// }); + /// + /// poll_fn(|cx| tx.poll_closed(cx)).await; + /// + /// println!("the receiver dropped"); + /// # } + /// ``` + Future pollClosed({required Context cx}); + + /// Returns the number of active receivers. + /// + /// An active receiver is a [`Receiver`] handle returned from [`channel`] or + /// [`subscribe`]. These are the handles that will receive values sent on + /// this [`Sender`]. + /// + /// # Note + /// + /// It is not guaranteed that a sent message will reach this number of + /// receivers. Active receivers may never call [`recv`] again before + /// dropping. + /// + /// [`recv`]: crate::sync::broadcast::Receiver::recv + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// [`Sender`]: crate::sync::broadcast::Sender + /// [`subscribe`]: crate::sync::broadcast::Sender::subscribe + /// [`channel`]: crate::sync::broadcast::channel + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, _rx1) = broadcast::channel(16); + /// + /// assert_eq!(1, tx.receiver_count()); + /// + /// let mut _rx2 = tx.subscribe(); + /// + /// assert_eq!(2, tx.receiver_count()); + /// + /// tx.send(10).unwrap(); + /// # } + /// ``` + Future receiverCount(); + + /// Waits for channel capacity. Once capacity to send one message is + /// available, it is reserved for the caller. + /// + /// If the channel is full, the function waits for the number of unreceived + /// messages to become less than the channel capacity. Capacity to send one + /// message is reserved for the caller. A [`Permit`] is returned to track + /// the reserved capacity. The [`send`] function on [`Permit`] consumes the + /// reserved capacity. + /// + /// Dropping [`Permit`] without sending a message releases the capacity back + /// to the channel. + /// + /// [`Permit`]: Permit + /// [`send`]: Permit::send + /// + /// # Cancel safety + /// + /// This channel uses a queue to ensure that calls to `send` and `reserve` + /// complete in the order they were requested. Cancelling a call to + /// `reserve` makes you lose your place in the queue. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// // Reserve capacity + /// let permit = tx.reserve().await.unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Sending on the permit succeeds + /// permit.send(456); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// # } + /// ``` + Future reserve(); + + /// Waits for channel capacity. Once capacity to send `n` messages is + /// available, it is reserved for the caller. + /// + /// If the channel is full or if there are fewer than `n` permits available, the function waits + /// for the number of unreceived messages to become `n` less than the channel capacity. + /// Capacity to send `n` message is then reserved for the caller. + /// + /// A [`PermitIterator`] is returned to track the reserved capacity. + /// You can call this [`Iterator`] until it is exhausted to + /// get a [`Permit`] and then call [`Permit::send`]. This function is similar to + /// [`try_reserve_many`] except it waits for the slots to become available. + /// + /// If the channel is closed, the function returns a [`SendError`]. + /// + /// Dropping [`PermitIterator`] without consuming it entirely releases the remaining + /// permits back to the channel. + /// + /// [`PermitIterator`]: PermitIterator + /// [`Permit`]: Permit + /// [`send`]: Permit::send + /// [`try_reserve_many`]: Sender::try_reserve_many + /// + /// # Cancel safety + /// + /// This channel uses a queue to ensure that calls to `send` and `reserve_many` + /// complete in the order they were requested. Cancelling a call to + /// `reserve_many` makes you lose your place in the queue. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(2); + /// + /// // Reserve capacity + /// let mut permit = tx.reserve_many(2).await.unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Sending with the permit iterator succeeds + /// permit.next().unwrap().send(456); + /// permit.next().unwrap().send(457); + /// + /// // The iterator should now be exhausted + /// assert!(permit.next().is_none()); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// assert_eq!(rx.recv().await.unwrap(), 457); + /// # } + /// ``` + Future reserveMany({required BigInt n}); + + /// Waits for channel capacity, moving the `Sender` and returning an owned + /// permit. Once capacity to send one message is available, it is reserved + /// for the caller. + /// + /// This moves the sender _by value_, and returns an owned permit that can + /// be used to send a message into the channel. Unlike [`Sender::reserve`], + /// this method may be used in cases where the permit must be valid for the + /// `'static` lifetime. `Sender`s may be cloned cheaply (`Sender::clone` is + /// essentially a reference count increment, comparable to [`Arc::clone`]), + /// so when multiple [`OwnedPermit`]s are needed or the `Sender` cannot be + /// moved, it can be cloned prior to calling `reserve_owned`. + /// + /// If the channel is full, the function waits for the number of unreceived + /// messages to become less than the channel capacity. Capacity to send one + /// message is reserved for the caller. An [`OwnedPermit`] is returned to + /// track the reserved capacity. The [`send`] function on [`OwnedPermit`] + /// consumes the reserved capacity. + /// + /// Dropping the [`OwnedPermit`] without sending a message releases the + /// capacity back to the channel. + /// + /// # Cancel safety + /// + /// This channel uses a queue to ensure that calls to `send` and `reserve` + /// complete in the order they were requested. Cancelling a call to + /// `reserve_owned` makes you lose your place in the queue. + /// + /// # Examples + /// Sending a message using an [`OwnedPermit`]: + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// // Reserve capacity, moving the sender. + /// let permit = tx.reserve_owned().await.unwrap(); + /// + /// // Send a message, consuming the permit and returning + /// // the moved sender. + /// let tx = permit.send(123); + /// + /// // The value sent on the permit is received. + /// assert_eq!(rx.recv().await.unwrap(), 123); + /// + /// // The sender can now be used again. + /// tx.send(456).await.unwrap(); + /// # } + /// ``` + /// + /// When multiple [`OwnedPermit`]s are needed, or the sender cannot be moved + /// by value, it can be inexpensively cloned before calling `reserve_owned`: + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// // Clone the sender and reserve capacity. + /// let permit = tx.clone().reserve_owned().await.unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Sending on the permit succeeds. + /// permit.send(456); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// # } + /// ``` + /// + /// [`Sender::reserve`]: Sender::reserve + /// [`OwnedPermit`]: OwnedPermit + /// [`send`]: OwnedPermit::send + /// [`Arc::clone`]: std::sync::Arc::clone + Future reserveOwned(); + + /// Returns `true` if senders belong to the same channel. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, _rx) = broadcast::channel::<()>(16); + /// let tx2 = tx.clone(); + /// + /// assert!(tx.same_channel(&tx2)); + /// + /// let (tx3, _rx3) = broadcast::channel::<()>(16); + /// + /// assert!(!tx3.same_channel(&tx2)); + /// # } + /// ``` + Future sameChannel({required SenderT other}); + + /// Attempts to send a value to all active [`Receiver`] handles, returning + /// it back if it could not be sent. + /// + /// A successful send occurs when there is at least one active [`Receiver`] + /// handle. An unsuccessful send would be one where all associated + /// [`Receiver`] handles have already been dropped. + /// + /// # Return + /// + /// On success, the number of subscribed [`Receiver`] handles is returned. + /// This does not mean that this number of receivers will see the message as + /// a receiver may drop or lag ([see lagging](self#lagging)) before receiving + /// the message. + /// + /// # Note + /// + /// A return value of `Ok` **does not** mean that the sent value will be + /// observed by all or any of the active [`Receiver`] handles. [`Receiver`] + /// handles may be dropped before receiving the sent message. + /// + /// A return value of `Err` **does not** mean that future calls to `send` + /// will fail. New [`Receiver`] handles may be created by calling + /// [`subscribe`]. + /// + /// [`Receiver`]: crate::sync::broadcast::Receiver + /// [`subscribe`]: crate::sync::broadcast::Sender::subscribe + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx1) = broadcast::channel(16); + /// let mut rx2 = tx.subscribe(); + /// + /// tokio::spawn(async move { + /// assert_eq!(rx1.recv().await.unwrap(), 10); + /// assert_eq!(rx1.recv().await.unwrap(), 20); + /// }); + /// + /// tokio::spawn(async move { + /// assert_eq!(rx2.recv().await.unwrap(), 10); + /// assert_eq!(rx2.recv().await.unwrap(), 20); + /// }); + /// + /// tx.send(10).unwrap(); + /// tx.send(20).unwrap(); + /// # } + /// ``` + Future send({required T value}); + + /// Sends a new value via the channel, notifying all receivers and returning + /// the previous value in the channel. + /// + /// This can be useful for reusing the buffers inside a watched value. + /// Additionally, this method permits sending values even when there are no + /// receivers. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::watch; + /// + /// let (tx, _rx) = watch::channel(1); + /// assert_eq!(tx.send_replace(2), 1); + /// assert_eq!(tx.send_replace(3), 2); + /// ``` + Future sendReplace({required T value}); + + /// Sends a value, waiting until there is capacity, but only for a limited time. + /// + /// Shares the same success and error conditions as [`send`], adding one more + /// condition for an unsuccessful send, which is when the provided timeout has + /// elapsed, and there is no capacity available. + /// + /// [`send`]: Sender::send + /// + /// # Errors + /// + /// If the receive half of the channel is closed, either due to [`close`] + /// being called or the [`Receiver`] having been dropped, + /// the function returns an error. The error includes the value passed to `send`. + /// + /// [`close`]: Receiver::close + /// [`Receiver`]: Receiver + /// + /// # Panics + /// + /// This function panics if it is called outside the context of a Tokio + /// runtime [with time enabled](crate::runtime::Builder::enable_time). + /// + /// # Examples + /// + /// In the following example, each call to `send_timeout` will block until the + /// previously sent value was received, unless the timeout has elapsed. + /// + /// ```rust + /// use tokio::sync::mpsc; + /// use tokio::time::{sleep, Duration}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// tokio::spawn(async move { + /// for i in 0..10 { + /// if let Err(e) = tx.send_timeout(i, Duration::from_millis(100)).await { + /// println!("send error: #{:?}", e); + /// return; + /// } + /// } + /// }); + /// + /// while let Some(i) = rx.recv().await { + /// println!("got = {}", i); + /// sleep(Duration::from_millis(200)).await; + /// } + /// # } + /// ``` + Future sendTimeout({required T value, required Duration timeout}); + + /// Returns the number of senders that currently exist. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::watch; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx1, rx) = watch::channel("hello"); + /// + /// assert_eq!(1, tx1.sender_count()); + /// + /// let tx2 = tx1.clone(); + /// + /// assert_eq!(2, tx1.sender_count()); + /// assert_eq!(2, tx2.sender_count()); + /// # } + /// ``` + Future senderCount(); + + /// Returns the number of [`Sender`] handles. + Future strongCount(); + + /// Creates a new [`Receiver`] handle that will receive values sent **after** + /// this call to `subscribe`. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::broadcast; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, _rx) = broadcast::channel(16); + /// + /// // Will not be seen + /// tx.send(10).unwrap(); + /// + /// let mut rx = tx.subscribe(); + /// + /// tx.send(20).unwrap(); + /// + /// let value = rx.recv().await.unwrap(); + /// assert_eq!(20, value); + /// # } + /// ``` + Future subscribe(); + + /// Tries to acquire a slot in the channel without waiting for the slot to become + /// available. + /// + /// If the channel is full this function will return [`TrySendError`], otherwise + /// if there is a slot available it will return a [`Permit`] that will then allow you + /// to [`send`] on the channel with a guaranteed slot. This function is similar to + /// [`reserve`] except it does not await for the slot to become available. + /// + /// Dropping [`Permit`] without sending a message releases the capacity back + /// to the channel. + /// + /// [`Permit`]: Permit + /// [`send`]: Permit::send + /// [`reserve`]: Sender::reserve + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// // Reserve capacity + /// let permit = tx.try_reserve().unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Trying to reserve an additional slot on the `tx` will + /// // fail because there is no capacity. + /// assert!(tx.try_reserve().is_err()); + /// + /// // Sending on the permit succeeds + /// permit.send(456); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// + /// # } + /// ``` + Future tryReserve(); + + /// Tries to acquire `n` slots in the channel without waiting for the slot to become + /// available. + /// + /// A [`PermitIterator`] is returned to track the reserved capacity. + /// You can call this [`Iterator`] until it is exhausted to + /// get a [`Permit`] and then call [`Permit::send`]. This function is similar to + /// [`reserve_many`] except it does not await for the slots to become available. + /// + /// If there are fewer than `n` permits available on the channel, then + /// this function will return a [`TrySendError::Full`]. If the channel is closed + /// this function will return a [`TrySendError::Closed`]. + /// + /// Dropping [`PermitIterator`] without consuming it entirely releases the remaining + /// permits back to the channel. + /// + /// [`PermitIterator`]: PermitIterator + /// [`send`]: Permit::send + /// [`reserve_many`]: Sender::reserve_many + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(2); + /// + /// // Reserve capacity + /// let mut permit = tx.try_reserve_many(2).unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Trying to reserve an additional slot on the `tx` will + /// // fail because there is no capacity. + /// assert!(tx.try_reserve().is_err()); + /// + /// // Sending with the permit iterator succeeds + /// permit.next().unwrap().send(456); + /// permit.next().unwrap().send(457); + /// + /// // The iterator should now be exhausted + /// assert!(permit.next().is_none()); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// assert_eq!(rx.recv().await.unwrap(), 457); + /// + /// // Trying to call try_reserve_many with 0 will return an empty iterator + /// let mut permit = tx.try_reserve_many(0).unwrap(); + /// assert!(permit.next().is_none()); + /// + /// // Trying to call try_reserve_many with a number greater than the channel + /// // capacity will return an error + /// let permit = tx.try_reserve_many(3); + /// assert!(permit.is_err()); + /// + /// // Trying to call try_reserve_many on a closed channel will return an error + /// drop(rx); + /// let permit = tx.try_reserve_many(1); + /// assert!(permit.is_err()); + /// + /// let permit = tx.try_reserve_many(0); + /// assert!(permit.is_err()); + /// # } + /// ``` + Future tryReserveMany({required BigInt n}); + + /// Tries to acquire a slot in the channel without waiting for the slot to become + /// available, returning an owned permit. + /// + /// This moves the sender _by value_, and returns an owned permit that can + /// be used to send a message into the channel. Unlike [`Sender::try_reserve`], + /// this method may be used in cases where the permit must be valid for the + /// `'static` lifetime. `Sender`s may be cloned cheaply (`Sender::clone` is + /// essentially a reference count increment, comparable to [`Arc::clone`]), + /// so when multiple [`OwnedPermit`]s are needed or the `Sender` cannot be + /// moved, it can be cloned prior to calling `try_reserve_owned`. + /// + /// If the channel is full this function will return a [`TrySendError`]. + /// Since the sender is taken by value, the `TrySendError` returned in this + /// case contains the sender, so that it may be used again. Otherwise, if + /// there is a slot available, this method will return an [`OwnedPermit`] + /// that can then be used to [`send`] on the channel with a guaranteed slot. + /// This function is similar to [`reserve_owned`] except it does not await + /// for the slot to become available. + /// + /// Dropping the [`OwnedPermit`] without sending a message releases the capacity back + /// to the channel. + /// + /// [`OwnedPermit`]: OwnedPermit + /// [`send`]: OwnedPermit::send + /// [`reserve_owned`]: Sender::reserve_owned + /// [`Arc::clone`]: std::sync::Arc::clone + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let (tx, mut rx) = mpsc::channel(1); + /// + /// // Reserve capacity + /// let permit = tx.clone().try_reserve_owned().unwrap(); + /// + /// // Trying to send directly on the `tx` will fail due to no + /// // available capacity. + /// assert!(tx.try_send(123).is_err()); + /// + /// // Trying to reserve an additional slot on the `tx` will + /// // fail because there is no capacity. + /// assert!(tx.try_reserve().is_err()); + /// + /// // Sending on the permit succeeds + /// permit.send(456); + /// + /// // The value sent on the permit is received + /// assert_eq!(rx.recv().await.unwrap(), 456); + /// + /// # } + /// ``` + Future tryReserveOwned(); + + /// Attempts to immediately send a message on this `Sender`. + /// + /// This method differs from [`send`] by returning immediately if the channel's + /// buffer is full or no receiver is waiting to acquire some data. Compared + /// with [`send`], this function has two failure cases instead of one (one for + /// disconnection, one for a full buffer). + /// + /// # Errors + /// + /// If the channel capacity has been reached, i.e., the channel has `n` + /// buffered values where `n` is the argument passed to [`channel`], then an + /// error is returned. + /// + /// If the receive half of the channel is closed, either due to [`close`] + /// being called or the [`Receiver`] handle dropping, the function returns + /// an error. The error includes the value passed to `send`. + /// + /// [`send`]: Sender::send + /// [`channel`]: channel + /// [`close`]: Receiver::close + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::mpsc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// // Create a channel with buffer size 1 + /// let (tx1, mut rx) = mpsc::channel(1); + /// let tx2 = tx1.clone(); + /// + /// tokio::spawn(async move { + /// tx1.send(1).await.unwrap(); + /// tx1.send(2).await.unwrap(); + /// // task waits until the receiver receives a value. + /// }); + /// + /// tokio::spawn(async move { + /// // This will return an error and send + /// // no message if the buffer is full + /// let _ = tx2.try_send(3); + /// }); + /// + /// let mut msg; + /// msg = rx.recv().await.unwrap(); + /// println!("message {} received", msg); + /// + /// msg = rx.recv().await.unwrap(); + /// println!("message {} received", msg); + /// + /// // Third message may have never been sent + /// match rx.recv().await { + /// Some(msg) => println!("message {} received", msg), + /// None => println!("the third message was never sent"), + /// } + /// # } + /// ``` + Future trySend({required T message}); + + /// Returns the number of [`WeakSender`] handles. + Future weakCount(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/process.dart b/mobile_app/lib/src/rust/third_party/tokio/process.dart new file mode 100644 index 0000000..d861c23 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/process.dart @@ -0,0 +1,426 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'process/sys.dart'; + +// These functions are ignored because they have generic arguments: `arg0`, `arg`, `args`, `current_dir`, `env_remove`, `env`, `envs`, `new`, `pre_exec`, `stderr`, `stdin`, `stdout` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `ChildDropGuard`, `FusedChild`, `SpawnedChild` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `drop`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `from`, `is_write_vectored`, `kill`, `poll_flush`, `poll_read`, `poll_read`, `poll_shutdown`, `poll_write_vectored`, `poll_write`, `poll`, `try_into`, `try_into`, `try_into` +// These functions have error during generation (see debug logs or enable `stop_on_error: true` for more details): `output`, `status`, `wait_with_output` + +// Rust type: RustOpaqueMoi> +abstract class Child implements RustOpaqueInterface { + ChildStderr? get stderr; + + ChildStdin? get stdin; + + ChildStdout? get stdout; + + set stderr(ChildStderr? stderr); + + set stdin(ChildStdin? stdin); + + set stdout(ChildStdout? stdout); + + /// Returns the OS-assigned process identifier associated with this child + /// while it is still running. + /// + /// Once the child has been polled to completion this will return `None`. + /// This is done to avoid confusion on platforms like Unix where the OS + /// identifier could be reused once the process has completed. + Future id(); + + /// Forces the child to exit. + /// + /// This is equivalent to sending a `SIGKILL` on unix platforms + /// followed by [`wait`](Child::wait). + /// + /// Note: std version of [`Child::kill`](std::process::Child::kill) does not `wait`. + /// For an equivalent of `Child::kill` in the standard library, + /// use [`start_kill`](Child::start_kill). + /// + /// # Examples + /// + /// If the child has to be killed remotely, it is possible to do it using + /// a combination of the select! macro and a `oneshot` channel. In the following + /// example, the child will run until completion unless a message is sent on + /// the `oneshot` channel. If that happens, the child is killed immediately + /// using the `.kill()` method. + /// + /// ```no_run + /// use tokio::process::Command; + /// use tokio::sync::oneshot::channel; + /// + /// #[tokio::main] + /// async fn main() { + /// let (send, recv) = channel::<()>(); + /// let mut child = Command::new("sleep").arg("1").spawn().unwrap(); + /// tokio::spawn(async move { send.send(()) }); + /// tokio::select! { + /// _ = child.wait() => {} + /// _ = recv => child.kill().await.expect("kill failed"), + /// } + /// } + /// ``` + /// + /// You can also interact with the child's standard I/O. For example, you can + /// read its stdout while waiting for it to exit. + /// + /// ```no_run + /// # use std::process::Stdio; + /// # + /// # use tokio::io::AsyncReadExt; + /// # use tokio::process::Command; + /// # use tokio::sync::oneshot::channel; + /// + /// #[tokio::main] + /// async fn main() { + /// let (_tx, rx) = channel::<()>(); + /// + /// let mut child = Command::new("echo") + /// .arg("Hello World!") + /// .stdout(Stdio::piped()) + /// .spawn() + /// .unwrap(); + /// + /// let mut stdout = child.stdout.take().expect("stdout is not captured"); + /// + /// let read_stdout = tokio::spawn(async move { + /// let mut buff = Vec::new(); + /// let _ = stdout.read_to_end(&mut buff).await; + /// + /// buff + /// }); + /// + /// tokio::select! { + /// _ = child.wait() => {} + /// _ = rx => { child.kill().await.expect("kill failed") }, + /// } + /// + /// let buff = read_stdout.await.unwrap(); + /// + /// assert_eq!(buff, b"Hello World!\n"); + /// } + /// ``` + Future kill(); + + /// Attempts to force the child to exit, but does not wait for the request + /// to take effect. + /// + /// On Unix platforms, this is the equivalent to sending a `SIGKILL`. Note + /// that on Unix platforms it is possible for a zombie process to remain + /// after a kill is sent; to avoid this, the caller should ensure that either + /// `child.wait().await` or `child.try_wait()` is invoked successfully. + Future startKill(); + + /// Attempts to collect the exit status of the child if it has already + /// exited. + /// + /// This function will not block the calling thread and will only + /// check to see if the child process has exited or not. If the child has + /// exited then on Unix the process ID is reaped. This function is + /// guaranteed to repeatedly return a successful exit status so long as the + /// child has already exited. + /// + /// If the child has exited, then `Ok(Some(status))` is returned. If the + /// exit status is not available at this time then `Ok(None)` is returned. + /// If an error occurs, then that error is returned. + /// + /// Note that unlike `wait`, this function will not attempt to drop stdin, + /// nor will it wake the current task if the child exits. + Future tryWait(); + + /// Waits for the child to exit completely, returning the status that it + /// exited with. This function will continue to have the same return value + /// after it has been called at least once. + /// + /// The stdin handle to the child process, if any, will be closed + /// before waiting. This helps avoid deadlock: it ensures that the + /// child does not block waiting for input from the parent, while + /// the parent waits for the child to exit. + /// + /// If the caller wishes to explicitly control when the child's stdin + /// handle is closed, they may `.take()` it before calling `.wait()`: + /// + /// # Cancel safety + /// + /// This function is cancel safe. + /// + /// ``` + /// # #[cfg(not(unix))]fn main(){} + /// # #[cfg(unix)] + /// use tokio::io::AsyncWriteExt; + /// # #[cfg(unix)] + /// use tokio::process::Command; + /// # #[cfg(unix)] + /// use std::process::Stdio; + /// + /// # #[cfg(unix)] + /// #[tokio::main] + /// async fn main() { + /// # if cfg!(miri) { return; } // No `pidfd_spawnp` in miri. + /// let mut child = Command::new("cat") + /// .stdin(Stdio::piped()) + /// .spawn() + /// .unwrap(); + /// + /// let mut stdin = child.stdin.take().unwrap(); + /// tokio::spawn(async move { + /// // do something with stdin here... + /// stdin.write_all(b"hello world\n").await.unwrap(); + /// + /// // then drop when finished + /// drop(stdin); + /// }); + /// + /// // wait for the process to complete + /// let _ = child.wait().await; + /// } + /// ``` + Future wait(); +} + +// Rust type: RustOpaqueMoi> +abstract class ChildStderr implements RustOpaqueInterface { + /// Creates an asynchronous `ChildStderr` from a synchronous one. + /// + /// # Errors + /// + /// This method may fail if an error is encountered when setting the pipe to + /// non-blocking mode, or when registering the pipe with the runtime's IO + /// driver. + static Future fromStd({required ChildStderr inner}) => + RustLib.instance.api.tokioProcessChildStderrFromStd(inner: inner); + + /// Convert into [`OwnedFd`]. + Future intoOwnedFd(); +} + +// Rust type: RustOpaqueMoi> +abstract class ChildStdin implements RustOpaqueInterface { + /// Creates an asynchronous `ChildStdin` from a synchronous one. + /// + /// # Errors + /// + /// This method may fail if an error is encountered when setting the pipe to + /// non-blocking mode, or when registering the pipe with the runtime's IO + /// driver. + static Future fromStd({required ChildStdin inner}) => + RustLib.instance.api.tokioProcessChildStdinFromStd(inner: inner); + + /// Convert into [`OwnedFd`]. + Future intoOwnedFd(); +} + +// Rust type: RustOpaqueMoi> +abstract class ChildStdout implements RustOpaqueInterface { + /// Creates an asynchronous `ChildStdout` from a synchronous one. + /// + /// # Errors + /// + /// This method may fail if an error is encountered when setting the pipe to + /// non-blocking mode, or when registering the pipe with the runtime's IO + /// driver. + static Future fromStd({required ChildStdout inner}) => + RustLib.instance.api.tokioProcessChildStdoutFromStd(inner: inner); + + /// Convert into [`OwnedFd`]. + Future intoOwnedFd(); +} + +// Rust type: RustOpaqueMoi> +abstract class Command implements RustOpaqueInterface { + /// Cheaply convert to a `&std::process::Command` for places where the type from the standard + /// library is expected. + Future asStd(); + + /// Cheaply convert to a `&mut std::process::Command` for places where the type from the + /// standard library is expected. + Future asStdMut(); + + /// Clears the entire environment map for the child process. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ```no_run + /// # async fn test() { // allow using await + /// use tokio::process::Command; + /// + /// let output = Command::new("ls") + /// .env_clear() + /// .output().await.unwrap(); + /// # } + /// ``` + Future envClear(); + + /// Returns the boolean value that was previously set by [`Command::kill_on_drop`]. + /// + /// Note that if you have not previously called [`Command::kill_on_drop`], the + /// default value of `false` will be returned here. + /// + /// # Examples + /// + /// ``` + /// use tokio::process::Command; + /// + /// let mut cmd = Command::new("echo"); + /// assert!(!cmd.get_kill_on_drop()); + /// + /// cmd.kill_on_drop(true); + /// assert!(cmd.get_kill_on_drop()); + /// ``` + Future getKillOnDrop(); + + /// Similar to `uid` but sets the group ID of the child process. This has + /// the same semantics as the `uid` field. + Future gid({required int id}); + + /// Cheaply convert into a `std::process::Command`. + /// + /// Note that Tokio specific options will be lost. Currently, this only applies to [`kill_on_drop`]. + /// + /// [`kill_on_drop`]: Command::kill_on_drop + Future intoStd(); + + /// Controls whether a `kill` operation should be invoked on a spawned child + /// process when its corresponding `Child` handle is dropped. + /// + /// By default, this value is assumed to be `false`, meaning the next spawned + /// process will not be killed on drop, similar to the behavior of the standard + /// library. + /// + /// # Caveats + /// + /// On Unix platforms processes must be "reaped" by their parent process after + /// they have exited in order to release all OS resources. A child process which + /// has exited, but has not yet been reaped by its parent is considered a "zombie" + /// process. Such processes continue to count against limits imposed by the system, + /// and having too many zombie processes present can prevent additional processes + /// from being spawned. + /// + /// Although issuing a `kill` signal to the child process is a synchronous + /// operation, the resulting zombie process cannot be `.await`ed inside of the + /// destructor to avoid blocking other tasks. The tokio runtime will, on a + /// best-effort basis, attempt to reap and clean up such processes in the + /// background, but no additional guarantees are made with regard to + /// how quickly or how often this procedure will take place. + /// + /// If stronger guarantees are required, it is recommended to avoid dropping + /// a [`Child`] handle where possible, and instead utilize `child.wait().await` + /// or `child.kill().await` where possible. + Future killOnDrop({required bool killOnDrop}); + + /// Sets the process group ID (PGID) of the child process. Equivalent to a + /// `setpgid` call in the child process, but may be more efficient. + /// + /// Process groups determine which processes receive signals. + /// + /// # Examples + /// + /// Pressing Ctrl-C in a terminal will send `SIGINT` to all processes + /// in the current foreground process group. By spawning the `sleep` + /// subprocess in a new process group, it will not receive `SIGINT` + /// from the terminal. + /// + /// The parent process could install a [signal handler] and manage the + /// process on its own terms. + /// + /// A process group ID of 0 will use the process ID as the PGID. + /// + /// ```no_run + /// # async fn test() { // allow using await + /// use tokio::process::Command; + /// + /// let output = Command::new("sleep") + /// .arg("10") + /// .process_group(0) + /// .output() + /// .await + /// .unwrap(); + /// # } + /// ``` + /// + /// [signal handler]: crate::signal + Future processGroup({required int pgroup}); + + /// Executes the command as a child process, returning a handle to it. + /// + /// By default, stdin, stdout and stderr are inherited from the parent. + /// + /// This method will spawn the child process synchronously and return a + /// handle to a future-aware child process. The `Child` returned implements + /// `Future` itself to acquire the `ExitStatus` of the child, and otherwise + /// the `Child` has methods to acquire handles to the stdin, stdout, and + /// stderr streams. + /// + /// All I/O this child does will be associated with the current default + /// event loop. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ```no_run + /// use tokio::process::Command; + /// + /// async fn run_ls() -> std::process::ExitStatus { + /// Command::new("ls") + /// .spawn() + /// .expect("ls command failed to start") + /// .wait() + /// .await + /// .expect("ls command failed to run") + /// } + /// ``` + /// + /// # Caveats + /// + /// ## Dropping/Cancellation + /// + /// Similar to the behavior to the standard library, and unlike the futures + /// paradigm of dropping-implies-cancellation, a spawned process will, by + /// default, continue to execute even after the `Child` handle has been dropped. + /// + /// The [`Command::kill_on_drop`] method can be used to modify this behavior + /// and kill the child process if the `Child` wrapper is dropped before it + /// has exited. + /// + /// ## Unix Processes + /// + /// On Unix platforms processes must be "reaped" by their parent process after + /// they have exited in order to release all OS resources. A child process which + /// has exited, but has not yet been reaped by its parent is considered a "zombie" + /// process. Such processes continue to count against limits imposed by the system, + /// and having too many zombie processes present can prevent additional processes + /// from being spawned. + /// + /// The tokio runtime will, on a best-effort basis, attempt to reap and clean up + /// any process which it has spawned. No additional guarantees are made with regard to + /// how quickly or how often this procedure will take place. + /// + /// It is recommended to avoid dropping a [`Child`] process handle before it has been + /// fully `await`ed if stricter cleanup guarantees are required. + /// + /// [`Command`]: crate::process::Command + /// [`Command::kill_on_drop`]: crate::process::Command::kill_on_drop + /// [`Child`]: crate::process::Child + /// + /// # Errors + /// + /// On Unix platforms this method will fail with `std::io::ErrorKind::WouldBlock` + /// if the system process limit is reached (which includes other applications + /// running on the system). + Future spawn(); + + /// Sets the child process's user ID. This translates to a + /// `setuid` call in the child process. Failure in the `setuid` + /// call will cause the spawn to fail. + Future uid({required int id}); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/process/sys.dart b/mobile_app/lib/src/rust/third_party/tokio/process/sys.dart new file mode 100644 index 0000000..d35d754 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/process/sys.dart @@ -0,0 +1,12 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_fd`, `as_fd`, `as_fd`, `as_raw_fd`, `as_raw_fd`, `as_raw_fd` + +// Rust type: RustOpaqueMoi> +abstract class OwnedFd implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/runtime.dart b/mobile_app/lib/src/rust/third_party/tokio/runtime.dart new file mode 100644 index 0000000..2cd6b3b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/runtime.dart @@ -0,0 +1,531 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `TimerFlavor`, `Timer` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `eq`, `fmt`, `fmt` + +// Rust type: RustOpaqueMoi>> +abstract class EnterGuard implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Handle implements RustOpaqueInterface { + /// Returns a `Handle` view over the currently running `Runtime`. + /// + /// # Panics + /// + /// This will panic if called outside the context of a Tokio runtime. That means that you must + /// call this on one of the threads **being run by the runtime**, or from a thread with an active + /// `EnterGuard`. Calling this from within a thread created by `std::thread::spawn` (for example) + /// will cause a panic unless that thread has an active `EnterGuard`. + /// + /// # Examples + /// + /// This can be used to obtain the handle of the surrounding runtime from an async + /// block or function running on that runtime. + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// # use std::thread; + /// # use tokio::runtime::Runtime; + /// # fn dox() { + /// # let rt = Runtime::new().unwrap(); + /// # rt.spawn(async { + /// use tokio::runtime::Handle; + /// + /// // Inside an async block or function. + /// let handle = Handle::current(); + /// handle.spawn(async { + /// println!("now running in the existing Runtime"); + /// }); + /// + /// # let handle = + /// thread::spawn(move || { + /// // Notice that the handle is created outside of this thread and then moved in + /// handle.spawn(async { /* ... */ }); + /// // This next line would cause a panic because we haven't entered the runtime + /// // and created an EnterGuard + /// // let handle2 = Handle::current(); // panic + /// // So we create a guard here with Handle::enter(); + /// let _guard = handle.enter(); + /// // Now we can call Handle::current(); + /// let handle2 = Handle::current(); + /// }); + /// # handle.join().unwrap(); + /// # }); + /// # } + /// # } + /// ``` + static Future current() => + RustLib.instance.api.tokioRuntimeHandleCurrent(); + + static Future default_() => + RustLib.instance.api.tokioRuntimeHandleDefault(); + + /// Enters the runtime context. This allows you to construct types that must + /// have an executor available on creation such as [`Sleep`] or + /// [`TcpStream`]. It will also allow you to call methods such as + /// [`tokio::spawn`] and [`Handle::current`] without panicking. + /// + /// # Panics + /// + /// When calling `Handle::enter` multiple times, the returned guards + /// **must** be dropped in the reverse order that they were acquired. + /// Failure to do so will result in a panic and possible memory leaks. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new().unwrap(); + /// + /// let _guard = rt.enter(); + /// tokio::spawn(async { + /// println!("Hello world!"); + /// }); + /// # } + /// ``` + /// + /// Do **not** do the following, this shows a scenario that will result in a + /// panic and possible memory leak. + /// + /// ```should_panic,ignore-wasm + /// use tokio::runtime::Runtime; + /// + /// let rt1 = Runtime::new().unwrap(); + /// let rt2 = Runtime::new().unwrap(); + /// + /// let enter1 = rt1.enter(); + /// let enter2 = rt2.enter(); + /// + /// drop(enter1); + /// drop(enter2); + /// ``` + /// + /// [`Sleep`]: struct@crate::time::Sleep + /// [`TcpStream`]: struct@crate::net::TcpStream + /// [`tokio::spawn`]: fn@crate::spawn + Future enter(); + + /// Returns the [`Id`] of the current `Runtime`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main(flavor = "current_thread")] + /// async fn main() { + /// println!("Current runtime id: {}", Handle::current().id()); + /// } + /// ``` + /// + /// [`Id`]: struct@crate::runtime::Id + Future id(); + + /// Returns a view that lets you get information about how the runtime + /// is performing. + Future metrics(); + + /// Returns the flavor of the current `Runtime`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{Handle, RuntimeFlavor}; + /// + /// #[tokio::main(flavor = "current_thread")] + /// async fn main() { + /// assert_eq!(RuntimeFlavor::CurrentThread, Handle::current().runtime_flavor()); + /// } + /// ``` + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime::{Handle, RuntimeFlavor}; + /// + /// #[tokio::main(flavor = "multi_thread", worker_threads = 4)] + /// async fn main() { + /// assert_eq!(RuntimeFlavor::MultiThread, Handle::current().runtime_flavor()); + /// } + /// # } + /// ``` + Future runtimeFlavor(); + + /// Returns a Handle view over the currently running Runtime + /// + /// Returns an error if no Runtime has been started + /// + /// Contrary to `current`, this never panics + static Future tryCurrent() => + RustLib.instance.api.tokioRuntimeHandleTryCurrent(); +} + +// Rust type: RustOpaqueMoi> +abstract class Id implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Runtime implements RustOpaqueInterface { + /// Enters the runtime context. + /// + /// This allows you to construct types that must have an executor + /// available on creation such as [`Sleep`] or [`TcpStream`]. It will + /// also allow you to call methods such as [`tokio::spawn`]. + /// + /// [`Sleep`]: struct@crate::time::Sleep + /// [`TcpStream`]: struct@crate::net::TcpStream + /// [`tokio::spawn`]: fn@crate::spawn + /// + /// # Example + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime::Runtime; + /// use tokio::task::JoinHandle; + /// + /// fn function_that_spawns(msg: String) -> JoinHandle<()> { + /// // Had we not used `rt.enter` below, this would panic. + /// tokio::spawn(async move { + /// println!("{}", msg); + /// }) + /// } + /// + /// fn main() { + /// let rt = Runtime::new().unwrap(); + /// + /// let s = "Hello World!".to_string(); + /// + /// // By entering the context, we tie `tokio::spawn` to this executor. + /// let _guard = rt.enter(); + /// let handle = function_that_spawns(s); + /// + /// // Wait for the task before we end the test. + /// rt.block_on(handle).unwrap(); + /// } + /// # } + /// ``` + Future enter(); + + /// Returns a handle to the runtime's spawner. + /// + /// The returned handle can be used to spawn tasks that run on this runtime, and can + /// be cloned to allow moving the `Handle` to other threads. + /// + /// Calling [`Handle::block_on`] on a handle to a `current_thread` runtime is error-prone. + /// Refer to the documentation of [`Handle::block_on`] for more. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new() + /// .unwrap(); + /// + /// let handle = rt.handle(); + /// + /// // Use the handle... + /// # } + /// ``` + Future handle(); + + /// Returns a view that lets you get information about how the runtime + /// is performing. + Future metrics(); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a new runtime instance with default configuration values. + /// + /// This results in the multi threaded scheduler, I/O driver, and time driver being + /// initialized. + /// + /// Most applications will not need to call this function directly. Instead, + /// they will use the [`#[tokio::main]` attribute][main]. When a more complex + /// configuration is necessary, the [runtime builder] may be used. + /// + /// See [module level][mod] documentation for more details. + /// + /// # Examples + /// + /// Creating a new `Runtime` with default configuration values. + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new() + /// .unwrap(); + /// + /// // Use the runtime... + /// ``` + /// + /// [mod]: index.html + /// [main]: ../attr.main.html + /// [threaded scheduler]: index.html#threaded-scheduler + /// [runtime builder]: crate::runtime::Builder + static Future newInstance() => + RustLib.instance.api.tokioRuntimeRuntimeNew(); + + /// Shuts down the runtime, without waiting for any spawned work to stop. + /// + /// This can be useful if you want to drop a runtime from within another runtime. + /// Normally, dropping a runtime will block indefinitely for spawned blocking tasks + /// to complete, which would normally not be permitted within an asynchronous context. + /// By calling `shutdown_background()`, you can drop the runtime from such a context. + /// + /// Note however, that because we do not wait for any blocking tasks to complete, this + /// may result in a resource leak (in that any blocking tasks are still running until they + /// return. + /// + /// See the [struct level documentation](Runtime#shutdown) for more details. + /// + /// This function is equivalent to calling `shutdown_timeout(Duration::from_nanos(0))`. + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime::Runtime; + /// + /// fn main() { + /// let runtime = Runtime::new().unwrap(); + /// + /// runtime.block_on(async move { + /// let inner_runtime = Runtime::new().unwrap(); + /// // ... + /// inner_runtime.shutdown_background(); + /// }); + /// } + /// # } + /// ``` + Future shutdownBackground(); +} + +// Rust type: RustOpaqueMoi> +abstract class RuntimeMetrics implements RustOpaqueInterface { + /// Returns the number of tasks currently scheduled in the runtime's + /// global queue. + /// + /// Tasks that are spawned or notified from a non-runtime thread are + /// scheduled using the runtime's global queue. This metric returns the + /// **current** number of tasks pending in the global queue. As such, the + /// returned value may increase or decrease as new tasks are scheduled and + /// processed. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.global_queue_depth(); + /// println!("{} tasks currently pending in the runtime's global queue", n); + /// # } + /// ``` + Future globalQueueDepth(); + + /// Returns the current number of alive tasks in the runtime. + /// + /// This counter increases when a task is spawned and decreases when a + /// task exits. + /// + /// Note: When using the multi-threaded runtime this number may not + /// not have strong consistency i.e. no tasks may be running but the metric + /// reports otherwise. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.num_alive_tasks(); + /// println!("Runtime has {} alive tasks", n); + /// # } + /// ``` + Future numAliveTasks(); + + /// Returns the number of worker threads used by the runtime. + /// + /// The number of workers is set by configuring `worker_threads` on + /// `runtime::Builder`. When using the `current_thread` runtime, the return + /// value is always `1`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.num_workers(); + /// println!("Runtime is using {} workers", n); + /// # } + /// ``` + Future numWorkers(); + + /// Returns the total number of times the given worker thread has parked. + /// + /// The worker park count starts at zero when the runtime is created and + /// increases by one each time the worker parks the thread waiting for new + /// inbound events to process. This usually means the worker has processed + /// all pending work and is currently idle. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_park_count(0); + /// println!("worker 0 parked {} times", n); + /// # } + /// ``` + Future workerParkCount({required BigInt worker}); + + /// Returns the total number of times the given worker thread has parked + /// and unparked. + /// + /// The worker park/unpark count starts at zero when the runtime is created + /// and increases by one each time the worker parks the thread waiting for + /// new inbound events to process. This usually means the worker has processed + /// all pending work and is currently idle. When new work becomes available, + /// the worker is unparked and the park/unpark count is again increased by one. + /// + /// An odd count means that the worker is currently parked. + /// An even count means that the worker is currently active. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// let n = metrics.worker_park_unpark_count(0); + /// + /// println!("worker 0 parked and unparked {} times", n); + /// + /// if n % 2 == 0 { + /// println!("worker 0 is active"); + /// } else { + /// println!("worker 0 is parked"); + /// } + /// # } + /// ``` + Future workerParkUnparkCount({required BigInt worker}); + + /// Returns the amount of time the given worker thread has been busy. + /// + /// The worker busy duration starts at zero when the runtime is created and + /// increases whenever the worker is spending time processing work. Using + /// this value can indicate the load of the given worker. If a lot of time + /// is spent busy, then the worker is under load and will check for inbound + /// events less often. + /// + /// The timer is monotonically increasing. It is never decremented or reset + /// to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_total_busy_duration(0); + /// println!("worker 0 was busy for a total of {:?}", n); + /// # } + /// ``` + Future workerTotalBusyDuration({required BigInt worker}); +} + +// Rust type: RustOpaqueMoi> +abstract class TryCurrentError implements RustOpaqueInterface { + /// Returns true if the call failed because there is currently no runtime in + /// the Tokio context. + Future isMissingContext(); + + /// Returns true if the call failed because the Tokio context thread-local + /// had been destroyed. This can usually only happen if in the destructor of + /// other thread-locals. + Future isThreadLocalDestroyed(); +} + +/// The flavor of a `Runtime`. +/// +/// This is the return type for [`Handle::runtime_flavor`](crate::runtime::Handle::runtime_flavor()). +enum RuntimeFlavor { + /// The flavor that executes all tasks on the current thread. + currentThread, + + /// The flavor that executes tasks across multiple threads. + multiThread, + ; +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/signal.dart b/mobile_app/lib/src/rust/third_party/tokio/signal.dart new file mode 100644 index 0000000..fb9884b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/signal.dart @@ -0,0 +1,63 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `RxFuture` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `fmt` + +/// Completes when a "ctrl-c" notification is sent to the process. +/// +/// While signals are handled very differently between Unix and Windows, both +/// platforms support receiving a signal on "ctrl-c". This function provides a +/// portable API for receiving this notification. +/// +/// Once the returned future is polled, a listener is registered. The future +/// will complete on the first received `ctrl-c` **after** the initial call to +/// either `Future::poll` or `.await`. +/// +/// # Caveats +/// +/// On Unix platforms, the first time that a `Signal` instance is registered for a +/// particular signal kind, an OS signal-handler is installed which replaces the +/// default platform behavior when that signal is received, **for the duration of +/// the entire process**. +/// +/// For example, Unix systems will terminate a process by default when it +/// receives a signal generated by `"CTRL+C"` on the terminal. But, when a +/// `ctrl_c` stream is created to listen for this signal, the time it arrives, +/// it will be translated to a stream event, and the process will continue to +/// execute. **Even if this `Signal` instance is dropped, subsequent `SIGINT` +/// deliveries will end up captured by Tokio, and the default platform behavior +/// will NOT be reset**. +/// +/// Thus, applications should take care to ensure the expected signal behavior +/// occurs as expected after listening for specific signals. +/// +/// # Examples +/// +/// ```rust,no_run +/// use tokio::signal; +/// +/// #[tokio::main] +/// async fn main() { +/// println!("waiting for ctrl-c"); +/// +/// signal::ctrl_c().await.expect("failed to listen for event"); +/// +/// println!("received ctrl-c event"); +/// } +/// ``` +/// +/// Listen in the background: +/// +/// ```rust,no_run +/// tokio::spawn(async move { +/// tokio::signal::ctrl_c().await.unwrap(); +/// // Your handler here +/// }); +/// ``` +Future ctrlC() => RustLib.instance.api.tokioSignalCtrlC(); diff --git a/mobile_app/lib/src/rust/third_party/tokio/signal/unix.dart b/mobile_app/lib/src/rust/third_party/tokio/signal/unix.dart new file mode 100644 index 0000000..19ad12b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/signal/unix.dart @@ -0,0 +1,200 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import '../../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `OsExtraData`, `OsStorage`, `SignalInfo` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `clone`, `eq`, `event_info`, `fmt`, `fmt`, `fmt`, `for_each`, `from`, `from`, `hash` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `default`, `default`, `default` + +/// Creates a new listener which will receive notifications when the current +/// process receives the specified signal `kind`. +/// +/// This function will create a new stream which binds to the default reactor. +/// The `Signal` stream is an infinite stream which will receive +/// notifications whenever a signal is received. More documentation can be +/// found on `Signal` itself, but to reiterate: +/// +/// * Signals may be coalesced beyond what the kernel already does. +/// * Once a signal handler is registered with the process the underlying +/// libc signal handler is never unregistered. +/// +/// A `Signal` stream can be created for a particular signal number +/// multiple times. When a signal is received then all the associated +/// channels will receive the signal notification. +/// +/// # Errors +/// +/// * If the lower-level C functions fail for some reason. +/// * If the previous initialization of this specific signal failed. +/// * If the signal is one of +/// [`signal_hook::FORBIDDEN`](fn@signal_hook_registry::register#panics) +/// +/// # Panics +/// +/// This function panics if there is no current reactor set, or if the `rt` +/// feature flag is not enabled. +Future signal({required SignalKind kind}) => + RustLib.instance.api.tokioSignalUnixSignal(kind: kind); + +// Rust type: RustOpaqueMoi>> +abstract class Context implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi >>> +abstract class PollOption implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Signal implements RustOpaqueInterface, InternalStream { + @override + Future pollRecv({required Context cx}); + + /// Receives the next signal notification event. + /// + /// `None` is returned if no more events can be received by this stream. + /// + /// # Cancel safety + /// + /// This method is cancel safe. If you use it as the event in a + /// [`tokio::select!`](crate::select) statement and some other branch + /// completes first, then it is guaranteed that no signal is lost. + /// + /// # Examples + /// + /// Wait for `SIGHUP` + /// + /// ```rust,no_run + /// use tokio::signal::unix::{signal, SignalKind}; + /// + /// #[tokio::main] + /// async fn main() -> Result<(), Box> { + /// // An infinite stream of hangup signals. + /// let mut stream = signal(SignalKind::hangup())?; + /// + /// // Print whenever a HUP signal is received + /// loop { + /// stream.recv().await; + /// println!("got signal HUP"); + /// } + /// } + /// ``` + Future recv(); +} + +// Rust type: RustOpaqueMoi> +abstract class SignalKind implements RustOpaqueInterface { + /// Represents the `SIGALRM` signal. + /// + /// On Unix systems this signal is sent when a real-time timer has expired. + /// By default, the process is terminated by this signal. + static Future alarm() => + RustLib.instance.api.tokioSignalUnixSignalKindAlarm(); + + /// Get the signal's numeric value. + /// + /// ```rust + /// # use tokio::signal::unix::SignalKind; + /// let kind = SignalKind::interrupt(); + /// assert_eq!(kind.as_raw_value(), libc::SIGINT); + /// ``` + Future asRawValue(); + + /// Represents the `SIGCHLD` signal. + /// + /// On Unix systems this signal is sent when the status of a child process + /// has changed. By default, this signal is ignored. + static Future child() => + RustLib.instance.api.tokioSignalUnixSignalKindChild(); + + /// Allows for listening to any valid OS signal. + /// + /// For example, this can be used for listening for platform-specific + /// signals. + /// ```rust,no_run + /// # use tokio::signal::unix::SignalKind; + /// # let signum = -1; + /// // let signum = libc::OS_SPECIFIC_SIGNAL; + /// let kind = SignalKind::from_raw(signum); + /// ``` + static Future fromRaw({required CInt signum}) => + RustLib.instance.api.tokioSignalUnixSignalKindFromRaw(signum: signum); + + /// Represents the `SIGHUP` signal. + /// + /// On Unix systems this signal is sent when the terminal is disconnected. + /// By default, the process is terminated by this signal. + static Future hangup() => + RustLib.instance.api.tokioSignalUnixSignalKindHangup(); + + /// Represents the `SIGINFO` signal. + /// + /// On Unix systems this signal is sent to request a status update from the + /// process. By default, this signal is ignored. + static Future info() => + RustLib.instance.api.tokioSignalUnixSignalKindInfo(); + + /// Represents the `SIGINT` signal. + /// + /// On Unix systems this signal is sent to interrupt a program. + /// By default, the process is terminated by this signal. + static Future interrupt() => + RustLib.instance.api.tokioSignalUnixSignalKindInterrupt(); + + /// Represents the `SIGIO` signal. + /// + /// On Unix systems this signal is sent when I/O operations are possible + /// on some file descriptor. By default, this signal is ignored. + static Future io() => + RustLib.instance.api.tokioSignalUnixSignalKindIo(); + + /// Represents the `SIGPIPE` signal. + /// + /// On Unix systems this signal is sent when the process attempts to write + /// to a pipe which has no reader. By default, the process is terminated by + /// this signal. + static Future pipe() => + RustLib.instance.api.tokioSignalUnixSignalKindPipe(); + + /// Represents the `SIGQUIT` signal. + /// + /// On Unix systems this signal is sent to issue a shutdown of the + /// process, after which the OS will dump the process core. + /// By default, the process is terminated by this signal. + static Future quit() => + RustLib.instance.api.tokioSignalUnixSignalKindQuit(); + + /// Represents the `SIGTERM` signal. + /// + /// On Unix systems this signal is sent to issue a shutdown of the + /// process. By default, the process is terminated by this signal. + static Future terminate() => + RustLib.instance.api.tokioSignalUnixSignalKindTerminate(); + + /// Represents the `SIGUSR1` signal. + /// + /// On Unix systems this is a user defined signal. + /// By default, the process is terminated by this signal. + static Future userDefined1() => + RustLib.instance.api.tokioSignalUnixSignalKindUserDefined1(); + + /// Represents the `SIGUSR2` signal. + /// + /// On Unix systems this is a user defined signal. + /// By default, the process is terminated by this signal. + static Future userDefined2() => + RustLib.instance.api.tokioSignalUnixSignalKindUserDefined2(); + + /// Represents the `SIGWINCH` signal. + /// + /// On Unix systems this signal is sent when the terminal window is resized. + /// By default, this signal is ignored. + static Future windowChange() => + RustLib.instance.api.tokioSignalUnixSignalKindWindowChange(); +} + +abstract class InternalStream { + Future pollRecv({required Context cx}); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync.dart b/mobile_app/lib/src/rust/third_party/tokio/sync.dart new file mode 100644 index 0000000..17eec1d --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync.dart @@ -0,0 +1,695 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `MappedMutexGuard`, `MutexGuard`, `Mutex`, `OwnedMappedMutexGuard`, `OwnedMutexGuard`, `OwnedRwLockMappedWriteGuard`, `OwnedRwLockReadGuard`, `OwnedRwLockWriteGuard`, `RwLockMappedWriteGuard`, `RwLockReadGuard`, `RwLockWriteGuard`, `TryLockError` + +// Rust type: RustOpaqueMoi> +abstract class AcquireError implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class Barrier implements RustOpaqueInterface { + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a new barrier that can block a given number of tasks. + /// + /// A barrier will block `n`-1 tasks which call [`Barrier::wait`] and then wake up all + /// tasks at once when the `n`th task calls `wait`. + static Future newInstance({required BigInt n}) => + RustLib.instance.api.tokioSyncBarrierNew(n: n); + + /// Does not resolve until all tasks have rendezvoused here. + /// + /// Barriers are re-usable after all tasks have rendezvoused once, and can + /// be used continuously. + /// + /// A single (arbitrary) future will receive a [`BarrierWaitResult`] that returns `true` from + /// [`BarrierWaitResult::is_leader`] when returning from this function, and all other tasks + /// will receive a result that will return `false` from `is_leader`. + /// + /// # Cancel safety + /// + /// This method is not cancel safe. + Future wait(); +} + +// Rust type: RustOpaqueMoi> +abstract class BarrierWaitResult implements RustOpaqueInterface { + /// Returns `true` if this task from wait is the "leader task". + /// + /// Only one task will have `true` returned from their result, all other tasks will have + /// `false` returned. + Future isLeader(); +} + +// Rust type: RustOpaqueMoi> +abstract class Notify implements RustOpaqueInterface { + /// Create a new `Notify`, initialized without a permit. + /// + /// When using the `tracing` [unstable feature], a `Notify` created with + /// `const_new` will not be instrumented. As such, it will not be visible + /// in [`tokio-console`]. Instead, [`Notify::new`] should be used to create + /// an instrumented object if that is needed. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Notify; + /// + /// static NOTIFY: Notify = Notify::const_new(); + /// ``` + /// + /// [`tokio-console`]: https://github.com/tokio-rs/console + /// [unstable feature]: crate#unstable-features + static Future constNew() => + RustLib.instance.api.tokioSyncNotifyConstNew(); + + static Future default_() => + RustLib.instance.api.tokioSyncNotifyDefault(); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Create a new `Notify`, initialized without a permit. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Notify; + /// + /// let notify = Notify::new(); + /// ``` + static Future newInstance() => + RustLib.instance.api.tokioSyncNotifyNew(); + + /// Wait for a notification. + /// + /// Equivalent to: + /// + /// ```ignore + /// async fn notified(&self); + /// ``` + /// + /// Each `Notify` value holds a single permit. If a permit is available from + /// an earlier call to [`notify_one()`], then `notified().await` will complete + /// immediately, consuming that permit. Otherwise, `notified().await` waits + /// for a permit to be made available by the next call to `notify_one()`. + /// + /// The `Notified` future is not guaranteed to receive wakeups from calls to + /// `notify_one()` if it has not yet been polled. See the documentation for + /// [`Notified::enable()`] for more details. + /// + /// The `Notified` future is guaranteed to receive wakeups from + /// `notify_waiters()` as soon as it has been created, even if it has not + /// yet been polled. + /// + /// [`notify_one()`]: Notify::notify_one + /// [`Notified::enable()`]: Notified::enable + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute notifications in the order + /// they were requested. Cancelling a call to `notified` makes you lose your + /// place in the queue. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Notify; + /// use std::sync::Arc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let notify = Arc::new(Notify::new()); + /// let notify2 = notify.clone(); + /// + /// tokio::spawn(async move { + /// notify2.notified().await; + /// println!("received notification"); + /// }); + /// + /// println!("sending notification"); + /// notify.notify_one(); + /// # } + /// ``` + Future notified(); + + /// Wait for a notification with an owned `Future`. + /// + /// Unlike [`Self::notified`] which returns a future tied to the `Notify`'s + /// lifetime, `notified_owned` creates a self-contained future that owns its + /// notification state, making it safe to move between threads. + /// + /// See [`Self::notified`] for more details. + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute notifications in the order + /// they were requested. Cancelling a call to `notified_owned` makes you lose your + /// place in the queue. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::Notify; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let notify = Arc::new(Notify::new()); + /// + /// for _ in 0..10 { + /// let notified = notify.clone().notified_owned(); + /// tokio::spawn(async move { + /// notified.await; + /// println!("received notification"); + /// }); + /// } + /// + /// println!("sending notification"); + /// notify.notify_waiters(); + /// # } + /// ``` + Future notifiedOwned(); + + /// Notifies the last waiting task. + /// + /// This function behaves similar to `notify_one`. The only difference is that it wakes + /// the most recently added waiter instead of the oldest waiter. + /// + /// Check the [`notify_one()`] documentation for more info and + /// examples. + /// + /// [`notify_one()`]: Notify::notify_one + Future notifyLast(); + + /// Notifies the first waiting task. + /// + /// If a task is currently waiting, that task is notified. Otherwise, a + /// permit is stored in this `Notify` value and the **next** call to + /// [`notified().await`] will complete immediately consuming the permit made + /// available by this call to `notify_one()`. + /// + /// At most one permit may be stored by `Notify`. Many sequential calls to + /// `notify_one` will result in a single permit being stored. The next call to + /// `notified().await` will complete immediately, but the one after that + /// will wait. + /// + /// [`notified().await`]: Notify::notified() + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Notify; + /// use std::sync::Arc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let notify = Arc::new(Notify::new()); + /// let notify2 = notify.clone(); + /// + /// tokio::spawn(async move { + /// notify2.notified().await; + /// println!("received notification"); + /// }); + /// + /// println!("sending notification"); + /// notify.notify_one(); + /// # } + /// ``` + Future notifyOne(); + + /// Notifies all waiting tasks. + /// + /// If a task is currently waiting, that task is notified. Unlike with + /// `notify_one()`, no permit is stored to be used by the next call to + /// `notified().await`. The purpose of this method is to notify all + /// already registered waiters. Registering for notification is done by + /// acquiring an instance of the `Notified` future via calling `notified()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Notify; + /// use std::sync::Arc; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let notify = Arc::new(Notify::new()); + /// let notify2 = notify.clone(); + /// + /// let notified1 = notify.notified(); + /// let notified2 = notify.notified(); + /// + /// let handle = tokio::spawn(async move { + /// println!("sending notifications"); + /// notify2.notify_waiters(); + /// }); + /// + /// notified1.await; + /// notified2.await; + /// println!("received notifications"); + /// # } + /// ``` + Future notifyWaiters(); +} + +// Rust type: RustOpaqueMoi> +abstract class OwnedSemaphorePermit implements RustOpaqueInterface { + /// Merge two [`OwnedSemaphorePermit`] instances together, consuming `other` + /// without releasing the permits it holds. + /// + /// Permits held by both `self` and `other` are released when `self` drops. + /// + /// # Panics + /// + /// This function panics if permits from different [`Semaphore`] instances + /// are merged. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::Semaphore; + /// + /// let sem = Arc::new(Semaphore::new(10)); + /// let mut permit = sem.clone().try_acquire_owned().unwrap(); + /// + /// for _ in 0..9 { + /// let _permit = sem.clone().try_acquire_owned().unwrap(); + /// // Merge individual permits into a single one. + /// permit.merge(_permit) + /// } + /// + /// assert_eq!(sem.available_permits(), 0); + /// + /// // Release all permits in a single batch. + /// drop(permit); + /// + /// assert_eq!(sem.available_permits(), 10); + /// ``` + Future merge({required OwnedSemaphorePermit other}); + + /// Returns the number of permits held by `self`. + Future numPermits(); + + /// Returns the [`Semaphore`] from which this permit was acquired. + Future semaphore(); + + /// Splits `n` permits from `self` and returns a new [`OwnedSemaphorePermit`] instance that holds `n` permits. + /// + /// If there are insufficient permits and it's not possible to reduce by `n`, returns `None`. + /// + /// # Note + /// + /// It will clone the owned `Arc` to construct the new instance. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::Semaphore; + /// + /// let sem = Arc::new(Semaphore::new(3)); + /// + /// let mut p1 = sem.try_acquire_many_owned(3).unwrap(); + /// let p2 = p1.split(1).unwrap(); + /// + /// assert_eq!(p1.num_permits(), 2); + /// assert_eq!(p2.num_permits(), 1); + /// ``` + Future split({required BigInt n}); +} + +// Rust type: RustOpaqueMoi> +abstract class Semaphore implements RustOpaqueInterface { + /// Acquires a permit from the semaphore. + /// + /// If the semaphore has been closed, this returns an [`AcquireError`]. + /// Otherwise, this returns a [`SemaphorePermit`] representing the + /// acquired permit. + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute permits in the order they + /// were requested. Cancelling a call to `acquire` makes you lose your place + /// in the queue. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Semaphore; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let semaphore = Semaphore::new(2); + /// + /// let permit_1 = semaphore.acquire().await.unwrap(); + /// assert_eq!(semaphore.available_permits(), 1); + /// + /// let permit_2 = semaphore.acquire().await.unwrap(); + /// assert_eq!(semaphore.available_permits(), 0); + /// + /// drop(permit_1); + /// assert_eq!(semaphore.available_permits(), 1); + /// # } + /// ``` + /// + /// [`AcquireError`]: crate::sync::AcquireError + /// [`SemaphorePermit`]: crate::sync::SemaphorePermit + Future acquire(); + + /// Acquires `n` permits from the semaphore. + /// + /// If the semaphore has been closed, this returns an [`AcquireError`]. + /// Otherwise, this returns a [`SemaphorePermit`] representing the + /// acquired permits. + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute permits in the order they + /// were requested. Cancelling a call to `acquire_many` makes you lose your + /// place in the queue. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Semaphore; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let semaphore = Semaphore::new(5); + /// + /// let permit = semaphore.acquire_many(3).await.unwrap(); + /// assert_eq!(semaphore.available_permits(), 2); + /// # } + /// ``` + /// + /// [`AcquireError`]: crate::sync::AcquireError + /// [`SemaphorePermit`]: crate::sync::SemaphorePermit + Future acquireMany({required int n}); + + /// Acquires `n` permits from the semaphore. + /// + /// The semaphore must be wrapped in an [`Arc`] to call this method. + /// If the semaphore has been closed, this returns an [`AcquireError`]. + /// Otherwise, this returns a [`OwnedSemaphorePermit`] representing the + /// acquired permit. + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute permits in the order they + /// were requested. Cancelling a call to `acquire_many_owned` makes you lose + /// your place in the queue. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::Semaphore; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let semaphore = Arc::new(Semaphore::new(10)); + /// let mut join_handles = Vec::new(); + /// + /// for _ in 0..5 { + /// let permit = semaphore.clone().acquire_many_owned(2).await.unwrap(); + /// join_handles.push(tokio::spawn(async move { + /// // perform task... + /// // explicitly own `permit` in the task + /// drop(permit); + /// })); + /// } + /// + /// for handle in join_handles { + /// handle.await.unwrap(); + /// } + /// # } + /// ``` + /// + /// [`Arc`]: std::sync::Arc + /// [`AcquireError`]: crate::sync::AcquireError + /// [`OwnedSemaphorePermit`]: crate::sync::OwnedSemaphorePermit + Future acquireManyOwned({required int n}); + + /// Acquires a permit from the semaphore. + /// + /// The semaphore must be wrapped in an [`Arc`] to call this method. + /// If the semaphore has been closed, this returns an [`AcquireError`]. + /// Otherwise, this returns a [`OwnedSemaphorePermit`] representing the + /// acquired permit. + /// + /// # Cancel safety + /// + /// This method uses a queue to fairly distribute permits in the order they + /// were requested. Cancelling a call to `acquire_owned` makes you lose your + /// place in the queue. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::Semaphore; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let semaphore = Arc::new(Semaphore::new(3)); + /// let mut join_handles = Vec::new(); + /// + /// for _ in 0..5 { + /// let permit = semaphore.clone().acquire_owned().await.unwrap(); + /// join_handles.push(tokio::spawn(async move { + /// // perform task... + /// // explicitly own `permit` in the task + /// drop(permit); + /// })); + /// } + /// + /// for handle in join_handles { + /// handle.await.unwrap(); + /// } + /// # } + /// ``` + /// + /// [`Arc`]: std::sync::Arc + /// [`AcquireError`]: crate::sync::AcquireError + /// [`OwnedSemaphorePermit`]: crate::sync::OwnedSemaphorePermit + Future acquireOwned(); + + /// Adds `n` new permits to the semaphore. + /// + /// The maximum number of permits is [`Semaphore::MAX_PERMITS`], and this function will panic if the limit is exceeded. + Future addPermits({required BigInt n}); + + /// Returns the current number of available permits. + Future availablePermits(); + + /// Closes the semaphore. + /// + /// This prevents the semaphore from issuing new permits and notifies all pending waiters. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Semaphore; + /// use std::sync::Arc; + /// use tokio::sync::TryAcquireError; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let semaphore = Arc::new(Semaphore::new(1)); + /// let semaphore2 = semaphore.clone(); + /// + /// tokio::spawn(async move { + /// let permit = semaphore.acquire_many(2).await; + /// assert!(permit.is_err()); + /// println!("waiter received error"); + /// }); + /// + /// println!("closing semaphore"); + /// semaphore2.close(); + /// + /// // Cannot obtain more permits + /// assert_eq!(semaphore2.try_acquire().err(), Some(TryAcquireError::Closed)) + /// # } + /// ``` + Future close(); + + /// Creates a new semaphore with the initial number of permits. + /// + /// When using the `tracing` [unstable feature], a `Semaphore` created with + /// `const_new` will not be instrumented. As such, it will not be visible + /// in [`tokio-console`]. Instead, [`Semaphore::new`] should be used to + /// create an instrumented object if that is needed. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::Semaphore; + /// + /// static SEM: Semaphore = Semaphore::const_new(10); + /// ``` + /// + /// [`tokio-console`]: https://github.com/tokio-rs/console + /// [unstable feature]: crate#unstable-features + static Future constNew({required BigInt permits}) => + RustLib.instance.api.tokioSyncSemaphoreConstNew(permits: permits); + + /// Decrease a semaphore's permits by a maximum of `n`. + /// + /// If there are insufficient permits and it's not possible to reduce by `n`, + /// return the number of permits that were actually reduced. + Future forgetPermits({required BigInt n}); + + /// Returns true if the semaphore is closed + Future isClosed(); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Creates a new semaphore with the initial number of permits. + /// + /// Panics if `permits` exceeds [`Semaphore::MAX_PERMITS`]. + static Future newInstance({required BigInt permits}) => + RustLib.instance.api.tokioSyncSemaphoreNew(permits: permits); + + /// Tries to acquire a permit from the semaphore. + /// + /// If the semaphore has been closed, this returns a [`TryAcquireError::Closed`] + /// and a [`TryAcquireError::NoPermits`] if there are no permits left. Otherwise, + /// this returns a [`SemaphorePermit`] representing the acquired permits. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::{Semaphore, TryAcquireError}; + /// + /// # fn main() { + /// let semaphore = Semaphore::new(2); + /// + /// let permit_1 = semaphore.try_acquire().unwrap(); + /// assert_eq!(semaphore.available_permits(), 1); + /// + /// let permit_2 = semaphore.try_acquire().unwrap(); + /// assert_eq!(semaphore.available_permits(), 0); + /// + /// let permit_3 = semaphore.try_acquire(); + /// assert_eq!(permit_3.err(), Some(TryAcquireError::NoPermits)); + /// # } + /// ``` + /// + /// [`TryAcquireError::Closed`]: crate::sync::TryAcquireError::Closed + /// [`TryAcquireError::NoPermits`]: crate::sync::TryAcquireError::NoPermits + /// [`SemaphorePermit`]: crate::sync::SemaphorePermit + Future tryAcquire(); + + /// Tries to acquire `n` permits from the semaphore. + /// + /// If the semaphore has been closed, this returns a [`TryAcquireError::Closed`] + /// and a [`TryAcquireError::NoPermits`] if there are not enough permits left. + /// Otherwise, this returns a [`SemaphorePermit`] representing the acquired permits. + /// + /// # Examples + /// + /// ``` + /// use tokio::sync::{Semaphore, TryAcquireError}; + /// + /// # fn main() { + /// let semaphore = Semaphore::new(4); + /// + /// let permit_1 = semaphore.try_acquire_many(3).unwrap(); + /// assert_eq!(semaphore.available_permits(), 1); + /// + /// let permit_2 = semaphore.try_acquire_many(2); + /// assert_eq!(permit_2.err(), Some(TryAcquireError::NoPermits)); + /// # } + /// ``` + /// + /// [`TryAcquireError::Closed`]: crate::sync::TryAcquireError::Closed + /// [`TryAcquireError::NoPermits`]: crate::sync::TryAcquireError::NoPermits + /// [`SemaphorePermit`]: crate::sync::SemaphorePermit + Future tryAcquireMany({required int n}); + + /// Tries to acquire `n` permits from the semaphore. + /// + /// The semaphore must be wrapped in an [`Arc`] to call this method. If + /// the semaphore has been closed, this returns a [`TryAcquireError::Closed`] + /// and a [`TryAcquireError::NoPermits`] if there are no permits left. + /// Otherwise, this returns a [`OwnedSemaphorePermit`] representing the + /// acquired permit. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::{Semaphore, TryAcquireError}; + /// + /// # fn main() { + /// let semaphore = Arc::new(Semaphore::new(4)); + /// + /// let permit_1 = Arc::clone(&semaphore).try_acquire_many_owned(3).unwrap(); + /// assert_eq!(semaphore.available_permits(), 1); + /// + /// let permit_2 = semaphore.try_acquire_many_owned(2); + /// assert_eq!(permit_2.err(), Some(TryAcquireError::NoPermits)); + /// # } + /// ``` + /// + /// [`Arc`]: std::sync::Arc + /// [`TryAcquireError::Closed`]: crate::sync::TryAcquireError::Closed + /// [`TryAcquireError::NoPermits`]: crate::sync::TryAcquireError::NoPermits + /// [`OwnedSemaphorePermit`]: crate::sync::OwnedSemaphorePermit + Future tryAcquireManyOwned({required int n}); + + /// Tries to acquire a permit from the semaphore. + /// + /// The semaphore must be wrapped in an [`Arc`] to call this method. If + /// the semaphore has been closed, this returns a [`TryAcquireError::Closed`] + /// and a [`TryAcquireError::NoPermits`] if there are no permits left. + /// Otherwise, this returns a [`OwnedSemaphorePermit`] representing the + /// acquired permit. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use tokio::sync::{Semaphore, TryAcquireError}; + /// + /// # fn main() { + /// let semaphore = Arc::new(Semaphore::new(2)); + /// + /// let permit_1 = Arc::clone(&semaphore).try_acquire_owned().unwrap(); + /// assert_eq!(semaphore.available_permits(), 1); + /// + /// let permit_2 = Arc::clone(&semaphore).try_acquire_owned().unwrap(); + /// assert_eq!(semaphore.available_permits(), 0); + /// + /// let permit_3 = semaphore.try_acquire_owned(); + /// assert_eq!(permit_3.err(), Some(TryAcquireError::NoPermits)); + /// # } + /// ``` + /// + /// [`Arc`]: std::sync::Arc + /// [`TryAcquireError::Closed`]: crate::sync::TryAcquireError::Closed + /// [`TryAcquireError::NoPermits`]: crate::sync::TryAcquireError::NoPermits + /// [`OwnedSemaphorePermit`]: crate::sync::OwnedSemaphorePermit + Future tryAcquireOwned(); +} + +// Rust type: RustOpaqueMoi>> +abstract class SemaphorePermit implements RustOpaqueInterface {} + +/// Error returned from the [`Semaphore::try_acquire`] function. +/// +/// [`Semaphore::try_acquire`]: crate::sync::Semaphore::try_acquire +enum TryAcquireError { + /// The semaphore has been [closed] and cannot issue new permits. + /// + /// [closed]: crate::sync::Semaphore::close + closed, + + /// The semaphore has no available permits. + noPermits, + ; +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast.dart new file mode 100644 index 0000000..0130be4 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast.dart @@ -0,0 +1,15 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `channel` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `RecvGuard`, `Recv`, `Shared`, `Slot`, `Tail`, `WaiterCell`, `Waiter`, `WaitersList` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `as_raw`, `clone`, `clone`, `drop`, `drop`, `drop`, `drop`, `drop`, `drop`, `fmt`, `fmt`, `fmt`, `from_raw`, `pointers`, `poll` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `strong_count`, `upgrade`, `weak_count` + +// Rust type: RustOpaqueMoi>> +abstract class WeakSenderT implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast/error.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast/error.dart new file mode 100644 index 0000000..41c7d22 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/broadcast/error.dart @@ -0,0 +1,18 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `clone`, `clone`, `eq`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt` + +// Rust type: RustOpaqueMoi>> +abstract class SendError implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class SendErrorT implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class TryRecvError implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/bounded.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/bounded.dart new file mode 100644 index 0000000..8fb2ae8 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/bounded.dart @@ -0,0 +1,19 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `clone`, `drop`, `drop`, `drop`, `drop`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `next`, `size_hint` +// These functions are ignored (category: IgnoreBecauseNotAllowedOwner): `release`, `same_channel_as_sender`, `same_channel`, `send`, `send` + +// Rust type: RustOpaqueMoi >>> +abstract class PollOptionT implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class PollUsize implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class VecT implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/error.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/error.dart new file mode 100644 index 0000000..a3659f2 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/mpsc/error.dart @@ -0,0 +1,25 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `clone`, `clone`, `clone`, `clone`, `clone`, `eq`, `eq`, `eq`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `from` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `into_inner`, `into_inner` + +// Rust type: RustOpaqueMoi> +abstract class RecvError implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class SendTimeoutErrorT implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class TrySendError implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class TrySendErrorSelf implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi>> +abstract class TrySendErrorT implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/oneshot.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/oneshot.dart new file mode 100644 index 0000000..8b8640c --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/oneshot.dart @@ -0,0 +1,14 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `channel` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `Inner`, `State`, `Task` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `drop`, `drop`, `drop`, `fmt`, `fmt`, `fmt`, `fmt`, `poll` + +// Rust type: RustOpaqueMoi>> +abstract class Poll implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/sync/watch.dart b/mobile_app/lib/src/rust/third_party/tokio/sync/watch.dart new file mode 100644 index 0000000..9fcb3e6 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/sync/watch.dart @@ -0,0 +1,15 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `channel`, `send_if_modified`, `send_modify` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `clone`, `deref`, `drop`, `drop`, `fmt`, `fmt`, `fmt`, `fmt` +// These functions are ignored (category: IgnoreBecauseOwnerTyShouldIgnore): `has_changed` +// These functions have error during generation (see debug logs or enable `stop_on_error: true` for more details): `wait_for` + +// Rust type: RustOpaqueMoi>> +abstract class RefT implements RustOpaqueInterface {} diff --git a/mobile_app/lib/src/rust/third_party/tokio/task.dart b/mobile_app/lib/src/rust/third_party/tokio/task.dart new file mode 100644 index 0000000..409a418 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/task.dart @@ -0,0 +1,83 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'runtime.dart'; + +// These functions are ignored because they have generic arguments: `block_in_place`, `spawn_blocking`, `spawn_local`, `spawn` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `JoinSet`, `LocalKey` + +/// Yields execution back to the Tokio runtime. +/// +/// A task yields by awaiting on `yield_now()`, and may resume when that future +/// completes (with no output.) The current task will be re-added as a pending +/// task at the _back_ of the pending queue. Any other pending tasks will be +/// scheduled. No other waking is required for the task to continue. +/// +/// See also the usage example in the [task module](index.html#yield_now). +/// +/// ## Non-guarantees +/// +/// This function may not yield all the way up to the executor if there are any +/// special combinators above it in the call stack. For example, if a +/// [`tokio::select!`] has another branch complete during the same poll as the +/// `yield_now()`, then the yield is not propagated all the way up to the +/// runtime. +/// +/// It is generally not guaranteed that the runtime behaves like you expect it +/// to when deciding which task to schedule next after a call to `yield_now()`. +/// In particular, the runtime may choose to poll the task that just ran +/// `yield_now()` again immediately without polling any other tasks first. For +/// example, the runtime will not drive the IO driver between every poll of a +/// task, and this could result in the runtime polling the current task again +/// immediately even if there is another task that could make progress if that +/// other task is waiting for a notification from the IO driver. +/// +/// In general, changes to the order in which the runtime polls tasks is not +/// considered a breaking change, and your program should be correct no matter +/// which order the runtime polls your tasks in. +/// +/// [`tokio::select!`]: macro@crate::select +Future yieldNow() => RustLib.instance.api.tokioTaskYieldNow(); + +// Rust type: RustOpaqueMoi> +abstract class LocalEnterGuard implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class LocalSet implements RustOpaqueInterface { + static Future default_() => + RustLib.instance.api.tokioTaskLocalSetDefault(); + + /// Enters the context of this `LocalSet`. + /// + /// The [`spawn_local`] method will spawn tasks on the `LocalSet` whose + /// context you are inside. + /// + /// [`spawn_local`]: fn@crate::task::spawn_local + Future enter(); + + /// Returns the [`Id`] of the current [`LocalSet`] runtime. + /// + /// # Examples + /// + /// ```rust + /// use tokio::task; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let local_set = task::LocalSet::new(); + /// println!("Local set id: {}", local_set.id()); + /// # } + /// ``` + /// + /// [`Id`]: struct@crate::runtime::Id + Future id(); + + // HINT: Make it `#[frb(sync)]` to let it become the default constructor of Dart class. + /// Returns a new local task set. + static Future newInstance() => + RustLib.instance.api.tokioTaskLocalSetNew(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/task/coop.dart b/mobile_app/lib/src/rust/third_party/tokio/task/coop.dart new file mode 100644 index 0000000..8725f16 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/task/coop.dart @@ -0,0 +1,157 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import '../signal/unix.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These functions are ignored because they have generic arguments: `cooperative`, `unconstrained` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `BudgetDecrement`, `Budget`, `Coop`, `Unconstrained` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `clone`, `drop`, `fmt`, `fmt`, `poll` + +/// Consumes a unit of budget and returns the execution back to the Tokio +/// runtime *if* the task's coop budget was exhausted. +/// +/// The task will only yield if its entire coop budget has been exhausted. +/// This function can be used in order to insert optional yield points into long +/// computations that do not use Tokio resources like sockets or semaphores, +/// without redundantly yielding to the runtime each time. +/// +/// # Examples +/// +/// Make sure that a function which returns a sum of (potentially lots of) +/// iterated values is cooperative. +/// +/// ``` +/// async fn sum_iterator(input: &mut impl std::iter::Iterator) -> i64 { +/// let mut sum: i64 = 0; +/// while let Some(i) = input.next() { +/// sum += i; +/// tokio::task::consume_budget().await +/// } +/// sum +/// } +/// ``` +Future consumeBudget() => + RustLib.instance.api.tokioTaskCoopConsumeBudget(); + +/// Returns `true` if there is still budget left on the task. +/// +/// # Examples +/// +/// This example defines a `Timeout` future that requires a given `future` to complete before the +/// specified duration elapses. If it does, its result is returned; otherwise, an error is returned +/// and the future is canceled. +/// +/// Note that the future could exhaust the budget before we evaluate the timeout. Using `has_budget_remaining`, +/// we can detect this scenario and ensure the timeout is always checked. +/// +/// ``` +/// # use std::future::Future; +/// # use std::pin::{pin, Pin}; +/// # use std::task::{ready, Context, Poll}; +/// # use tokio::task::coop; +/// # use tokio::time::Sleep; +/// pub struct Timeout { +/// future: T, +/// delay: Pin>, +/// } +/// +/// impl Future for Timeout +/// where +/// T: Future + Unpin, +/// { +/// type Output = Result; +/// +/// fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { +/// let this = Pin::into_inner(self); +/// let future = Pin::new(&mut this.future); +/// let delay = Pin::new(&mut this.delay); +/// +/// // check if the future is ready +/// let had_budget_before = coop::has_budget_remaining(); +/// if let Poll::Ready(v) = future.poll(cx) { +/// return Poll::Ready(Ok(v)); +/// } +/// let has_budget_now = coop::has_budget_remaining(); +/// +/// // evaluate the timeout +/// if let (true, false) = (had_budget_before, has_budget_now) { +/// // it is the underlying future that exhausted the budget +/// ready!(pin!(coop::unconstrained(delay)).poll(cx)); +/// } else { +/// ready!(delay.poll(cx)); +/// } +/// return Poll::Ready(Err(())); +/// } +/// } +///``` +Future hasBudgetRemaining() => + RustLib.instance.api.tokioTaskCoopHasBudgetRemaining(); + +/// Decrements the task budget and returns [`Poll::Pending`] if the budget is depleted. +/// This indicates that the task should yield to the scheduler. Otherwise, returns +/// [`RestoreOnPending`] which can be used to commit the budget consumption. +/// +/// The returned [`RestoreOnPending`] will revert the budget to its former +/// value when dropped unless [`RestoreOnPending::made_progress`] +/// is called. It is the caller's responsibility to do so when it _was_ able to +/// make progress after the call to [`poll_proceed`]. +/// Restoring the budget automatically ensures the task can try to make progress in some other +/// way. +/// +/// Note that [`RestoreOnPending`] restores the budget **as it was before [`poll_proceed`]**. +/// Therefore, if the budget is _further_ adjusted between when [`poll_proceed`] returns and +/// [`RestoreOnPending`] is dropped, those adjustments are erased unless the caller indicates +/// that progress was made. +/// +/// # Examples +/// +/// This example wraps the `futures::channel::mpsc::UnboundedReceiver` to +/// cooperate with the Tokio scheduler. Each time a value is received, task budget +/// is consumed. If no budget is available, the task yields to the scheduler. +/// +/// ``` +/// use std::pin::Pin; +/// use std::task::{ready, Context, Poll}; +/// use tokio::task::coop; +/// use futures::stream::{Stream, StreamExt}; +/// use futures::channel::mpsc::UnboundedReceiver; +/// +/// struct CoopUnboundedReceiver { +/// receiver: UnboundedReceiver, +/// } +/// +/// impl Stream for CoopUnboundedReceiver { +/// type Item = T; +/// fn poll_next( +/// mut self: Pin<&mut Self>, +/// cx: &mut Context<'_> +/// ) -> Poll> { +/// let coop = ready!(coop::poll_proceed(cx)); +/// match self.receiver.poll_next_unpin(cx) { +/// Poll::Ready(v) => { +/// // We received a value, so consume budget. +/// coop.made_progress(); +/// Poll::Ready(v) +/// } +/// Poll::Pending => Poll::Pending, +/// } +/// } +/// } +/// ``` +Future pollProceed({required Context cx}) => + RustLib.instance.api.tokioTaskCoopPollProceed(cx: cx); + +// Rust type: RustOpaqueMoi>> +abstract class PollRestoreOnPending implements RustOpaqueInterface {} + +// Rust type: RustOpaqueMoi> +abstract class RestoreOnPending implements RustOpaqueInterface { + /// Signals that the task that obtained this `RestoreOnPending` was able to make + /// progress. This prevents the task budget from being restored to the value + /// it had prior to obtaining this instance when it is dropped. + Future madeProgress(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/time.dart b/mobile_app/lib/src/rust/third_party/tokio/time.dart new file mode 100644 index 0000000..1e2af9b --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/time.dart @@ -0,0 +1,734 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../frb_generated.dart'; +import '../../lib.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; +import 'signal/unix.dart'; +import 'time/interval.dart'; + +// These functions are ignored because they have generic arguments: `timeout_at`, `timeout` +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `Timeout` + +/// Pauses time. +/// +/// The current value of `Instant::now()` is saved and all subsequent calls +/// to `Instant::now()` will return the saved value. The saved value can be +/// changed by [`advance`] or by the time auto-advancing once the runtime +/// has no work to do. This only affects the `Instant` type in Tokio, and +/// the `Instant` in std continues to work as normal. +/// +/// Pausing time requires the `current_thread` Tokio runtime. This is the +/// default runtime used by `#[tokio::test]`. The runtime can be initialized +/// with time in a paused state using the `Builder::start_paused` method. +/// +/// For cases where time is immediately paused, it is better to pause +/// the time using the `main` or `test` macro: +/// ``` +/// #[tokio::main(flavor = "current_thread", start_paused = true)] +/// async fn main() { +/// println!("Hello world"); +/// } +/// ``` +/// +/// # Panics +/// +/// Panics if time is already frozen or if called from outside of a +/// `current_thread` Tokio runtime. +/// +/// # Auto-advance +/// +/// If time is paused and the runtime has no work to do, the clock is +/// auto-advanced to the next pending timer. This means that [`Sleep`] or +/// other timer-backed primitives can cause the runtime to advance the +/// current time when awaited. +/// +/// [`Sleep`]: crate::time::Sleep +/// [`advance`]: crate::time::advance +Future pause() => RustLib.instance.api.tokioTimePause(); + +/// Resumes time. +/// +/// Clears the saved `Instant::now()` value. Subsequent calls to +/// `Instant::now()` will return the value returned by the system call. +/// +/// # Panics +/// +/// Panics if time is not frozen or if called from outside of the Tokio +/// runtime. +Future resume() => RustLib.instance.api.tokioTimeResume(); + +/// Advances time. +/// +/// Increments the saved `Instant::now()` value by `duration`. Subsequent +/// calls to `Instant::now()` will return the result of the increment. +/// +/// This function will make the current time jump forward by the given +/// duration in one jump. This means that all `sleep` calls with a deadline +/// before the new time will immediately complete "at the same time", and +/// the runtime is free to poll them in any order. Additionally, this +/// method will not wait for the `sleep` calls it advanced past to complete. +/// If you want to do that, you should instead call [`sleep`] and rely on +/// the runtime's auto-advance feature. +/// +/// Note that calls to `sleep` are not guaranteed to complete the first time +/// they are polled after a call to `advance`. For example, this can happen +/// if the runtime has not yet touched the timer driver after the call to +/// `advance`. However if they don't, the runtime will poll the task again +/// shortly. +/// +/// # Panics +/// +/// Panics if any of the following conditions are met: +/// +/// - The clock is not frozen, which means that you must +/// call [`pause`] before calling this method. +/// - If called outside of the Tokio runtime. +/// - If the input `duration` is too large (such as [`Duration::MAX`]) +/// to be safely added to the current time without causing an overflow. +/// +/// # Caveats +/// +/// Using a very large `duration` is not recommended, +/// as it may cause panicking due to overflow. +/// +/// # Auto-advance +/// +/// If the time is paused and there is no work to do, the runtime advances +/// time to the next timer. See [`pause`](pause#auto-advance) for more +/// details. +/// +/// [`sleep`]: fn@crate::time::sleep +Future advance({required Duration duration}) => + RustLib.instance.api.tokioTimeAdvance(duration: duration); + +/// Creates new [`Interval`] that yields with interval of `period`. The first +/// tick completes immediately. The default [`MissedTickBehavior`] is +/// [`Burst`](MissedTickBehavior::Burst), but this can be configured +/// by calling [`set_missed_tick_behavior`](Interval::set_missed_tick_behavior). +/// +/// An interval will tick indefinitely. At any time, the [`Interval`] value can +/// be dropped. This cancels the interval. +/// +/// This function is equivalent to +/// [`interval_at(Instant::now(), period)`](interval_at). +/// +/// # Panics +/// +/// This function panics if `period` is zero. +/// +/// # Examples +/// +/// ``` +/// use tokio::time::{self, Duration}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let mut interval = time::interval(Duration::from_millis(10)); +/// +/// interval.tick().await; // ticks immediately +/// interval.tick().await; // ticks after 10ms +/// interval.tick().await; // ticks after 10ms +/// +/// // approximately 20ms have elapsed. +/// # } +/// ``` +/// +/// A simple example using `interval` to execute a task every two seconds. +/// +/// The difference between `interval` and [`sleep`] is that an [`Interval`] +/// measures the time since the last tick, which means that [`.tick().await`] +/// may wait for a shorter time than the duration specified for the interval +/// if some time has passed between calls to [`.tick().await`]. +/// +/// If the tick in the example below was replaced with [`sleep`], the task +/// would only be executed once every three seconds, and not every two +/// seconds. +/// +/// ``` +/// use tokio::time; +/// +/// async fn task_that_takes_a_second() { +/// println!("hello"); +/// time::sleep(time::Duration::from_secs(1)).await +/// } +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let mut interval = time::interval(time::Duration::from_secs(2)); +/// for _i in 0..5 { +/// interval.tick().await; +/// task_that_takes_a_second().await; +/// } +/// # } +/// ``` +/// +/// [`sleep`]: crate::time::sleep() +/// [`.tick().await`]: Interval::tick +Future interval({required Duration period}) => + RustLib.instance.api.tokioTimeInterval(period: period); + +/// Creates new [`Interval`] that yields with interval of `period` with the +/// first tick completing at `start`. The default [`MissedTickBehavior`] is +/// [`Burst`](MissedTickBehavior::Burst), but this can be configured +/// by calling [`set_missed_tick_behavior`](Interval::set_missed_tick_behavior). +/// +/// An interval will tick indefinitely. At any time, the [`Interval`] value can +/// be dropped. This cancels the interval. +/// +/// # Panics +/// +/// This function panics if `period` is zero. +/// +/// # Examples +/// +/// ``` +/// use tokio::time::{interval_at, Duration, Instant}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// let start = Instant::now() + Duration::from_millis(50); +/// let mut interval = interval_at(start, Duration::from_millis(10)); +/// +/// interval.tick().await; // ticks after 50ms +/// interval.tick().await; // ticks after 10ms +/// interval.tick().await; // ticks after 10ms +/// +/// // approximately 70ms have elapsed. +/// # } +/// ``` +Future intervalAt( + {required Instant start, required Duration period}) => + RustLib.instance.api.tokioTimeIntervalAt(start: start, period: period); + +/// Waits until `deadline` is reached. +/// +/// No work is performed while awaiting on the sleep future to complete. `Sleep` +/// operates at millisecond granularity and should not be used for tasks that +/// require high-resolution timers. +/// +/// To run something regularly on a schedule, see [`interval`]. +/// +/// # Cancellation +/// +/// Canceling a sleep instance is done by dropping the returned future. No additional +/// cleanup work is required. +/// +/// # Examples +/// +/// Wait 100ms and print "100 ms have elapsed". +/// +/// ``` +/// use tokio::time::{sleep_until, Instant, Duration}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// sleep_until(Instant::now() + Duration::from_millis(100)).await; +/// println!("100 ms have elapsed"); +/// # } +/// ``` +/// +/// See the documentation for the [`Sleep`] type for more examples. +/// +/// # Panics +/// +/// This function panics if there is no current timer set. +/// +/// It can be triggered when [`Builder::enable_time`] or +/// [`Builder::enable_all`] are not included in the builder. +/// +/// It can also panic whenever a timer is created outside of a +/// Tokio runtime. That is why `rt.block_on(sleep(...))` will panic, +/// since the function is executed outside of the runtime. +/// Whereas `rt.block_on(async {sleep(...).await})` doesn't panic. +/// And this is because wrapping the function on an async makes it lazy, +/// and so gets executed inside the runtime successfully without +/// panicking. +/// +/// [`Sleep`]: struct@crate::time::Sleep +/// [`interval`]: crate::time::interval() +/// [`Builder::enable_time`]: crate::runtime::Builder::enable_time +/// [`Builder::enable_all`]: crate::runtime::Builder::enable_all +Future sleepUntil({required Instant deadline}) => + RustLib.instance.api.tokioTimeSleepUntil(deadline: deadline); + +/// Waits until `duration` has elapsed. +/// +/// Equivalent to `sleep_until(Instant::now() + duration)`. An asynchronous +/// analog to `std::thread::sleep`. +/// +/// No work is performed while awaiting on the sleep future to complete. `Sleep` +/// operates at millisecond granularity and should not be used for tasks that +/// require high-resolution timers. The implementation is platform specific, +/// and some platforms (specifically Windows) will provide timers with a +/// larger resolution than 1 ms. +/// +/// To run something regularly on a schedule, see [`interval`]. +/// +/// # Cancellation +/// +/// Canceling a sleep instance is done by dropping the returned future. No additional +/// cleanup work is required. +/// +/// # Examples +/// +/// Wait 100ms and print "100 ms have elapsed". +/// +/// ``` +/// use tokio::time::{sleep, Duration}; +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// sleep(Duration::from_millis(100)).await; +/// println!("100 ms have elapsed"); +/// # } +/// ``` +/// +/// See the documentation for the [`Sleep`] type for more examples. +/// +/// # Panics +/// +/// This function panics if there is no current timer set. +/// +/// It can be triggered when [`Builder::enable_time`] or +/// [`Builder::enable_all`] are not included in the builder. +/// +/// It can also panic whenever a timer is created outside of a +/// Tokio runtime. That is why `rt.block_on(sleep(...))` will panic, +/// since the function is executed outside of the runtime. +/// Whereas `rt.block_on(async {sleep(...).await})` doesn't panic. +/// And this is because wrapping the function on an async makes it lazy, +/// and so gets executed inside the runtime successfully without +/// panicking. +/// +/// [`Sleep`]: struct@crate::time::Sleep +/// [`interval`]: crate::time::interval() +/// [`Builder::enable_time`]: crate::runtime::Builder::enable_time +/// [`Builder::enable_all`]: crate::runtime::Builder::enable_all +Future sleep({required Duration duration}) => + RustLib.instance.api.tokioTimeSleep(duration: duration); + +// Rust type: RustOpaqueMoi> +abstract class Interval implements RustOpaqueInterface { + /// Returns the [`MissedTickBehavior`] strategy currently being used. + Future missedTickBehavior(); + + /// Returns the period of the interval. + Future period(); + + /// Polls for the next instant in the interval to be reached. + /// + /// This method can return the following values: + /// + /// * `Poll::Pending` if the next instant has not yet been reached. + /// * `Poll::Ready(instant)` if the next instant has been reached. + /// + /// When this method returns `Poll::Pending`, the current task is scheduled + /// to receive a wakeup when the instant has elapsed. Note that on multiple + /// calls to `poll_tick`, only the [`Waker`](std::task::Waker) from the + /// [`Context`] passed to the most recent call is scheduled to receive a + /// wakeup. + Future pollTick({required Context cx}); + + /// Resets the interval to complete one period after the current time. + /// + /// This method ignores [`MissedTickBehavior`] strategy. + /// + /// This is equivalent to calling `reset_at(Instant::now() + period)`. + /// + /// # Examples + /// + /// ``` + /// use tokio::time; + /// + /// use std::time::Duration; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = time::interval(Duration::from_millis(100)); + /// + /// interval.tick().await; + /// + /// time::sleep(Duration::from_millis(50)).await; + /// interval.reset(); + /// + /// interval.tick().await; + /// interval.tick().await; + /// + /// // approximately 250ms have elapsed. + /// # } + /// ``` + Future reset(); + + /// Resets the interval after the specified [`std::time::Duration`]. + /// + /// This method ignores [`MissedTickBehavior`] strategy. + /// + /// This is equivalent to calling `reset_at(Instant::now() + after)`. + /// + /// # Examples + /// + /// ``` + /// use tokio::time; + /// + /// use std::time::Duration; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = time::interval(Duration::from_millis(100)); + /// interval.tick().await; + /// + /// time::sleep(Duration::from_millis(50)).await; + /// + /// let after = Duration::from_millis(20); + /// interval.reset_after(after); + /// + /// interval.tick().await; + /// interval.tick().await; + /// + /// // approximately 170ms have elapsed. + /// # } + /// ``` + Future resetAfter({required Duration after}); + + /// Resets the interval to a [`crate::time::Instant`] deadline. + /// + /// Sets the next tick to expire at the given instant. If the instant is in + /// the past, then the [`MissedTickBehavior`] strategy will be used to + /// catch up. If the instant is in the future, then the next tick will + /// complete at the given instant, even if that means that it will sleep for + /// longer than the duration of this [`Interval`]. If the [`Interval`] had + /// any missed ticks before calling this method, then those are discarded. + /// + /// # Examples + /// + /// ``` + /// use tokio::time::{self, Instant}; + /// + /// use std::time::Duration; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = time::interval(Duration::from_millis(100)); + /// interval.tick().await; + /// + /// time::sleep(Duration::from_millis(50)).await; + /// + /// let deadline = Instant::now() + Duration::from_millis(30); + /// interval.reset_at(deadline); + /// + /// interval.tick().await; + /// interval.tick().await; + /// + /// // approximately 180ms have elapsed. + /// # } + /// ``` + Future resetAt({required Instant deadline}); + + /// Resets the interval immediately. + /// + /// This method ignores [`MissedTickBehavior`] strategy. + /// + /// This is equivalent to calling `reset_at(Instant::now())`. + /// + /// # Examples + /// + /// ``` + /// use tokio::time; + /// + /// use std::time::Duration; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = time::interval(Duration::from_millis(100)); + /// + /// interval.tick().await; + /// + /// time::sleep(Duration::from_millis(50)).await; + /// interval.reset_immediately(); + /// + /// interval.tick().await; + /// interval.tick().await; + /// + /// // approximately 150ms have elapsed. + /// # } + /// ``` + Future resetImmediately(); + + /// Sets the [`MissedTickBehavior`] strategy that should be used. + Future setMissedTickBehavior({required MissedTickBehavior behavior}); + + /// Completes when the next instant in the interval has been reached. + /// + /// # Cancel safety + /// + /// This method is cancellation safe. If `tick` is used as the branch in a `tokio::select!` and + /// another branch completes first, then no tick has been consumed. + /// + /// # Examples + /// + /// ``` + /// use tokio::time; + /// + /// use std::time::Duration; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = time::interval(Duration::from_millis(10)); + /// + /// interval.tick().await; + /// // approximately 0ms have elapsed. The first tick completes immediately. + /// interval.tick().await; + /// interval.tick().await; + /// + /// // approximately 20ms have elapsed. + /// # } + /// ``` + Future tick(); +} + +// Rust type: RustOpaqueMoi> +abstract class Sleep implements RustOpaqueInterface { + /// Returns the instant at which the future will complete. + Future deadline(); + + /// Returns `true` if `Sleep` has elapsed. + /// + /// A `Sleep` instance is elapsed when the requested duration has elapsed. + Future isElapsed(); + + /// Resets the `Sleep` instance to a new deadline. + /// + /// Calling this function allows changing the instant at which the `Sleep` + /// future completes without having to create new associated state. + /// + /// This function can be called both before and after the future has + /// completed. + /// + /// To call this method, you will usually combine the call with + /// [`Pin::as_mut`], which lets you call the method without consuming the + /// `Sleep` itself. + /// + /// # Example + /// + /// ``` + /// use tokio::time::{Duration, Instant}; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let sleep = tokio::time::sleep(Duration::from_millis(10)); + /// tokio::pin!(sleep); + /// + /// sleep.as_mut().reset(Instant::now() + Duration::from_millis(20)); + /// # } + /// ``` + /// + /// See also the top-level examples. + /// + /// [`Pin::as_mut`]: fn@std::pin::Pin::as_mut + Future reset({required Instant deadline}); +} + +/// Defines the behavior of an [`Interval`] when it misses a tick. +/// +/// Sometimes, an [`Interval`]'s tick is missed. For example, consider the +/// following: +/// +/// ``` +/// use tokio::time::{self, Duration}; +/// # async fn task_that_takes_one_to_three_millis() {} +/// +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() { +/// // ticks every 2 milliseconds +/// let mut interval = time::interval(Duration::from_millis(2)); +/// for _ in 0..5 { +/// interval.tick().await; +/// // if this takes more than 2 milliseconds, a tick will be delayed +/// task_that_takes_one_to_three_millis().await; +/// } +/// # } +/// ``` +/// +/// Generally, a tick is missed if too much time is spent without calling +/// [`Interval::tick()`]. +/// +/// By default, when a tick is missed, [`Interval`] fires ticks as quickly as it +/// can until it is "caught up" in time to where it should be. +/// `MissedTickBehavior` can be used to specify a different behavior for +/// [`Interval`] to exhibit. Each variant represents a different strategy. +/// +/// Note that because the executor cannot guarantee exact precision with timers, +/// these strategies will only apply when the delay is greater than 5 +/// milliseconds. +enum MissedTickBehavior { + /// Ticks as fast as possible until caught up. + /// + /// When this strategy is used, [`Interval`] schedules ticks "normally" (the + /// same as it would have if the ticks hadn't been delayed), which results + /// in it firing ticks as fast as possible until it is caught up in time to + /// where it should be. Unlike [`Delay`] and [`Skip`], the ticks yielded + /// when `Burst` is used (the [`Instant`]s that [`tick`](Interval::tick) + /// yields) aren't different than they would have been if a tick had not + /// been missed. Like [`Skip`], and unlike [`Delay`], the ticks may be + /// shortened. + /// + /// This looks something like this: + /// ```text + /// Expected ticks: | 1 | 2 | 3 | 4 | 5 | 6 | + /// Actual ticks: | work -----| delay | work | work | work -| work -----| + /// ``` + /// + /// In code: + /// + /// ``` + /// use tokio::time::{interval, Duration}; + /// # async fn task_that_takes_200_millis() {} + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = interval(Duration::from_millis(50)); + /// + /// // First tick resolves immediately after creation + /// interval.tick().await; + /// + /// task_that_takes_200_millis().await; + /// // The `Interval` has missed a tick + /// + /// // Since we have exceeded our timeout, this will resolve immediately + /// interval.tick().await; + /// + /// // Since we are more than 100ms after the start of `interval`, this will + /// // also resolve immediately. + /// interval.tick().await; + /// + /// // Also resolves immediately, because it was supposed to resolve at + /// // 150ms after the start of `interval` + /// interval.tick().await; + /// + /// // Resolves immediately + /// interval.tick().await; + /// + /// // Since we have gotten to 200ms after the start of `interval`, this + /// // will resolve after 50ms + /// interval.tick().await; + /// # } + /// ``` + /// + /// This is the default behavior when [`Interval`] is created with + /// [`interval`] and [`interval_at`]. + /// + /// [`Delay`]: MissedTickBehavior::Delay + /// [`Skip`]: MissedTickBehavior::Skip + burst, + + /// Tick at multiples of `period` from when [`tick`] was called, rather than + /// from `start`. + /// + /// When this strategy is used and [`Interval`] has missed a tick, instead + /// of scheduling ticks to fire at multiples of `period` from `start` (the + /// time when the first tick was fired), it schedules all future ticks to + /// happen at a regular `period` from the point when [`tick`] was called. + /// Unlike [`Burst`] and [`Skip`], ticks are not shortened, and they aren't + /// guaranteed to happen at a multiple of `period` from `start` any longer. + /// + /// This looks something like this: + /// ```text + /// Expected ticks: | 1 | 2 | 3 | 4 | 5 | 6 | + /// Actual ticks: | work -----| delay | work -----| work -----| work -----| + /// ``` + /// + /// In code: + /// + /// ``` + /// use tokio::time::{interval, Duration, MissedTickBehavior}; + /// # async fn task_that_takes_more_than_50_millis() {} + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = interval(Duration::from_millis(50)); + /// interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + /// + /// task_that_takes_more_than_50_millis().await; + /// // The `Interval` has missed a tick + /// + /// // Since we have exceeded our timeout, this will resolve immediately + /// interval.tick().await; + /// + /// // But this one, rather than also resolving immediately, as might happen + /// // with the `Burst` or `Skip` behaviors, will not resolve until + /// // 50ms after the call to `tick` up above. That is, in `tick`, when we + /// // recognize that we missed a tick, we schedule the next tick to happen + /// // 50ms (or whatever the `period` is) from right then, not from when + /// // were *supposed* to tick + /// interval.tick().await; + /// # } + /// ``` + /// + /// [`Burst`]: MissedTickBehavior::Burst + /// [`Skip`]: MissedTickBehavior::Skip + /// [`tick`]: Interval::tick + delay, + + /// Skips missed ticks and tick on the next multiple of `period` from + /// `start`. + /// + /// When this strategy is used, [`Interval`] schedules the next tick to fire + /// at the next-closest tick that is a multiple of `period` away from + /// `start` (the point where [`Interval`] first ticked). Like [`Burst`], all + /// ticks remain multiples of `period` away from `start`, but unlike + /// [`Burst`], the ticks may not be *one* multiple of `period` away from the + /// last tick. Like [`Delay`], the ticks are no longer the same as they + /// would have been if ticks had not been missed, but unlike [`Delay`], and + /// like [`Burst`], the ticks may be shortened to be less than one `period` + /// away from each other. + /// + /// This looks something like this: + /// ```text + /// Expected ticks: | 1 | 2 | 3 | 4 | 5 | 6 | + /// Actual ticks: | work -----| delay | work ---| work -----| work -----| + /// ``` + /// + /// In code: + /// + /// ``` + /// use tokio::time::{interval, Duration, MissedTickBehavior}; + /// # async fn task_that_takes_75_millis() {} + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() { + /// let mut interval = interval(Duration::from_millis(50)); + /// interval.set_missed_tick_behavior(MissedTickBehavior::Skip); + /// + /// task_that_takes_75_millis().await; + /// // The `Interval` has missed a tick + /// + /// // Since we have exceeded our timeout, this will resolve immediately + /// interval.tick().await; + /// + /// // This one will resolve after 25ms, 100ms after the start of + /// // `interval`, which is the closest multiple of `period` from the start + /// // of `interval` after the call to `tick` up above. + /// interval.tick().await; + /// # } + /// ``` + /// + /// [`Burst`]: MissedTickBehavior::Burst + /// [`Delay`]: MissedTickBehavior::Delay + skip, + ; + + /// Returns [`MissedTickBehavior::Burst`]. + /// + /// For most usecases, the [`Burst`] strategy is what is desired. + /// Additionally, to preserve backwards compatibility, the [`Burst`] + /// strategy must be the default. For these reasons, + /// [`MissedTickBehavior::Burst`] is the default for [`MissedTickBehavior`]. + /// See [`Burst`] for more details. + /// + /// [`Burst`]: MissedTickBehavior::Burst + static Future default_() => + RustLib.instance.api.tokioTimeMissedTickBehaviorDefault(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/time/error.dart b/mobile_app/lib/src/rust/third_party/tokio/time/error.dart new file mode 100644 index 0000000..f3196c3 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/time/error.dart @@ -0,0 +1,34 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These types are ignored because they are neither used by any `pub` functions nor (for structs and enums) marked `#[frb(unignore)]`: `Elapsed`, `InsertError`, `Kind` +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `assert_receiver_is_total_eq`, `clone`, `clone`, `eq`, `eq`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `fmt`, `from`, `from` + +// Rust type: RustOpaqueMoi> +abstract class Error implements RustOpaqueInterface { + /// Creates an error representing a timer at capacity. + static Future atCapacity() => + RustLib.instance.api.tokioTimeErrorErrorAtCapacity(); + + /// Creates an error representing a misconfigured timer. + static Future invalid() => + RustLib.instance.api.tokioTimeErrorErrorInvalid(); + + /// Returns `true` if the error was caused by the timer being at capacity. + Future isAtCapacity(); + + /// Returns `true` if the error was caused by the timer being misconfigured. + Future isInvalid(); + + /// Returns `true` if the error was caused by the timer being shutdown. + Future isShutdown(); + + /// Creates an error representing a shutdown timer. + static Future shutdown() => + RustLib.instance.api.tokioTimeErrorErrorShutdown(); +} diff --git a/mobile_app/lib/src/rust/third_party/tokio/time/interval.dart b/mobile_app/lib/src/rust/third_party/tokio/time/interval.dart new file mode 100644 index 0000000..2eba311 --- /dev/null +++ b/mobile_app/lib/src/rust/third_party/tokio/time/interval.dart @@ -0,0 +1,12 @@ +// This file is automatically generated, so please do not edit it. +// @generated by `flutter_rust_bridge`@ 2.11.1. + +// ignore_for_file: invalid_use_of_internal_member, unused_import, unnecessary_import + +import '../../../frb_generated.dart'; +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart'; + +// These function are ignored because they are on traits that is not defined in current crate (put an empty `#[frb]` on it to unignore): `assert_receiver_is_total_eq`, `clone`, `eq`, `fmt`, `fmt` + +// Rust type: RustOpaqueMoi>> +abstract class PollInstant implements RustOpaqueInterface {} diff --git a/mobile_app/lib/storage.dart b/mobile_app/lib/storage.dart new file mode 100644 index 0000000..b61277d --- /dev/null +++ b/mobile_app/lib/storage.dart @@ -0,0 +1,22 @@ +// logging.dart +import 'dart:io'; + +import 'package:logger/logger.dart'; + +class Log { + static Logger? _logger; + + static Future init() async { + _logger = Logger( + printer: PrettyPrinter(), + output: MultiOutput([ConsoleOutput()]), + ); + } + + static Logger get logger { + if (_logger == null) { + throw Exception('Logger is not initialized. Call Log.init() first.'); + } + return _logger!; + } +} diff --git a/mobile_app/lib/utils.dart b/mobile_app/lib/utils.dart new file mode 100644 index 0000000..3eb0365 --- /dev/null +++ b/mobile_app/lib/utils.dart @@ -0,0 +1,136 @@ +import 'dart:io'; +import 'package:permission_handler/permission_handler.dart'; +import 'package:mobile_app/src/rust/api/simple.dart'; +import 'package:path_provider/path_provider.dart'; +import 'package:flutter_tantivy/flutter_tantivy.dart'; +import 'dart:convert'; +import 'package:syncfusion_flutter_pdf/pdf.dart'; + +// import 'package:path_provider/path_provider.dart'; (Optional if you need specific paths) + +class PdfScanner { + Future> getAllPdfs() async { + List pdfs = []; + + var status = await Permission.manageExternalStorage.status; + if (!status.isGranted) { + status = await Permission.manageExternalStorage.request(); + } + + Directory rootDir = Directory('/storage/emulated/0/'); + + try { + await _searchForPdfs(rootDir, pdfs); + } catch (e) { + print("Error scanning: $e"); + } + + return pdfs; + } + + // Recursive function to walk through folders + Future _searchForPdfs( + Directory dir, List pdfs) async { + try { + List entities = + dir.listSync(recursive: false, followLinks: false); + + for (FileSystemEntity entity in entities) { + // Skip hidden folders (start with .) and the Android data folder (restricted) + if (entity.path.split('/').last.startsWith('.')) continue; + if (entity.path.contains('/Android/obb')) + continue; // Avoid Access Denied errors + if (entity.path.contains('/Android/data')) + continue; // Avoid Access Denied errors + + if (entity is File) { + if (entity.path.toLowerCase().endsWith(".pdf")) { + pdfs.add(entity); + print("Found PDF: ${entity.path}"); + } + } else if (entity is Directory) { + await _searchForPdfs(entity, pdfs); + } + } + } catch (e) {} + } + + Future openFile(FileSystemEntity file) async { + final buffer = StringBuffer(); + final stream = File(file.path).openRead().transform(utf8.decoder); + + try { + await for (final chunk in stream) { + buffer.write(chunk); + } + } catch (e) { + print("Error reading file: $e"); + } + return buffer.toString(); + } + + void indexPdfFiles() async { + ///index by chunks + List allPdfs = await getAllPdfs(); + for (FileSystemEntity i in allPdfs) { + final PdfDocument document = + PdfDocument(inputBytes: File(i.path).readAsBytesSync()); + String fileName = i.path.split("/").last; + final PdfTextExtractor extractor = PdfTextExtractor(document); + for (int j = 0; j < document.pages.count; j++) { + String pageText = extractor.extractText(startPageIndex: j); + final doc = Document( + id: "${fileName}-${j.toString()}", + text: pageText.replaceAll(j.toString(), " ")); + await addDocument(doc: doc); + } + document.dispose(); + } + } +} + +Future> findMatch(String query) async { + await RustLib.init(); + final results = await searchDocuments( + query: query, + topK: BigInt.from(10), + ); + return results; +} + +Future saveSearchHistory(String text) async { + final query = text.trim(); + if (query.isEmpty) return; + + final directory = await getApplicationDocumentsDirectory(); + final file = File('${directory.path}/search_history.txt'); + + // 1. Read existing lines into a Set to ensure uniqueness + Set history = {}; + if (await file.exists()) { + final lines = await file.readAsLines(); + history = lines.toSet(); + } + + history.remove(query); + history.add(query); + + await file.writeAsString(history.join('\n') + '\n'); +} + +Future> getSearchHistory() async { + try { + final directory = await getApplicationDocumentsDirectory(); + final file = File('${directory.path}/search_history.txt'); + + if (await file.exists()) { + String contents = await file.readAsString(); + return contents.trim().split('\n').reversed.toList(); + } + } catch (e) { + print("Error reading history: $e"); + } + return []; +} + +///delete document when a document is deleted diff --git a/mobile_app/linux/flutter/generated_plugins.cmake b/mobile_app/linux/flutter/generated_plugins.cmake index a6ac924..f585e33 100644 --- a/mobile_app/linux/flutter/generated_plugins.cmake +++ b/mobile_app/linux/flutter/generated_plugins.cmake @@ -6,6 +6,7 @@ list(APPEND FLUTTER_PLUGIN_LIST ) list(APPEND FLUTTER_FFI_PLUGIN_LIST + flutter_tantivy rust_lib_mobile_app ) diff --git a/mobile_app/macos/Flutter/GeneratedPluginRegistrant.swift b/mobile_app/macos/Flutter/GeneratedPluginRegistrant.swift index cccf817..774a6b8 100644 --- a/mobile_app/macos/Flutter/GeneratedPluginRegistrant.swift +++ b/mobile_app/macos/Flutter/GeneratedPluginRegistrant.swift @@ -5,6 +5,8 @@ import FlutterMacOS import Foundation +import file_picker func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) { + FilePickerPlugin.register(with: registry.registrar(forPlugin: "FilePickerPlugin")) } diff --git a/mobile_app/pubspec.lock b/mobile_app/pubspec.lock index 95ec294..c641df9 100644 --- a/mobile_app/pubspec.lock +++ b/mobile_app/pubspec.lock @@ -1,6 +1,22 @@ # Generated by pub # See https://dart.dev/tools/pub/glossary#lockfile packages: + _fe_analyzer_shared: + dependency: transitive + description: + name: _fe_analyzer_shared + sha256: "5b7468c326d2f8a4f630056404ca0d291ade42918f4a3c6233618e724f39da8e" + url: "https://pub.dev" + source: hosted + version: "92.0.0" + analyzer: + dependency: transitive + description: + name: analyzer + sha256: "70e4b1ef8003c64793a9e268a551a82869a8a96f39deb73dea28084b0e8bf75e" + url: "https://pub.dev" + source: hosted + version: "9.0.0" args: dependency: transitive description: @@ -25,6 +41,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.1" + build: + dependency: transitive + description: + name: build + sha256: "275bf6bb2a00a9852c28d4e0b410da1d833a734d57d39d44f94bfc895a484ec3" + url: "https://pub.dev" + source: hosted + version: "4.0.4" build_cli_annotations: dependency: transitive description: @@ -33,6 +57,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.0" + build_config: + dependency: transitive + description: + name: build_config + sha256: "4f64382b97504dc2fcdf487d5aae33418e08b4703fc21249e4db6d804a4d0187" + url: "https://pub.dev" + source: hosted + version: "1.2.0" characters: dependency: transitive description: @@ -41,6 +73,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.4.0" + checked_yaml: + dependency: transitive + description: + name: checked_yaml + sha256: "959525d3162f249993882720d52b7e0c833978df229be20702b33d48d91de70f" + url: "https://pub.dev" + source: hosted + version: "2.0.4" clock: dependency: transitive description: @@ -49,6 +89,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.2" + code_assets: + dependency: transitive + description: + name: code_assets + sha256: ae0db647e668cbb295a3527f0938e4039e004c80099dce2f964102373f5ce0b5 + url: "https://pub.dev" + source: hosted + version: "0.19.10" collection: dependency: transitive description: @@ -57,6 +105,30 @@ packages: url: "https://pub.dev" source: hosted version: "1.19.1" + convert: + dependency: transitive + description: + name: convert + sha256: b30acd5944035672bc15c6b7a8b47d773e41e2f17de064350988c5d02adb1c68 + url: "https://pub.dev" + source: hosted + version: "3.1.2" + cross_file: + dependency: transitive + description: + name: cross_file + sha256: "701dcfc06da0882883a2657c445103380e53e647060ad8d9dfb710c100996608" + url: "https://pub.dev" + source: hosted + version: "0.3.5+1" + crypto: + dependency: transitive + description: + name: crypto + sha256: c8ea0233063ba03258fbcf2ca4d6dadfefe14f02fab57702265467a19f27fadf + url: "https://pub.dev" + source: hosted + version: "3.0.7" cupertino_icons: dependency: "direct main" description: @@ -65,6 +137,22 @@ packages: url: "https://pub.dev" source: hosted version: "1.0.8" + dart_style: + dependency: transitive + description: + name: dart_style + sha256: a9c30492da18ff84efe2422ba2d319a89942d93e58eb0b73d32abe822ef54b7b + url: "https://pub.dev" + source: hosted + version: "3.1.3" + dbus: + dependency: transitive + description: + name: dbus + sha256: "79e0c23480ff85dc68de79e2cd6334add97e48f7f4865d17686dd6ea81a47e8c" + url: "https://pub.dev" + source: hosted + version: "0.7.11" fake_async: dependency: transitive description: @@ -73,6 +161,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.3.3" + ffi: + dependency: transitive + description: + name: ffi + sha256: d07d37192dbf97461359c1518788f203b0c9102cfd2c35a716b823741219542c + url: "https://pub.dev" + source: hosted + version: "2.1.5" file: dependency: transitive description: @@ -81,6 +177,14 @@ packages: url: "https://pub.dev" source: hosted version: "7.0.0" + file_picker: + dependency: "direct main" + description: + name: file_picker + sha256: d974b6ba2606371ac71dd94254beefb6fa81185bde0b59bdc1df09885da85fde + url: "https://pub.dev" + source: hosted + version: "10.3.8" flutter: dependency: "direct main" description: flutter @@ -99,6 +203,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.2" + flutter_plugin_android_lifecycle: + dependency: transitive + description: + name: flutter_plugin_android_lifecycle + sha256: ee8068e0e1cd16c4a82714119918efdeed33b3ba7772c54b5d094ab53f9b7fd1 + url: "https://pub.dev" + source: hosted + version: "2.0.33" flutter_rust_bridge: dependency: "direct main" description: @@ -107,11 +219,40 @@ packages: url: "https://pub.dev" source: hosted version: "2.11.1" + flutter_tantivy: + dependency: "direct main" + description: + name: flutter_tantivy + sha256: "5b024e4d68bff5aecd7397e20c9ae9e13d19288b707eb9b71233134a861935b5" + url: "https://pub.dev" + source: hosted + version: "0.0.1" flutter_test: dependency: "direct dev" description: flutter source: sdk version: "0.0.0" + flutter_web_plugins: + dependency: transitive + description: flutter + source: sdk + version: "0.0.0" + freezed: + dependency: "direct main" + description: + name: freezed + sha256: "03dd9b7423ff0e31b7e01b2204593e5e1ac5ee553b6ea9d8184dff4a26b9fb07" + url: "https://pub.dev" + source: hosted + version: "3.2.4" + freezed_annotation: + dependency: transitive + description: + name: freezed_annotation + sha256: "7294967ff0a6d98638e7acb774aac3af2550777accd8149c90af5b014e6d44d8" + url: "https://pub.dev" + source: hosted + version: "3.1.0" fuchsia_remote_debug_protocol: dependency: transitive description: flutter @@ -125,11 +266,59 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.1" + glob: + dependency: transitive + description: + name: glob + sha256: c3f1ee72c96f8f78935e18aa8cecced9ab132419e8625dc187e1c2408efc20de + url: "https://pub.dev" + source: hosted + version: "2.1.3" + hooks: + dependency: transitive + description: + name: hooks + sha256: "5410b9f4f6c9f01e8ff0eb81c9801ea13a3c3d39f8f0b1613cda08e27eab3c18" + url: "https://pub.dev" + source: hosted + version: "0.20.5" + http: + dependency: transitive + description: + name: http + sha256: "87721a4a50b19c7f1d49001e51409bddc46303966ce89a65af4f4e6004896412" + url: "https://pub.dev" + source: hosted + version: "1.6.0" + http_parser: + dependency: transitive + description: + name: http_parser + sha256: "178d74305e7866013777bab2c3d8726205dc5a4dd935297175b19a23a2e66571" + url: "https://pub.dev" + source: hosted + version: "4.1.2" integration_test: dependency: "direct dev" description: flutter source: sdk version: "0.0.0" + intl: + dependency: transitive + description: + name: intl + sha256: "3df61194eb431efc39c4ceba583b95633a403f46c9fd341e550ce0bfa50e9aa5" + url: "https://pub.dev" + source: hosted + version: "0.20.2" + json_annotation: + dependency: transitive + description: + name: json_annotation + sha256: "1ce844379ca14835a50d2f019a3099f419082cfdd231cd86a142af94dd5c6bb1" + url: "https://pub.dev" + source: hosted + version: "4.9.0" leak_tracker: dependency: transitive description: @@ -162,6 +351,22 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.0" + logger: + dependency: "direct main" + description: + name: logger + sha256: a7967e31b703831a893bbc3c3dd11db08126fe5f369b5c648a36f821979f5be3 + url: "https://pub.dev" + source: hosted + version: "2.6.2" + logging: + dependency: transitive + description: + name: logging + sha256: c8245ada5f1717ed44271ed1c26b8ce85ca3228fd2ffdb75468ab01979309d61 + url: "https://pub.dev" + source: hosted + version: "1.3.0" matcher: dependency: transitive description: @@ -186,6 +391,30 @@ packages: url: "https://pub.dev" source: hosted version: "1.17.0" + native_toolchain_c: + dependency: transitive + description: + name: native_toolchain_c + sha256: f8872ea6c7a50ce08db9ae280ca2b8efdd973157ce462826c82f3c3051d154ce + url: "https://pub.dev" + source: hosted + version: "0.17.2" + objective_c: + dependency: transitive + description: + name: objective_c + sha256: "55eb67ede1002d9771b3f9264d2c9d30bc364f0267bc1c6cc0883280d5f0c7cb" + url: "https://pub.dev" + source: hosted + version: "9.2.2" + package_config: + dependency: transitive + description: + name: package_config + sha256: f096c55ebb7deb7e384101542bfba8c52696c1b56fca2eb62827989ef2353bbc + url: "https://pub.dev" + source: hosted + version: "2.2.0" path: dependency: transitive description: @@ -194,6 +423,110 @@ packages: url: "https://pub.dev" source: hosted version: "1.9.1" + path_provider: + dependency: "direct main" + description: + name: path_provider + sha256: "50c5dd5b6e1aaf6fb3a78b33f6aa3afca52bf903a8a5298f53101fdaee55bbcd" + url: "https://pub.dev" + source: hosted + version: "2.1.5" + path_provider_android: + dependency: transitive + description: + name: path_provider_android + sha256: f2c65e21139ce2c3dad46922be8272bb5963516045659e71bb16e151c93b580e + url: "https://pub.dev" + source: hosted + version: "2.2.22" + path_provider_foundation: + dependency: transitive + description: + name: path_provider_foundation + sha256: "2a376b7d6392d80cd3705782d2caa734ca4727776db0b6ec36ef3f1855197699" + url: "https://pub.dev" + source: hosted + version: "2.6.0" + path_provider_linux: + dependency: transitive + description: + name: path_provider_linux + sha256: f7a1fe3a634fe7734c8d3f2766ad746ae2a2884abe22e241a8b301bf5cac3279 + url: "https://pub.dev" + source: hosted + version: "2.2.1" + path_provider_platform_interface: + dependency: transitive + description: + name: path_provider_platform_interface + sha256: "88f5779f72ba699763fa3a3b06aa4bf6de76c8e5de842cf6f29e2e06476c2334" + url: "https://pub.dev" + source: hosted + version: "2.1.2" + path_provider_windows: + dependency: transitive + description: + name: path_provider_windows + sha256: bd6f00dbd873bfb70d0761682da2b3a2c2fccc2b9e84c495821639601d81afe7 + url: "https://pub.dev" + source: hosted + version: "2.3.0" + permission_handler: + dependency: "direct main" + description: + name: permission_handler + sha256: bc917da36261b00137bbc8896bf1482169cd76f866282368948f032c8c1caae1 + url: "https://pub.dev" + source: hosted + version: "12.0.1" + permission_handler_android: + dependency: transitive + description: + name: permission_handler_android + sha256: "1e3bc410ca1bf84662104b100eb126e066cb55791b7451307f9708d4007350e6" + url: "https://pub.dev" + source: hosted + version: "13.0.1" + permission_handler_apple: + dependency: transitive + description: + name: permission_handler_apple + sha256: f000131e755c54cf4d84a5d8bd6e4149e262cc31c5a8b1d698de1ac85fa41023 + url: "https://pub.dev" + source: hosted + version: "9.4.7" + permission_handler_html: + dependency: transitive + description: + name: permission_handler_html + sha256: "38f000e83355abb3392140f6bc3030660cfaef189e1f87824facb76300b4ff24" + url: "https://pub.dev" + source: hosted + version: "0.1.3+5" + permission_handler_platform_interface: + dependency: transitive + description: + name: permission_handler_platform_interface + sha256: eb99b295153abce5d683cac8c02e22faab63e50679b937fa1bf67d58bb282878 + url: "https://pub.dev" + source: hosted + version: "4.3.0" + permission_handler_windows: + dependency: transitive + description: + name: permission_handler_windows + sha256: "1a790728016f79a41216d88672dbc5df30e686e811ad4e698bfc51f76ad91f1e" + url: "https://pub.dev" + source: hosted + version: "0.2.1" + petitparser: + dependency: transitive + description: + name: petitparser + sha256: "1a97266a94f7350d30ae522c0af07890c70b8e62c71e8e3920d1db4d23c057d1" + url: "https://pub.dev" + source: hosted + version: "7.0.1" platform: dependency: transitive description: @@ -218,6 +551,22 @@ packages: url: "https://pub.dev" source: hosted version: "5.0.2" + pub_semver: + dependency: transitive + description: + name: pub_semver + sha256: "5bfcf68ca79ef689f8990d1160781b4bad40a3bd5e5218ad4076ddb7f4081585" + url: "https://pub.dev" + source: hosted + version: "2.2.0" + pubspec_parse: + dependency: transitive + description: + name: pubspec_parse + sha256: "0560ba233314abbed0a48a2956f7f022cce7c3e1e73df540277da7544cad4082" + url: "https://pub.dev" + source: hosted + version: "1.5.0" rust_lib_mobile_app: dependency: "direct main" description: @@ -230,6 +579,14 @@ packages: description: flutter source: sdk version: "0.0.0" + source_gen: + dependency: transitive + description: + name: source_gen + sha256: "1d562a3c1f713904ebbed50d2760217fd8a51ca170ac4b05b0db490699dbac17" + url: "https://pub.dev" + source: hosted + version: "4.2.0" source_span: dependency: transitive description: @@ -270,6 +627,22 @@ packages: url: "https://pub.dev" source: hosted version: "0.3.1" + syncfusion_flutter_core: + dependency: transitive + description: + name: syncfusion_flutter_core + sha256: "44ec7f2bc3257d4d5ecae9ad2ed73ec1ca123323e0b1662441bc7f756530844f" + url: "https://pub.dev" + source: hosted + version: "32.1.25" + syncfusion_flutter_pdf: + dependency: "direct main" + description: + name: syncfusion_flutter_pdf + sha256: "3cb4d617cb7c98850a9a19ab21d4d7140b1f753316b23c416c5f598671446fdc" + url: "https://pub.dev" + source: hosted + version: "32.1.25" term_glyph: dependency: transitive description: @@ -286,6 +659,14 @@ packages: url: "https://pub.dev" source: hosted version: "0.7.7" + typed_data: + dependency: transitive + description: + name: typed_data + sha256: f9049c039ebfeb4cf7a7104a675823cd72dba8297f264b6637062516699fa006 + url: "https://pub.dev" + source: hosted + version: "1.4.0" vector_math: dependency: transitive description: @@ -302,6 +683,14 @@ packages: url: "https://pub.dev" source: hosted version: "14.2.1" + watcher: + dependency: transitive + description: + name: watcher + sha256: "1398c9f081a753f9226febe8900fce8f7d0a67163334e1c94a2438339d79d635" + url: "https://pub.dev" + source: hosted + version: "1.2.1" web: dependency: transitive description: @@ -318,6 +707,38 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.3" + win32: + dependency: transitive + description: + name: win32 + sha256: d7cb55e04cd34096cd3a79b3330245f54cb96a370a1c27adb3c84b917de8b08e + url: "https://pub.dev" + source: hosted + version: "5.15.0" + xdg_directories: + dependency: transitive + description: + name: xdg_directories + sha256: "7a3f37b05d989967cdddcbb571f1ea834867ae2faa29725fd085180e0883aa15" + url: "https://pub.dev" + source: hosted + version: "1.1.0" + xml: + dependency: transitive + description: + name: xml + sha256: "971043b3a0d3da28727e40ed3e0b5d18b742fa5a68665cca88e74b7876d5e025" + url: "https://pub.dev" + source: hosted + version: "6.6.1" + yaml: + dependency: transitive + description: + name: yaml + sha256: b9da305ac7c39faa3f030eccd175340f968459dae4af175130b3fc47e40d76ce + url: "https://pub.dev" + source: hosted + version: "3.1.3" sdks: - dart: ">=3.8.0-0 <4.0.0" - flutter: ">=3.18.0-18.0.pre.54" + dart: ">=3.10.3 <4.0.0" + flutter: ">=3.38.4" diff --git a/mobile_app/pubspec.yaml b/mobile_app/pubspec.yaml index 3ae2a7f..29e67b7 100644 --- a/mobile_app/pubspec.yaml +++ b/mobile_app/pubspec.yaml @@ -39,6 +39,13 @@ dependencies: path: rust_builder flutter_rust_bridge: 2.11.1 gap: ^3.0.1 + permission_handler: ^12.0.1 + file_picker: ^10.3.8 + logger: ^2.6.2 + path_provider: ^2.1.5 + freezed: ^3.2.4 + flutter_tantivy: ^0.0.1 + syncfusion_flutter_pdf: ^32.1.25 dev_dependencies: flutter_test: diff --git a/mobile_app/rust/Cargo.lock b/mobile_app/rust/Cargo.lock index 75291b4..5944227 100644 --- a/mobile_app/rust/Cargo.lock +++ b/mobile_app/rust/Cargo.lock @@ -4,24 +4,24 @@ version = 4 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -37,6 +37,12 @@ dependencies = [ "backtrace", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_log-sys" version = "0.3.2" @@ -56,9 +62,29 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "arc-swap" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] + +[[package]] +name = "async-trait" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "atomic" @@ -68,31 +94,46 @@ checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" [[package]] name = "autocfg" -version = "1.1.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-link", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "bitpacking" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" +dependencies = [ + "crunchy", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -102,6 +143,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bon" +version = "3.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "build-target" version = "0.4.0" @@ -110,15 +176,15 @@ checksum = "832133bbabbbaa9fbdba793456a2827627a7d2b8fb96032fa1e7666d7895832b" [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] name = "byteorder" @@ -128,18 +194,27 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cc" -version = "1.0.83" +version = "1.2.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" dependencies = [ + "find-msvc-tools", + "jobserver", "libc", + "shlex", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "console_error_panic_hook" @@ -151,16 +226,99 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "dart-sys" version = "4.1.5" @@ -177,7 +335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -194,6 +352,16 @@ dependencies = [ "syn", ] +[[package]] +name = "deranged" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +dependencies = [ + "powerfmt", + "serde_core", +] + [[package]] name = "digest" version = "0.10.7" @@ -204,6 +372,18 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "env_filter" version = "0.1.4" @@ -214,6 +394,40 @@ dependencies = [ "regex", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + [[package]] name = "flutter_rust_bridge" version = "2.11.1" @@ -256,11 +470,33 @@ dependencies = [ "syn", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "fs4" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4" +dependencies = [ + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "futures" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -273,9 +509,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -283,15 +519,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -300,15 +536,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -317,21 +553,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -355,11 +591,34 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "gimli" -version = "0.28.1" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "hashbrown" @@ -367,11 +626,22 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hex" @@ -379,26 +649,91 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libm" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "lock_api" @@ -411,9 +746,24 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lz4_flex" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" [[package]] name = "md-5" @@ -425,26 +775,82 @@ dependencies = [ "digest", ] +[[package]] +name = "measure_time" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" +dependencies = [ + "log", +] + [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "memmap2" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "adler", + "autocfg", + "libm", ] [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ "hermit-abi", "libc", @@ -452,18 +858,24 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "oneshot" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3ce66197e99546da6c6d991285f605192e794ceae69686c17163844a7bf8fcc2" [[package]] name = "oslog" @@ -476,6 +888,15 @@ dependencies = [ "log", ] +[[package]] +name = "ownedbytes" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "parking_lot_core" version = "0.9.12" @@ -491,9 +912,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -501,6 +922,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "portable-atomic" version = "1.13.0" @@ -508,26 +935,117 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] -name = "proc-macro2" -version = "1.0.70" +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "unicode-ident", + "zerocopy", ] [[package]] -name = "quote" -version = "1.0.33" +name = "prettyplease" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", + "syn", ] [[package]] -name = "redox_syscall" -version = "0.5.18" +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ @@ -536,9 +1054,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -548,9 +1066,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -559,22 +1077,58 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rust-stemmers" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] [[package]] name = "rust_lib_mobile_app" version = "0.1.0" dependencies = [ "flutter_rust_bridge", + "tantivy", ] [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "scopeguard" @@ -583,31 +1137,277 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "slab" -version = "0.4.9" +name = "serde" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ - "autocfg", + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", ] +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "sketches-ddsketch" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +dependencies = [ + "serde", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" -version = "2.0.39" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "tantivy" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "502915c7381c5cb2d2781503962610cb880ad8f1a0ca95df1bae645d5ebf2545" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "bon", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "hyperloglogplus", + "itertools", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b04eed5108d8283607da6710fe17a7663523440eaf7ea5a1a440d19a1448b6" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b628488ae936c83e92b5c4056833054ca56f76c0e616aee8339e24ac89119cd" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f880aa7cab0c063a47b62596d10991cdd0b6e0e0575d9c5eeb298b307a25de55" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "768fccdc84d60d86235d42d7e4c33acf43c418258ff5952abf07bd7837fcd26b" +dependencies = [ + "nom", + "serde", + "serde_json", +] + +[[package]] +name = "tantivy-sstable" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8292095d1a8a2c2b36380ec455f910ab52dde516af36321af332c93f20ab7d5" +dependencies = [ + "futures-util", + "itertools", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d38a379411169f0b3002c9cba61cdfe315f757e9d4f239c00c282497a0749d" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23024f6aeb25ceb1a0e27740c84bdb0fae52626737b7e9a9de6ad5aa25c7b038" +dependencies = [ + "serde", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "threadpool" version = "1.8.1" @@ -617,77 +1417,129 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "time" +version = "0.3.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9da98b7d9b7dad93488a84b8248efc35352b0b2657397d4167e7ad67e5d535e5" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78cc610bac2dcee56805c99642447d4c5dbde4d01f752ffea0199aee1f601dc4" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tokio" -version = "1.34.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ - "backtrace", - "num_cpus", "pin-project-lite", ] [[package]] name = "typenum" -version = "1.17.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + +[[package]] +name = "uuid" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "serde_core", + "wasm-bindgen", +] [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] -name = "wasm-bindgen" -version = "0.2.92" +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ - "cfg-if", - "wasm-bindgen-macro", + "wit-bindgen", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.92" +name = "wasm-bindgen" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ - "bumpalo", - "log", + "cfg-if", "once_cell", - "proc-macro2", - "quote", - "syn", + "rustversion", + "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -695,35 +1547,202 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] [[package]] name = "web-sys" -version = "0.3.66" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zerocopy" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/mobile_app/rust/Cargo.toml b/mobile_app/rust/Cargo.toml index 2a03225..371c110 100644 --- a/mobile_app/rust/Cargo.toml +++ b/mobile_app/rust/Cargo.toml @@ -8,6 +8,8 @@ crate-type = ["cdylib", "staticlib"] [dependencies] flutter_rust_bridge = "=2.11.1" +tantivy = "0.25.0" +#seekstorm = { git = "https://github.com/Ok3ks/seekstorm", branch = "compile-for-mobile", version = "1.2.5" } [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(frb_expand)'] } diff --git a/mobile_app/rust/src/api/mod.rs b/mobile_app/rust/src/api/mod.rs index b252f36..299aafc 100644 --- a/mobile_app/rust/src/api/mod.rs +++ b/mobile_app/rust/src/api/mod.rs @@ -1 +1 @@ -pub mod simple; +pub mod simple; \ No newline at end of file diff --git a/mobile_app/rust/src/api/simple.rs b/mobile_app/rust/src/api/simple.rs index 4360c82..7f96e5d 100644 --- a/mobile_app/rust/src/api/simple.rs +++ b/mobile_app/rust/src/api/simple.rs @@ -7,4 +7,4 @@ pub fn greet(name: String) -> String { pub fn init_app() { // Default utilities - feel free to customize flutter_rust_bridge::setup_default_user_utils(); -} +} \ No newline at end of file diff --git a/mobile_app/rust/src/seekstorm/add_result.rs b/mobile_app/rust/src/seekstorm/add_result.rs new file mode 100644 index 0000000..baa7426 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/add_result.rs @@ -0,0 +1,3682 @@ +use ahash::AHashSet; +use smallvec::{SmallVec, smallvec}; +use std::cmp::Ordering; + +use crate::{ + geo_search::{decode_morton_2_d, euclidian_distance}, + index::{ + AccessType, CompressionType, FIELD_STOP_BIT_1, FIELD_STOP_BIT_2, FieldType, NgramType, + NonUniquePostingListObjectQuery, PostingListObjectQuery, SPEEDUP_FLAG, STOP_BIT, Shard, + SimilarityType, get_document_length_compressed_mmap, + }, + min_heap, + search::{FilterSparse, Ranges, ResultType, SearchResult}, + utils::{ + read_f32, read_f64, read_i8, read_i16, read_i32, read_i64, read_u8, read_u16, read_u32, + read_u64, + }, +}; + +pub(crate) const K: f32 = 1.2; +pub(crate) const B: f32 = 0.75; +pub(crate) const SIGMA: f32 = 0.0; + +pub(crate) struct PostingListObjectSingle<'a> { + pub rank_position_pointer_range: u32, + pub pointer_pivot_p_docid: u16, + pub byte_array: &'a [u8], + pub p_docid: i32, + pub idf: f32, + + pub idf_ngram1: f32, + pub idf_ngram2: f32, + pub idf_ngram3: f32, + pub ngram_type: NgramType, +} + +#[inline(always)] +pub(crate) fn get_next_position_singlefield(plo: &mut NonUniquePostingListObjectQuery) -> u32 { + if plo.is_embedded { + return plo.embedded_positions[plo.p_pos as usize]; + } + + if (plo.byte_array[plo.positions_pointer] & STOP_BIT) != 0 { + let position = (plo.byte_array[plo.positions_pointer] & 0b0111_1111) as u32; + plo.positions_pointer += 1; + position + } else if (plo.byte_array[plo.positions_pointer + 1] & STOP_BIT) != 0 { + let position = ((plo.byte_array[plo.positions_pointer] as u32) << 7) + | (plo.byte_array[plo.positions_pointer + 1] & 0b0111_1111) as u32; + plo.positions_pointer += 2; + position + } else { + let position = ((plo.byte_array[plo.positions_pointer] as u32) << 13) + | ((plo.byte_array[plo.positions_pointer + 1] as u32) << 7) + | (plo.byte_array[plo.positions_pointer + 2] & 0b0111_1111) as u32; + plo.positions_pointer += 3; + position + } +} + +#[inline(always)] +pub(crate) fn get_next_position_multifield(plo: &mut NonUniquePostingListObjectQuery) -> u32 { + if plo.is_embedded { + return plo.embedded_positions[if plo.p_field == 0 { + plo.p_pos as usize + } else { + plo.field_vec[plo.p_field - 1].1 + plo.p_pos as usize + }]; + } + + if (plo.byte_array[plo.positions_pointer] & STOP_BIT) != 0 { + let position = (plo.byte_array[plo.positions_pointer] & 0b0111_1111) as u32; + plo.positions_pointer += 1; + position + } else if (plo.byte_array[plo.positions_pointer + 1] & STOP_BIT) != 0 { + let position = ((plo.byte_array[plo.positions_pointer] as u32) << 7) + | (plo.byte_array[plo.positions_pointer + 1] & 0b0111_1111) as u32; + plo.positions_pointer += 2; + position + } else { + let position = ((plo.byte_array[plo.positions_pointer] as u32) << 13) + | ((plo.byte_array[plo.positions_pointer + 1] as u32) << 7) + | (plo.byte_array[plo.positions_pointer + 2] & 0b0111_1111) as u32; + plo.positions_pointer += 3; + position + } +} + +/// Post processing after AND intersection candidates have been found +/// Phrase intersection +/// BM25 ranking vs. seekstorm ranking (implicit phrase search, term proximity, field type boost, source reputation) +/// BM25 is default baseline in IR academics, but exhibits inferior relevance for practical use +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn add_result_singleterm_multifield( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + + plo_single: &PostingListObjectSingle, + not_query_list: &mut [PostingListObjectQuery], + block_score: f32, +) { + if shard.indexed_field_vec.len() == 1 { + add_result_singleterm_singlefield( + shard, + docid, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + plo_single, + not_query_list, + block_score, + ); + return; + } + + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + let local_docid = docid & 0b11111111_11111111; + + match &plo.compression_type { + CompressionType::Array => { + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + plo.docid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + (plo.p_docid << 1), + ) as i32; + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + CompressionType::Bitmap => { + if (plo.byte_array[plo.compressed_doc_id_range + (local_docid >> 3)] + & (1 << (local_docid & 7))) + > 0 + { + return; + } + } + CompressionType::Rle => { + if local_docid >= plo.docid as usize && local_docid <= plo.run_end as usize { + return; + } else { + while (plo.p_run_sum as usize) + ((plo.p_run as usize - 2) >> 2) + < plo.p_docid_count + && local_docid > plo.run_end as usize + { + let startdocid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize, + ); + let runlength = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize + 2, + ); + plo.docid = startdocid as i32; + plo.run_end = (startdocid + runlength) as i32; + plo.p_run_sum += runlength as i32; + plo.p_run += 4; + + if local_docid >= startdocid as usize && local_docid <= plo.run_end as usize + { + return; + } + } + } + } + _ => {} + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + let mut field_vec: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + let mut field_vec_ngram1: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + let mut field_vec_ngram2: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + let mut field_vec_ngram3: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + + match *result_type { + ResultType::Count => { + if !field_filter_set.is_empty() { + decode_positions_singleterm_multifield( + shard, + plo_single, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + + if field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() { + let mut match_flag = false; + for field in field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + + facet_count(shard, search_result, docid); + + *result_count += 1; + + return; + } + ResultType::Topk => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + + if !field_filter_set.is_empty() { + decode_positions_singleterm_multifield( + shard, + plo_single, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + + if field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() { + let mut match_flag = false; + for field in field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + } + ResultType::TopkCount => { + if !field_filter_set.is_empty() { + decode_positions_singleterm_multifield( + shard, + plo_single, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + + if field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() { + let mut match_flag = false; + for field in field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + + facet_count(shard, search_result, docid); + + *result_count += 1; + + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + } + } + + if field_filter_set.is_empty() { + decode_positions_singleterm_multifield( + shard, + plo_single, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + } + + let bm25f = get_bm25f_singleterm_multifield( + shard, + docid, + plo_single, + field_vec, + field_vec_ngram1, + field_vec_ngram2, + field_vec_ngram3, + ); + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25f, + }, + top_k, + ); +} + +#[inline] +pub(crate) fn is_facet_filter(index: &Shard, facet_filter: &[FilterSparse], docid: usize) -> bool { + for (i, facet) in index.facets.iter().enumerate() { + match &facet_filter[i] { + FilterSparse::U8(range) => { + let facet_value_id = read_u8( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::U16(range) => { + let facet_value_id = read_u16( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::U32(range) => { + let facet_value_id = read_u32( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::U64(range) => { + let facet_value_id = read_u64( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::I8(range) => { + let facet_value_id = read_i8( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::I16(range) => { + let facet_value_id = read_i16( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::I32(range) => { + let facet_value_id = read_i32( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::I64(range) => { + let facet_value_id = read_i64( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::Timestamp(range) => { + let facet_value_id = read_i64( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::F32(range) => { + let facet_value_id = read_f32( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::F64(range) => { + let facet_value_id = read_f64( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !range.contains(&facet_value_id) { + return true; + } + } + FilterSparse::String16(values) => { + let facet_value_id = read_u16( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !values.contains(&facet_value_id) { + return true; + } + } + FilterSparse::String32(values) => { + let facet_value_id = read_u32( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if !values.contains(&facet_value_id) { + return true; + } + } + + FilterSparse::Point(point, distance_range, unit, range) => { + let morton_code = read_u64( + &index.facets_file_mmap, + (index.facets_size_sum * docid) + facet.offset, + ); + if range.contains(&morton_code) { + if !distance_range.contains(&euclidian_distance( + point, + &decode_morton_2_d(morton_code), + unit, + )) { + return true; + } + } else { + return true; + } + } + + FilterSparse::None => {} + } + } + false +} + +#[inline] +pub(crate) fn facet_count(shard: &Shard, search_result: &mut SearchResult, docid: usize) { + if !search_result.query_facets.is_empty() && !search_result.skip_facet_count { + for (i, facet) in shard.facets.iter().enumerate() { + if search_result.query_facets[i].length == 0 { + continue; + } + + let facet_value_id = match &search_result.query_facets[i].ranges { + Ranges::U8(_range_type, ranges) => { + let facet_value = + shard.facets_file_mmap[(shard.facets_size_sum * docid) + facet.offset]; + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U16(_range_type, ranges) => { + let facet_value = read_u16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U32(_range_type, ranges) => { + let facet_value = read_u32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U64(_range_type, ranges) => { + let facet_value = read_u64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I8(_range_type, ranges) => { + let facet_value = read_i8( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I16(_range_type, ranges) => { + let facet_value = read_i16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I32(_range_type, ranges) => { + let facet_value = read_i32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + + Ranges::I64(_range_type, ranges) => { + let facet_value = read_i64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::Timestamp(_range_type, ranges) => { + let facet_value = read_i64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::F32(_range_type, ranges) => { + let facet_value = read_f32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by(|range| range.1.partial_cmp(&facet_value).unwrap()) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::F64(_range_type, ranges) => { + let facet_value = read_f64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by(|range| range.1.partial_cmp(&facet_value).unwrap()) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + + Ranges::Point(_range_type, ranges, base, unit) => { + let facet_value = read_u64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + let facet_value_distance = + euclidian_distance(base, &decode_morton_2_d(facet_value), unit); + ranges + .binary_search_by(|range| { + range.1.partial_cmp(&facet_value_distance).unwrap() + }) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + + _ => { + if facet.field_type == FieldType::String16 + || facet.field_type == FieldType::StringSet16 + { + read_u16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ) as u32 + } else { + read_u32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ) + } + } + }; + + *search_result.query_facets[i] + .values + .entry(facet_value_id) + .or_insert(0) += 1; + } + } +} + +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn add_result_singleterm_singlefield( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + + plo_single: &PostingListObjectSingle, + not_query_list: &mut [PostingListObjectQuery], + block_score: f32, +) { + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + let local_docid = docid & 0b11111111_11111111; + + match &plo.compression_type { + CompressionType::Array => { + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + plo.docid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + (plo.p_docid << 1), + ) as i32; + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + CompressionType::Bitmap => { + if (plo.byte_array[plo.compressed_doc_id_range + (local_docid >> 3)] + & (1 << (local_docid & 7))) + > 0 + { + return; + } + } + CompressionType::Rle => { + if local_docid >= plo.docid as usize && local_docid <= plo.run_end as usize { + return; + } else { + while (plo.p_run_sum as usize) + ((plo.p_run as usize - 2) >> 2) + < plo.p_docid_count + && local_docid > plo.run_end as usize + { + let startdocid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize, + ); + let runlength = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize + 2, + ); + plo.docid = startdocid as i32; + plo.run_end = (startdocid + runlength) as i32; + plo.p_run_sum += runlength as i32; + plo.p_run += 4; + + if local_docid >= startdocid as usize && local_docid <= plo.run_end as usize + { + return; + } + } + } + } + _ => {} + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + let mut tf_ngram1 = 0; + let mut tf_ngram2 = 0; + let mut tf_ngram3 = 0; + let mut positions_count = 0; + let field_id = 0u16; + + match *result_type { + ResultType::Count => { + if !field_filter_set.is_empty() { + decode_positions_singleterm_singlefield( + plo_single, + &mut tf_ngram1, + &mut tf_ngram2, + &mut tf_ngram3, + &mut positions_count, + ); + + if field_filter_set.len() < shard.indexed_field_vec.len() { + let mut match_flag = false; + + if field_filter_set.contains(&field_id) { + match_flag = true; + } + + if !match_flag { + return; + } + } + } + facet_count(shard, search_result, docid); + + *result_count += 1; + + return; + } + ResultType::Topk => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + + if !field_filter_set.is_empty() { + decode_positions_singleterm_singlefield( + plo_single, + &mut tf_ngram1, + &mut tf_ngram2, + &mut tf_ngram3, + &mut positions_count, + ); + + if field_filter_set.len() < shard.indexed_field_vec.len() { + let mut match_flag = false; + if field_filter_set.contains(&field_id) { + match_flag = true; + } + + if !match_flag { + return; + } + } + } + } + ResultType::TopkCount => { + if !field_filter_set.is_empty() { + decode_positions_singleterm_singlefield( + plo_single, + &mut tf_ngram1, + &mut tf_ngram2, + &mut tf_ngram3, + &mut positions_count, + ); + + if field_filter_set.len() < shard.indexed_field_vec.len() { + let mut match_flag = false; + if field_filter_set.contains(&field_id) { + match_flag = true; + } + if !match_flag { + return; + } + } + } + + facet_count(shard, search_result, docid); + + *result_count += 1; + + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + } + } + + if field_filter_set.is_empty() { + decode_positions_singleterm_singlefield( + plo_single, + &mut tf_ngram1, + &mut tf_ngram2, + &mut tf_ngram3, + &mut positions_count, + ); + } + + let bm25f = get_bm25f_singleterm_singlefield( + shard, + docid, + plo_single, + tf_ngram1, + tf_ngram2, + tf_ngram3, + positions_count, + ); + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25f, + }, + top_k, + ); +} + +#[inline(always)] +pub(crate) fn get_bm25f_singleterm_multifield( + shard: &Shard, + docid: usize, + plo_single: &PostingListObjectSingle, + field_vec: SmallVec<[(u16, usize); 2]>, + field_vec_ngram1: SmallVec<[(u16, usize); 2]>, + field_vec_ngram2: SmallVec<[(u16, usize); 2]>, + field_vec_ngram3: SmallVec<[(u16, usize); 2]>, +) -> f32 { + let mut bm25f = 0.0; + let block_id = docid >> 16; + + if shard.indexed_field_vec.len() == 1 { + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap(shard, 0, block_id, docid & 0b11111111_11111111) + } else { + shard.level_index[block_id].document_length_compressed_array[0] + [docid & 0b11111111_11111111] + } as usize]; + + match plo_single.ngram_type { + NgramType::SingleTerm => { + let tf = field_vec[0].1 as f32; + + bm25f = plo_single.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + let tf_ngram1 = field_vec_ngram1[0].1 as f32; + let tf_ngram2 = field_vec_ngram2[0].1 as f32; + + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) / (tf_ngram1 + bm25_component)) + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) / (tf_ngram2 + bm25_component)) + SIGMA); + } + _ => { + let tf_ngram1 = field_vec_ngram1[0].1 as f32; + let tf_ngram2 = field_vec_ngram2[0].1 as f32; + let tf_ngram3 = field_vec_ngram3[0].1 as f32; + + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) / (tf_ngram1 + bm25_component)) + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) / (tf_ngram2 + bm25_component)) + SIGMA) + + plo_single.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) / (tf_ngram3 + bm25_component)) + SIGMA); + } + } + } else if plo_single.ngram_type == NgramType::SingleTerm + || shard.meta.similarity == SimilarityType::Bm25fProximity + { + for field in field_vec.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight * plo_single.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + } else if plo_single.ngram_type == NgramType::NgramFF + || plo_single.ngram_type == NgramType::NgramRF + || plo_single.ngram_type == NgramType::NgramFR + { + for field in field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) / (tf_ngram1 + bm25_component)) + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) / (tf_ngram2 + bm25_component)) + SIGMA); + } + } else { + for field in field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) / (tf_ngram1 + bm25_component)) + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) / (tf_ngram2 + bm25_component)) + SIGMA); + } + + for field in field_vec_ngram3.iter() { + let field_id = field.0 as usize; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + let tf_ngram3 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo_single.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) / (tf_ngram3 + bm25_component)) + SIGMA); + } + } + + bm25f +} + +#[inline(always)] +pub(crate) fn get_bm25f_singleterm_singlefield( + shard: &Shard, + docid: usize, + plo_single: &PostingListObjectSingle, + tf_ngram1: u32, + tf_ngram2: u32, + tf_ngram3: u32, + positions_count: u32, +) -> f32 { + let bm25f; + let block_id = docid >> 16; + + if shard.indexed_field_vec.len() == 1 { + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap(shard, 0, block_id, docid & 0b11111111_11111111) + } else { + shard.level_index[block_id].document_length_compressed_array[0] + [docid & 0b11111111_11111111] + } as usize]; + + match plo_single.ngram_type { + NgramType::SingleTerm => { + let tf = positions_count as f32; + + bm25f = plo_single.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 as f32 * (K + 1.0) / (tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 as f32 * (K + 1.0) / (tf_ngram2 as f32 + bm25_component)) + + SIGMA); + } + _ => { + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 as f32 * (K + 1.0) / (tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 as f32 * (K + 1.0) / (tf_ngram2 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram3 + * ((tf_ngram3 as f32 * (K + 1.0) / (tf_ngram3 as f32 + bm25_component)) + + SIGMA); + } + } + } else { + let field_id = 0; + + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array[field_id] + [docid & 0b11111111_11111111] + } as usize]; + + match plo_single.ngram_type { + NgramType::SingleTerm => { + let tf = positions_count as f32; + + bm25f = plo_single.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 as f32 * (K + 1.0) / (tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 as f32 * (K + 1.0) / (tf_ngram2 as f32 + bm25_component)) + + SIGMA); + } + _ => { + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 as f32 * (K + 1.0) / (tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 as f32 * (K + 1.0) / (tf_ngram2 as f32 + bm25_component)) + + SIGMA) + + plo_single.idf_ngram3 + * ((tf_ngram3 as f32 * (K + 1.0) / (tf_ngram3 as f32 + bm25_component)) + + SIGMA); + } + } + } + + bm25f +} + +#[inline(always)] +pub(crate) fn get_bm25f_multiterm_multifield( + shard: &Shard, + docid: usize, + query_list: &mut [PostingListObjectQuery], +) -> f32 { + let mut bm25f = 0.0; + let block_id = docid >> 16; + + if shard.indexed_field_vec.len() == 1 { + let bm25_component = + shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap(shard, 0, block_id, docid & 0b11111111_11111111) + } else { + shard.level_index[block_id].document_length_compressed_array[0] + [docid & 0b11111111_11111111] + } as usize]; + + for plo in query_list.iter() { + if !plo.bm25_flag { + continue; + } + + match plo.ngram_type { + NgramType::SingleTerm => { + let tf = plo.field_vec[0].1 as f32; + + bm25f += plo.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + bm25f += plo.idf_ngram1 + * ((plo.tf_ngram1 as f32 * (K + 1.0) + / (plo.tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram2 + * ((plo.tf_ngram2 as f32 * (K + 1.0) + / (plo.tf_ngram2 as f32 + bm25_component)) + + SIGMA); + } + _ => { + bm25f += plo.idf_ngram1 + * ((plo.tf_ngram1 as f32 * (K + 1.0) + / (plo.tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram2 + * ((plo.tf_ngram2 as f32 * (K + 1.0) + / (plo.tf_ngram2 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram3 + * ((plo.tf_ngram3 as f32 * (K + 1.0) + / (plo.tf_ngram3 as f32 + bm25_component)) + + SIGMA); + } + } + } + } else { + let mut bm25_component_vec: SmallVec<[f32; 2]> = + smallvec![0.0; shard.indexed_field_vec.len()]; + for plo in query_list.iter() { + if !plo.bm25_flag { + continue; + } + + match plo.ngram_type { + NgramType::SingleTerm => { + for field in plo.field_vec.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf + * ((tf * (K + 1.0) / (tf + bm25_component_vec[field_id])) + SIGMA); + } + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + for field in plo.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + bm25_component_vec[field_id])) + + SIGMA); + } + + for field in plo.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + bm25_component_vec[field_id])) + + SIGMA); + } + } + _ => { + for field in plo.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + bm25_component_vec[field_id])) + + SIGMA); + } + + for field in plo.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + bm25_component_vec[field_id])) + + SIGMA); + } + + for field in plo.field_vec_ngram3.iter() { + let field_id = field.0 as usize; + if bm25_component_vec[field_id] == 0.0 { + bm25_component_vec[field_id] = + shard.bm25_component_cache[if shard.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap( + shard, + field_id, + block_id, + docid & 0b11111111_11111111, + ) + } else { + shard.level_index[block_id].document_length_compressed_array + [field_id][docid & 0b11111111_11111111] + } + as usize]; + } + + let tf_ngram3 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + bm25_component_vec[field_id])) + + SIGMA); + } + } + } + } + } + + bm25f +} + +#[inline(always)] +pub(crate) fn get_bm25f_multiterm_singlefield( + shard: &Shard, + docid: usize, + query_list: &mut [PostingListObjectQuery], +) -> f32 { + let mut bm25f = 0.0; + let block_id = docid >> 16; + + let bm25_component = shard.bm25_component_cache[if shard.meta.access_type == AccessType::Mmap { + get_document_length_compressed_mmap(shard, 0, block_id, docid & 0b11111111_11111111) + } else { + shard.level_index[block_id].document_length_compressed_array[0][docid & 0b11111111_11111111] + } as usize]; + + for plo in query_list.iter() { + if !plo.bm25_flag { + continue; + } + + match plo.ngram_type { + NgramType::SingleTerm => { + let tf = plo.positions_count as f32; + + bm25f += plo.idf * ((tf * (K + 1.0) / (tf + bm25_component)) + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + bm25f += plo.idf_ngram1 + * ((plo.tf_ngram1 as f32 * (K + 1.0) + / (plo.tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram2 + * ((plo.tf_ngram2 as f32 * (K + 1.0) + / (plo.tf_ngram2 as f32 + bm25_component)) + + SIGMA); + } + _ => { + bm25f += plo.idf_ngram1 + * ((plo.tf_ngram1 as f32 * (K + 1.0) + / (plo.tf_ngram1 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram2 + * ((plo.tf_ngram2 as f32 * (K + 1.0) + / (plo.tf_ngram2 as f32 + bm25_component)) + + SIGMA) + + plo.idf_ngram3 + * ((plo.tf_ngram3 as f32 * (K + 1.0) + / (plo.tf_ngram3 as f32 + bm25_component)) + + SIGMA); + } + } + } + + bm25f +} + +#[inline(always)] +pub(crate) fn decode_positions_multiterm_multifield( + shard: &Shard, + plo: &mut PostingListObjectQuery, + facet_filtered: bool, + phrase_query: bool, + all_terms_frequent: bool, +) -> bool { + let mut field_vec: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + + let posting_pointer_size_sum = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + plo.p_docid as u32 * 2 + } else { + (plo.p_docid as u32) * 3 - plo.pointer_pivot_p_docid as u32 + }; + + let mut positions_pointer = + (plo.rank_position_pointer_range + posting_pointer_size_sum) as usize; + + let rank_position_pointer = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + read_u16(plo.byte_array, positions_pointer) as u32 + } else { + read_u32(plo.byte_array, positions_pointer) + }; + + if (rank_position_pointer + & (if plo.p_docid < plo.pointer_pivot_p_docid as usize { + 0b10000000_00000000 + } else { + 0b10000000_00000000_00000000 + })) + == 0 + { + plo.is_embedded = false; + + let pointer_value = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + rank_position_pointer & 0b01111111_11111111 + } else { + rank_position_pointer & 0b01111111_11111111_11111111 + } as usize; + + positions_pointer = plo.rank_position_pointer_range as usize - pointer_value; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + plo.field_vec_ngram1.clear(); + plo.field_vec_ngram2.clear(); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut plo.field_vec_ngram1, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut plo.field_vec_ngram2, + plo.byte_array, + &mut positions_pointer, + ); + } + _ => { + plo.field_vec_ngram1.clear(); + plo.field_vec_ngram2.clear(); + plo.field_vec_ngram3.clear(); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut plo.field_vec_ngram1, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut plo.field_vec_ngram2, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut plo.field_vec_ngram3, + plo.byte_array, + &mut positions_pointer, + ); + } + } + + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + &mut field_vec, + plo.byte_array, + &mut positions_pointer, + ); + + if SPEEDUP_FLAG + && all_terms_frequent + && !phrase_query + && !facet_filtered + && field_vec[0].1 < 10 + { + return true; + } + } else { + plo.is_embedded = true; + + if SPEEDUP_FLAG && all_terms_frequent && !phrase_query && !facet_filtered { + return true; + } + + let field_id; + + if plo.p_docid < plo.pointer_pivot_p_docid as usize { + match ( + shard.indexed_field_vec.len() == 1, + rank_position_pointer >> 12, + ) { + (true, 0b1000..=0b1011) => { + if phrase_query { + plo.embedded_positions = + [rank_position_pointer & 0b00111111_11111111, 0, 0, 0]; + }; + field_vec.push((0, 1)); + } + (true, 0b1100..=0b1111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 7) & 0b00000000_01111111, + rank_position_pointer & 0b00000000_01111111, + 0, + 0, + ]; + }; + field_vec.push((0, 2)); + } + + (false, 0b1100 | 0b1101) => { + if phrase_query { + plo.embedded_positions = + [rank_position_pointer & 0b00011111_11111111, 0, 0, 0]; + }; + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 1)); + } + (false, 0b1110 | 0b1111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 7) & 0b00000000_00111111, + rank_position_pointer & 0b00000000_01111111, + 0, + 0, + ]; + }; + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 2)); + } + + (false, 0b1000) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer & ((1 << position_bits) - 1)), + 0, + 0, + 0, + ]; + }; + } + (false, 0b1001) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b1010) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b1011) => { + let position_bits = + 12 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + + (_, _) => { + if phrase_query { + println!("unsupported 2 byte pointer embedded"); + plo.embedded_positions = [0, 0, 0, 0] + }; + } + } + } else { + match ( + shard.indexed_field_vec.len() == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19, + ) { + (true, 0b10000..=0b10011) => { + if phrase_query { + plo.embedded_positions = [ + rank_position_pointer & 0b00011111_11111111_11111111, + 0, + 0, + 0, + ]; + }; + field_vec.push((0, 1)); + } + (true, 0b10100..=0b10111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 11) & 0b00000000_00000011_11111111, + rank_position_pointer & 0b00000000_00000111_11111111, + 0, + 0, + ]; + }; + field_vec.push((0, 2)); + } + (true, 0b11000..=0b11011) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 14) & 0b00000000_00000000_01111111, + (rank_position_pointer >> 7) & 0b00000000_00000000_01111111, + rank_position_pointer & 0b00000000_00000000_01111111, + 0, + ]; + }; + field_vec.push((0, 3)); + } + (true, 0b11100..=0b11111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 16) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 11) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 6) & 0b00000000_00000000_00011111, + rank_position_pointer & 0b00000000_00000000_00111111, + ]; + }; + field_vec.push((0, 4)); + } + + (false, 0b11000 | 0b11001) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + rank_position_pointer & 0b00001111_11111111_11111111, + 0, + 0, + 0, + ]; + }; + } + (false, 0b11010 | 0b11011) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 2)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 10) & 0b00000000_00000011_11111111, + rank_position_pointer & 0b00000000_00000011_11111111, + 0, + 0, + ]; + }; + } + (false, 0b11100 | 0b11101) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 3)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 14) & 0b00000000_00000000_00111111, + (rank_position_pointer >> 7) & 0b00000000_00000000_01111111, + rank_position_pointer & 0b00000000_00000000_01111111, + 0, + ]; + }; + } + (false, 0b11110 | 0b11111) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 4)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 15) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 10) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 5) & 0b00000000_00000000_00011111, + rank_position_pointer & 0b00000000_00000000_00011111, + ]; + }; + } + + (false, 0b10000) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer & ((1 << position_bits) - 1)), + 0, + 0, + 0, + ]; + }; + } + + (false, 0b10001) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b10010) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10011) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 4)); + if phrase_query { + let position_bits_1 = position_bits >> 2; + let position_bits_2 = (position_bits - position_bits_1) / 3; + let position_bits_3 = + (position_bits - position_bits_1 - position_bits_2) >> 1; + let position_bits_4 = + position_bits - position_bits_1 - position_bits_2 - position_bits_3; + plo.embedded_positions = [ + ((rank_position_pointer + >> (position_bits_2 + position_bits_3 + position_bits_4)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> (position_bits_3 + position_bits_4)) + & ((1 << position_bits_2) - 1)), + ((rank_position_pointer >> position_bits_4) + & ((1 << position_bits_3) - 1)), + (rank_position_pointer & ((1 << position_bits_4) - 1)), + ]; + }; + } + (false, 0b10100) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b10101) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 2)]); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10110) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 2), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10111) => { + let position_bits = 19 + - shard.indexed_field_id_bits + - shard.indexed_field_id_bits + - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + + shard.indexed_field_id_bits + + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = + ((rank_position_pointer >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_3 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1), (field_id_3, 1)]); + + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + + (_, _) => { + if phrase_query { + println!( + "unsupported 3 byte pointer embedded {} {:032b}", + shard.indexed_field_vec.len() == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19 + ); + plo.embedded_positions = [0, 0, 0, 0] + }; + } + } + }; + } + + plo.positions_pointer = positions_pointer as u32; + plo.positions_count = field_vec[0].1 as u32; + plo.field_vec = field_vec; + + false +} + +#[inline(always)] +pub(crate) fn decode_positions_multiterm_singlefield( + plo: &mut PostingListObjectQuery, + facet_filtered: bool, + phrase_query: bool, + all_terms_frequent: bool, +) -> bool { + let mut positions_count = 0; + + let posting_pointer_size_sum = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + plo.p_docid as u32 * 2 + } else { + (plo.p_docid as u32) * 3 - plo.pointer_pivot_p_docid as u32 + }; + + let mut positions_pointer = plo.rank_position_pointer_range + posting_pointer_size_sum; + + let rank_position_pointer = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + read_u16(plo.byte_array, positions_pointer as usize) as u32 + } else { + read_u32(plo.byte_array, positions_pointer as usize) + }; + + if (rank_position_pointer + & (if plo.p_docid < plo.pointer_pivot_p_docid as usize { + 0b10000000_00000000 + } else { + 0b10000000_00000000_00000000 + })) + == 0 + { + plo.is_embedded = false; + + let pointer_value = if plo.p_docid < plo.pointer_pivot_p_docid as usize { + rank_position_pointer & 0b01111111_11111111 + } else { + rank_position_pointer & 0b01111111_11111111_11111111 + } as usize; + + positions_pointer = plo.rank_position_pointer_range - pointer_value as u32; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + read_singlefield_value(&mut plo.tf_ngram1, plo.byte_array, &mut positions_pointer); + read_singlefield_value(&mut plo.tf_ngram2, plo.byte_array, &mut positions_pointer); + } + _ => { + read_singlefield_value(&mut plo.tf_ngram1, plo.byte_array, &mut positions_pointer); + read_singlefield_value(&mut plo.tf_ngram2, plo.byte_array, &mut positions_pointer); + read_singlefield_value(&mut plo.tf_ngram3, plo.byte_array, &mut positions_pointer); + } + } + + read_singlefield_value(&mut positions_count, plo.byte_array, &mut positions_pointer); + + if SPEEDUP_FLAG + && all_terms_frequent + && !phrase_query + && !facet_filtered + && positions_count < 10 + { + return true; + } + } else { + plo.is_embedded = true; + + if SPEEDUP_FLAG && all_terms_frequent && !phrase_query && !facet_filtered { + return true; + } + + if plo.p_docid < plo.pointer_pivot_p_docid as usize { + match rank_position_pointer >> 14 { + 0b10 => { + if phrase_query { + plo.embedded_positions = + [rank_position_pointer & 0b00111111_11111111, 0, 0, 0]; + }; + positions_count = 1; + } + 0b11 => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 7) & 0b00000000_01111111, + rank_position_pointer & 0b00000000_01111111, + 0, + 0, + ]; + }; + positions_count = 2; + } + + _ => { + if phrase_query { + println!("unsupported 2 byte pointer embedded"); + plo.embedded_positions = [0, 0, 0, 0] + }; + positions_count = 0; + } + } + } else { + match (rank_position_pointer & 0b11111111_11111111_11111111) >> 21 { + 0b100 => { + if phrase_query { + plo.embedded_positions = [ + rank_position_pointer & 0b00011111_11111111_11111111, + 0, + 0, + 0, + ]; + }; + positions_count = 1; + } + 0b101 => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 11) & 0b00000000_00000011_11111111, + rank_position_pointer & 0b00000000_00000111_11111111, + 0, + 0, + ]; + }; + positions_count = 2; + } + 0b110 => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 14) & 0b00000000_00000000_01111111, + (rank_position_pointer >> 7) & 0b00000000_00000000_01111111, + rank_position_pointer & 0b00000000_00000000_01111111, + 0, + ]; + }; + positions_count = 3; + } + 0b111 => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 16) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 11) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 6) & 0b00000000_00000000_00011111, + rank_position_pointer & 0b00000000_00000000_00111111, + ]; + }; + positions_count = 4; + } + + _ => { + if phrase_query { + println!("unsupported 3 byte pointer embedded"); + plo.embedded_positions = [0, 0, 0, 0] + }; + positions_count = 0; + } + } + }; + } + + plo.positions_pointer = positions_pointer; + plo.positions_count = positions_count; + + false +} + +#[inline(always)] +pub(crate) fn read_multifield_vec( + indexed_field_vec_len: usize, + indexed_field_id_bits: usize, + indexed_field_id_mask: usize, + longest_field_id: usize, + field_vec: &mut SmallVec<[(u16, usize); 2]>, + byte_array: &[u8], + positions_pointer: &mut usize, +) { + let mut positions_count; + if indexed_field_vec_len == 1 { + positions_count = byte_array[*positions_pointer] as u32; + *positions_pointer += 1; + if (positions_count & STOP_BIT as u32) > 0 { + positions_count &= 0b01111111 + } else { + positions_count = (positions_count & 0b01111111) << 7; + let positions_count2 = byte_array[*positions_pointer] as u32; + *positions_pointer += 1; + if (positions_count2 & STOP_BIT as u32) > 0 { + positions_count |= positions_count2 & 0b01111111 + } else { + positions_count = (positions_count << 7) + | (positions_count2 & 0b01111111) << 7 + | (byte_array[*positions_pointer] & 0b01111111) as u32; + *positions_pointer += 1; + } + }; + field_vec.push((0, positions_count as usize)); + } else if byte_array[*positions_pointer] & 0b01000000 > 0 { + positions_count = byte_array[*positions_pointer] as u32; + *positions_pointer += 1; + if (positions_count & STOP_BIT as u32) > 0 { + positions_count &= 0b00111111 + } else { + positions_count = (positions_count & 0b00111111) << 7; + let positions_count2 = byte_array[*positions_pointer] as u32; + *positions_pointer += 1; + if (positions_count2 & STOP_BIT as u32) > 0 { + positions_count |= positions_count2 & 0b01111111 + } else { + positions_count = (positions_count << 7) + | (positions_count2 & 0b01111111) << 7 + | (byte_array[*positions_pointer] & 0b01111111) as u32; + *positions_pointer += 1; + } + }; + field_vec.push((longest_field_id as u16, positions_count as usize)); + } else { + let mut first = true; + loop { + let mut byte = byte_array[*positions_pointer]; + *positions_pointer += 1; + + let field_stop = { + byte & if first { + FIELD_STOP_BIT_1 + } else { + FIELD_STOP_BIT_2 + } > 0 + }; + + let mut field_id_position_count = + byte as usize & if first { 0b0001_1111 } else { 0b0011_1111 }; + + if (byte & STOP_BIT) == 0 { + byte = byte_array[*positions_pointer]; + *positions_pointer += 1; + + field_id_position_count = + field_id_position_count << 7 | (byte as usize & 0b01111111); + + if (byte & STOP_BIT) == 0 { + byte = byte_array[*positions_pointer]; + *positions_pointer += 1; + + field_id_position_count = + field_id_position_count << 7 | (byte as usize & 0b01111111); + } + } + + let field_id = (field_id_position_count & indexed_field_id_mask) as u16; + positions_count = (field_id_position_count >> indexed_field_id_bits) as u32; + + field_vec.push((field_id, positions_count as usize)); + + first = false; + + if (byte & STOP_BIT) > 0 && field_stop { + break; + } + } + } +} + +#[inline(always)] +pub(crate) fn decode_positions_singleterm_multifield( + shard: &Shard, + plo: &PostingListObjectSingle, + field_vec: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram1: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram2: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram3: &mut SmallVec<[(u16, usize); 2]>, +) { + let posting_pointer_size_sum = if (plo.p_docid as usize) < plo.pointer_pivot_p_docid as usize { + plo.p_docid as u32 * 2 + } else { + (plo.p_docid as u32) * 3 - plo.pointer_pivot_p_docid as u32 + }; + + let mut positions_pointer = + (plo.rank_position_pointer_range + posting_pointer_size_sum) as usize; + + let rank_position_pointer = if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + read_u16(plo.byte_array, positions_pointer) as u32 + } else { + read_u32(plo.byte_array, positions_pointer) + }; + + if (rank_position_pointer + & (if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + 0b10000000_00000000 + } else { + 0b10000000_00000000_00000000 + })) + == 0 + { + let pointer_value = if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + rank_position_pointer & 0b01111111_11111111 + } else { + rank_position_pointer & 0b01111111_11111111_11111111 + } as usize; + + positions_pointer = plo.rank_position_pointer_range as usize - pointer_value; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec_ngram1, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec_ngram2, + plo.byte_array, + &mut positions_pointer, + ); + } + _ => { + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec_ngram1, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec_ngram2, + plo.byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec_ngram3, + plo.byte_array, + &mut positions_pointer, + ); + } + } + + read_multifield_vec( + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id, + field_vec, + plo.byte_array, + &mut positions_pointer, + ); + } else { + let field_id; + + if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + match ( + shard.indexed_field_vec.len() == 1, + rank_position_pointer >> 12, + ) { + (true, 0b1000..=0b1011) => { + field_vec.push((0, 1)); + } + (true, 0b1100..=0b1111) => { + field_vec.push((0, 2)); + } + + (false, 0b1100 | 0b1101) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 1)); + } + (false, 0b1110 | 0b1111) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 2)); + } + + (false, 0b1000) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + } + (false, 0b1001) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + } + (false, 0b1010) => { + let position_bits = 12 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + } + (false, 0b1011) => { + let position_bits = + 12 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + } + + (_, _) => { + println!( + "unsupported single 2 byte pointer embedded {} {:032b}", + shard.indexed_field_vec.len() == 1, + rank_position_pointer >> 12 + ); + } + } + } else { + match ( + shard.indexed_field_vec.len() == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19, + ) { + (true, 0b10000..=0b10011) => { + field_vec.push((0, 1)); + } + (true, 0b10100..=0b10111) => { + field_vec.push((0, 2)); + } + (true, 0b11000..=0b11011) => { + field_vec.push((0, 3)); + } + (true, 0b11100..=0b11111) => { + field_vec.push((0, 4)); + } + + (false, 0b11000 | 0b11001) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 1)); + } + (false, 0b11010 | 0b11011) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 2)); + } + (false, 0b11100 | 0b11101) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 3)); + } + (false, 0b11110 | 0b11111) => { + field_id = shard.longest_field_id as u16; + field_vec.push((field_id, 4)); + } + + (false, 0b10000) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + } + + (false, 0b10001) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + } + (false, 0b10010) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + } + (false, 0b10011) => { + let position_bits = 19 - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 4)); + } + (false, 0b10100) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + } + (false, 0b10101) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 2)]); + } + (false, 0b10110) => { + let position_bits = + 19 - shard.indexed_field_id_bits - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 2), (field_id_2, 1)]); + } + (false, 0b10111) => { + let position_bits = 19 + - shard.indexed_field_id_bits + - shard.indexed_field_id_bits + - shard.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + + shard.indexed_field_id_bits + + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_2 = + ((rank_position_pointer >> (position_bits + shard.indexed_field_id_bits)) + & shard.indexed_field_id_mask as u32) as u16; + let field_id_3 = ((rank_position_pointer >> position_bits) + & shard.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1), (field_id_3, 1)]); + } + + (_, _) => { + println!( + "unsupported single 3 byte pointer embedded {} {:032b}", + shard.indexed_field_vec.len() == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19 + ); + } + } + }; + } +} + +#[inline(always)] +pub(crate) fn read_singlefield_value( + positions_count: &mut u32, + byte_array: &[u8], + positions_pointer: &mut u32, +) { + let mut positions_count_internal = byte_array[*positions_pointer as usize] as u32; + *positions_pointer += 1; + if (positions_count_internal & STOP_BIT as u32) > 0 { + positions_count_internal &= 0b01111111 + } else { + positions_count_internal = (positions_count_internal & 0b01111111) << 7; + let positions_count2 = byte_array[*positions_pointer as usize] as u32; + *positions_pointer += 1; + if (positions_count2 & STOP_BIT as u32) > 0 { + positions_count_internal |= positions_count2 & 0b01111111 + } else { + positions_count_internal = (positions_count_internal << 7) + | (positions_count2 & 0b01111111) << 7 + | (byte_array[*positions_pointer as usize] & 0b01111111) as u32; + } + }; + *positions_count = positions_count_internal; +} + +#[inline(always)] +pub(crate) fn decode_positions_singleterm_singlefield( + plo: &PostingListObjectSingle, + tf_ngram1: &mut u32, + tf_ngram2: &mut u32, + tf_ngram3: &mut u32, + positions_count: &mut u32, +) { + let posting_pointer_size_sum = if (plo.p_docid as usize) < plo.pointer_pivot_p_docid as usize { + plo.p_docid as u32 * 2 + } else { + (plo.p_docid as u32) * 3 - plo.pointer_pivot_p_docid as u32 + }; + + let mut positions_pointer = plo.rank_position_pointer_range + posting_pointer_size_sum; + + let rank_position_pointer = if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + read_u16(plo.byte_array, positions_pointer as usize) as u32 + } else { + read_u32(plo.byte_array, positions_pointer as usize) + }; + + if (rank_position_pointer + & (if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + 0b10000000_00000000 + } else { + 0b10000000_00000000_00000000 + })) + == 0 + { + let pointer_value = if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + rank_position_pointer & 0b01111111_11111111 + } else { + rank_position_pointer & 0b01111111_11111111_11111111 + } as usize; + + positions_pointer = plo.rank_position_pointer_range - pointer_value as u32; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + read_singlefield_value(tf_ngram1, plo.byte_array, &mut positions_pointer); + read_singlefield_value(tf_ngram2, plo.byte_array, &mut positions_pointer); + } + _ => { + read_singlefield_value(tf_ngram1, plo.byte_array, &mut positions_pointer); + read_singlefield_value(tf_ngram2, plo.byte_array, &mut positions_pointer); + read_singlefield_value(tf_ngram3, plo.byte_array, &mut positions_pointer); + } + } + + read_singlefield_value(positions_count, plo.byte_array, &mut positions_pointer); + } else if plo.p_docid < plo.pointer_pivot_p_docid as i32 { + match rank_position_pointer >> 14 { + 0b10 => { + *positions_count = 1; + } + 0b11 => { + *positions_count = 2; + } + + _ => { + println!( + "unsupported single 2 byte pointer embedded {:032b}", + rank_position_pointer >> 14 + ); + } + } + } else { + match (rank_position_pointer & 0b11111111_11111111_11111111) >> 21 { + 0b100 => { + *positions_count = 1; + } + 0b101 => { + *positions_count = 2; + } + 0b110 => { + *positions_count = 3; + } + 0b111 => { + *positions_count = 4; + } + + _ => { + println!( + "unsupported single 3 byte pointer embedded {:032b}", + (rank_position_pointer & 0b11111111_11111111_11111111) >> 21 + ); + } + } + } +} + +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn decode_positions_commit( + posting_pointer_size: u8, + embed_flag: bool, + byte_array: &[u8], + pointer: usize, + ngram_type: &NgramType, + indexed_field_vec_len: usize, + indexed_field_id_bits: usize, + indexed_field_id_mask: usize, + longest_field_id: u16, + + field_vec: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram1: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram2: &mut SmallVec<[(u16, usize); 2]>, + field_vec_ngram3: &mut SmallVec<[(u16, usize); 2]>, +) { + let mut positions_pointer = pointer; + + if !embed_flag { + match ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec_ngram1, + byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec_ngram2, + byte_array, + &mut positions_pointer, + ); + } + _ => { + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec_ngram1, + byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec_ngram2, + byte_array, + &mut positions_pointer, + ); + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec_ngram3, + byte_array, + &mut positions_pointer, + ); + } + } + + read_multifield_vec( + indexed_field_vec_len, + indexed_field_id_bits, + indexed_field_id_mask, + longest_field_id as usize, + field_vec, + byte_array, + &mut positions_pointer, + ); + } else { + let rank_position_pointer = if posting_pointer_size == 2 { + read_u16(byte_array, positions_pointer) as u32 + } else { + read_u32(byte_array, positions_pointer) + }; + + let field_id; + + if posting_pointer_size == 2 { + match (indexed_field_vec_len == 1, rank_position_pointer >> 12) { + (true, 0b1000..=0b1011) => { + field_vec.push((0, 1)); + } + (true, 0b1100..=0b1111) => { + field_vec.push((0, 2)); + } + + (false, 0b1100 | 0b1101) => { + field_id = longest_field_id; + field_vec.push((field_id, 1)); + } + (false, 0b1110 | 0b1111) => { + field_id = longest_field_id; + field_vec.push((field_id, 2)); + } + + (false, 0b1000) => { + let position_bits = 12 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + } + (false, 0b1001) => { + let position_bits = 12 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + } + (false, 0b1010) => { + let position_bits = 12 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + } + (false, 0b1011) => { + let position_bits = 12 - indexed_field_id_bits - indexed_field_id_bits; + field_id = ((rank_position_pointer >> (position_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + } + + (_, _) => { + println!( + "unsupported single 2 byte pointer embedded commit {} {:032b}", + indexed_field_vec_len == 1, + rank_position_pointer >> 12 + ); + } + } + } else { + match ( + indexed_field_vec_len == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19, + ) { + (true, 0b10000..=0b10011) => { + field_vec.push((0, 1)); + } + (true, 0b10100..=0b10111) => { + field_vec.push((0, 2)); + } + (true, 0b11000..=0b11011) => { + field_vec.push((0, 3)); + } + (true, 0b11100..=0b11111) => { + field_vec.push((0, 4)); + } + + (false, 0b11000 | 0b11001) => { + field_id = longest_field_id; + field_vec.push((field_id, 1)); + } + (false, 0b11010 | 0b11011) => { + field_id = longest_field_id; + field_vec.push((field_id, 2)); + } + (false, 0b11100 | 0b11101) => { + field_id = longest_field_id; + field_vec.push((field_id, 3)); + } + (false, 0b11110 | 0b11111) => { + field_id = longest_field_id; + field_vec.push((field_id, 4)); + } + + (false, 0b10000) => { + let position_bits = 19 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 1)); + } + + (false, 0b10001) => { + let position_bits = 19 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 2)); + } + (false, 0b10010) => { + let position_bits = 19 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 3)); + } + (false, 0b10011) => { + let position_bits = 19 - indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.push((field_id, 4)); + } + (false, 0b10100) => { + let position_bits = 19 - indexed_field_id_bits - indexed_field_id_bits; + field_id = ((rank_position_pointer >> (position_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + } + (false, 0b10101) => { + let position_bits = 19 - indexed_field_id_bits - indexed_field_id_bits; + field_id = ((rank_position_pointer >> (position_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.extend([(field_id, 1), (field_id_2, 2)]); + } + (false, 0b10110) => { + let position_bits = 19 - indexed_field_id_bits - indexed_field_id_bits; + field_id = ((rank_position_pointer >> (position_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.extend([(field_id, 2), (field_id_2, 1)]); + } + (false, 0b10111) => { + let position_bits = + 19 - indexed_field_id_bits - indexed_field_id_bits - indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + indexed_field_id_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_2 = ((rank_position_pointer + >> (position_bits + indexed_field_id_bits)) + & indexed_field_id_mask as u32) as u16; + let field_id_3 = ((rank_position_pointer >> position_bits) + & indexed_field_id_mask as u32) as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1), (field_id_3, 1)]); + } + + (_, _) => { + println!( + "unsupported single 3 byte pointer embedded commit {} {:032b}", + indexed_field_vec_len == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19 + ); + } + } + }; + } +} + +/// Post processing after AND intersection candidates have been found +/// Phrase intersection +/// BM25 ranking vs. seekstorm ranking (implicit phrase search, term proximity, field type boost, source reputation) +/// BM25 is default baseline in IR academics, but exhibits inferior relevance for practical use +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn add_result_multiterm_multifield( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, + block_score: f32, + all_terms_frequent: bool, +) { + if shard.indexed_field_vec.len() == 1 { + add_result_multiterm_singlefield( + shard, + docid, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + return; + } + + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + let local_docid = docid & 0b11111111_11111111; + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + match &plo.compression_type { + CompressionType::Array => { + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + plo.docid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + (plo.p_docid << 1), + ) as i32; + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + CompressionType::Bitmap => { + if (plo.byte_array[plo.compressed_doc_id_range + (local_docid >> 3)] + & (1 << (local_docid & 7))) + > 0 + { + return; + } + } + CompressionType::Rle => { + if local_docid >= plo.docid as usize && local_docid <= plo.run_end as usize { + return; + } else { + while (plo.p_run_sum as usize) + ((plo.p_run as usize - 2) >> 2) + < plo.p_docid_count + && local_docid > plo.run_end as usize + { + let startdocid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize, + ); + let runlength = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize + 2, + ); + plo.docid = startdocid as i32; + plo.run_end = (startdocid + runlength) as i32; + plo.p_run_sum += runlength as i32; + plo.p_run += 4; + + if local_docid >= startdocid as usize && local_docid <= plo.run_end as usize + { + return; + } + } + } + } + _ => {} + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + match *result_type { + ResultType::Count => { + if !phrase_query && field_filter_set.is_empty() { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + } + ResultType::Topk => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + } + ResultType::TopkCount => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && !phrase_query + && field_filter_set.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + } + } + + let mut bm25: f32 = 0.0; + + for plo in query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + if decode_positions_multiterm_multifield( + shard, + plo, + !facet_filter.is_empty(), + phrase_query, + all_terms_frequent && field_filter_set.is_empty(), + ) { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + + if !field_filter_set.is_empty() + && plo.field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() + { + let mut match_flag = false; + for field in plo.field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + + if result_type == &ResultType::Topk && phrase_query { + bm25 = get_bm25f_multiterm_multifield(shard, docid, query_list); + + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && bm25 <= search_result.topk_candidates._elements[0].score + { + return; + } + } + + if phrase_query { + let len = query_list.len(); + let mut index_transpose = vec![0; len]; + for i in 0..len { + index_transpose[query_list[i].term_index_unique] = i; + } + + let mut phrasematch_count = 0; + if shard.indexed_field_vec.len() == 1 { + for plo in non_unique_query_list.iter_mut() { + plo.p_pos = 0; + let item = &query_list[index_transpose[plo.term_index_unique]]; + plo.positions_pointer = item.positions_pointer as usize; + plo.positions_count = item.positions_count; + + plo.is_embedded = item.is_embedded; + plo.embedded_positions = item.embedded_positions; + + plo.pos = get_next_position_singlefield(plo); + } + + non_unique_query_list.sort_unstable_by(|x, y| { + x.positions_count.partial_cmp(&y.positions_count).unwrap() + }); + + let t1 = 0; + let mut t2 = 1; + let mut pos1 = non_unique_query_list[t1].pos; + let mut pos2 = non_unique_query_list[t2].pos; + + loop { + match (pos1 + non_unique_query_list[t2].term_index_nonunique as u32) + .cmp(&(pos2 + non_unique_query_list[t1].term_index_nonunique as u32)) + { + Ordering::Less => { + if t2 > 1 { + t2 = 1; + pos2 = non_unique_query_list[t2].pos; + } + + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + pos1 += get_next_position_singlefield(&mut non_unique_query_list[t1]) + 1; + } + + Ordering::Greater => { + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + pos2 = non_unique_query_list[t2].pos + + get_next_position_singlefield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + Ordering::Equal => { + if t2 + 1 < non_unique_query_list.len() { + t2 += 1; + pos2 = non_unique_query_list[t2].pos; + continue; + } + + phrasematch_count += 1; + if phrasematch_count >= 1 { + break; + } + + t2 = 1; + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + + pos1 += get_next_position_singlefield(&mut non_unique_query_list[t1]) + 1; + pos2 = non_unique_query_list[t2].pos + + get_next_position_singlefield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + } + } + } else { + for plo in non_unique_query_list.iter_mut() { + let item = &query_list[index_transpose[plo.term_index_unique]]; + plo.positions_pointer = item.positions_pointer as usize; + plo.is_embedded = item.is_embedded; + plo.embedded_positions = item.embedded_positions; + plo.field_vec.clone_from(&item.field_vec); + plo.p_pos = 0; + plo.positions_count = item.positions_count; + plo.p_field = 0; + } + + 'main: for i in 0..shard.indexed_field_vec.len() as u16 { + for plo in non_unique_query_list.iter_mut() { + while plo.field_vec[plo.p_field].0 < i { + if !plo.is_embedded { + for _ in plo.p_pos..plo.field_vec[plo.p_field].1 as i32 { + get_next_position_multifield(plo); + } + } + if plo.p_field < plo.field_vec.len() - 1 { + plo.p_field += 1; + plo.p_pos = 0; + } else { + break 'main; + } + } + if plo.field_vec[plo.p_field].0 > i { + continue 'main; + } + } + + for plo in non_unique_query_list.iter_mut() { + plo.p_pos = 0; + plo.positions_count = plo.field_vec[plo.p_field].1 as u32; + plo.pos = get_next_position_multifield(plo); + } + + if !field_filter_set.is_empty() && !field_filter_set.contains(&i) { + continue; + } + + non_unique_query_list.sort_unstable_by(|x, y| { + x.positions_count.partial_cmp(&y.positions_count).unwrap() + }); + + let t1 = 0; + let mut t2 = 1; + let mut pos1 = non_unique_query_list[t1].pos; + let mut pos2 = non_unique_query_list[t2].pos; + + loop { + match (pos1 + non_unique_query_list[t2].term_index_nonunique as u32) + .cmp(&(pos2 + non_unique_query_list[t1].term_index_nonunique as u32)) + { + Ordering::Less => { + if t2 > 1 { + t2 = 1; + pos2 = non_unique_query_list[t2].pos; + } + + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(1) { + item.p_pos += 1 + } + } + break; + } + pos1 += + get_next_position_multifield(&mut non_unique_query_list[t1]) + 1; + } + Ordering::Greater => { + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for (j, item) in non_unique_query_list.iter_mut().enumerate() { + if j != t2 { + item.p_pos += 1 + } + } + } + break; + } + pos2 = non_unique_query_list[t2].pos + + get_next_position_multifield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + Ordering::Equal => { + if t2 + 1 < non_unique_query_list.len() { + t2 += 1; + pos2 = non_unique_query_list[t2].pos; + continue; + } + + phrasematch_count += 1; + if phrasematch_count >= 1 { + break 'main; + } + + t2 = 1; + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(1) { + item.p_pos += 1 + } + } + break; + } + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(2) { + item.p_pos += 1 + } + } + break; + } + + pos1 += + get_next_position_multifield(&mut non_unique_query_list[t1]) + 1; + pos2 = non_unique_query_list[t2].pos + + get_next_position_multifield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + } + } + } + } + + if phrase_query && (phrasematch_count == 0) { + return; + } + } + + facet_count(shard, search_result, docid); + + *result_count += 1; + if result_type == &ResultType::Count { + return; + } + + if result_type != &ResultType::Topk || !phrase_query { + bm25 = get_bm25f_multiterm_multifield(shard, docid, query_list); + } + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25, + }, + top_k, + ); +} + +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn add_result_multiterm_singlefield( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, + + block_score: f32, + all_terms_frequent: bool, +) { + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + let local_docid = docid & 0b11111111_11111111; + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + match &plo.compression_type { + CompressionType::Array => { + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + plo.docid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + (plo.p_docid << 1), + ) as i32; + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + CompressionType::Bitmap => { + if (plo.byte_array[plo.compressed_doc_id_range + (local_docid >> 3)] + & (1 << (local_docid & 7))) + > 0 + { + return; + } + } + CompressionType::Rle => { + if local_docid >= plo.docid as usize && local_docid <= plo.run_end as usize { + return; + } else { + while (plo.p_run_sum as usize) + ((plo.p_run as usize - 2) >> 2) + < plo.p_docid_count + && local_docid > plo.run_end as usize + { + let startdocid = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize, + ); + let runlength = read_u16( + plo.byte_array, + plo.compressed_doc_id_range + plo.p_run as usize + 2, + ); + plo.docid = startdocid as i32; + plo.run_end = (startdocid + runlength) as i32; + plo.p_run_sum += runlength as i32; + plo.p_run += 4; + + if local_docid >= startdocid as usize && local_docid <= plo.run_end as usize + { + return; + } + } + } + } + _ => {} + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + match *result_type { + ResultType::Count => { + if !phrase_query && field_filter_set.is_empty() { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + } + ResultType::Topk => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + return; + } + } + ResultType::TopkCount => { + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && !phrase_query + && field_filter_set.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + } + } + + let mut bm25: f32 = 0.0; + + for plo in query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + if decode_positions_multiterm_singlefield( + plo, + !facet_filter.is_empty(), + phrase_query, + all_terms_frequent && field_filter_set.is_empty(), + ) { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + + if !field_filter_set.is_empty() + && plo.field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() + { + let mut match_flag = false; + for field in plo.field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + + if result_type == &ResultType::Topk && phrase_query { + bm25 = get_bm25f_multiterm_singlefield(shard, docid, query_list); + + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && search_result.topk_candidates.current_heap_size >= top_k + && bm25 <= search_result.topk_candidates._elements[0].score + { + return; + } + } + + if phrase_query { + let len = query_list.len(); + let mut index_transpose = vec![0; len]; + for i in 0..len { + index_transpose[query_list[i].term_index_unique] = i; + } + + let mut phrasematch_count = 0; + + for plo in non_unique_query_list.iter_mut() { + plo.p_pos = 0; + let item = &query_list[index_transpose[plo.term_index_unique]]; + + plo.positions_pointer = item.positions_pointer as usize; + plo.positions_count = item.positions_count; + + plo.is_embedded = item.is_embedded; + plo.embedded_positions = item.embedded_positions; + + plo.pos = get_next_position_singlefield(plo); + } + + non_unique_query_list + .sort_unstable_by(|x, y| x.positions_count.partial_cmp(&y.positions_count).unwrap()); + + let t1 = 0; + let mut t2 = 1; + let mut pos1 = non_unique_query_list[t1].pos; + let mut pos2 = non_unique_query_list[t2].pos; + + loop { + match (pos1 + non_unique_query_list[t2].term_index_nonunique as u32) + .cmp(&(pos2 + non_unique_query_list[t1].term_index_nonunique as u32)) + { + Ordering::Less => { + if t2 > 1 { + t2 = 1; + pos2 = non_unique_query_list[t2].pos; + } + + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + pos1 += get_next_position_singlefield(&mut non_unique_query_list[t1]) + 1; + } + Ordering::Greater => { + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + pos2 = non_unique_query_list[t2].pos + + get_next_position_singlefield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + Ordering::Equal => { + if t2 + 1 < non_unique_query_list.len() { + t2 += 1; + pos2 = non_unique_query_list[t2].pos; + continue; + } + + phrasematch_count += 1; + if phrasematch_count >= 1 { + break; + } + + t2 = 1; + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + + pos1 += get_next_position_singlefield(&mut non_unique_query_list[t1]) + 1; + pos2 = non_unique_query_list[t2].pos + + get_next_position_singlefield(&mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + } + } + + if phrase_query && (phrasematch_count == 0) { + return; + } + } + + facet_count(shard, search_result, docid); + + *result_count += 1; + if result_type == &ResultType::Count { + return; + } + + if result_type != &ResultType::Topk || !phrase_query { + bm25 = get_bm25f_multiterm_singlefield(shard, docid, query_list); + } + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25, + }, + top_k, + ); +} diff --git a/mobile_app/rust/src/seekstorm/commit.rs b/mobile_app/rust/src/seekstorm/commit.rs new file mode 100644 index 0000000..99d1db2 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/commit.rs @@ -0,0 +1,1142 @@ +use memmap2::{Mmap, MmapMut, MmapOptions}; +use num::FromPrimitive; +use num_format::{Locale, ToFormattedString}; + +use std::{ + fs::File, + io::{Seek, SeekFrom, Write}, + path::PathBuf, +}; + +use crate::{ + add_result::{ + B, K, decode_positions_multiterm_multifield, decode_positions_multiterm_singlefield, + get_next_position_multifield, get_next_position_singlefield, + }, + compatible::{_blsr_u64, _mm_tzcnt_64}, + compress_postinglist::compress_postinglist, + index::{ + AccessType, BlockObjectIndex, CompressionType, DOCUMENT_LENGTH_COMPRESSION, + FACET_VALUES_FILENAME, IndexArc, LevelIndex, MAX_POSITIONS_PER_TERM, NgramType, + NonUniquePostingListObjectQuery, POSTING_BUFFER_SIZE, PostingListObjectIndex, + PostingListObjectQuery, ROARING_BLOCK_SIZE, Shard, TermObject, + update_list_max_impact_score, warmup, + }, + utils::{ + block_copy, block_copy_mut, read_u8, read_u16, read_u32, read_u64, write_u16, write_u32, + write_u64, + }, +}; + +/// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM +/// to the final compressed data structure (roaring bitmap) on Mmap or disk - +/// which is persistent, more compact, with lower query latency and allows search with realtime=false. +/// Commit is invoked automatically each time 64K documents are newly indexed as well as on close_index (e.g. server quit). +/// There is no way to prevent this automatic commit by not manually invoking it. +/// But commit can also be invoked manually at any time at any number of newly indexed documents. +/// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability +/// is invoked implicitly with every index_doc, +/// i.e. the document can immediately searched and included in the search results +/// if it matches the query AND the query paramter realtime=true is enabled. +/// **Use commit with caution, as it is an expensive operation**. +/// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents and when the index is closed with close_index. +/// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost. +/// There are only 2 reasons that justify a manual commit: +/// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or +/// 2. if after indexing new documents there won't be more documents indexed (for some time), +/// so there won't be (soon) a commit invoked automatically at the next 64k threshold or close_index, +/// but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash. +#[allow(async_fn_in_trait)] +pub trait Commit { + /// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM + /// to the final compressed data structure (roaring bitmap) on Mmap or disk - + /// which is persistent, more compact, with lower query latency and allows search with realtime=false. + /// Commit is invoked automatically each time 64K documents are newly indexed as well as on close_index (e.g. server quit). + /// There is no way to prevent this automatic commit by not manually invoking it. + /// But commit can also be invoked manually at any time at any number of newly indexed documents. + /// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability + /// is invoked implicitly with every index_doc, + /// i.e. the document can immediately searched and included in the search results + /// if it matches the query AND the query paramter realtime=true is enabled. + /// **Use commit with caution, as it is an expensive operation**. + /// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents and when the index is closed with close_index. + /// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost. + /// There are only 2 reasons that justify a manual commit: + /// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or + /// 2. if after indexing new documents there won't be more documents indexed (for some time), + /// so there won't be (soon) a commit invoked automatically at the next 64k threshold or close_index, + /// but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash. + async fn commit(&self); +} + +/// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM +/// to the final compressed data structure (roaring bitmap) on Mmap or disk - +/// which is persistent, more compact, with lower query latency and allows search with realtime=false. +/// Commit is invoked automatically each time 64K documents are newly indexed as well as on close_index (e.g. server quit). +/// There is no way to prevent this automatic commit by not manually invoking it. +/// But commit can also be invoked manually at any time at any number of newly indexed documents. +/// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability +/// is invoked implicitly with every index_doc, +/// i.e. the document can immediately searched and included in the search results +/// if it matches the query AND the query paramter realtime=true is enabled. +/// **Use commit with caution, as it is an expensive operation**. +/// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents and when the index is closed with close_index. +/// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost. +/// There are only 2 reasons that justify a manual commit: +/// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or +/// 2. if after indexing new documents there won't be more documents indexed (for some time), +/// so there won't be (soon) a commit invoked automatically at the next 64k threshold or close_index, +/// but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash. +impl Commit for IndexArc { + /// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM + /// to the final compressed data structure (roaring bitmap) on Mmap or disk - + /// which is persistent, more compact, with lower query latency and allows search with realtime=false. + /// Commit is invoked automatically each time 64K documents are newly indexed as well as on close_index (e.g. server quit). + /// There is no way to prevent this automatic commit by not manually invoking it. + /// But commit can also be invoked manually at any time at any number of newly indexed documents. + /// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability + /// is invoked implicitly with every index_doc, + /// i.e. the document can immediately searched and included in the search results + /// if it matches the query AND the query paramter realtime=true is enabled. + /// **Use commit with caution, as it is an expensive operation**. + /// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents and when the index is closed with close_index. + /// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost. + /// There are only 2 reasons that justify a manual commit: + /// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or + /// 2. if after indexing new documents there won't be more documents indexed (for some time), + /// so there won't be (soon) a commit invoked automatically at the next 64k threshold or close_index, + /// but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash. + async fn commit(&self) { + let index_ref = self.read().await; + let uncommitted_doc_count = index_ref.uncommitted_doc_count().await; + + for shard in index_ref.shard_vec.iter() { + let p = shard.read().await.permits.clone(); + let permit = p.acquire().await.unwrap(); + + let indexed_doc_count = shard.read().await.indexed_doc_count; + shard.write().await.commit(indexed_doc_count).await; + warmup(shard).await; + drop(permit); + } + + if !index_ref.mute { + println!( + "commit index {} level {} committed documents {} {}", + index_ref.meta.id, + index_ref.level_count().await, + uncommitted_doc_count, + index_ref.indexed_doc_count().await, + ); + } + + drop(index_ref); + } +} + +impl Shard { + pub(crate) async fn commit(&mut self, indexed_doc_count: usize) { + if !self.uncommitted { + return; + } + + let is_last_level_incomplete = self.is_last_level_incomplete; + if self.is_last_level_incomplete { + self.merge_incomplete_index_level_to_level0(); + + self.index_file_mmap = unsafe { + MmapOptions::new() + .len(0) + .map(&self.index_file) + .expect("Unable to create Mmap") + }; + + if let Err(e) = self + .index_file + .set_len(self.last_level_index_file_start_pos) + { + println!( + "Unable to index_file.set_len in clear_index {} {} {:?}", + self.index_path_string, self.indexed_doc_count, e + ) + }; + let _ = self + .index_file + .seek(SeekFrom::Start(self.last_level_index_file_start_pos)); + + let idx = self.level_index.len() - 1; + if self.meta.access_type == AccessType::Mmap { + self.index_file_mmap = + unsafe { Mmap::map(&self.index_file).expect("Unable to create Mmap") }; + + for segment in self.segments_index.iter_mut() { + if idx == segment.byte_array_blocks_pointer.len() - 1 { + segment.byte_array_blocks_pointer.remove(idx); + } + } + } else { + for segment in self.segments_index.iter_mut() { + if idx == segment.byte_array_blocks.len() - 1 { + segment.byte_array_blocks.remove(idx); + } + } + + for key0 in 0..self.segment_number1 { + for item in self.segments_index[key0].segment.iter_mut() { + if let Some(block) = item.1.blocks.last() + && block.block_id as usize == idx + { + item.1.posting_count -= block.posting_count as u32 + 1; + item.1.blocks.remove(idx); + } + } + self.segments_index[key0] + .segment + .retain(|_key, value| !value.blocks.is_empty()) + } + } + } else { + self.last_level_index_file_start_pos = self.index_file.stream_position().unwrap(); + self.last_level_docstore_file_start_pos = self.docstore_file.stream_position().unwrap(); + }; + + if self.committed_doc_count / ROARING_BLOCK_SIZE == 0 { + write_u16( + self.longest_field_id as u16, + &mut self.compressed_index_segment_block_buffer, + 0, + ); + + let _ = self + .index_file + .write(&self.compressed_index_segment_block_buffer[0..2]); + } + + let document_length_compressed_array_pointer = + self.index_file.stream_position().unwrap() as usize; + + for document_length_compressed_array in self.document_length_compressed_array.iter_mut() { + let _ = self.index_file.write(document_length_compressed_array); + } + + if !self.mute { + println!( + "commit index {} level {} indexed documents {}", + self.meta.id, + self.level_index.len(), + indexed_doc_count.to_formatted_string(&Locale::en), + ); + } + + write_u64( + indexed_doc_count as u64, + &mut self.compressed_index_segment_block_buffer, + 0, + ); + write_u64( + self.positions_sum_normalized, + &mut self.compressed_index_segment_block_buffer, + 8, + ); + + let _ = self + .index_file + .write(&self.compressed_index_segment_block_buffer[0..16]); + + let segment_head_position = self.index_file.stream_position().unwrap() as usize; + self.index_file + .seek(SeekFrom::Current((self.segment_number1 * 8) as i64)) + .unwrap(); + + self.document_length_normalized_average = + self.positions_sum_normalized as f32 / indexed_doc_count as f32; + + for (i, component) in self.bm25_component_cache.iter_mut().enumerate() { + let document_length_quotient = + DOCUMENT_LENGTH_COMPRESSION[i] as f32 / self.document_length_normalized_average; + *component = K * (1.0 - B + B * document_length_quotient); + } + + for k0 in 0..self.segment_number1 { + let strip_compressed = self.commit_segment(k0); + self.strip_compressed_sum += strip_compressed as u64; + self.key_count_sum += self.segments_level0[k0].segment.len() as u64; + } + + if !is_last_level_incomplete { + let mut document_length_compressed_array: Vec<[u8; ROARING_BLOCK_SIZE]> = Vec::new(); + if self.meta.access_type != AccessType::Mmap { + for document_length_compressed_array_item in + self.document_length_compressed_array.iter_mut() + { + document_length_compressed_array.push(*document_length_compressed_array_item); + } + } + + self.level_index.push(LevelIndex { + document_length_compressed_array, + document_length_compressed_array_pointer, + docstore_pointer_docs: Vec::new(), + docstore_pointer_docs_pointer: 0, + }); + } + + for document_length_compressed_array in self.document_length_compressed_array.iter_mut() { + *document_length_compressed_array = [0; ROARING_BLOCK_SIZE]; + } + + let segment_head_position2 = self.index_file.stream_position().unwrap() as usize; + self.index_file + .seek(SeekFrom::Start(segment_head_position as u64)) + .unwrap(); + let segment_head_position3 = + self.compressed_index_segment_block_buffer.len() - (self.segment_number1 * 8); + let _ = self + .index_file + .write(&self.compressed_index_segment_block_buffer[segment_head_position3..]); + + let _ = self.index_file.flush(); + + self.index_file + .seek(SeekFrom::Start(segment_head_position2 as u64)) + .unwrap(); + + if !self.stored_field_names.is_empty() { + self.commit_docstore(indexed_doc_count, is_last_level_incomplete); + } + + if self.meta.access_type == AccessType::Mmap { + self.index_file.flush().expect("Unable to flush Mmap"); + + self.index_file_mmap = + unsafe { Mmap::map(&self.index_file).expect("Unable to create Mmap") }; + } + + if !self.facets.is_empty() { + self.facets_file_mmap.flush().expect("Unable to flush Mmap"); + if self.facets_file.metadata().unwrap().len() + != (self.facets_size_sum * (self.level_index.len() + 1) * ROARING_BLOCK_SIZE) as u64 + { + if let Err(e) = self.facets_file.set_len( + (self.facets_size_sum * (self.level_index.len() + 1) * ROARING_BLOCK_SIZE) + as u64, + ) { + println!("Unable to facets_file.set_len in commit {:?}", e) + }; + + self.facets_file_mmap = + unsafe { MmapMut::map_mut(&self.facets_file).expect("Unable to create Mmap") }; + } + + let index_path = PathBuf::new(&self.index_path_string); + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(FACET_VALUES_FILENAME)).unwrap(), + &self.facets, + ) + .unwrap(); + } + + self.string_set_to_single_term_id(); + + update_list_max_impact_score(self); + + self.committed_doc_count = indexed_doc_count; + self.is_last_level_incomplete = + !(self.committed_doc_count).is_multiple_of(ROARING_BLOCK_SIZE); + + if let Some(root_index_arc) = &self.index_option { + let root_index = root_index_arc.read().await; + + if let Some(root_completion_option) = root_index.completion_option.as_ref() { + let mut root_completions = root_completion_option.write().await; + for completion in self.level_completions.read().await.iter() { + if root_completions.len() < root_index.max_completion_entries { + root_completions.add_completion(&completion.0.join(" "), *completion.1); + } + } + + self.level_completions.write().await.clear(); + } + + if let Some(symspell) = root_index.symspell_option.as_ref() { + if symspell.read().await.get_dictionary_size() < root_index.max_dictionary_entries { + for key0 in 0..self.segment_number1 { + for key in self.segments_level0[key0].segment.keys() { + let plo = self.segments_level0[key0].segment.get(key).unwrap(); + + if self.meta.spelling_correction.is_some() + && symspell.read().await.get_dictionary_size() + < root_index.max_dictionary_entries + && key & 7 == 0 + && let Some(term) = self.level_terms.get(&((key >> 32) as u32)) + { + let mut symspell = symspell.write().await; + symspell.create_dictionary_entry(term.clone(), plo.posting_count); + drop(symspell); + }; + } + } + } + self.level_terms.clear(); + }; + }; + + self.compressed_index_segment_block_buffer = vec![0; 10_000_000]; + self.postings_buffer = vec![0; POSTING_BUFFER_SIZE]; + + self.postings_buffer_pointer = 0; + self.strip_compressed_sum = 0; + + for segment in self.segments_level0.iter_mut() { + segment.segment.clear(); + } + + self.uncommitted = false; + } + + /// Flush a single segment from the key hash range partitioned level to RAM and disk + pub(crate) fn commit_segment(&mut self, key0: usize) -> usize { + let block_id = self.block_id as u32; + + let mut key_head_pointer_w: usize = 0; + let segment_head_position = self.compressed_index_segment_block_buffer.len() + - (self.segment_number1 * 8) + + (key0 * 8) + + 4; + write_u32( + self.segments_level0[key0].segment.len() as u32, + &mut self.compressed_index_segment_block_buffer, + segment_head_position, + ); + + let mut key_body_pointer_w: usize = + key_head_pointer_w + (self.segments_level0[key0].segment.len() * self.key_head_size); + let key_body_pointer_wstart: usize = key_body_pointer_w; + + let mut key_list: Vec = self.segments_level0[key0].segment.keys().cloned().collect(); + key_list.sort_unstable(); + + for key in key_list.iter() { + let plo = self.segments_level0[key0].segment.get_mut(key).unwrap(); + + let mut key_position_pointer_w: usize = key_body_pointer_w; + let key_rank_position_pointer_w: usize = + key_body_pointer_w + plo.size_compressed_positions_key; + + let posting_pointer_size_sum = plo.pointer_pivot_p_docid as usize * 2 + + if (plo.pointer_pivot_p_docid as usize) < plo.posting_count { + (plo.posting_count - plo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + + let size_compressed_positions_key = + plo.size_compressed_positions_key + posting_pointer_size_sum; + + let key_docid_pointer_w: usize = key_body_pointer_w + size_compressed_positions_key; + + let mut size_compressed_docid_key; + + let key_body_offset = + key_rank_position_pointer_w as u32 - key_body_pointer_wstart as u32; + + size_compressed_docid_key = compress_postinglist( + self, + &mut key_head_pointer_w, + &mut key_position_pointer_w, + key_body_offset, + &key0, + key, + ); + + key_body_pointer_w = key_docid_pointer_w + size_compressed_docid_key; + + size_compressed_docid_key += self.key_head_size; + + self.size_compressed_docid_index += size_compressed_docid_key as u64; + self.size_compressed_positions_index += size_compressed_positions_key as u64; + } + + let compressed_segment_block_size = key_body_pointer_w; + + let segment_head_position = self.compressed_index_segment_block_buffer.len() + - (self.segment_number1 * 8) + + (key0 * 8); + + write_u32( + compressed_segment_block_size as u32, + &mut self.compressed_index_segment_block_buffer, + segment_head_position, + ); + + let file_position = self.index_file.stream_position().unwrap() as usize; + + let _ = self + .index_file + .write(&self.compressed_index_segment_block_buffer[0..compressed_segment_block_size]); + + if self.meta.access_type == AccessType::Mmap { + self.segments_index[key0].byte_array_blocks_pointer.push(( + file_position + key_body_pointer_wstart, + (compressed_segment_block_size - key_body_pointer_wstart), + key_list.len() as u32, + )); + } else { + let mut byte_array_docid: Vec = + vec![0; compressed_segment_block_size - key_body_pointer_wstart]; + block_copy_mut( + &mut self.compressed_index_segment_block_buffer, + key_body_pointer_wstart, + &mut byte_array_docid, + 0, + compressed_segment_block_size - key_body_pointer_wstart, + ); + + let mut posting_count_previous = 0; + let mut pointer_pivot_p_docid_previous = 0; + let mut compression_type_pointer_previous = 0; + + for (key_index, key) in key_list.iter().enumerate() { + let plo = self.segments_level0[key0].segment.get_mut(key).unwrap(); + + let value = self.segments_index[key0].segment.entry(*key).or_insert( + PostingListObjectIndex { + ..Default::default() + }, + ); + let exists: bool = value.posting_count > 0; + + if !self.indexed_doc_count.is_multiple_of(ROARING_BLOCK_SIZE) + && self.meta.access_type == AccessType::Ram + { + let position_range_previous = if key_index == 0 { + 0 + } else { + let posting_pointer_size_sum_previous = pointer_pivot_p_docid_previous + as usize + * 2 + + if (pointer_pivot_p_docid_previous as usize) < posting_count_previous + { + (posting_count_previous - pointer_pivot_p_docid_previous as usize) + * 3 + } else { + 0 + }; + + let rank_position_pointer_range_previous = compression_type_pointer_previous + & 0b0011_1111_1111_1111_1111_1111_1111_1111; + let compression_type_previous: CompressionType = FromPrimitive::from_i32( + (compression_type_pointer_previous >> 30) as i32, + ) + .unwrap(); + + let compressed_docid_previous = match compression_type_previous { + CompressionType::Array => posting_count_previous * 2, + CompressionType::Bitmap => 8192, + CompressionType::Rle => { + 4 * read_u16( + &byte_array_docid, + rank_position_pointer_range_previous as usize + + posting_pointer_size_sum_previous, + ) as usize + + 2 + } + _ => 0, + }; + + rank_position_pointer_range_previous + + (posting_pointer_size_sum_previous + compressed_docid_previous) as u32 + }; + + value.position_range_previous = position_range_previous; + + posting_count_previous = plo.posting_count; + pointer_pivot_p_docid_previous = plo.pointer_pivot_p_docid; + compression_type_pointer_previous = plo.compression_type_pointer; + }; + + if exists { + value.posting_count += plo.posting_count as u32; + if self.meta.access_type != AccessType::Mmap { + value.blocks.push(BlockObjectIndex { + block_id, + posting_count: (plo.posting_count - 1) as u16, + max_block_score: plo.max_block_score, + max_docid: plo.max_docid, + max_p_docid: plo.max_p_docid, + pointer_pivot_p_docid: plo.pointer_pivot_p_docid, + compression_type_pointer: plo.compression_type_pointer, + }); + } + } else { + value.posting_count = plo.posting_count as u32; + value.max_list_score = 0.0; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramRF | NgramType::NgramFR => { + value.posting_count_ngram_1_compressed = + plo.posting_count_ngram_1_compressed; + value.posting_count_ngram_2_compressed = + plo.posting_count_ngram_2_compressed; + } + _ => { + value.posting_count_ngram_1_compressed = + plo.posting_count_ngram_1_compressed; + value.posting_count_ngram_2_compressed = + plo.posting_count_ngram_2_compressed; + value.posting_count_ngram_3_compressed = + plo.posting_count_ngram_3_compressed; + } + } + + if self.meta.access_type != AccessType::Mmap { + value.blocks = vec![BlockObjectIndex { + block_id, + posting_count: (plo.posting_count - 1) as u16, + max_block_score: plo.max_block_score, + max_docid: plo.max_docid, + max_p_docid: plo.max_p_docid, + pointer_pivot_p_docid: plo.pointer_pivot_p_docid, + compression_type_pointer: plo.compression_type_pointer, + }] + } + } + } + + self.segments_index[key0] + .byte_array_blocks + .push(byte_array_docid); + } + + compressed_segment_block_size + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn add_docid( + self: &mut Shard, + plo: &mut PostingListObjectQuery, + docid: usize, + key_hash: u64, + key0: usize, + ngram_type: &NgramType, + posting_count_ngram_1_compressed: u8, + posting_count_ngram_2_compressed: u8, + posting_count_ngram_3_compressed: u8, + ) { + let mut field_positions_vec: Vec> = vec![Vec::new(); self.indexed_field_vec.len()]; + + if self.indexed_field_vec.len() == 1 { + decode_positions_multiterm_singlefield(plo, true, true, false); + + let mut plo2 = NonUniquePostingListObjectQuery { + positions_pointer: plo.positions_pointer as usize, + is_embedded: plo.is_embedded, + embedded_positions: plo.embedded_positions, + field_vec: plo.field_vec.clone(), + p_pos: 0, + p_field: 0, + positions_count: plo.positions_count, + key0: key0 as u32, + byte_array: plo.byte_array, + term_index_unique: 0, + term_index_nonunique: 0, + pos: 0, + }; + + let mut prev_pos = 0; + let mut one = 0; + for _ in 0..plo.positions_count { + plo2.pos = get_next_position_singlefield(&mut plo2); + let pos = prev_pos + plo2.pos as u16 + one; + field_positions_vec[0].push(pos); + prev_pos = pos; + one = 1; + plo2.p_pos += 1; + } + } else { + decode_positions_multiterm_multifield(self, plo, true, true, false); + + let mut plo2 = NonUniquePostingListObjectQuery { + positions_pointer: plo.positions_pointer as usize, + is_embedded: plo.is_embedded, + embedded_positions: plo.embedded_positions, + field_vec: plo.field_vec.clone(), + p_pos: 0, + p_field: 0, + positions_count: plo.positions_count, + key0: key0 as u32, + byte_array: plo.byte_array, + term_index_unique: 0, + term_index_nonunique: 0, + pos: 0, + }; + + for field in plo.field_vec.iter() { + let mut prev_pos = 0; + let mut one = 0; + for _ in 0..field.1 { + plo2.pos = get_next_position_multifield(&mut plo2); + let pos = prev_pos + plo2.pos as u16 + one; + field_positions_vec[field.0 as usize].push(pos); + prev_pos = pos; + one = 1; + plo2.p_pos += 1; + } + } + } + + let term = match ngram_type { + NgramType::SingleTerm => TermObject { + key_hash, + key0: key0 as u32, + ngram_type: ngram_type.clone(), + term_ngram_1: String::new(), + term_ngram_0: String::new(), + field_positions_vec, + + ..Default::default() + }, + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => TermObject { + key_hash, + key0: key0 as u32, + ngram_type: ngram_type.clone(), + term_ngram_1: String::new(), + term_ngram_0: String::new(), + field_positions_vec, + field_vec_ngram1: if self.indexed_field_vec.len() == 1 { + vec![(0, plo.tf_ngram1)] + } else { + plo.field_vec_ngram1 + .iter() + .map(|field| (field.0 as usize, field.1 as u32)) + .collect() + }, + + field_vec_ngram2: if self.indexed_field_vec.len() == 1 { + vec![(0, plo.tf_ngram2)] + } else { + plo.field_vec_ngram2 + .iter() + .map(|field| (field.0 as usize, field.1 as u32)) + .collect() + }, + + ..Default::default() + }, + _ => TermObject { + key_hash, + key0: key0 as u32, + ngram_type: ngram_type.clone(), + term_ngram_1: String::new(), + term_ngram_0: String::new(), + + field_positions_vec, + field_vec_ngram1: if self.indexed_field_vec.len() == 1 { + vec![(0, plo.tf_ngram1)] + } else { + plo.field_vec_ngram1 + .iter() + .map(|field| (field.0 as usize, field.1 as u32)) + .collect() + }, + + field_vec_ngram2: if self.indexed_field_vec.len() == 1 { + vec![(0, plo.tf_ngram2)] + } else { + plo.field_vec_ngram2 + .iter() + .map(|field| (field.0 as usize, field.1 as u32)) + .collect() + }, + + field_vec_ngram3: if self.indexed_field_vec.len() == 1 { + vec![(0, plo.tf_ngram3)] + } else { + plo.field_vec_ngram3 + .iter() + .map(|field| (field.0 as usize, field.1 as u32)) + .collect() + }, + + ..Default::default() + }, + }; + + self.index_posting( + term, + docid, + true, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn iterate_docid( + self: &mut Shard, + compression_type_pointer: u32, + pointer_pivot_p_docid: u16, + posting_count: u16, + block_id: usize, + key0: usize, + key_hash: u64, + ngram_type: NgramType, + posting_count_ngram_1_compressed: u8, + posting_count_ngram_2_compressed: u8, + posting_count_ngram_3_compressed: u8, + ) { + let compression_type: CompressionType = + FromPrimitive::from_i32((compression_type_pointer >> 30) as i32).unwrap(); + + let rank_position_pointer_range: u32 = + compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum = pointer_pivot_p_docid as u32 * 2 + + if (pointer_pivot_p_docid as usize) <= posting_count as usize { + ((posting_count as u32 + 1) - pointer_pivot_p_docid as u32) * 3 + } else { + 0 + }; + let compressed_doc_id_range: u32 = rank_position_pointer_range + posting_pointer_size_sum; + + let byte_array = if self.meta.access_type == AccessType::Mmap { + let segment = &self.segments_index[key0]; + let byte_array = &self.index_file_mmap[segment.byte_array_blocks_pointer[block_id].0 + ..segment.byte_array_blocks_pointer[block_id].0 + + segment.byte_array_blocks_pointer[block_id].1]; + byte_array.to_owned() + } else { + self.segments_index[key0].byte_array_blocks[block_id].to_owned() + }; + + let mut plo = PostingListObjectQuery { + rank_position_pointer_range, + pointer_pivot_p_docid, + byte_array: &byte_array, + p_docid: 0, + ngram_type: ngram_type.clone(), + ..Default::default() + }; + + match compression_type { + CompressionType::Array => { + for i in 0..=posting_count { + plo.p_docid = i as usize; + + let docid = (block_id << 16) + | read_u16( + &byte_array[compressed_doc_id_range as usize..], + i as usize * 2, + ) as usize; + + self.add_docid( + &mut plo, + docid, + key_hash, + key0, + &ngram_type, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + } + } + + CompressionType::Rle => { + let runs_count = + read_u16(&byte_array[compressed_doc_id_range as usize..], 0) as i32; + + plo.p_docid = 0; + for i in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &byte_array[compressed_doc_id_range as usize..], + i as usize * 2, + ); + let runlength = read_u16( + &byte_array[compressed_doc_id_range as usize..], + (i + 1) as usize * 2, + ); + + for j in 0..=runlength { + let docid = (block_id << 16) | (startdocid + j) as usize; + self.add_docid( + &mut plo, + docid, + key_hash, + key0, + &ngram_type, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + + plo.p_docid += 1; + } + } + } + + CompressionType::Bitmap => { + plo.p_docid = 0; + + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &byte_array[compressed_doc_id_range as usize..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = (block_id << 16) | ((ulong_pos << 6) + bit_pos) as usize; + + self.add_docid( + &mut plo, + docid, + key_hash, + key0, + &ngram_type, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + + plo.p_docid += 1; + } + } + } + + _ => {} + } + } + + pub(crate) fn merge_incomplete_index_level_to_level0(self: &mut Shard) { + for strip0 in self.segments_level0.iter_mut() { + if strip0.positions_compressed.is_empty() { + strip0.positions_compressed = vec![0; MAX_POSITIONS_PER_TERM * 2]; + } + } + + let block_id = self.level_index.len() - 1; + let committed_doc_count = (self.committed_doc_count - 1 % ROARING_BLOCK_SIZE) + 1; + + for i in 0..self.indexed_field_vec.len() { + if self.meta.access_type == AccessType::Mmap { + block_copy( + &self.index_file_mmap[self.level_index[block_id] + .document_length_compressed_array_pointer + + i * ROARING_BLOCK_SIZE..], + 0, + &mut self.document_length_compressed_array[i], + 0, + committed_doc_count, + ); + } else { + block_copy( + &self.level_index[block_id].document_length_compressed_array[i], + 0, + &mut self.document_length_compressed_array[i], + 0, + committed_doc_count, + ); + } + } + + for key0 in 0..self.segment_number1 { + if self.meta.access_type == AccessType::Mmap { + let pointer = self.segments_index[key0].byte_array_blocks_pointer[block_id]; + + let key_count = pointer.2 as usize; + + for key_index in 0..key_count { + let key_address; + let key_hash; + let posting_count; + + let ngram_type; + let posting_count_ngram_1_compressed; + let posting_count_ngram_2_compressed; + let posting_count_ngram_3_compressed; + let pointer_pivot_p_docid_old; + let compression_type_pointer; + { + let byte_array = &self.index_file_mmap + [pointer.0 - (key_count * self.key_head_size)..pointer.0]; + key_address = key_index * self.key_head_size; + key_hash = read_u64(byte_array, key_address); + posting_count = read_u16(byte_array, key_address + 8); + ngram_type = FromPrimitive::from_u64(key_hash & 0b111) + .unwrap_or(NgramType::SingleTerm); + match ngram_type { + NgramType::SingleTerm => { + posting_count_ngram_1_compressed = 0; + posting_count_ngram_2_compressed = 0; + posting_count_ngram_3_compressed = 0; + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + posting_count_ngram_1_compressed = + read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = + read_u8(byte_array, key_address + 15); + posting_count_ngram_3_compressed = 0; + } + _ => { + posting_count_ngram_1_compressed = + read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = + read_u8(byte_array, key_address + 15); + posting_count_ngram_3_compressed = + read_u8(byte_array, key_address + 16); + } + } + + pointer_pivot_p_docid_old = + read_u16(byte_array, key_address + self.key_head_size - 6); + compression_type_pointer = + read_u32(byte_array, key_address + self.key_head_size - 4); + } + + let mut pointer_pivot_p_docid_new = 0; + let mut size_compressed_positions_key_new = 0; + let mut pointer_first_new = 0; + let mut pointer_last_new = 0; + let mut pointer_first_old = 0; + let merge = match self.segments_level0[key0].segment.get_mut(&key_hash) { + Some(plo0) => { + pointer_pivot_p_docid_new = plo0.pointer_pivot_p_docid; + size_compressed_positions_key_new = plo0.size_compressed_positions_key; + plo0.pointer_pivot_p_docid = 0; + plo0.size_compressed_positions_key = 0; + + pointer_first_new = plo0.pointer_first; + pointer_last_new = plo0.pointer_last; + pointer_first_old = self.postings_buffer_pointer; + true + } + None => false, + }; + + self.iterate_docid( + compression_type_pointer, + pointer_pivot_p_docid_old, + posting_count, + block_id, + key0, + key_hash, + ngram_type, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + + if merge { + let plo0 = self.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + plo0.pointer_pivot_p_docid = if pointer_pivot_p_docid_new == 0 { + pointer_pivot_p_docid_old + } else { + pointer_pivot_p_docid_old + pointer_pivot_p_docid_new + }; + + plo0.size_compressed_positions_key = size_compressed_positions_key_new; + + let pointer_last_old = plo0.pointer_last; + plo0.pointer_first = pointer_first_old; + plo0.pointer_last = pointer_last_new; + + write_u32( + pointer_first_new as u32, + &mut self.postings_buffer, + pointer_last_old, + ); + } + } + } else { + let keys: Vec = self.segments_index[key0].segment.keys().cloned().collect(); + + for key_hash in keys { + let plo = &self.segments_index[key0].segment[&key_hash]; + let last_block = plo.blocks.last().unwrap(); + if last_block.block_id as usize != self.level_index.len() - 1 { + continue; + } + + let posting_count = last_block.posting_count; + + let posting_count_ngram_1_compressed = plo.posting_count_ngram_1_compressed; + let posting_count_ngram_2_compressed = plo.posting_count_ngram_2_compressed; + let posting_count_ngram_3_compressed = plo.posting_count_ngram_3_compressed; + + let pointer_pivot_p_docid = last_block.pointer_pivot_p_docid; + let compression_type_pointer = last_block.compression_type_pointer; + + let mut pointer_pivot_p_docid_new = 0; + let mut size_compressed_positions_key_new = 0; + let mut pointer_first_new = 0; + let mut pointer_last_new = 0; + let mut pointer_first_old = 0; + let merge = match self.segments_level0[key0].segment.get_mut(&key_hash) { + Some(plo0) => { + pointer_pivot_p_docid_new = plo0.pointer_pivot_p_docid; + size_compressed_positions_key_new = plo0.size_compressed_positions_key; + plo0.pointer_pivot_p_docid = 0; + plo0.size_compressed_positions_key = 0; + + pointer_first_new = plo0.pointer_first; + pointer_last_new = plo0.pointer_last; + pointer_first_old = self.postings_buffer_pointer; + true + } + None => false, + }; + + let ngram_type = + FromPrimitive::from_u64(key_hash & 0b111).unwrap_or(NgramType::SingleTerm); + + self.iterate_docid( + compression_type_pointer, + pointer_pivot_p_docid, + posting_count, + block_id, + key0, + key_hash, + ngram_type, + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ); + + if merge { + let plo0 = self.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + plo0.pointer_pivot_p_docid = if pointer_pivot_p_docid_new == 0 { + pointer_pivot_p_docid + } else { + pointer_pivot_p_docid + pointer_pivot_p_docid_new + }; + plo0.size_compressed_positions_key = size_compressed_positions_key_new; + + let pointer_last_old = plo0.pointer_last; + plo0.pointer_first = pointer_first_old; + plo0.pointer_last = pointer_last_new; + + write_u32( + pointer_first_new as u32, + &mut self.postings_buffer, + pointer_last_old, + ); + } + } + } + } + } +} diff --git a/mobile_app/rust/src/seekstorm/compatible.rs b/mobile_app/rust/src/seekstorm/compatible.rs new file mode 100644 index 0000000..466235b --- /dev/null +++ b/mobile_app/rust/src/seekstorm/compatible.rs @@ -0,0 +1,21 @@ +#[cfg(target_arch = "x86_64")] +pub use std::arch::x86_64::{_blsr_u64, _lzcnt_u32, _mm_tzcnt_64}; + +#[cfg(not(target_arch = "x86_64"))] +pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { + x.trailing_zeros() as i64 +} + +#[cfg(not(target_arch = "x86_64"))] +pub unsafe fn _blsr_u64(x: u64) -> u64 { + if x == 0 { + x + } else { + x & (!(1 << x.trailing_zeros())) + } +} + +#[cfg(not(target_arch = "x86_64"))] +pub unsafe fn _lzcnt_u32(x: u32) -> u32 { + x.leading_zeros() +} diff --git a/mobile_app/rust/src/seekstorm/compress_postinglist.rs b/mobile_app/rust/src/seekstorm/compress_postinglist.rs new file mode 100644 index 0000000..cce6cc4 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/compress_postinglist.rs @@ -0,0 +1,978 @@ +use std::cmp; + +use smallvec::SmallVec; + +use crate::{ + add_result::{B, K, SIGMA, decode_positions_commit}, + compatible::_lzcnt_u32, + index::{ + AccessType, CompressionType, DOCUMENT_LENGTH_COMPRESSION, NgramType, STOP_BIT, Shard, + SimilarityType, hash32, hash64, int_to_byte4, + }, + search::decode_posting_list_count, + utils::{ + block_copy, read_u16_ref, read_u32_ref, write_u8_ref, write_u16, write_u16_ref, + write_u32_ref, write_u64_ref, + }, +}; + +/// Compress a single postinglist using roaring bitmaps compression for docid https://roaringbitmap.org/about/ +pub(crate) fn compress_postinglist( + shard: &mut Shard, + key_head_pointer_w: &mut usize, + roaring_offset: &mut usize, + key_body_offset: u32, + key0: &usize, + key_hash: &u64, +) -> usize { + let mut posting_count_ngram_1 = 0; + let mut posting_count_ngram_2 = 0; + let mut posting_count_ngram_3 = 0; + let mut posting_count_ngram_1_compressed = 0; + let mut posting_count_ngram_2_compressed = 0; + let mut posting_count_ngram_3_compressed = 0; + { + let plo = shard.segments_level0[*key0].segment.get(key_hash).unwrap(); + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + posting_count_ngram_1_compressed = if plo.term_ngram1.is_empty() { + plo.posting_count_ngram_1_compressed + } else { + let term_bytes_1 = plo.term_ngram1.as_bytes(); + let key0_1 = hash32(term_bytes_1) & shard.segment_number_mask1; + let key_hash_1 = hash64(term_bytes_1); + let mut posting_count_ngram1 = if shard.meta.access_type == AccessType::Mmap { + decode_posting_list_count( + &shard.segments_index[key0_1 as usize], + shard, + key_hash_1, + key0_1 < *key0 as u32, + ) + .unwrap_or_default() + } else if let Some(plo) = shard.segments_index[key0_1 as usize] + .segment + .get(&key_hash_1) + { + plo.posting_count + } else { + 0 + }; + + if let Some(x) = shard.segments_level0[key0_1 as usize] + .segment + .get(&key_hash_1) + { + posting_count_ngram1 += x.posting_count as u32; + } + int_to_byte4(posting_count_ngram1) + }; + + posting_count_ngram_2_compressed = if plo.term_ngram2.is_empty() { + plo.posting_count_ngram_2_compressed + } else { + let term_bytes_2 = plo.term_ngram2.as_bytes(); + let key0_2 = hash32(term_bytes_2) & shard.segment_number_mask1; + let key_hash_2 = hash64(term_bytes_2); + + let mut posting_count_ngram2 = if shard.meta.access_type == AccessType::Mmap { + decode_posting_list_count( + &shard.segments_index[key0_2 as usize], + shard, + key_hash_2, + key0_2 < *key0 as u32, + ) + .unwrap_or_default() + } else if let Some(plo) = shard.segments_index[key0_2 as usize] + .segment + .get(&key_hash_2) + { + plo.posting_count + } else { + 0 + }; + + if let Some(x) = shard.segments_level0[key0_2 as usize] + .segment + .get(&key_hash_2) + { + posting_count_ngram2 += x.posting_count as u32; + } + int_to_byte4(posting_count_ngram2) + }; + + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + } + _ => { + posting_count_ngram_1_compressed = if plo.term_ngram1.is_empty() { + plo.posting_count_ngram_1_compressed + } else { + let term_bytes_1 = plo.term_ngram1.as_bytes(); + let key0_1 = hash32(term_bytes_1) & shard.segment_number_mask1; + let key_hash_1 = hash64(term_bytes_1); + let mut posting_count_ngram1 = if shard.meta.access_type == AccessType::Mmap { + decode_posting_list_count( + &shard.segments_index[key0_1 as usize], + shard, + key_hash_1, + key0_1 < *key0 as u32, + ) + .unwrap_or_default() + } else if let Some(plo) = shard.segments_index[key0_1 as usize] + .segment + .get(&key_hash_1) + { + plo.posting_count + } else { + 0 + }; + + if let Some(x) = shard.segments_level0[key0_1 as usize] + .segment + .get(&key_hash_1) + { + posting_count_ngram1 += x.posting_count as u32; + } + int_to_byte4(posting_count_ngram1) + }; + + posting_count_ngram_2_compressed = if plo.term_ngram2.is_empty() { + plo.posting_count_ngram_2_compressed + } else { + let term_bytes_2 = plo.term_ngram2.as_bytes(); + let key0_2 = hash32(term_bytes_2) & shard.segment_number_mask1; + let key_hash_2 = hash64(term_bytes_2); + + let mut posting_count_ngram2 = if shard.meta.access_type == AccessType::Mmap { + decode_posting_list_count( + &shard.segments_index[key0_2 as usize], + shard, + key_hash_2, + key0_2 < *key0 as u32, + ) + .unwrap_or_default() + } else if let Some(plo) = shard.segments_index[key0_2 as usize] + .segment + .get(&key_hash_2) + { + plo.posting_count + } else { + 0 + }; + + if let Some(x) = shard.segments_level0[key0_2 as usize] + .segment + .get(&key_hash_2) + { + posting_count_ngram2 += x.posting_count as u32; + } + int_to_byte4(posting_count_ngram2) + }; + + posting_count_ngram_3_compressed = if plo.term_ngram3.is_empty() { + plo.posting_count_ngram_3_compressed + } else { + let term_bytes_3 = plo.term_ngram3.as_bytes(); + let key0_3 = hash32(term_bytes_3) & shard.segment_number_mask1; + let key_hash_3 = hash64(term_bytes_3); + + let mut posting_count_ngram3 = if shard.meta.access_type == AccessType::Mmap { + decode_posting_list_count( + &shard.segments_index[key0_3 as usize], + shard, + key_hash_3, + key0_3 < *key0 as u32, + ) + .unwrap_or_default() + } else if let Some(plo) = shard.segments_index[key0_3 as usize] + .segment + .get(&key_hash_3) + { + plo.posting_count + } else { + 0 + }; + + if let Some(x) = shard.segments_level0[key0_3 as usize] + .segment + .get(&key_hash_3) + { + posting_count_ngram3 += x.posting_count as u32; + } + int_to_byte4(posting_count_ngram3) + }; + + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + posting_count_ngram_3 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_3_compressed as usize]; + } + } + } + + let plo = shard.segments_level0[*key0] + .segment + .get_mut(key_hash) + .unwrap(); + let plo_posting_count = plo.posting_count; + + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + plo.posting_count_ngram_1 = posting_count_ngram_1 as f32; + plo.posting_count_ngram_2 = posting_count_ngram_2 as f32; + } + _ => { + plo.posting_count_ngram_1 = posting_count_ngram_1 as f32; + plo.posting_count_ngram_2 = posting_count_ngram_2 as f32; + plo.posting_count_ngram_3 = posting_count_ngram_3 as f32; + } + } + + let mut size_compressed_docid_key: usize = 0; + + let enable_rle_compression: bool = true; + let enable_bitmap_compression: bool = true; + let enable_delta_compression: bool = false; + + shard.docid_count += plo.posting_count; + shard.postinglist_count += 1; + shard.position_count += plo.position_count; + let mut compression_type_pointer = CompressionType::Error as u32; + + let mut runs_count: u16 = 0; + + let delta_size_bits: u32 = 4; + let range_bits: u32 = 32 - unsafe { _lzcnt_u32(plo.docid_delta_max.into()) }; + let result_bits: u32 = delta_size_bits + (range_bits * plo.posting_count as u32); + let delta_compression_size_byte: u32 = result_bits.div_ceil(8); + + if (plo.posting_count < 4096) || !enable_bitmap_compression { + if enable_rle_compression { + let runs_count_threshold: u16 = cmp::min( + (plo.posting_count / 2) as u16, + if enable_delta_compression { + (delta_compression_size_byte >> 2) as u16 + } else { + u16::MAX + }, + ); + compress_postinglist_rle( + shard, + roaring_offset, + &mut size_compressed_docid_key, + *key0, + *key_hash, + runs_count_threshold, + &mut runs_count, + &key_body_offset, + &mut compression_type_pointer, + ); + } + + if runs_count == 0 { + if enable_delta_compression + && ((delta_compression_size_byte as usize) < (plo_posting_count << 1)) + { + } else { + compress_postinglist_array( + shard, + roaring_offset, + &mut size_compressed_docid_key, + *key0, + *key_hash, + &key_body_offset, + &mut compression_type_pointer, + ); + } + } + } else { + if enable_rle_compression { + let runs_count_threshold: u16 = cmp::min( + 2048, + if enable_delta_compression { + (delta_compression_size_byte >> 2) as u16 + } else { + u16::MAX + }, + ); + compress_postinglist_rle( + shard, + roaring_offset, + &mut size_compressed_docid_key, + *key0, + *key_hash, + runs_count_threshold, + &mut runs_count, + &key_body_offset, + &mut compression_type_pointer, + ); + } + + if runs_count == 0 { + if enable_delta_compression && (delta_compression_size_byte < 8192) { + } else { + compress_postinglist_bitmap( + shard, + roaring_offset, + &mut size_compressed_docid_key, + *key0, + *key_hash, + &key_body_offset, + &mut compression_type_pointer, + ); + } + } + } + + let plo = shard.segments_level0[*key0] + .segment + .get_mut(key_hash) + .unwrap(); + + write_u64_ref( + *key_hash, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u16_ref( + (plo.posting_count - 1) as u16, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u16_ref( + plo.max_docid, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u16_ref( + plo.max_p_docid, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + match shard.key_head_size { + 20 => {} + 22 => { + write_u8_ref( + posting_count_ngram_1_compressed, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u8_ref( + posting_count_ngram_2_compressed, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + } + _ => { + write_u8_ref( + posting_count_ngram_1_compressed, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u8_ref( + posting_count_ngram_2_compressed, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u8_ref( + posting_count_ngram_3_compressed, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + } + } + + write_u16_ref( + plo.pointer_pivot_p_docid, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + write_u32_ref( + compression_type_pointer, + &mut shard.compressed_index_segment_block_buffer, + key_head_pointer_w, + ); + + size_compressed_docid_key +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn docid_iterator( + shard: &mut Shard, + posting_pointer_size: u8, + next_pointer: &mut usize, + key_position_pointer_w: &mut usize, + key_rank_position_pointer_w: &mut usize, + key0: usize, + key_hash: u64, + doc_id: &mut u16, + size_compressed_positions_key: &mut usize, + p_docid: usize, +) { + let mut read_pointer = *next_pointer; + *next_pointer = read_u32_ref(&shard.postings_buffer, &mut read_pointer) as usize; + + *doc_id = read_u16_ref(&shard.postings_buffer, &mut read_pointer); + + let position_size_byte_temp: u16 = read_u16_ref(&shard.postings_buffer, &mut read_pointer); + let embed_flag = position_size_byte_temp & 0b10000000_00000000 > 0; + let position_size_byte = (position_size_byte_temp & 0b01111111_11111111) as usize; + + let plo = shard.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + let mut field_vec: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + let mut field_vec_ngram1 = SmallVec::new(); + let mut field_vec_ngram2 = SmallVec::new(); + let mut field_vec_ngram3 = SmallVec::new(); + + decode_positions_commit( + posting_pointer_size, + embed_flag, + &shard.postings_buffer, + read_pointer, + &plo.ngram_type, + shard.indexed_field_vec.len(), + shard.indexed_field_id_bits, + shard.indexed_field_id_mask, + shard.longest_field_id as u16, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + + if posting_pointer_size == 2 { + if embed_flag { + block_copy( + &shard.postings_buffer, + read_pointer, + &mut shard.compressed_index_segment_block_buffer, + *key_rank_position_pointer_w, + position_size_byte, + ); + + *key_rank_position_pointer_w += 2; + } else { + *size_compressed_positions_key += position_size_byte; + *key_position_pointer_w -= position_size_byte; + + shard.compressed_index_segment_block_buffer[*key_rank_position_pointer_w] = + (*size_compressed_positions_key & 255) as u8; + *key_rank_position_pointer_w += 1; + shard.compressed_index_segment_block_buffer[*key_rank_position_pointer_w] = + ((*size_compressed_positions_key >> 8) & 127) as u8; + *key_rank_position_pointer_w += 1; + + block_copy( + &shard.postings_buffer, + read_pointer, + &mut shard.compressed_index_segment_block_buffer, + *key_position_pointer_w, + position_size_byte, + ); + } + } else if posting_pointer_size == 3 { + if embed_flag { + block_copy( + &shard.postings_buffer, + read_pointer, + &mut shard.compressed_index_segment_block_buffer, + *key_rank_position_pointer_w, + position_size_byte, + ); + + *key_rank_position_pointer_w += 3; + } else { + *size_compressed_positions_key += position_size_byte; + *key_position_pointer_w -= position_size_byte; + + shard.compressed_index_segment_block_buffer[*key_rank_position_pointer_w] = + (*size_compressed_positions_key & 255) as u8; + *key_rank_position_pointer_w += 1; + shard.compressed_index_segment_block_buffer[*key_rank_position_pointer_w] = + ((*size_compressed_positions_key >> 8) & 255) as u8; + *key_rank_position_pointer_w += 1; + shard.compressed_index_segment_block_buffer[*key_rank_position_pointer_w] = + ((*size_compressed_positions_key >> 16) & 127) as u8; + *key_rank_position_pointer_w += 1; + + block_copy( + &shard.postings_buffer, + read_pointer, + &mut shard.compressed_index_segment_block_buffer, + *key_position_pointer_w, + position_size_byte, + ); + } + } else { + println!("postingPointerSize exceeded: {}", posting_pointer_size); + } + + if plo.ngram_type == NgramType::SingleTerm + || shard.meta.similarity == SimilarityType::Bm25fProximity + { + let mut posting_score = 0.0; + for field in field_vec.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * ((tf * (K + 1.0) / (tf + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + if posting_score > plo.max_block_score { + plo.max_block_score = posting_score; + plo.max_docid = *doc_id; + plo.max_p_docid = p_docid as u16; + } + } else if plo.ngram_type == NgramType::NgramFF + || plo.ngram_type == NgramType::NgramRF + || plo.ngram_type == NgramType::NgramFR + { + let idf_ngram1 = (((shard.indexed_doc_count as f32 - plo.posting_count_ngram_1 + 0.5) + / (plo.posting_count_ngram_1 + 0.5)) + + 1.0) + .ln(); + let idf_ngram2 = (((shard.indexed_doc_count as f32 - plo.posting_count_ngram_2 + 0.5) + / (plo.posting_count_ngram_2 + 0.5)) + + 1.0) + .ln(); + + let mut posting_score = 0.0; + for field in field_vec_ngram1.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + if posting_score > plo.max_block_score { + plo.max_block_score = posting_score; + plo.max_docid = *doc_id; + plo.max_p_docid = p_docid as u16; + } + } else { + let idf_ngram1 = (((shard.indexed_doc_count as f32 - plo.posting_count_ngram_1 + 0.5) + / (plo.posting_count_ngram_1 + 0.5)) + + 1.0) + .ln(); + let idf_ngram2 = (((shard.indexed_doc_count as f32 - plo.posting_count_ngram_2 + 0.5) + / (plo.posting_count_ngram_2 + 0.5)) + + 1.0) + .ln(); + let idf_ngram3 = (((shard.indexed_doc_count as f32 - plo.posting_count_ngram_3 + 0.5) + / (plo.posting_count_ngram_3 + 0.5)) + + 1.0) + .ln(); + + let mut posting_score = 0.0; + for field in field_vec_ngram1.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + for field in field_vec_ngram3.iter() { + let document_length_compressed = + shard.document_length_compressed_array[field.0 as usize][*doc_id as usize]; + let document_length_normalized_doc = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize] as f32; + let document_length_quotient_doc = + document_length_normalized_doc / shard.document_length_normalized_average; + + let tf_ngram3 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + posting_score += weight + * idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient_doc))))) + + SIGMA); + } + + if posting_score > plo.max_block_score { + plo.max_block_score = posting_score; + plo.max_docid = *doc_id; + plo.max_p_docid = p_docid as u16; + } + } +} + +/// Compress postinglist to array +pub(crate) fn compress_postinglist_array( + shard: &mut Shard, + roaring_offset: &mut usize, + size_compressed_docid_key: &mut usize, + key0: usize, + key_hash: u64, + key_body_offset: &u32, + compression_type_pointer: &mut u32, +) { + let plo = shard.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + let key_rank_position_pointer_range = *roaring_offset + plo.size_compressed_positions_key; + let mut key_position_pointer_w = key_rank_position_pointer_range; + let mut key_rank_position_pointer_w = key_rank_position_pointer_range; + let posting_pointer_size_sum = plo.pointer_pivot_p_docid as usize * 2 + + if (plo.pointer_pivot_p_docid as usize) < plo.posting_count { + (plo.posting_count - plo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + let key_docid_pointer_w = + *roaring_offset + plo.size_compressed_positions_key + posting_pointer_size_sum; + let mut size_compressed_positions_key = 0; + + let count_byte = plo.posting_count * 2; + + plo.compression_type_pointer = key_body_offset | ((CompressionType::Array as u32) << 30); + *compression_type_pointer = plo.compression_type_pointer; + + let pointer_pivot_p_docid = plo.pointer_pivot_p_docid; + let mut next_pointer = plo.pointer_first; + for p_docid in 0..plo.posting_count { + let plo_posting_pointer_size = if p_docid < pointer_pivot_p_docid as usize { + 2 + } else { + 3 + }; + let mut doc_id = 0; + docid_iterator( + shard, + plo_posting_pointer_size, + &mut next_pointer, + &mut key_position_pointer_w, + &mut key_rank_position_pointer_w, + key0, + key_hash, + &mut doc_id, + &mut size_compressed_positions_key, + p_docid, + ); + + write_u16( + doc_id, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w + (p_docid * 2), + ); + } + + *size_compressed_docid_key = count_byte; + *roaring_offset = key_docid_pointer_w + count_byte; +} + +/// Compress postinglist to bitmap +pub(crate) fn compress_postinglist_bitmap( + shard: &mut Shard, + roaring_offset: &mut usize, + size_compressed_docid_key: &mut usize, + key0: usize, + key_hash: u64, + key_body_offset: &u32, + compression_type_pointer: &mut u32, +) { + let plo = shard.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + let key_rank_position_pointer_range = *roaring_offset + plo.size_compressed_positions_key; + let mut key_position_pointer_w = key_rank_position_pointer_range; + let mut key_rank_position_pointer_w = key_rank_position_pointer_range; + let posting_pointer_size_sum = plo.pointer_pivot_p_docid as usize * 2 + + if (plo.pointer_pivot_p_docid as usize) < plo.posting_count { + (plo.posting_count - plo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + let key_docid_pointer_w = + *roaring_offset + plo.size_compressed_positions_key + posting_pointer_size_sum; + let mut size_compressed_positions_key = 0; + + let count_byte = 8192; + + plo.compression_type_pointer = key_body_offset | ((CompressionType::Bitmap as u32) << 30); + *compression_type_pointer = plo.compression_type_pointer; + + shard.compressed_index_segment_block_buffer + [key_docid_pointer_w..key_docid_pointer_w + count_byte] + .fill(0); + + let pointer_pivot_p_docid = plo.pointer_pivot_p_docid; + let mut next_pointer = plo.pointer_first; + for p_docid in 0..plo.posting_count { + let plo_posting_pointer_size = if p_docid < pointer_pivot_p_docid as usize { + 2 + } else { + 3 + }; + + let mut doc_id = 0; + docid_iterator( + shard, + plo_posting_pointer_size, + &mut next_pointer, + &mut key_position_pointer_w, + &mut key_rank_position_pointer_w, + key0, + key_hash, + &mut doc_id, + &mut size_compressed_positions_key, + p_docid, + ); + + let docid_pos = doc_id; + let byte_pos = docid_pos >> 3; + let bit_pos = docid_pos & 7; + + shard.compressed_index_segment_block_buffer[key_docid_pointer_w + byte_pos as usize] |= + 1u8 << bit_pos; + } + + *size_compressed_docid_key = count_byte; + *roaring_offset = key_docid_pointer_w + count_byte; +} + +/// Compress postinglist to RLE +#[allow(clippy::too_many_arguments)] +pub(crate) fn compress_postinglist_rle( + shard: &mut Shard, + roaring_offset: &mut usize, + size_compressed_docid_key: &mut usize, + key0: usize, + key_hash: u64, + runs_count_threshold: u16, + runs_count: &mut u16, + key_body_offset: &u32, + compression_type_pointer: &mut u32, +) { + let plo = shard.segments_level0[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + *runs_count = 0; + + let mut run_start = 0; + let mut run_length = 0; + + let key_rank_position_pointer_range = *roaring_offset + plo.size_compressed_positions_key; + let mut key_position_pointer_w = key_rank_position_pointer_range; + let mut key_rank_position_pointer_w = key_rank_position_pointer_range; + let posting_pointer_size_sum = plo.pointer_pivot_p_docid as usize * 2 + + if (plo.pointer_pivot_p_docid as usize) < plo.posting_count { + (plo.posting_count - plo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + let mut key_docid_pointer_w = + *roaring_offset + plo.size_compressed_positions_key + posting_pointer_size_sum; + let key_docid_pointer_w_old = key_docid_pointer_w; + let mut size_compressed_positions_key = 0; + + plo.compression_type_pointer = key_body_offset | ((CompressionType::Rle as u32) << 30); + *compression_type_pointer = plo.compression_type_pointer; + + let mut doc_id_old = 0; + let pointer_pivot_p_docid = plo.pointer_pivot_p_docid; + let mut next_pointer = plo.pointer_first; + for p_docid in 0..plo.posting_count { + let plo_posting_pointer_size = if p_docid < pointer_pivot_p_docid as usize { + 2 + } else { + 3 + }; + + let mut doc_id = 0; + docid_iterator( + shard, + plo_posting_pointer_size, + &mut next_pointer, + &mut key_position_pointer_w, + &mut key_rank_position_pointer_w, + key0, + key_hash, + &mut doc_id, + &mut size_compressed_positions_key, + p_docid, + ); + + if p_docid == 0 { + run_start = doc_id; + } else if doc_id_old + 1 == doc_id { + run_length += 1; + } else { + write_u16( + run_start, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w_old + (((*runs_count << 1) as usize + 1) * 2), + ); + write_u16( + run_length, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w_old + (((*runs_count << 1) as usize + 2) * 2), + ); + key_docid_pointer_w += 4; + + run_start = doc_id; + run_length = 0; + *runs_count += 1; + } + + if *runs_count >= runs_count_threshold { + *runs_count = 0; + return; + } + doc_id_old = doc_id; + } + + write_u16( + run_start, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w_old + (((*runs_count << 1) as usize + 1) * 2), + ); + write_u16( + run_length, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w_old + (((*runs_count << 1) as usize + 2) * 2), + ); + + *runs_count += 1; + key_docid_pointer_w += 4; + + write_u16( + *runs_count, + &mut shard.compressed_index_segment_block_buffer, + key_docid_pointer_w_old, + ); + key_docid_pointer_w += 2; + + *size_compressed_docid_key = key_docid_pointer_w - key_docid_pointer_w_old; + *roaring_offset = key_docid_pointer_w; +} + +/// Compress positions: input delta compressed vector, output VINT compressioned byte array +pub(crate) fn compress_positions( + positions: &[u16], + positions_compressed: &mut [u8], + size_compressed_positions_pointer: &mut usize, +) { + for delta in positions { + if *delta < 128 { + positions_compressed[*size_compressed_positions_pointer] = *delta as u8 | STOP_BIT; + *size_compressed_positions_pointer += 1 + } else if *delta < 16_384 { + positions_compressed[*size_compressed_positions_pointer] = + (delta >> 7) as u8 & 0b01111111; + *size_compressed_positions_pointer += 1; + positions_compressed[*size_compressed_positions_pointer] = + (delta & 0b01111111) as u8 | STOP_BIT; + *size_compressed_positions_pointer += 1; + } else { + positions_compressed[*size_compressed_positions_pointer] = + (delta >> 13) as u8 & 0b01111111; + *size_compressed_positions_pointer += 1; + positions_compressed[*size_compressed_positions_pointer] = + (delta >> 7) as u8 & 0b01111111; + *size_compressed_positions_pointer += 1; + positions_compressed[*size_compressed_positions_pointer] = + (delta & 0b01111111) as u8 | STOP_BIT; + *size_compressed_positions_pointer += 1; + } + } +} diff --git a/mobile_app/rust/src/seekstorm/doc_store.rs b/mobile_app/rust/src/seekstorm/doc_store.rs new file mode 100644 index 0000000..5519d66 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/doc_store.rs @@ -0,0 +1,353 @@ +use memmap2::Mmap; +use serde_json::{Value, json}; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::fs; +use std::io::{self, Seek, SeekFrom, Write}; +use std::path::PathBuf; + +use crate::geo_search::euclidian_distance; +use crate::highlighter::{Highlighter, top_fragments_from_field}; +use crate::index::{ + AccessType, DistanceField, Document, FILE_PATH, FieldType, Index, ROARING_BLOCK_SIZE, Shard, +}; +use crate::search::FacetValue; +use crate::utils::{read_u32, write_u32}; + +impl Shard { + pub(crate) fn get_file_shard(&self, doc_id: usize) -> Result, String> { + let file_path = PathBuf::new(&self.index_path_string) + .join(FILE_PATH) + .join(doc_id.to_string() + ".pdf"); + + if let Ok(data) = fs::read(file_path) { + Ok(data) + } else { + Err("not found".into()) + } + } + + pub(crate) fn get_document_shard( + &self, + doc_id: usize, + include_uncommited: bool, + highlighter_option: &Option, + fields: &HashSet, + distance_fields: &[DistanceField], + ) -> Result { + if !self.delete_hashset.is_empty() && self.delete_hashset.contains(&doc_id) { + return Err("not found".to_owned()); + } + + if doc_id >= self.indexed_doc_count { + return Err("not found".to_owned()); + } + let block_id = doc_id >> 16; + + let is_uncommitted = doc_id >= self.committed_doc_count; + if is_uncommitted && !(include_uncommited && self.uncommitted) { + return Err("not found".to_owned()); + } + + if self.stored_field_names.is_empty() { + return Err("not found".to_owned()); + } + + let doc_id_local = doc_id & 0b11111111_11111111; + + let mut doc = if self.meta.access_type == AccessType::Ram || is_uncommitted { + let docstore_pointer_docs = if is_uncommitted { + &self.compressed_docstore_segment_block_buffer + } else { + &self.level_index[block_id].docstore_pointer_docs + }; + + let position = doc_id_local * 4; + let pointer = read_u32(docstore_pointer_docs, position) as usize; + + let previous_pointer = if doc_id == self.committed_doc_count || doc_id_local == 0 { + ROARING_BLOCK_SIZE * 4 + } else { + read_u32(docstore_pointer_docs, position - 4) as usize + }; + + if previous_pointer == pointer { + return Err("not found".to_owned()); + } + + let compressed_doc = &docstore_pointer_docs[previous_pointer..pointer]; + let decompressed_doc = zstd::decode_all(compressed_doc).unwrap(); + let doc: Document = serde_json::from_slice(&decompressed_doc).unwrap(); + + doc + } else { + let level = doc_id >> 16; + + let pointer; + let previous_pointer; + let position = + self.level_index[level].docstore_pointer_docs_pointer + (doc_id_local * 4); + + if doc_id_local == 0 { + previous_pointer = ROARING_BLOCK_SIZE * 4; + pointer = read_u32(&self.docstore_file_mmap, position) as usize; + } else { + previous_pointer = read_u32(&self.docstore_file_mmap, position - 4) as usize; + pointer = read_u32(&self.docstore_file_mmap, position) as usize; + }; + + if previous_pointer == pointer { + return Err(format!("not found {} {}", previous_pointer, pointer)); + } + + let compressed_doc = &self.docstore_file_mmap[(self.level_index[level] + .docstore_pointer_docs_pointer + + previous_pointer) + ..(self.level_index[level].docstore_pointer_docs_pointer + pointer)]; + + let decompressed_doc = zstd::decode_all(compressed_doc).unwrap(); + let doc: Document = serde_json::from_slice(&decompressed_doc).unwrap(); + + doc + }; + + if let Some(highlighter) = highlighter_option { + let mut kwic_vec: VecDeque = VecDeque::new(); + for highlight in highlighter.highlights.iter() { + let kwic = + top_fragments_from_field(self, &doc, &highlighter.query_terms_ac, highlight) + .unwrap(); + kwic_vec.push_back(kwic); + } + + for highlight in highlighter.highlights.iter() { + let kwic = kwic_vec.pop_front().unwrap(); + doc.insert( + (if highlight.name.is_empty() { + &highlight.field + } else { + &highlight.name + }) + .to_string(), + json!(kwic), + ); + } + } + + for distance_field in distance_fields.iter() { + if let Some(idx) = self.facets_map.get(&distance_field.field) + && self.facets[*idx].field_type == FieldType::Point + && let FacetValue::Point(point) = + self.get_facet_value_shard(&distance_field.field, doc_id) + { + let distance = + euclidian_distance(&point, &distance_field.base, &distance_field.unit); + + doc.insert(distance_field.distance.clone(), json!(distance)); + } + } + + if !fields.is_empty() { + for key in self.stored_field_names.iter() { + if !fields.contains(key) { + doc.remove(key); + } + } + } + + Ok(doc) + } + + pub(crate) fn copy_file(&self, source_path: &PathBuf, doc_id: usize) -> io::Result { + let dir_path = PathBuf::new(&self.index_path_string).join(FILE_PATH); + if !dir_path.exists() { + fs::create_dir_all(&dir_path).unwrap(); + } + + let file_path = dir_path.join(doc_id.to_string() + ".pdf"); + fs::copy(source_path, file_path) + } + + pub(crate) fn write_file(&self, file_bytes: &[u8], doc_id: usize) -> io::Result { + let dir_path = PathBuf::new(&self.index_path_string).join(FILE_PATH); + if !dir_path.exists() { + fs::create_dir_all(&dir_path).unwrap(); + } + + let file_path = dir_path.join(doc_id.to_string() + ".pdf"); + + let mut file = fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(file_path)?; + + let _ = file.write_all(file_bytes); + Ok(file_bytes.len() as u64) + } + + pub(crate) fn store_document(&mut self, doc_id: usize, document: HashMap) { + let mut document = document; + + let keys: Vec = document.keys().cloned().collect(); + for key in keys.into_iter() { + if !self.schema_map.contains_key(&key) || !self.schema_map.get(&key).unwrap().stored { + document.remove(&key); + } + } + + if document.is_empty() { + return; + } + + let document_string = serde_json::to_string(&document).unwrap(); + + let mut compressed = zstd::encode_all(document_string.as_bytes(), 1).unwrap(); + + self.compressed_docstore_segment_block_buffer + .append(&mut compressed); + + write_u32( + self.compressed_docstore_segment_block_buffer.len() as u32, + &mut self.compressed_docstore_segment_block_buffer, + (doc_id & 0b11111111_11111111) * 4, + ); + } + + pub(crate) fn commit_docstore( + &mut self, + indexed_doc_count: usize, + is_last_level_incomplete: bool, + ) { + let size_uncommitted = self.compressed_docstore_segment_block_buffer.len(); + let level = self.level_index.len() - 1; + + if is_last_level_incomplete { + let docstore_file_end = self.docstore_file.metadata().unwrap().len(); + + let size_committed = + docstore_file_end as usize - self.last_level_docstore_file_start_pos as usize - 4; + let size_committed_docs = size_committed - (4 * ROARING_BLOCK_SIZE); + let size_sum = size_uncommitted + size_committed_docs; + + let _ = self + .docstore_file + .seek(SeekFrom::Start(self.last_level_docstore_file_start_pos)); + + let _ = self.docstore_file.write(&(size_sum as u32).to_le_bytes()); + + let committed_doc_count = (self.committed_doc_count - 1 % ROARING_BLOCK_SIZE) + 1; + let indexed_doc_count = (indexed_doc_count - 1 % ROARING_BLOCK_SIZE) + 1; + + for i in committed_doc_count..indexed_doc_count { + let pointer = read_u32(&self.compressed_docstore_segment_block_buffer, i * 4); + + write_u32( + pointer + size_committed_docs as u32, + &mut self.compressed_docstore_segment_block_buffer, + i * 4, + ); + } + + let _ = self.docstore_file.seek(SeekFrom::Start( + self.last_level_docstore_file_start_pos + 4 + committed_doc_count as u64 * 4, + )); + + let _ = self.docstore_file.write( + &self.compressed_docstore_segment_block_buffer + [committed_doc_count * 4..ROARING_BLOCK_SIZE * 4], + ); + + let _ = self.docstore_file.seek(SeekFrom::Start(docstore_file_end)); + + let _ = self + .docstore_file + .write(&self.compressed_docstore_segment_block_buffer[4 * ROARING_BLOCK_SIZE..]); + + if self.meta.access_type == AccessType::Ram { + self.level_index[level] + .docstore_pointer_docs + .extend_from_slice( + &self.compressed_docstore_segment_block_buffer[4 * ROARING_BLOCK_SIZE..], + ); + self.level_index[level].docstore_pointer_docs + [committed_doc_count * 4..ROARING_BLOCK_SIZE * 4] + .copy_from_slice( + &self.compressed_docstore_segment_block_buffer + [committed_doc_count * 4..ROARING_BLOCK_SIZE * 4], + ); + } + } else { + let _ = self + .docstore_file + .write(&(size_uncommitted as u32).to_le_bytes()); + + self.level_index[level].docstore_pointer_docs_pointer = + self.docstore_file.stream_position().unwrap() as usize; + + let _ = self + .docstore_file + .write(&self.compressed_docstore_segment_block_buffer); + + if self.meta.access_type == AccessType::Ram { + self.level_index[level].docstore_pointer_docs.append( + &mut self + .compressed_docstore_segment_block_buffer + .drain(..) + .collect(), + ); + } + } + + let _ = self.docstore_file.flush(); + + self.compressed_docstore_segment_block_buffer = vec![0; ROARING_BLOCK_SIZE * 4]; + + if self.meta.access_type == AccessType::Mmap { + self.docstore_file_mmap = + unsafe { Mmap::map(&self.docstore_file).expect("Unable to create Mmap") }; + } + } +} + +impl Index { + /// Get file for document id + /// Arguments: + /// * `doc_id`: Specifies which document to load from the document store of the index. + /// + /// Returns: + /// * `Vec`: The file content as a byte vector. + /// + pub async fn get_file(&self, doc_id: usize) -> Result, String> { + let shard_id = doc_id & ((1 << self.shard_bits) - 1); + let doc_id = doc_id >> self.shard_bits; + self.shard_vec[shard_id].read().await.get_file_shard(doc_id) + } + + /// Get document for document id + /// Arguments: + /// * `doc_id`: Specifies which document to load from the document store of the index. + /// * `include_uncommited`: Return also documents which have not yet been committed. + /// * `highlighter_option`: Specifies the extraction of keyword-in-context (KWIC) fragments from fields in documents, and the highlighting of the query terms within. + /// * `fields`: Specifies which of the stored fields to return with each document. Default: If empty return all stored fields + /// * `distance_fields`: insert distance fields into result documents, calculating the distance between a specified facet field of type Point and a base Point, in kilometers or miles. + /// using Euclidian distance (Pythagoras theorem) with Equirectangular approximation. + pub async fn get_document( + &self, + doc_id: usize, + include_uncommited: bool, + highlighter_option: &Option, + fields: &HashSet, + distance_fields: &[DistanceField], + ) -> Result { + let shard_id = doc_id & ((1 << self.shard_bits) - 1); + let doc_id_shard = doc_id >> self.shard_bits; + + self.shard_vec[shard_id].read().await.get_document_shard( + doc_id_shard, + include_uncommited, + highlighter_option, + fields, + distance_fields, + ) + } +} diff --git a/mobile_app/rust/src/seekstorm/geo_search.rs b/mobile_app/rust/src/seekstorm/geo_search.rs new file mode 100644 index 0000000..4377ada --- /dev/null +++ b/mobile_app/rust/src/seekstorm/geo_search.rs @@ -0,0 +1,144 @@ +use std::cmp::Ordering; + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::{_pdep_u64, _pext_u64}; + +use crate::{ + index::DistanceUnit, + search::{Point, SortOrder}, +}; + +#[inline] +fn encode_morton_64_bit(x: u32) -> u64 { + let mut x = x as u64; + x = (x | (x << 32)) & 0x00000000ffffffff; + x = (x | (x << 16)) & 0x0000FFFF0000FFFF; + x = (x | (x << 8)) & 0x00FF00FF00FF00FF; + x = (x | (x << 4)) & 0x0F0F0F0F0F0F0F0F; + x = (x | (x << 2)) & 0x3333333333333333; + x = (x | (x << 1)) & 0x5555555555555555; + x +} + +/// encode 2D-coordinate (lat/lon) into 64-bit Morton code +/// This method is lossy/quantized as two f64 coordinate values are mapped to a single u64 Morton code! +/// The z-value of a point in multidimensions is simply calculated by interleaving the binary representations of its coordinate values. +#[inline] +pub fn encode_morton_2_d(point: &Point) -> u64 { + let x_u32 = ((point[0] * 10_000_000.0) as i32) as u32; + let y_u32 = ((point[1] * 10_000_000.0) as i32) as u32; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("bmi2") { + return unsafe { + _pdep_u64(x_u32.into(), 0x5555555555555555) + | _pdep_u64(y_u32.into(), 0xAAAAAAAAAAAAAAAA) + }; + } + } + + (encode_morton_64_bit(y_u32) << 1) | encode_morton_64_bit(x_u32) +} + +#[inline] +fn decode_morton_64_bit(code: u64) -> u64 { + let mut x = code & 0x5555555555555555; + x = (x ^ (x >> 1)) & 0x3333333333333333; + x = (x ^ (x >> 2)) & 0x0F0F0F0F0F0F0F0F; + x = (x ^ (x >> 4)) & 0x00FF00FF00FF00FF; + x = (x ^ (x >> 8)) & 0x0000FFFF0000FFFF; + x = (x ^ (x >> 16)) & 0x00000000FFFFFFFF; + x +} + +/// decode 64-bit Morton code into 2D-coordinate (lat/lon) +/// This method is lossy/quantized as a single u64 Morton code is converted to two f64 coordinate values! +#[inline] +pub fn decode_morton_2_d(code: u64) -> Point { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("bmi2") { + let x_u32 = unsafe { _pext_u64(code, 0x5555555555555555) as u32 }; + let y_u32 = unsafe { _pext_u64(code, 0xAAAAAAAAAAAAAAAA) as u32 }; + + return vec![ + (x_u32 as i32) as f64 / 10_000_000.0, + (y_u32 as i32) as f64 / 10_000_000.0, + ]; + }; + } + + let x_u32 = decode_morton_64_bit(code) as u32; + let y_u32 = decode_morton_64_bit(code >> 1) as u32; + + vec![ + (x_u32 as i32) as f64 / 10_000_000.0, + (y_u32 as i32) as f64 / 10_000_000.0, + ] +} + +#[inline] +fn simplified_distance(point1: &Point, point2: &Point) -> f64 { + let x = (point2[1] - point1[1]) * f64::cos(DEG2RAD * (point1[0] + point2[0]) / 2.0); + let y = point2[0] - point1[0]; + + x * x + y * y +} + +/// Comparison of the distances between two morton encoded positions and a base position +pub fn morton_ordering( + morton1: u64, + morton2: u64, + base_point: &Point, + order: &SortOrder, +) -> Ordering { + let point1 = decode_morton_2_d(morton1); + let point2 = decode_morton_2_d(morton2); + + let distance1 = simplified_distance(&point1, base_point); + let distance2 = simplified_distance(&point2, base_point); + + if order == &SortOrder::Descending { + distance1.partial_cmp(&distance2).unwrap_or(Ordering::Equal) + } else { + distance2.partial_cmp(&distance1).unwrap_or(Ordering::Equal) + } +} + +const EARTH_RADIUS_KM: f64 = 6371.0087714; +const EARTH_RADIUS_MI: f64 = 3_958.761_315_801_475; +const DEG2RAD: f64 = 0.017_453_292_519_943_295; + +/// calculates distance in kilometers or miles between two 2D-coordinates using Euclidian distance (Pythagoras theorem) with Equirectangular approximation. +#[inline] +pub fn euclidian_distance(point1: &Point, point2: &Point, unit: &DistanceUnit) -> f64 { + let x = DEG2RAD * (point2[1] - point1[1]) * f64::cos(DEG2RAD * (point1[0] + point2[0]) / 2.0); + let y = DEG2RAD * (point2[0] - point1[0]); + + (if *unit == DistanceUnit::Kilometers { + EARTH_RADIUS_KM + } else { + EARTH_RADIUS_MI + }) * (x * x + y * y).sqrt() +} + +/// Converts a Point and a distance radius into a range of morton_codes for geo search range filtering. +/// The conversion is lossy due to coordinate to Morton code rounding errors and Equirectangular approximation of Euclidian distance. +pub fn point_distance_to_morton_range( + point: &Point, + distance: f64, + unit: &DistanceUnit, +) -> std::ops::Range { + let earth_radius = if *unit == DistanceUnit::Kilometers { + EARTH_RADIUS_KM + } else { + EARTH_RADIUS_MI + }; + let lat_delta = distance / (DEG2RAD * earth_radius); + let lon_delta = distance / (DEG2RAD * earth_radius * f64::cos(DEG2RAD * point[0])); + let morton_min = encode_morton_2_d(&vec![point[0] - lat_delta, point[1] - lon_delta]); + let morton_max = encode_morton_2_d(&vec![point[0] + lat_delta, point[1] + lon_delta]); + + morton_min..morton_max +} diff --git a/mobile_app/rust/src/seekstorm/highlighter.rs b/mobile_app/rust/src/seekstorm/highlighter.rs new file mode 100644 index 0000000..1138340 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/highlighter.rs @@ -0,0 +1,367 @@ +use crate::index::{Document, FieldType, IndexArc, Shard, hash64}; +use crate::min_heap::{self, MinHeap}; +use aho_corasick::{AhoCorasick, MatchKind}; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +/// Specifies the number and size of fragments (snippets, summaries) to generate from each specified field to provide a "keyword in context" (KWIC) functionality. +/// With highlight_markup the matching query terms within the fragments can be highlighted with HTML markup. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct Highlight { + /// Specifies the field from which the fragments (snippets, summaries) are created. + pub field: String, + /// Allows to specifiy multiple highlight result fields from the same source field, leaving the original field intact, + /// Default: if name is empty then field is used instead, i.e the original field is overwritten with the highlight. + #[serde(default)] + #[serde(skip_serializing_if = "String::is_empty")] + pub name: String, + /// If 0/default then return the full original text without fragmenting. + #[serde(default)] + pub fragment_number: usize, + /// Specifies the length of a highlight fragment. + /// The default 0 returns the full original text without truncating, but still with highlighting if highlight_markup is enabled. + #[serde(default)] + pub fragment_size: usize, + /// if true, the matching query terms within the fragments are highlighted with HTML markup **\term\<\/b\>**. + #[serde(default)] + pub highlight_markup: bool, + /// Specifies the markup tags to insert **before** each highlighted term (e.g. \"\\" or \"\\"). This can be any string, but is most often an HTML or XML tag. + /// Only used when **highlight_markup** is set to true. + #[serde(default = "default_pre_tag")] + pub pre_tags: String, + /// Specifies the markup tags to insert **after** each highlighted term. (e.g. \"\<\/b\>\" or \"\<\/em\>\"). This can be any string, but is most often an HTML or XML tag. + /// Only used when **highlight_markup** is set to true. + #[serde(default = "default_post_tag")] + pub post_tags: String, +} + +impl Default for Highlight { + fn default() -> Self { + Highlight { + field: String::new(), + name: String::new(), + fragment_number: 1, + fragment_size: usize::MAX, + highlight_markup: true, + pre_tags: default_pre_tag(), + post_tags: default_post_tag(), + } + } +} + +fn default_pre_tag() -> String { + "".into() +} + +fn default_post_tag() -> String { + "".into() +} + +/// Highlighter object used as get_document parameter for extracting keyword-in-context (KWIC) fragments from fields in documents, and highlighting the query terms within. +#[derive(Debug)] +pub struct Highlighter { + pub(crate) highlights: Vec, + pub(crate) query_terms_ac: AhoCorasick, +} + +/// Returns the Highlighter object used as get_document parameter for highlighting fields in documents +pub async fn highlighter( + index_arc: &IndexArc, + highlights: Vec, + query_terms_vec: Vec, +) -> Highlighter { + let index_ref = index_arc.read().await; + let query_terms = if !index_ref.synonyms_map.is_empty() { + let mut query_terms_vec_mut = query_terms_vec.clone(); + for query_term in query_terms_vec.iter() { + let term_hash = hash64(query_term.to_lowercase().as_bytes()); + + if let Some(synonyms) = index_ref.synonyms_map.get(&term_hash) { + for synonym in synonyms.iter() { + query_terms_vec_mut.push(synonym.0.clone()); + } + } + } + query_terms_vec_mut + } else { + query_terms_vec + }; + + let query_terms_ac = AhoCorasick::builder() + .ascii_case_insensitive(true) + .match_kind(MatchKind::LeftmostLongest) + .build(query_terms) + .unwrap(); + + Highlighter { + highlights, + query_terms_ac, + } +} + +pub(crate) fn add_fragment<'a>( + no_score_no_highlight: bool, + mut fragment: Fragment<'a>, + query_terms_ac: &AhoCorasick, + fragments: &mut Vec>, + topk_candidates: &mut MinHeap, + fragment_number: usize, + fragment_size: usize, +) { + let mut score = 0.0; + let mut expected_pattern = usize::MAX; + let mut expected_index = usize::MAX; + + let mut first_end = 0; + let mut set = vec![0; query_terms_ac.patterns_len()]; + let mut sequence_length = 1; + + if no_score_no_highlight { + score = 1.0; + } else { + for mat in query_terms_ac.find_iter(fragment.text) { + if first_end == 0 { + first_end = mat.end(); + } + + let id = mat.pattern().as_usize(); + score += if id == expected_pattern && expected_index == mat.start() { + sequence_length += 1; + set[id] = 1; + sequence_length as f32 * 5.0 + } else if set[id] == 0 { + sequence_length = 1; + set[id] = 1; + 1.0 + } else { + sequence_length = 1; + 0.3 + }; + + expected_pattern = id + 1; + expected_index = mat.end() + 1; + } + } + + if first_end > fragment_size { + let mut idx = fragment.text.len() - fragment_size; + + while !fragment.text.is_char_boundary(idx) { + idx -= 1; + } + + match fragment.text[idx..].find(' ') { + None => idx = 0, + Some(value) => idx += value, + } + + let adjusted_fragment = &fragment.text[idx..]; + fragment.text = adjusted_fragment; + fragment.trim_left = true; + } else if fragment.text.len() > fragment_size { + let mut idx = fragment_size; + + while !fragment.text.is_char_boundary(idx) { + idx -= 1; + } + + match fragment.text[idx..].find(' ') { + None => idx = fragment.text.len(), + Some(value) => idx += value, + } + + let adjusted_fragment = &fragment.text[..idx]; + fragment.text = adjusted_fragment; + fragment.trim_right = true; + } + + let section_index = fragments.len(); + + let mut added = false; + if score > 0.0 { + added = topk_candidates.add_topk( + min_heap::Result { + doc_id: section_index, + score, + }, + fragment_number, + ); + } + if fragments.is_empty() || added { + fragments.push(fragment); + } +} + +const SENTENCE_BOUNDARY_CHARS: [char; 11] = + ['!', '?', '.', '¿', '¡', '。', '、', '!', '?', '︒', '。']; + +pub(crate) struct Fragment<'a> { + text: &'a str, + trim_left: bool, + trim_right: bool, +} +/// Extracts the most relevant fragments (snippets, summaries) from specified fields of the document to provide a "keyword in context" (KWIC) functionality. +/// I.e. the sentences containing the matches of the query terms within the field is displayed and the query term matches are optionally highlighted (e.g. bold) by injecting HTML tags in to the text. +/// Instead of showing the complete text only the relevant fragments containing keyword matches are extracted. The user is provided with concise visual feedback for relevancy of the document regarding to the query. +/// The fragment ranking score takes into account the number of matching terms, their order and proximity (phrase). +/// The score is used for the selection of top-k most relevant fragments, but the order of selected fragments is preserved how they originally appear in the field. +/// The field is fragmented into sentences, using punctuation marks '.?!' as sentence boundaries. +/// If the fragment length exceeds the specified fragment_size, then the fragment is truncated at the right or left side, so that the query term higlight positions are kept within the remaining fragment window. +/// Selecting the right fragment and the right fragment window is fundamental for the users perceived relevancy of the search results. +pub(crate) fn top_fragments_from_field( + shard: &Shard, + document: &Document, + query_terms_ac: &AhoCorasick, + highlight: &Highlight, +) -> Result { + match document.get(&highlight.field) { + None => Ok("".to_string()), + Some(value) => { + let no_score_no_highlight = + query_terms_ac.patterns_len() == 1 && query_terms_ac.max_pattern_len() == 1; + let no_fragmentation = highlight.fragment_number == 0; + let fragment_number = if no_fragmentation { + 1 + } else { + highlight.fragment_number + }; + let result_sort = Vec::new(); + let mut topk_candidates = MinHeap::new(fragment_number, shard, &result_sort); + + if let Some(schema_field) = shard.schema_map.get(&highlight.field) { + let text = match schema_field.field_type { + FieldType::Text | FieldType::String16 | FieldType::String32 => { + serde_json::from_value::(value.clone()).unwrap_or(value.to_string()) + } + _ => value.to_string(), + }; + + let mut fragments: Vec = Vec::new(); + + let mut last = 0; + if !no_fragmentation { + for (character_index, matched) in + text.match_indices(&SENTENCE_BOUNDARY_CHARS[..]) + { + if last != character_index { + let section = Fragment { + text: &text[last..character_index + matched.len()], + trim_left: false, + trim_right: false, + }; + + add_fragment( + no_score_no_highlight, + section, + query_terms_ac, + &mut fragments, + &mut topk_candidates, + fragment_number, + highlight.fragment_size, + ); + + if no_score_no_highlight + && topk_candidates.current_heap_size == fragment_number + { + break; + } + } + last = character_index + matched.len(); + } + } + + if last < text.len() - 1 { + let section = Fragment { + text: &text[last..], + trim_left: false, + trim_right: false, + }; + + add_fragment( + no_score_no_highlight, + section, + query_terms_ac, + &mut fragments, + &mut topk_candidates, + fragment_number, + highlight.fragment_size, + ); + } + + let mut combined_string = String::with_capacity(text.len()); + + if !fragments.is_empty() { + if topk_candidates.current_heap_size > 0 { + if topk_candidates.current_heap_size < fragment_number { + topk_candidates + ._elements + .truncate(topk_candidates.current_heap_size); + } + + topk_candidates + ._elements + .sort_by(|a, b| a.doc_id.partial_cmp(&b.doc_id).unwrap()); + + let mut previous_docid = 0; + for candidate in topk_candidates._elements { + if (!combined_string.is_empty() + && !combined_string.ends_with("...") + && candidate.doc_id != previous_docid + 1) + || (fragments[candidate.doc_id].trim_left + && (combined_string.is_empty() + || !combined_string.ends_with("..."))) + { + combined_string.push_str("...") + }; + combined_string.push_str(fragments[candidate.doc_id].text); + previous_docid = candidate.doc_id; + + if fragments[candidate.doc_id].trim_right { + combined_string.push_str("...") + }; + } + } else { + combined_string.push_str(fragments[0].text); + } + } + + if highlight.highlight_markup && !no_score_no_highlight { + highlight_terms( + &mut combined_string, + query_terms_ac, + &highlight.pre_tags, + &highlight.post_tags, + ); + } + + Ok(combined_string) + } else { + Ok("".to_string()) + } + } + } +} + +pub(crate) fn highlight_terms( + text: &mut String, + query_terms_ac: &AhoCorasick, + pre_tags: &str, + post_tags: &str, +) { + let mut result = String::new(); + let mut prev_end = 0; + + for mat in query_terms_ac.find_iter(&text) { + result.push_str(&text[prev_end..mat.start()]); + result.push_str(pre_tags); + result.push_str(&text[mat.start()..mat.end()]); + result.push_str(post_tags); + prev_end = mat.end(); + } + + if prev_end < text.len() { + result.push_str(&text[prev_end..text.len()]); + } + + *text = result; +} diff --git a/mobile_app/rust/src/seekstorm/index.rs b/mobile_app/rust/src/seekstorm/index.rs new file mode 100644 index 0000000..8d51543 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/index.rs @@ -0,0 +1,4740 @@ +use add_result::decode_positions_commit; +use ahash::{AHashMap, AHashSet}; +use futures::future; +use indexmap::IndexMap; +use itertools::Itertools; +use memmap2::{Mmap, MmapMut, MmapOptions}; +use num::FromPrimitive; +use num_derive::FromPrimitive; + +use num_format::{Locale, ToFormattedString}; + +use rust_stemmers::{Algorithm, Stemmer}; +use search::{QueryType, Search}; +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use std::{ + collections::{HashMap, VecDeque}, + fs::{self, File}, + hint, + io::{BufRead, BufReader, Read, Seek, Write}, + path::PathBuf, + sync::Arc, + time::Instant, +}; +use symspell_complete_rs::{PruningRadixTrie, SymSpell}; +use tokio::sync::{RwLock, Semaphore}; +use utils::{read_u32, write_u16}; +use utoipa::ToSchema; + +#[cfg(feature = "zh")] +use crate::word_segmentation::WordSegmentationTM; +use crate::{ + add_result::{self, B, K, SIGMA}, + commit::Commit, + geo_search::encode_morton_2_d, + search::{ + self, FacetFilter, Point, QueryFacet, QueryRewriting, Ranges, ResultObject, ResultSort, + ResultType, SearchShard, + }, + tokenizer::tokenizer, + utils::{ + self, read_u8_ref, read_u16, read_u16_ref, read_u32_ref, read_u64, read_u64_ref, write_f32, + write_f64, write_i8, write_i16, write_i32, write_i64, write_u32, write_u64, + }, +}; + + +pub(crate) const FILE_PATH: &str = "files"; +pub(crate) const INDEX_FILENAME: &str = "index.bin"; +pub(crate) const DOCSTORE_FILENAME: &str = "docstore.bin"; +pub(crate) const DELETE_FILENAME: &str = "delete.bin"; +pub(crate) const SCHEMA_FILENAME: &str = "schema.json"; +pub(crate) const SYNONYMS_FILENAME: &str = "synonyms.json"; +pub(crate) const META_FILENAME: &str = "index.json"; +pub(crate) const FACET_FILENAME: &str = "facet.bin"; +pub(crate) const FACET_VALUES_FILENAME: &str = "facet.json"; + +pub(crate) const DICTIONARY_FILENAME: &str = "dictionary.csv"; +pub(crate) const COMPLETIONS_FILENAME: &str = "completions.csv"; + +pub(crate) const VERSION: &str = env!("CARGO_PKG_VERSION"); + +const INDEX_HEADER_SIZE: u64 = 4; +/// Incompatible index format change: new library can't open old format, and old library can't open new format +pub const INDEX_FORMAT_VERSION_MAJOR: u16 = 5; +/// Backward compatible format change: new library can open old format, but old library can't open new format +pub const INDEX_FORMAT_VERSION_MINOR: u16 = 0; + +/// Maximum processed positions per term per document: default=65_536. E.g. 65,536 * 'the' per document, exceeding positions are ignored for search. +pub const MAX_POSITIONS_PER_TERM: usize = 65_536; +pub(crate) const STOP_BIT: u8 = 0b10000000; +pub(crate) const FIELD_STOP_BIT_1: u8 = 0b0010_0000; +pub(crate) const FIELD_STOP_BIT_2: u8 = 0b0100_0000; +/// maximum number of documents per block +pub const ROARING_BLOCK_SIZE: usize = 65_536; + +pub(crate) const SPEEDUP_FLAG: bool = true; +pub(crate) const SORT_FLAG: bool = true; + +pub(crate) const POSTING_BUFFER_SIZE: usize = 400_000_000; +pub(crate) const MAX_QUERY_TERM_NUMBER: usize = 100; +pub(crate) const SEGMENT_KEY_CAPACITY: usize = 1000; + +/// A document is a flattened, single level of key-value pairs, where key is an arbitrary string, and value represents any valid JSON value. +pub type Document = HashMap; + +/// File type for storing documents: PathBuf, Bytes, None. +#[derive(Clone, PartialEq)] +pub enum FileType { + /// File path + PathBuf(Box), + /// File bytes + Bytes(Box, Box<[u8]>), + /// No file + None, +} + +/// Defines where the index resides during search: +/// - Ram (the complete index is preloaded to Ram when opening the index) +/// - Mmap (the index is accessed via memory-mapped files). See architecture.md for details. +/// - At commit the data is serialized to disk for persistence both in Ram and Mmap mode. +/// - The serialization format is identical for Ram and Mmap mode, allowing to change it retrospectively. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] +pub enum AccessType { + /// Ram (the complete index is preloaded to Ram when opening the index). + /// - Index size is limited by available RAM size. + /// - Slightly fastesr search speed. + /// - Higher index loading time. + /// - Higher RAM usage. + Ram = 0, + /// Mmap (the index is accessed via memory-mapped files). See architecture.md for details. + /// - Enables index size scaling beyond RAM size. + /// - Slightly slower search speed compared to Ram. + /// - Faster index loading time compared to Ram. + /// - Lower RAM usage. + Mmap = 1, +} + +/// Similarity type defines the scoring and ranking of the search results: +/// - Bm25f: considers documents composed from several fields, with different field lengths and importance +/// - Bm25fProximity: considers term proximity, e.g. for implicit phrase search with improved relevancy +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Default, ToSchema)] +pub enum SimilarityType { + /// Bm25f considers documents composed from several fields, with different field lengths and importance + Bm25f = 0, + /// Bm25fProximity considers term proximity, e.g. for implicit phrase search with improved relevancy + #[default] + Bm25fProximity = 1, +} + +/// Defines tokenizer behavior: +/// AsciiAlphabetic +/// - Mainly for for benchmark compatibility +/// - Only ASCII alphabetic chars are recognized as token. +/// +/// UnicodeAlphanumeric +/// - All Unicode alphanumeric chars are recognized as token. +/// - Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. +/// +/// UnicodeAlphanumericFolded +/// - All Unicode alphanumeric chars are recognized as token. +/// - Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. +/// - Diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters are converted into its basic representation. +/// - Apostroph handling prevents that short term parts preceding or following the apostroph get indexed (e.g. "s" in "someone's"). +/// - Tokenizing might be slower due to folding and apostroph processing. +/// +/// UnicodeAlphanumericZH +/// - Implements Chinese word segmentation to segment continuous Chinese text into tokens for indexing and search. +/// - Supports mixed Latin and Chinese texts +/// - Supports Chinese sentence boundary chars for KWIC snippets ahd highlighting. +/// - Requires feature #[cfg(feature = "zh")] +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Copy, Default, ToSchema)] +pub enum TokenizerType { + /// Only ASCII alphabetic chars are recognized as token. Mainly for benchmark compatibility. + #[default] + AsciiAlphabetic = 0, + /// All Unicode alphanumeric chars are recognized as token. + /// Allow '+' '-' '#' in middle or end of a token: c++, c#, block-max. + UnicodeAlphanumeric = 1, + /// All Unicode alphanumeric chars are recognized as token. + /// Allows '+' '-' '#' in middle or end of a token: c++, c#, block-max. + /// Diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters are converted into its basic representation. + /// Apostroph handling prevents that short term parts preceding or following the apostroph get indexed (e.g. "s" in "someone's"). + /// Tokenizing might be slower due to folding and apostroph processing. + UnicodeAlphanumericFolded = 2, + /// Tokens are separated by whitespace. Mainly for benchmark compatibility. + Whitespace = 3, + /// Tokens are separated by whitespace. Token are converted to lowercase. Mainly for benchmark compatibility. + WhitespaceLowercase = 4, + /// Implements Chinese word segmentation to segment continuous Chinese text into tokens for indexing and search. + /// Supports mixed Latin and Chinese texts + /// Supports Chinese sentence boundary chars for KWIC snippets ahd highlighting. + /// Requires feature #[cfg(feature = "zh")] + #[cfg(feature = "zh")] + UnicodeAlphanumericZH = 5, +} + +/// Defines stemming behavior, reducing inflected words to their word stem, base or root form. +/// Stemming increases recall, but decreases precision. It can introduce false positive results. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Copy, Default, ToSchema)] +pub enum StemmerType { + /// No stemming + #[default] + None = 0, + /// Arabic stemmer + Arabic = 1, + /// Danish stemmer + Danish = 2, + /// Dutch stemmer + Dutch = 3, + /// English stemmer + English = 4, + /// Finnish stemmer + Finnish = 5, + /// French stemmer + French = 6, + /// German stemmer + German = 7, + /// Hungarian stemmer + Greek = 8, + /// Hungarian stemmer + Hungarian = 9, + /// Italian stemmer + Italian = 10, + /// Norwegian stemmer + Norwegian = 11, + /// Portuguese stemmer + Portuguese = 12, + /// Romanian stemmer + Romanian = 13, + /// Russian stemmer + Russian = 14, + /// Spanish stemmer + Spanish = 15, + /// Swedish stemmer + Swedish = 16, + /// Tamil stemmer + Tamil = 17, + /// Turkish stemmer + Turkish = 18, +} + +pub(crate) struct LevelIndex { + pub document_length_compressed_array: Vec<[u8; ROARING_BLOCK_SIZE]>, + + pub docstore_pointer_docs: Vec, + pub docstore_pointer_docs_pointer: usize, + pub document_length_compressed_array_pointer: usize, +} + +/// Posting lists are divided into blocks of a doc id range of 65.536 (16 bit). +/// Each block can be compressed with a different method. +#[derive(Default, Debug, Deserialize, Serialize, Clone)] +pub(crate) struct BlockObjectIndex { + pub max_block_score: f32, + pub block_id: u32, + pub compression_type_pointer: u32, + pub posting_count: u16, + pub max_docid: u16, + pub max_p_docid: u16, + pub pointer_pivot_p_docid: u16, +} + +/// PostingListObjectIndex owns all blocks of a postinglist of a term +#[derive(Default)] +pub(crate) struct PostingListObjectIndex { + pub posting_count: u32, + pub posting_count_ngram_1: u32, + pub posting_count_ngram_2: u32, + pub posting_count_ngram_3: u32, + pub posting_count_ngram_1_compressed: u8, + pub posting_count_ngram_2_compressed: u8, + pub posting_count_ngram_3_compressed: u8, + pub max_list_score: f32, + pub blocks: Vec, + + pub position_range_previous: u32, +} + +#[derive(Default, Debug, Deserialize, Serialize, Clone)] +pub(crate) struct PostingListObject0 { + pub pointer_first: usize, + pub pointer_last: usize, + pub posting_count: usize, + + pub max_block_score: f32, + pub max_docid: u16, + pub max_p_docid: u16, + + pub ngram_type: NgramType, + pub term_ngram1: String, + pub term_ngram2: String, + pub term_ngram3: String, + pub posting_count_ngram_1: f32, + pub posting_count_ngram_2: f32, + pub posting_count_ngram_3: f32, + pub posting_count_ngram_1_compressed: u8, + pub posting_count_ngram_2_compressed: u8, + pub posting_count_ngram_3_compressed: u8, + + pub position_count: usize, + pub pointer_pivot_p_docid: u16, + pub size_compressed_positions_key: usize, + pub docid_delta_max: u16, + pub docid_old: u16, + pub compression_type_pointer: u32, +} + +/// Type of posting list compression. +#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, FromPrimitive)] +pub(crate) enum CompressionType { + Delta = 0, + Array = 1, + Bitmap = 2, + Rle = 3, + #[default] + Error = 4, +} + +pub(crate) struct QueueObject<'a> { + pub query_list: Vec>, + pub query_index: usize, + pub max_score: f32, +} + +/// PostingListObjectQuery manages thes posting list for each unique query term during intersection. +#[derive(Clone)] +pub(crate) struct PostingListObjectQuery<'a> { + pub posting_count: u32, + pub max_list_score: f32, + pub blocks: &'a Vec, + pub blocks_index: usize, + + pub term: String, + pub key0: u32, + + pub compression_type: CompressionType, + pub rank_position_pointer_range: u32, + pub compressed_doc_id_range: usize, + pub pointer_pivot_p_docid: u16, + + pub posting_pointer: usize, + pub posting_pointer_previous: usize, + + pub byte_array: &'a [u8], + + pub p_block: i32, + pub p_block_max: i32, + pub p_docid: usize, + pub p_docid_count: usize, + + pub rangebits: i32, + pub docid: i32, + pub bitposition: u32, + + pub intersect: u64, + pub ulong_pos: usize, + + pub run_end: i32, + pub p_run: i32, + pub p_run_count: i32, + pub p_run_sum: i32, + + pub term_index_unique: usize, + pub positions_count: u32, + pub positions_pointer: u32, + + pub idf: f32, + pub idf_ngram1: f32, + pub idf_ngram2: f32, + pub idf_ngram3: f32, + pub tf_ngram1: u32, + pub tf_ngram2: u32, + pub tf_ngram3: u32, + pub ngram_type: NgramType, + + pub end_flag: bool, + pub end_flag_block: bool, + pub is_embedded: bool, + pub embedded_positions: [u32; 4], + pub field_vec: SmallVec<[(u16, usize); 2]>, + pub field_vec_ngram1: SmallVec<[(u16, usize); 2]>, + pub field_vec_ngram2: SmallVec<[(u16, usize); 2]>, + pub field_vec_ngram3: SmallVec<[(u16, usize); 2]>, + pub bm25_flag: bool, +} + +pub(crate) static DUMMY_VEC: Vec = Vec::new(); +pub(crate) static DUMMY_VEC_8: Vec = Vec::new(); + +impl Default for PostingListObjectQuery<'_> { + fn default() -> Self { + Self { + posting_count: 0, + max_list_score: 0.0, + blocks: &DUMMY_VEC, + blocks_index: 0, + term: "".to_string(), + key0: 0, + compression_type: CompressionType::Error, + rank_position_pointer_range: 0, + compressed_doc_id_range: 0, + pointer_pivot_p_docid: 0, + posting_pointer: 0, + posting_pointer_previous: 0, + byte_array: &DUMMY_VEC_8, + p_block: 0, + p_block_max: 0, + p_docid: 0, + p_docid_count: 0, + rangebits: 0, + docid: 0, + bitposition: 0, + run_end: 0, + p_run: 0, + p_run_count: 0, + p_run_sum: 0, + term_index_unique: 0, + positions_count: 0, + positions_pointer: 0, + idf: 0.0, + idf_ngram1: 0.0, + idf_ngram2: 0.0, + idf_ngram3: 0.0, + ngram_type: NgramType::SingleTerm, + is_embedded: false, + embedded_positions: [0; 4], + field_vec: SmallVec::new(), + tf_ngram1: 0, + tf_ngram2: 0, + tf_ngram3: 0, + field_vec_ngram1: SmallVec::new(), + field_vec_ngram2: SmallVec::new(), + field_vec_ngram3: SmallVec::new(), + + end_flag: false, + end_flag_block: false, + bm25_flag: true, + intersect: 0, + ulong_pos: 0, + } + } +} + +/// NonUniquePostingListObjectQuery manages these posting list for each non-unique query term during intersection. +/// It references to the unique query terms. +#[derive(Clone)] +pub(crate) struct NonUniquePostingListObjectQuery<'a> { + pub term_index_unique: usize, + pub term_index_nonunique: usize, + pub pos: u32, + pub p_pos: i32, + pub positions_pointer: usize, + pub positions_count: u32, + pub byte_array: &'a [u8], + pub key0: u32, + pub is_embedded: bool, + pub embedded_positions: [u32; 4], + pub p_field: usize, + pub field_vec: SmallVec<[(u16, usize); 2]>, +} + +/// Terms are converted to hashs. The index is divided into key hash range partitioned segments. +/// for each strip (key hash range) a separate dictionary (key hash - posting list) is maintained. +/// The index hash multiple segments, each segments contains multiple terms, each term has a postinglist, each postinglist has multiple blocks. +pub(crate) struct SegmentIndex { + pub byte_array_blocks: Vec>, + pub byte_array_blocks_pointer: Vec<(usize, usize, u32)>, + pub segment: AHashMap, +} + +/// StripObject0 defines a strip (key hash range) within level 0. Level 0 is the mutable level where all writes are taking place. +/// After each 65.536 docs the level 0 is flushed as an immutable block to the next level +#[derive(Default, Debug, Clone)] +pub(crate) struct SegmentLevel0 { + pub segment: AHashMap, + pub positions_compressed: Vec, +} + +/// FieldType defines the type of a field in the document: u8, u16, u32, u64, i8, i16, i32, i64, f32, f64, point, string, stringset, text. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Default, ToSchema)] +pub enum FieldType { + /// Unsigned 8-bit integer + U8, + /// Unsigned 16-bit integer + U16, + /// Unsigned 32-bit integer + U32, + /// Unsigned 64-bit integer + U64, + /// Signed 8-bit integer + I8, + /// Signed 16-bit integer + I16, + /// Signed 32-bit integer + I32, + /// Signed 64-bit integer + I64, + /// Timestamp is identical to I64, but to be used for Unix timestamps . + /// The reason for a separate FieldType is to enable the UI to interpret I64 as timestamp without using the field name as indicator. + /// For date facets and filtering. + Timestamp, + /// Floating point 32-bit + F32, + /// Floating point 64-bit + F64, + /// Boolean + Bool, + /// String16 + /// allows a maximum cardinality of 65_535 (16 bit) distinct values, is space-saving. + #[default] + String16, + /// String32 + /// allows a maximum cardinality of 4_294_967_295 (32 bit) distinct values + String32, + /// StringSet16 is a set of strings, e.g. tags, categories, keywords, authors, genres, etc. + /// allows a maximum cardinality of 65_535 (16 bit) distinct values, is space-saving. + StringSet16, + /// StringSet32 is a set of strings, e.g. tags, categories, keywords, authors, genres, etc. + /// allows a maximum cardinality of 4_294_967_295 (32 bit) distinct values + StringSet32, + /// Point is a geographic field type: A `Vec` with two coordinate values (latitude and longitude) are internally encoded into a single u64 value (Morton code). + /// Morton codes enable efficient range queries. + /// Latitude and longitude are a pair of numbers (coordinates) used to describe a position on the plane of a geographic coordinate system. + /// The numbers are in decimal degrees format and range from -90 to 90 for latitude and -180 to 180 for longitude. + /// Coordinates are internally stored as u64 morton code: both f64 values are multiplied by 10_000_000, converted to i32 and bitwise interleaved into a single u64 morton code + /// The conversion between longitude/latitude coordinates and Morton code is lossy due to rounding errors. + Point, + /// Text is a text field, that will be tokenized by the selected Tokenizer into string tokens. + Text, +} + +/// Defines synonyms for terms per index. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct Synonym { + /// List of terms that are synonyms. + pub terms: Vec, + /// Creates alternative versions of documents where in each copy a term is replaced with one of its synonyms. + /// Doesn't impact the query latency, but does increase the index size. + /// Multi-way synonyms (default): all terms are synonyms of each other. + /// One-way synonyms: only the first term is a synonym of the following terms, but not vice versa. + /// E.g. [street, avenue, road] will result in searches for street to return documents containing any of the terms street, avenue or road, + /// but searches for avenue will only return documents containing avenue, but not documents containing street or road. + /// Currently only single terms without spaces are supported. + /// Synonyms are supported in result highlighting. + /// The synonyms that were created with the synonyms parameter in create_index are stored in synonyms.json in the index directory contains + /// Can be manually modified, but becomes effective only after restart and only for newly indexed documents. + #[serde(default = "default_as_true")] + pub multiway: bool, +} + +fn default_as_true() -> bool { + true +} + +/// Defines a field in index schema: field, stored, indexed , field_type, facet, boost. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct SchemaField { + /// unique name of a field + pub field: String, + /// only stored fields are returned in the search results + pub stored: bool, + /// only indexed fields can be searched + pub indexed: bool, + /// type of a field + pub field_type: FieldType, + /// optional faceting for a field + /// Faceting can be enabled both for string field type and numerical field types. + /// both numerical and string fields can be indexed (indexed=true) and stored (stored=true) in the json document, + /// but with field_facet=true they are additionally stored in a binary format, for fast faceting and sorting without docstore access (decompression, deserialization) + #[serde(skip_serializing_if = "is_default_bool")] + #[serde(default = "default_false")] + pub facet: bool, + + /// Indicate the longest field in schema. + /// Otherwise the longest field will be automatically detected in first index_document. + /// Setting/detecting the longest field ensures efficient index encoding. + #[serde(skip_serializing_if = "is_default_bool")] + #[serde(default = "default_false")] + pub longest: bool, + + /// optional custom weight factor for Bm25 ranking + #[serde(skip_serializing_if = "is_default_f32")] + #[serde(default = "default_1")] + pub boost: f32, + + /// if both indexed=true and dictionary_source=true then the terms from this field are added to dictionary to the spelling correction dictionary. + /// if disabled, then a manually generated dictionary can be used: {index_path}/dictionary.csv + #[serde(skip_serializing_if = "is_default_bool")] + #[serde(default = "default_false")] + pub dictionary_source: bool, + + /// if both indexed=true and completion_source=true then the n-grams (unigrams, bigrams, trigrams) from this field are added to the auto-completion list. + /// if disabled, then a manually generated completion list can be used: {index_path}/completions.csv + /// it is recommended to enable completion_source only for fields that contain short text with high-quality terms for auto-completion, e.g. title, author, category, product name, tags, + /// in order to keep the extraction time and RAM requirement for completions low and the completions relevance high. + #[serde(skip_serializing_if = "is_default_bool")] + #[serde(default = "default_false")] + pub completion_source: bool, + + #[serde(skip)] + pub(crate) indexed_field_id: usize, + #[serde(skip_deserializing)] + pub(crate) field_id: usize, +} + +/// Defines a field in index schema: field, stored, indexed , field_type, facet, boost. +/// # Parameters +/// - field: unique name of a field +/// - stored: only stored fields are returned in the search results +/// - indexed: only indexed fields can be searched +/// - field_type: type of a field: u8, u16, u32, u64, i8, i16, i32, i64, f32, f64, point +/// - facet: enable faceting for a field: for sorting results by field values, for range filtering, for result count per field value or range +/// - `longest`: This allows to annotate (manually set) the longest field in schema. +/// Otherwise the longest field will be automatically detected in first index_document. +/// Setting/detecting the longest field ensures efficient index encoding. +/// - boost: optional custom weight factor for Bm25 ranking +/// # Returns +/// - SchemaField +/// # Example +/// ```rust +/// use seekstorm::index::{SchemaField, FieldType}; +/// let schema_field = SchemaField::new("title".to_string(), true, true, FieldType::String16, false, false, 1.0, false, false); +/// ``` +impl SchemaField { + /// Creates a new SchemaField. + #[allow(clippy::too_many_arguments)] + pub fn new( + field: String, + stored: bool, + indexed: bool, + field_type: FieldType, + facet: bool, + longest: bool, + boost: f32, + dictionary_source: bool, + completion_source: bool, + ) -> Self { + SchemaField { + field, + stored, + indexed, + field_type, + facet, + longest, + boost, + dictionary_source, + completion_source, + + indexed_field_id: 0, + field_id: 0, + } + } +} + +fn default_false() -> bool { + false +} + +fn is_default_bool(num: &bool) -> bool { + !(*num) +} + +fn default_1() -> f32 { + 1.0 +} + +fn is_default_f32(num: &f32) -> bool { + *num == 1.0 +} + +pub(crate) struct IndexedField { + pub schema_field_name: String, + pub field_length_sum: usize, + pub indexed_field_id: usize, + + pub is_longest_field: bool, +} + +/// StopwordType defines the stopword behavior: None, English, German, French, Spanish, Custom. +/// Stopwords are removed, both from index and query: for compact index size and faster queries. +/// Stopword removal has drawbacks: “The Who”, “Take That”, “Let it be”, “To be or not to be”, "The The", "End of days", "What might have been" are all valid queries for bands, songs, movies, literature, +/// but become impossible when stopwords are removed. +/// The lists of stop_words and frequent_words should not overlap. +#[derive(Debug, Clone, Deserialize, Serialize, Default, ToSchema)] +pub enum StopwordType { + /// No stopwords + #[default] + None, + /// English stopwords + English, + /// German stopwords + German, + /// French stopwords + French, + /// Spanish stopwords + Spanish, + /// Custom stopwords + Custom { + ///List of stopwords. + terms: Vec, + }, +} + +/// FrequentwordType defines the frequentword behavior: None, English, German, French, Spanish, Custom. +/// Adjacent frequent terms are combined to bi-grams, both in index and query: for shorter posting lists and faster phrase queries (only for bi-grams of frequent terms). +/// The lists of stop_words and frequent_words should not overlap. +#[derive(Debug, Clone, Deserialize, Serialize, Default, ToSchema)] +pub enum FrequentwordType { + /// No frequent words + None, + /// English frequent words + #[default] + English, + /// German frequent words + German, + /// French frequent words + French, + /// Spanish frequent words + Spanish, + /// Custom frequent words + Custom { + ///List of frequent terms, max. 256 terms. + terms: Vec, + }, +} + +/// Defines spelling correction (fuzzy search) settings for an index. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct SpellingCorrection { + /// The edit distance thresholds for suggestions: 1..2 recommended; higher values increase latency and memory consumption. + pub max_dictionary_edit_distance: usize, + /// Term length thresholds for each edit distance. + /// None: max_dictionary_edit_distance for all terms lengths + /// Some(\[4\]): max_dictionary_edit_distance for all terms lengths >= 4, + /// Some(\[2,8\]): max_dictionary_edit_distance for all terms lengths >=2, max_dictionary_edit_distance +1 for all terms for lengths>=8 + pub term_length_threshold: Option>, + + /// The minimum frequency count for dictionary words to be considered eligible for spelling correction. + /// Depends on the corpus size, 1..20 recommended. + /// If count_threshold is too high, some correct words might be missed from the dictionary and deemed misspelled, + /// if count_threshold is too low, some misspelled words from the corpus might be considered correct and added to the dictionary. + /// Dictionary terms eligible for spelling correction (frequency count >= count_threshold) consume much more RAM, than the candidates (frequency count < count_threshold), + /// but the terms below count_threshold will be included in dictionary.csv too. + pub count_threshold: usize, + + /// Limits the maximum number of dictionary entries (terms >= count_threshold) to generate during indexing, preventing excessive RAM consumption. + /// The number of terms in dictionary.csv will be higher, because it contains also the terms < count_threshold, to become eligible in the future during incremental dictionary updates. + /// Dictionary terms eligible for spelling correction (frequency count >= count_threshold) consume much more RAM, than the candidates (frequency count < count_threshold). + /// ⚠️ Above this threshold no new terms are added to the dictionary, causing them to be deemed incorrect during spelling correction and possibly changed to similar terms that are in the dictionary. + pub max_dictionary_entries: usize, +} + +/// Defines spelling correction (fuzzy search) settings for an index. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct QueryCompletion { + /// Maximum number of completions to generate during indexing + /// disabled if == 0 + pub max_completion_entries: usize, +} + +/// Specifies SimilarityType, TokenizerType and AccessType when creating an new index +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct IndexMetaObject { + /// unique index ID + /// Only used in SeekStorm server, not by the SeekStorm library itself. + /// In the SeekStorm server with REST API, the index ID is used to specify the index (within the API key) where you want to index and search. + pub id: u64, + /// index name: used informational purposes + pub name: String, + /// SimilarityType defines the scoring and ranking of the search results: Bm25f or Bm25fProximity + pub similarity: SimilarityType, + /// TokenizerType defines the tokenizer behavior: AsciiAlphabetic, UnicodeAlphanumeric, UnicodeAlphanumericFolded, UnicodeAlphanumericZH + pub tokenizer: TokenizerType, + /// StemmerType defines the stemming behavior: None, Arabic, Armenian, Danish, Dutch, English, French, German, Greek, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish, Tamil, Turkish + pub stemmer: StemmerType, + + /// StopwordType defines the stopword behavior: None, English, German, French, Spanish, Custom. + /// Stopwords are removed, both from index and query: for compact index size and faster queries. + /// Stopword removal has drawbacks: “The Who”, “Take That”, “Let it be”, “To be or not to be”, "The The", "End of days", "What might have been" are all valid queries for bands, songs, movies, literature, + /// but become impossible when stopwords are removed. + /// The lists of stop_words and frequent_words should not overlap. + #[serde(default)] + pub stop_words: StopwordType, + /// FrequentwordType defines the frequentword behavior: None, English, German, French, Spanish, Custom. + /// Adjacent frequent terms are combined to bi-grams, both in index and query: for shorter posting lists and faster phrase queries (only for bi-grams of frequent terms). + /// The lists of stop_words and frequent_words should not overlap. + #[serde(default)] + pub frequent_words: FrequentwordType, + /// N-gram indexing: n-grams are indexed in addition to single terms, for fast phrase search, at the cost of higher index size + /// Preference valid both for index time and query time. Any change requires reindexing. + /// bitwise OR flags: + /// SingleTerm = 0b00000000, always enabled in addition to the other optional NgramSet below + /// NgramFF = 0b00000001, frequent frequent + /// NgramFR = 0b00000010, frequent rare + /// NgramRF = 0b00000100, rare frequent + /// NgramFFF = 0b00001000, frequent frequent frequent + /// NgramRFF = 0b00010000, rare frequent frequent + /// NgramFFR = 0b00100000, frequent frequent rare + /// NgramFRF = 0b01000000, frequent rare frequent + /// + /// When **minimum index size** is more important than phrase query latency, we recommend **Single Terms**: + /// `NgramSet::SingleTerm as u8` + /// + /// For a **good balance of latency and index size** cost, we recommend **Single Terms + Frequent Bigrams + Frequent Trigrams** (default): + /// `NgramSet::SingleTerm as u8 | NgramSet::NgramFF as u8 | NgramSet::NgramFFF` + /// + /// When **minimal phrase query latency** is more important than low index size, we recommend **Single Terms + Mixed Bigrams + Frequent Trigrams**: + /// `NgramSet::SingleTerm as u8 | NgramSet::NgramFF as u8 | NgramSet::NgramFR as u8 | NgramSet::NgramRF | NgramSet::NgramFFF` + #[serde(default = "ngram_indexing_default")] + pub ngram_indexing: u8, + + /// AccessType defines where the index resides during search: Ram or Mmap + pub access_type: AccessType, + /// Enable spelling correction for search queries using the SymSpell algorithm. + /// SymSpell enables finding those spelling suggestions in a dictionary very fast with minimum Damerau-Levenshtein edit distance and maximum word occurrence frequency. + /// When enabled, a SymSpell dictionary is incrementally created during indexing of documents and stored in the index. + /// The spelling correction is not based on a generic dictionary, but on a domain specific one derived from your indexed documents (only indexed fields). + /// This makes it language independent and prevents any discrepancy between corrected word and indexed content. + /// The creation of an individual dictionary derived from the indexed documents improves the correction quality compared to a generic dictionary. + /// An dictionary per index improves the privacy compared to a global dictionary derived from all indices. + /// The dictionary is deleted when delete_index or clear_index is called. + /// Note: enabling spelling correction increases the index size, indexing time and query latency. + /// Default: None. Enable by setting CreateDictionary with values for max_dictionary_edit_distance (1..2 recommended) and optionally a term length thresholds for each edit distance. + /// The higher the value, the higher the number of errors taht can be corrected - but also the memory consumption, lookup latency, and the number of false positives. + /// ⚠️ In addition to the create_index parameter `meta.spelling_correction` you also need to set the parameter `query_rewriting` in the search method to enable it per query. + #[serde(default)] + pub spelling_correction: Option, + + /// Enable query completion for search queries + /// When enabled, an auto-completion list is incrementally created during indexing of documents and stored in the index. + /// Because the completions are not based on a generic dictionary, but on a domain specific one derived from your indexed documents (only from indexed fields with completion_source=true), this increases the relevance of completions. + /// ⚠️ Deriving completions from indexed documents increases the indexing time and index size. + #[serde(default)] + pub query_completion: Option, +} + +fn ngram_indexing_default() -> u8 { + NgramSet::NgramFF as u8 | NgramSet::NgramFFF as u8 +} + +#[derive(Debug, Clone, Default)] +pub(crate) struct ResultFacet { + pub field: String, + pub values: AHashMap, + pub prefix: String, + pub length: u32, + pub ranges: Ranges, +} + +/// DistanceUnit defines the unit for distance calculation: kilometers or miles. +#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, ToSchema)] +pub enum DistanceUnit { + /// Kilometers + Kilometers, + /// Miles + Miles, +} + +/// DistanceField defines a field for proximity search. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +pub struct DistanceField { + /// field name of a numeric facet field (currently onyl Point field type supported) + pub field: String, + /// field name of the distance field we are deriving from the numeric facet field (Point type) and the base (Point type) + pub distance: String, + /// base point (lat,lon) for distance calculation + pub base: Point, + /// distance unit for the distance field: kilometers or miles + pub unit: DistanceUnit, +} + +impl Default for DistanceField { + fn default() -> Self { + DistanceField { + field: String::new(), + distance: String::new(), + base: Vec::new(), + unit: DistanceUnit::Kilometers, + } + } +} + +/// MinMaxField represents the minimum and maximum value of a field. +#[derive(Deserialize, Serialize, Debug, Clone, Default)] +pub struct MinMaxField { + /// minimum value of the field + pub min: ValueType, + /// maximum value of the field + pub max: ValueType, +} + +/// MinMaxFieldJson is a JSON representation of the minimum and maximum value of a field. +#[derive(Deserialize, Serialize, Debug, Clone, Default, ToSchema)] +pub struct MinMaxFieldJson { + /// minimum value of the field + pub min: serde_json::Value, + /// maximum value of the field + pub max: serde_json::Value, +} + +/// Value type for a field: u8, u16, u32, u64, i8, i16, i32, i64, f32, f64, point, none. +#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq)] +pub enum ValueType { + /// Unsigned 8-bit integer + U8(u8), + /// Unsigned 16-bit integer + U16(u16), + /// Unsigned 32-bit integer + U32(u32), + /// Unsigned 64-bit integer + U64(u64), + /// Signed 8-bit integer + I8(i8), + /// Signed 16-bit integer + I16(i16), + /// Signed 32-bit integer + I32(i32), + /// Signed 64-bit integer + I64(i64), + /// Unix timestamp: the number of seconds since 1 January 1970 + Timestamp(i64), + /// Floating point 32-bit + F32(f32), + /// Floating point 64-bit + F64(f64), + /// Geographic Point: a pair of latitude and longitude coordinates and a distance unit (kilometers, miles) + Point(Point, DistanceUnit), + /// No value + #[default] + None, +} + +/// Facet field, with field name and a map of unique values and their count (number of times the specific value appears in the whole index). +#[derive(Deserialize, Serialize, Debug, Clone, Default)] +pub struct FacetField { + /// Facet field name + pub name: String, + /// Vector of facet value names and their count + /// The number of distinct string values and numerical ranges per facet field (cardinality) is limited to 65_536. + /// Once that number is reached, the facet field is not updated anymore (no new values are added, no existing values are counted). + pub values: IndexMap, usize)>, + + /// Minimum value of the facet field + pub min: ValueType, + /// Maximum value of the facet field + pub max: ValueType, + + #[serde(skip)] + pub(crate) offset: usize, + #[serde(skip)] + pub(crate) field_type: FieldType, +} + +/// Facet field, with field name and a vector of unique values and their count (number of times the specific value appears in the whole index). +/// Facet field: a vector of unique values and their count (number of times the specific value appears in the whole index). +pub type Facet = Vec<(String, usize)>; + +/// Shard wrapped in Arc and RwLock for concurrent read and write access. +pub type ShardArc = Arc>; + +/// Index wrapped in Arc and RwLock for concurrent read and write access. +pub type IndexArc = Arc>; + +/// The shard object of the index. It contains all levels and all segments of the index. +/// It also contains all properties that control indexing and intersection. +pub struct Shard { + /// Incompatible index format change: new library can't open old format, and old library can't open new format + pub index_format_version_major: u16, + /// Backward compatible format change: new library can open old format, but old library can't open new format + pub index_format_version_minor: u16, + + /// Number of indexed documents + pub(crate) indexed_doc_count: usize, + /// Number of comitted documents + pub(crate) committed_doc_count: usize, + /// The index countains indexed, but uncommitted documents. Documents will either committed automatically once the number exceeds 64K documents, or once commit is invoked manually. + pub(crate) uncommitted: bool, + + /// Defines a field in index schema: field, stored, indexed , field_type, facet, boost. + pub schema_map: HashMap, + /// List of stored fields in the index: get_document and highlighter work only with stored fields + pub stored_field_names: Vec, + /// Specifies SimilarityType, TokenizerType and AccessType when creating an new index + pub meta: IndexMetaObject, + + pub(crate) is_last_level_incomplete: bool, + pub(crate) last_level_index_file_start_pos: u64, + pub(crate) last_level_docstore_file_start_pos: u64, + + /// Number of allowed parallel indexed documents (default=available_parallelism). Can be used to detect wehen all indexing processes are finished. + pub(crate) permits: Arc, + + pub(crate) docstore_file: File, + pub(crate) docstore_file_mmap: Mmap, + + pub(crate) delete_file: File, + pub(crate) delete_hashset: AHashSet, + + pub(crate) index_file: File, + pub(crate) index_path_string: String, + pub(crate) index_file_mmap: Mmap, + + pub(crate) compressed_index_segment_block_buffer: Vec, + pub(crate) compressed_docstore_segment_block_buffer: Vec, + + pub(crate) segment_number1: usize, + pub(crate) segment_number_bits1: usize, + + pub(crate) document_length_normalized_average: f32, + pub(crate) positions_sum_normalized: u64, + + pub(crate) level_index: Vec, + pub(crate) segments_index: Vec, + pub(crate) segments_level0: Vec, + + pub(crate) enable_fallback: bool, + pub(crate) enable_single_term_topk: bool, + pub(crate) enable_search_quality_test: bool, + pub(crate) enable_inter_query_threading: bool, + pub(crate) enable_inter_query_threading_auto: bool, + + pub(crate) segment_number_mask1: u32, + + pub(crate) indexed_field_vec: Vec, + pub(crate) indexed_field_id_bits: usize, + pub(crate) indexed_field_id_mask: usize, + pub(crate) longest_field_id: usize, + pub(crate) longest_field_auto: bool, + pub(crate) indexed_schema_vec: Vec, + + pub(crate) document_length_compressed_array: Vec<[u8; ROARING_BLOCK_SIZE]>, + pub(crate) key_count_sum: u64, + + pub(crate) block_id: usize, + pub(crate) strip_compressed_sum: u64, + pub(crate) postings_buffer: Vec, + pub(crate) postings_buffer_pointer: usize, + + pub(crate) size_compressed_positions_index: u64, + pub(crate) size_compressed_docid_index: u64, + + pub(crate) postinglist_count: usize, + pub(crate) docid_count: usize, + pub(crate) position_count: usize, + + pub(crate) mute: bool, + pub(crate) frequentword_results: AHashMap, + + pub(crate) facets: Vec, + pub(crate) facets_map: AHashMap, + pub(crate) facets_size_sum: usize, + pub(crate) facets_file: File, + pub(crate) facets_file_mmap: MmapMut, + pub(crate) bm25_component_cache: [f32; 256], + + pub(crate) string_set_to_single_term_id_vec: Vec>>, + + pub(crate) synonyms_map: AHashMap, + + #[cfg(feature = "zh")] + pub(crate) word_segmentation_option: Option, + + pub(crate) shard_number: usize, + pub(crate) index_option: Option>>, + pub(crate) stemmer: Option, + + pub(crate) stop_words: AHashSet, + pub(crate) frequent_words: Vec, + pub(crate) frequent_hashset: AHashSet, + pub(crate) key_head_size: usize, + pub(crate) level_terms: AHashMap, + pub(crate) level_completions: Arc, usize>>>, +} + +/// The root object of the index. It contains all levels and all segments of the index. +/// It also contains all properties that control indexing and intersection. +pub struct Index { + /// Incompatible index format change: new library can't open old format, and old library can't open new format + pub index_format_version_major: u16, + /// Backward compatible format change: new library can open old format, but old library can't open new format + pub index_format_version_minor: u16, + + /// Number of indexed documents + pub(crate) indexed_doc_count: usize, + + /// Number of deleted documents + pub(crate) deleted_doc_count: usize, + + /// Defines a field in index schema: field, stored, indexed , field_type, facet, boost. + pub schema_map: HashMap, + /// List of stored fields in the index: get_document and highlighter work only with stored fields + pub stored_field_names: Vec, + /// Specifies SimilarityType, TokenizerType and AccessType when creating an new index + pub meta: IndexMetaObject, + + pub(crate) index_file: File, + pub(crate) index_path_string: String, + + pub(crate) compressed_index_segment_block_buffer: Vec, + + pub(crate) segment_number1: usize, + pub(crate) segment_number_mask1: u32, + + pub(crate) indexed_field_vec: Vec, + + pub(crate) mute: bool, + + pub(crate) facets: Vec, + + pub(crate) synonyms_map: AHashMap, + + pub(crate) shard_number: usize, + pub(crate) shard_bits: usize, + pub(crate) shard_vec: Vec>>, + pub(crate) shard_queue: Arc>>, + + pub(crate) max_dictionary_entries: usize, + pub(crate) symspell_option: Option>>, + + pub(crate) max_completion_entries: usize, + pub(crate) completion_option: Option>>, + + pub(crate) frequent_hashset: AHashSet, +} + +///SynonymItem is a vector of tuples: (synonym term, (64-bit synonym term hash, 64-bit synonym term hash)) +pub type SynonymItem = Vec<(String, (u64, u32))>; + +/// Get the version of the SeekStorm search library +pub fn version() -> &'static str { + VERSION +} + +pub(crate) fn get_synonyms_map( + synonyms: &[Synonym], + segment_number_mask1: u32, +) -> AHashMap { + let mut synonyms_map: AHashMap = AHashMap::new(); + for synonym in synonyms.iter() { + if synonym.terms.len() > 1 { + let mut hashes: Vec<(String, (u64, u32))> = Vec::new(); + for term in synonym.terms.iter() { + let term_bytes = term.to_lowercase(); + hashes.push(( + term.to_string(), + ( + hash64(term_bytes.as_bytes()), + hash32(term_bytes.as_bytes()) & segment_number_mask1, + ), + )); + } + if synonym.multiway { + for (i, hash) in hashes.iter().enumerate() { + let new_synonyms = if i == 0 { + hashes[1..].to_vec() + } else if i == hashes.len() - 1 { + hashes[..hashes.len() - 1].to_vec() + } else { + [&hashes[..i], &hashes[(i + 1)..]].concat() + }; + + if let Some(item) = synonyms_map.get_mut(&hash.1.0) { + *item = item + .clone() + .into_iter() + .chain(new_synonyms.into_iter()) + .collect::>() + .into_iter() + .collect(); + } else { + synonyms_map.insert(hash.1.0, new_synonyms); + } + } + } else { + synonyms_map.insert(hashes[0].1.0, hashes[1..].to_vec()); + } + } + } + synonyms_map +} + +/// N-gram indexing: n-grams are indexed in addition to single terms, for faster phrase search, at the cost of higher index size +/// Setting valid both for index time and query time. Any change requires reindexing. +/// bitwise OR flags: +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, FromPrimitive)] +pub enum NgramSet { + /// no n-grams, only single terms are indexed + SingleTerm = 0b00000000, + /// Ngram frequent frequent + NgramFF = 0b00000001, + /// Ngram frequent rare + NgramFR = 0b00000010, + /// Ngram rare frequent + NgramRF = 0b00000100, + /// Ngram frequent frequent frequent + NgramFFF = 0b00001000, + /// Ngram rare frequent frequent + NgramRFF = 0b00010000, + /// Ngram frequent frequent rare + NgramFFR = 0b00100000, + /// Ngram frequent rare frequent + NgramFRF = 0b01000000, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, FromPrimitive, Default)] +pub(crate) enum NgramType { + /// no n-grams, only single terms are indexed + #[default] + SingleTerm = 0, + /// Ngram frequent frequent + NgramFF = 1, + /// Ngram frequent rare + NgramFR = 2, + /// Ngram rare frequent + NgramRF = 3, + /// Ngram frequent frequent frequent + NgramFFF = 4, + /// Ngram rare frequent frequent + NgramRFF = 5, + /// Ngram frequent frequent rare + NgramFFR = 6, + /// Ngram frequent rare frequent + NgramFRF = 7, +} + +/// Create index in RAM. +/// Inner data structures for create index and open_index +/// * `index_path` - index path. +/// * `meta` - index meta object. +/// * `schema` - schema. +/// * `synonyms` - vector of synonyms. +/// * `segment_number_bits1` - number of index segments: e.g. 11 bits for 2048 segments. +/// * `mute` - prevent emitting status messages (e.g. when using pipes for data interprocess communication). +/// * `force_shard_number` - set number of shards manually or automatically. +/// - none: number of shards is set automatically = number of physical processor cores (default) +/// - small: slower indexing, higher latency, slightly higher throughput, faster realtime search, lower RAM consumption +/// - large: faster indexing, lower latency, slightly lower throughput, slower realtime search, higher RAM consumption +pub async fn create_index( + index_path: PathBuf, + meta: IndexMetaObject, + schema: &Vec, + synonyms: &Vec, + segment_number_bits1: usize, + mute: bool, + force_shard_number: Option, +) -> Result { + create_index_root( + index_path, + meta, + schema, + true, + synonyms, + segment_number_bits1, + mute, + force_shard_number, + ) + .await +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn create_index_root( + index_path: PathBuf, + meta: IndexMetaObject, + schema: &Vec, + serialize_schema: bool, + synonyms: &Vec, + segment_number_bits1: usize, + mute: bool, + force_shard_number: Option, +) -> Result { + let frequent_hashset: AHashSet = match &meta.frequent_words { + FrequentwordType::None => AHashSet::new(), + FrequentwordType::English => FREQUENT_EN.lines().map(|x| hash64(x.as_bytes())).collect(), + FrequentwordType::German => FREQUENT_EN.lines().map(|x| hash64(x.as_bytes())).collect(), + FrequentwordType::French => FREQUENT_FR.lines().map(|x| hash64(x.as_bytes())).collect(), + FrequentwordType::Spanish => FREQUENT_ES.lines().map(|x| hash64(x.as_bytes())).collect(), + FrequentwordType::Custom { terms } => terms.iter().map(|x| hash64(x.as_bytes())).collect(), + }; + + let segment_number1 = 1usize << segment_number_bits1; + let segment_number_mask1 = (1u32 << segment_number_bits1) - 1; + + let index_path_buf = index_path.to_path_buf(); + let index_path_string = index_path_buf.to_str().unwrap(); + + if !index_path.exists() { + if !mute { + println!("index path created: {} ", index_path_string); + } + fs::create_dir_all(index_path).unwrap(); + } + + let file_path = PathBuf::new(index_path_string).join(FILE_PATH); + if !file_path.exists() { + if !mute { + println!("index directory created: {} ", file_path.display()); + } + fs::create_dir_all(file_path).unwrap(); + } + + match File::options() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(PathBuf::new(index_path).join(INDEX_FILENAME)) + { + Ok(index_file) => { + let mut document_length_compressed_array: Vec<[u8; ROARING_BLOCK_SIZE]> = Vec::new(); + let mut indexed_field_vec: Vec = Vec::new(); + let mut facets_vec: Vec = Vec::new(); + let mut facets_map: AHashMap = AHashMap::new(); + + let mut schema_map: HashMap = HashMap::new(); + let mut indexed_schema_vec: Vec = Vec::new(); + let mut stored_field_names = Vec::new(); + let mut facets_size_sum = 0; + let mut longest_field_id_option: Option = None; + for (i, schema_field) in schema.iter().enumerate() { + let mut schema_field_clone = schema_field.clone(); + schema_field_clone.indexed_field_id = indexed_field_vec.len(); + if schema_field.longest && schema_field.indexed { + longest_field_id_option = Some(schema_field_clone.indexed_field_id); + } + + schema_field_clone.field_id = i; + schema_map.insert(schema_field.field.clone(), schema_field_clone.clone()); + + if schema_field.facet { + let facet_size = match schema_field.field_type { + FieldType::U8 => 1, + FieldType::U16 => 2, + FieldType::U32 => 4, + FieldType::U64 => 8, + FieldType::I8 => 1, + FieldType::I16 => 2, + FieldType::I32 => 4, + FieldType::I64 => 8, + FieldType::Timestamp => 8, + FieldType::F32 => 4, + FieldType::F64 => 8, + FieldType::String16 => 2, + FieldType::String32 => 4, + FieldType::StringSet16 => 2, + FieldType::StringSet32 => 4, + FieldType::Point => 8, + _ => 1, + }; + + facets_map.insert(schema_field.field.clone(), facets_vec.len()); + facets_vec.push(FacetField { + name: schema_field.field.clone(), + values: IndexMap::new(), + min: ValueType::None, + max: ValueType::None, + offset: facets_size_sum, + field_type: schema_field.field_type.clone(), + }); + facets_size_sum += facet_size; + } + + if schema_field.indexed { + indexed_field_vec.push(IndexedField { + schema_field_name: schema_field.field.clone(), + is_longest_field: false, + field_length_sum: 0, + indexed_field_id: indexed_field_vec.len(), + }); + indexed_schema_vec.push(schema_field_clone); + document_length_compressed_array.push([0; ROARING_BLOCK_SIZE]); + } + + if schema_field.stored { + stored_field_names.push(schema_field.field.clone()); + } + } + + if !facets_vec.is_empty() + && let Ok(file) = File::open(PathBuf::new(index_path).join(FACET_VALUES_FILENAME)) + && let Ok(facets) = serde_json::from_reader(BufReader::new(file)) + { + let mut facets: Vec = facets; + if facets_vec.len() == facets.len() { + for i in 0..facets.len() { + facets[i].offset = facets_vec[i].offset; + facets[i].field_type = facets_vec[i].field_type.clone(); + } + } + facets_vec = facets; + } + + let synonyms_map = get_synonyms_map(synonyms, segment_number_mask1); + + let shard_number = if let Some(shard_number) = force_shard_number { + shard_number + } else { + num_cpus::get_physical() + }; + let shard_bits = if serialize_schema { + (usize::BITS - (shard_number - 1).leading_zeros()) as usize + } else { + 0 + }; + + let mut shard_vec: Vec>> = Vec::new(); + let mut shard_queue = VecDeque::::new(); + if serialize_schema { + let mut result_object_list = Vec::new(); + let index_path_clone = Arc::new(index_path.to_path_buf()); + for i in 0..shard_number { + let index_path_clone2 = index_path_clone.clone(); + let meta_clone = meta.clone(); + let schema_clone = schema.clone(); + result_object_list.push(tokio::spawn(async move { + let shard_path = index_path_clone2.join("shards").join(i.to_string()); + let mut shard_meta = meta_clone.clone(); + shard_meta.id = i as u64; + let mut shard = create_shard( + &shard_path, + &shard_meta, + &schema_clone, + serialize_schema, + &Vec::new(), + segment_number_bits1, + mute, + longest_field_id_option, + ) + .unwrap(); + shard.shard_number = shard_number; + let shard_arc = Arc::new(RwLock::new(shard)); + (shard_arc, i) + })); + } + for result_object_shard in result_object_list { + let ro_shard = result_object_shard.await.unwrap(); + shard_vec.push(ro_shard.0); + shard_queue.push_back(ro_shard.1); + } + } + + let shard_queue_arc = Arc::new(RwLock::new(shard_queue)); + + let mut index = Index { + index_format_version_major: INDEX_FORMAT_VERSION_MAJOR, + index_format_version_minor: INDEX_FORMAT_VERSION_MINOR, + + index_file, + index_path_string: index_path_string.to_owned(), + stored_field_names, + + compressed_index_segment_block_buffer: vec![0; 10_000_000], + indexed_doc_count: 0, + deleted_doc_count: 0, + segment_number1: 0, + segment_number_mask1: 0, + schema_map, + indexed_field_vec, + meta: meta.clone(), + mute, + facets: facets_vec, + synonyms_map, + + shard_number, + shard_bits, + shard_vec, + shard_queue: shard_queue_arc, + + max_dictionary_entries: if let Some(spelling_correction) = &meta.spelling_correction + { + spelling_correction.max_dictionary_entries + } else { + usize::MAX + }, + + symspell_option: if let Some(spelling_correction) = meta.spelling_correction { + Some(Arc::new(RwLock::new(SymSpell::new( + spelling_correction.max_dictionary_edit_distance, + spelling_correction.term_length_threshold, + 7, + spelling_correction.count_threshold, + )))) + } else { + None + }, + + max_completion_entries: if let Some(query_completion) = &meta.query_completion { + query_completion.max_completion_entries + } else { + usize::MAX + }, + + completion_option: meta + .query_completion + .as_ref() + .map(|_query_completion| Arc::new(RwLock::new(PruningRadixTrie::new()))), + + frequent_hashset, + }; + + let file_len = index.index_file.metadata().unwrap().len(); + if file_len == 0 { + write_u16( + INDEX_FORMAT_VERSION_MAJOR, + &mut index.compressed_index_segment_block_buffer, + 0, + ); + write_u16( + INDEX_FORMAT_VERSION_MINOR, + &mut index.compressed_index_segment_block_buffer, + 2, + ); + let _ = index.index_file.write( + &index.compressed_index_segment_block_buffer[0..INDEX_HEADER_SIZE as usize], + ); + } else { + let _ = index.index_file.read( + &mut index.compressed_index_segment_block_buffer[0..INDEX_HEADER_SIZE as usize], + ); + index.index_format_version_major = + read_u16(&index.compressed_index_segment_block_buffer, 0); + index.index_format_version_minor = + read_u16(&index.compressed_index_segment_block_buffer, 2); + + if INDEX_FORMAT_VERSION_MAJOR != index.index_format_version_major { + return Err("incompatible index format version ".to_string() + + &INDEX_FORMAT_VERSION_MAJOR.to_string() + + " " + + &index.index_format_version_major.to_string()); + }; + } + + index.segment_number1 = segment_number1; + index.segment_number_mask1 = segment_number_mask1; + + if serialize_schema { + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(SCHEMA_FILENAME)).unwrap(), + &schema, + ) + .unwrap(); + + if !synonyms.is_empty() { + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(SYNONYMS_FILENAME)).unwrap(), + &synonyms, + ) + .unwrap(); + } + + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(META_FILENAME)).unwrap(), + &index.meta, + ) + .unwrap(); + } + + let index_arc = Arc::new(RwLock::new(index)); + + if serialize_schema { + for shard in index_arc.write().await.shard_vec.iter() { + shard.write().await.index_option = Some(index_arc.clone()); + } + } + + Ok(index_arc) + } + Err(e) => { + println!("file opening error"); + Err(e.to_string()) + } + } +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn create_shard( + index_path: PathBuf, + meta: &IndexMetaObject, + schema: &Vec, + serialize_schema: bool, + synonyms: &Vec, + segment_number_bits1: usize, + mute: bool, + longest_field_id_option: Option, +) -> Result { + let segment_number1 = 1usize << segment_number_bits1; + let segment_number_mask1 = (1u32 << segment_number_bits1) - 1; + + let index_path_buf = index_path.to_path_buf(); + let index_path_string = index_path_buf.to_str().unwrap(); + + if !index_path.exists() { + fs::create_dir_all(index_path).unwrap(); + } + + let file_path = PathBuf::new(index_path_string).join(FILE_PATH); + if !file_path.exists() { + fs::create_dir_all(file_path).unwrap(); + } + + match File::options() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(PathBuf::new(index_path).join(INDEX_FILENAME)) + { + Ok(index_file) => { + let docstore_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(PathBuf::new(index_path).join(DOCSTORE_FILENAME)) + .unwrap(); + + let delete_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(PathBuf::new(index_path).join(DELETE_FILENAME)) + .unwrap(); + + let facets_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(PathBuf::new(index_path).join(FACET_FILENAME)) + .unwrap(); + + let mut document_length_compressed_array: Vec<[u8; ROARING_BLOCK_SIZE]> = Vec::new(); + let mut indexed_field_vec: Vec = Vec::new(); + let mut facets_vec: Vec = Vec::new(); + let mut facets_map: AHashMap = AHashMap::new(); + + let mut schema_map: HashMap = HashMap::new(); + let mut indexed_schema_vec: Vec = Vec::new(); + let mut stored_fields_flag = false; + let mut stored_field_names = Vec::new(); + let mut facets_size_sum = 0; + for (i, schema_field) in schema.iter().enumerate() { + let mut schema_field_clone = schema_field.clone(); + schema_field_clone.indexed_field_id = indexed_field_vec.len(); + schema_field_clone.field_id = i; + schema_map.insert(schema_field.field.clone(), schema_field_clone.clone()); + + if schema_field.facet { + let facet_size = match schema_field.field_type { + FieldType::U8 => 1, + FieldType::U16 => 2, + FieldType::U32 => 4, + FieldType::U64 => 8, + FieldType::I8 => 1, + FieldType::I16 => 2, + FieldType::I32 => 4, + FieldType::I64 => 8, + FieldType::Timestamp => 8, + FieldType::F32 => 4, + FieldType::F64 => 8, + FieldType::String16 => 2, + FieldType::String32 => 4, + FieldType::StringSet16 => 2, + FieldType::StringSet32 => 4, + FieldType::Point => 8, + _ => 1, + }; + + facets_map.insert(schema_field.field.clone(), facets_vec.len()); + facets_vec.push(FacetField { + name: schema_field.field.clone(), + values: IndexMap::new(), + min: ValueType::None, + max: ValueType::None, + offset: facets_size_sum, + field_type: schema_field.field_type.clone(), + }); + facets_size_sum += facet_size; + } + + if schema_field.indexed { + indexed_field_vec.push(IndexedField { + schema_field_name: schema_field.field.clone(), + is_longest_field: false, + field_length_sum: 0, + indexed_field_id: indexed_field_vec.len(), + }); + indexed_schema_vec.push(schema_field_clone); + document_length_compressed_array.push([0; ROARING_BLOCK_SIZE]); + } + + if schema_field.stored { + stored_fields_flag = true; + stored_field_names.push(schema_field.field.clone()); + } + } + + let indexed_field_id_bits = + (usize::BITS - (indexed_field_vec.len() - 1).leading_zeros()) as usize; + + let index_file_mmap; + let docstore_file_mmap = if meta.access_type == AccessType::Mmap { + index_file_mmap = unsafe { Mmap::map(&index_file).expect("Unable to create Mmap") }; + unsafe { Mmap::map(&docstore_file).expect("Unable to create Mmap") } + } else { + index_file_mmap = unsafe { + MmapOptions::new() + .len(0) + .map(&index_file) + .expect("Unable to create Mmap") + }; + unsafe { + MmapOptions::new() + .len(0) + .map(&docstore_file) + .expect("Unable to create Mmap") + } + }; + + if !facets_vec.is_empty() + && let Ok(file) = File::open(PathBuf::new(index_path).join(FACET_VALUES_FILENAME)) + && let Ok(facets) = serde_json::from_reader(BufReader::new(file)) + { + let mut facets: Vec = facets; + if facets_vec.len() == facets.len() { + for i in 0..facets.len() { + facets[i].offset = facets_vec[i].offset; + facets[i].field_type = facets_vec[i].field_type.clone(); + } + } + facets_vec = facets; + } + + let facets_file_mmap = if !facets_vec.is_empty() { + if facets_file.metadata().unwrap().len() == 0 { + facets_file + .set_len((facets_size_sum * ROARING_BLOCK_SIZE) as u64) + .expect("Unable to set len"); + } + + unsafe { MmapMut::map_mut(&facets_file).expect("Unable to create Mmap") } + } else { + unsafe { MmapMut::map_mut(&facets_file).expect("Unable to create Mmap") } + }; + + let synonyms_map = get_synonyms_map(synonyms, segment_number_mask1); + + let facets_len = facets_vec.len(); + + #[cfg(feature = "zh")] + let word_segmentation_option = if meta.tokenizer == TokenizerType::UnicodeAlphanumericZH + { + let mut word_segmentation = WordSegmentationTM::new(); + word_segmentation.load_dictionary(0, 1, true); + Some(word_segmentation) + } else { + None + }; + + let shard_number = 1; + + let stemmer = match meta.stemmer { + StemmerType::Arabic => Some(Stemmer::create(Algorithm::Arabic)), + StemmerType::Danish => Some(Stemmer::create(Algorithm::Danish)), + StemmerType::Dutch => Some(Stemmer::create(Algorithm::Dutch)), + StemmerType::English => Some(Stemmer::create(Algorithm::English)), + StemmerType::Finnish => Some(Stemmer::create(Algorithm::Finnish)), + StemmerType::French => Some(Stemmer::create(Algorithm::French)), + StemmerType::German => Some(Stemmer::create(Algorithm::German)), + StemmerType::Greek => Some(Stemmer::create(Algorithm::Greek)), + StemmerType::Hungarian => Some(Stemmer::create(Algorithm::Hungarian)), + StemmerType::Italian => Some(Stemmer::create(Algorithm::Italian)), + StemmerType::Norwegian => Some(Stemmer::create(Algorithm::Norwegian)), + StemmerType::Portuguese => Some(Stemmer::create(Algorithm::Portuguese)), + StemmerType::Romanian => Some(Stemmer::create(Algorithm::Romanian)), + StemmerType::Russian => Some(Stemmer::create(Algorithm::Russian)), + StemmerType::Spanish => Some(Stemmer::create(Algorithm::Spanish)), + StemmerType::Swedish => Some(Stemmer::create(Algorithm::Swedish)), + StemmerType::Tamil => Some(Stemmer::create(Algorithm::Tamil)), + StemmerType::Turkish => Some(Stemmer::create(Algorithm::Turkish)), + _ => None, + }; + + let stop_words: AHashSet = match &meta.stop_words { + StopwordType::None => AHashSet::new(), + StopwordType::English => FREQUENT_EN.lines().map(|x| x.to_string()).collect(), + StopwordType::German => FREQUENT_DE.lines().map(|x| x.to_string()).collect(), + StopwordType::French => FREQUENT_FR.lines().map(|x| x.to_string()).collect(), + StopwordType::Spanish => FREQUENT_ES.lines().map(|x| x.to_string()).collect(), + StopwordType::Custom { terms } => terms.iter().map(|x| x.to_string()).collect(), + }; + + let frequent_words: Vec = match &meta.frequent_words { + FrequentwordType::None => Vec::new(), + FrequentwordType::English => { + let mut words: Vec = + FREQUENT_EN.lines().map(|x| x.to_string()).collect(); + words.sort_unstable(); + words + } + FrequentwordType::German => { + let mut words: Vec = + FREQUENT_DE.lines().map(|x| x.to_string()).collect(); + words.sort_unstable(); + words + } + FrequentwordType::French => { + let mut words: Vec = + FREQUENT_FR.lines().map(|x| x.to_string()).collect(); + words.sort_unstable(); + words + } + FrequentwordType::Spanish => { + let mut words: Vec = + FREQUENT_ES.lines().map(|x| x.to_string()).collect(); + words.sort_unstable(); + words + } + FrequentwordType::Custom { terms } => { + let mut words: Vec = terms.iter().map(|x| x.to_string()).collect(); + words.sort_unstable(); + words + } + }; + + let frequent_hashset: AHashSet = frequent_words + .iter() + .map(|x| hash64(x.as_bytes())) + .collect(); + + let mut index = Shard { + index_format_version_major: INDEX_FORMAT_VERSION_MAJOR, + index_format_version_minor: INDEX_FORMAT_VERSION_MINOR, + docstore_file, + delete_file, + delete_hashset: AHashSet::new(), + index_file, + index_path_string: index_path_string.to_owned(), + index_file_mmap, + docstore_file_mmap, + stored_field_names, + compressed_index_segment_block_buffer: vec![0; 10_000_000], + compressed_docstore_segment_block_buffer: if stored_fields_flag { + vec![0; ROARING_BLOCK_SIZE * 4] + } else { + Vec::new() + }, + document_length_normalized_average: 0.0, + indexed_doc_count: 0, + committed_doc_count: 0, + is_last_level_incomplete: false, + last_level_index_file_start_pos: 0, + last_level_docstore_file_start_pos: 0, + positions_sum_normalized: 0, + segment_number1: 0, + segment_number_bits1, + segment_number_mask1: 0, + level_index: Vec::new(), + segments_index: Vec::new(), + segments_level0: Vec::new(), + uncommitted: false, + enable_fallback: false, + enable_single_term_topk: false, + enable_search_quality_test: false, + enable_inter_query_threading: false, + enable_inter_query_threading_auto: false, + schema_map, + indexed_field_id_bits, + indexed_field_id_mask: (1usize << indexed_field_id_bits) - 1, + longest_field_id: longest_field_id_option.unwrap_or_default(), + longest_field_auto: longest_field_id_option.is_none(), + indexed_field_vec, + indexed_schema_vec, + meta: meta.clone(), + document_length_compressed_array, + key_count_sum: 0, + + block_id: 0, + strip_compressed_sum: 0, + postings_buffer: vec![0; POSTING_BUFFER_SIZE], + postings_buffer_pointer: 0, + + docid_count: 0, + size_compressed_docid_index: 0, + size_compressed_positions_index: 0, + position_count: 0, + postinglist_count: 0, + permits: Arc::new(Semaphore::new(1)), + mute, + frequentword_results: AHashMap::new(), + facets: facets_vec, + facets_map, + facets_size_sum, + facets_file, + facets_file_mmap, + string_set_to_single_term_id_vec: vec![AHashMap::new(); facets_len], + bm25_component_cache: [0.0; 256], + synonyms_map, + #[cfg(feature = "zh")] + word_segmentation_option, + + shard_number, + index_option: None, + stemmer, + stop_words, + frequent_words, + frequent_hashset, + key_head_size: if meta.ngram_indexing == 0 { + 20 + } else if meta.ngram_indexing < 8 { + 22 + } else { + 23 + }, + level_terms: AHashMap::new(), + level_completions: Arc::new(RwLock::new(AHashMap::with_capacity(200_000))), + }; + + let file_len = index.index_file.metadata().unwrap().len(); + if file_len == 0 { + write_u16( + INDEX_FORMAT_VERSION_MAJOR, + &mut index.compressed_index_segment_block_buffer, + 0, + ); + write_u16( + INDEX_FORMAT_VERSION_MINOR, + &mut index.compressed_index_segment_block_buffer, + 2, + ); + let _ = index.index_file.write( + &index.compressed_index_segment_block_buffer[0..INDEX_HEADER_SIZE as usize], + ); + } else { + let _ = index.index_file.read( + &mut index.compressed_index_segment_block_buffer[0..INDEX_HEADER_SIZE as usize], + ); + index.index_format_version_major = + read_u16(&index.compressed_index_segment_block_buffer, 0); + index.index_format_version_minor = + read_u16(&index.compressed_index_segment_block_buffer, 2); + + if INDEX_FORMAT_VERSION_MAJOR != index.index_format_version_major { + return Err("incompatible index format version ".to_string() + + &INDEX_FORMAT_VERSION_MAJOR.to_string() + + " " + + &index.index_format_version_major.to_string()); + }; + } + + index.segment_number1 = segment_number1; + index.segment_number_mask1 = segment_number_mask1; + index.segments_level0 = vec![ + SegmentLevel0 { + segment: AHashMap::with_capacity(SEGMENT_KEY_CAPACITY), + ..Default::default() + }; + index.segment_number1 + ]; + + index.segments_index = Vec::new(); + for _i in 0..index.segment_number1 { + index.segments_index.push(SegmentIndex { + byte_array_blocks: Vec::new(), + byte_array_blocks_pointer: Vec::new(), + segment: AHashMap::new(), + }); + } + + if serialize_schema { + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(SCHEMA_FILENAME)).unwrap(), + &schema, + ) + .unwrap(); + + if !synonyms.is_empty() { + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(SYNONYMS_FILENAME)).unwrap(), + &synonyms, + ) + .unwrap(); + } + + serde_json::to_writer( + &File::create(PathBuf::new(index_path).join(META_FILENAME)).unwrap(), + &index.meta, + ) + .unwrap(); + } + + Ok(index) + } + Err(e) => { + println!("file opening error"); + Err(e.to_string()) + } + } +} + +#[inline(always)] +pub(crate) fn get_document_length_compressed_mmap( + index: &Shard, + field_id: usize, + block_id: usize, + doc_id_block: usize, +) -> u8 { + index.index_file_mmap[index.level_index[block_id].document_length_compressed_array_pointer + + (field_id << 16) + + doc_id_block] +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn get_max_score( + index: &Shard, + segment: &SegmentIndex, + posting_count_ngram_1: u32, + posting_count_ngram_2: u32, + posting_count_ngram_3: u32, + posting_count: u32, + block_id: usize, + max_docid: usize, + max_p_docid: usize, + pointer_pivot_p_docid: usize, + compression_type_pointer: u32, + ngram_type: &NgramType, +) -> f32 { + let byte_array = if index.meta.access_type == AccessType::Mmap { + &index.index_file_mmap[segment.byte_array_blocks_pointer[block_id].0 + ..segment.byte_array_blocks_pointer[block_id].0 + + segment.byte_array_blocks_pointer[block_id].1] + } else { + &segment.byte_array_blocks[block_id] + }; + + let mut bm25f = 0.0; + + let rank_position_pointer_range: u32 = + compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum; + let rank_position_pointer; + let posting_pointer_size; + let embed_flag; + if max_p_docid < pointer_pivot_p_docid { + posting_pointer_size_sum = max_p_docid as u32 * 2; + rank_position_pointer = read_u16( + byte_array, + rank_position_pointer_range as usize + posting_pointer_size_sum as usize, + ) as u32; + posting_pointer_size = 2; + embed_flag = (rank_position_pointer & 0b10000000_00000000) != 0; + } else { + posting_pointer_size_sum = (max_p_docid as u32) * 3 - pointer_pivot_p_docid as u32; + rank_position_pointer = read_u32( + byte_array, + rank_position_pointer_range as usize + posting_pointer_size_sum as usize, + ); + posting_pointer_size = 3; + embed_flag = (rank_position_pointer & 0b10000000_00000000_00000000) != 0; + }; + + let positions_pointer = if embed_flag { + rank_position_pointer_range as usize + posting_pointer_size_sum as usize + } else { + let pointer_value = if posting_pointer_size == 2 { + rank_position_pointer & 0b01111111_11111111 + } else { + rank_position_pointer & 0b01111111_11111111_11111111 + } as usize; + + rank_position_pointer_range as usize - pointer_value + }; + + let mut field_vec: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + let mut field_vec_ngram1 = SmallVec::new(); + let mut field_vec_ngram2 = SmallVec::new(); + let mut field_vec_ngram3 = SmallVec::new(); + + decode_positions_commit( + posting_pointer_size, + embed_flag, + byte_array, + positions_pointer, + ngram_type, + index.indexed_field_vec.len(), + index.indexed_field_id_bits, + index.indexed_field_id_mask, + index.longest_field_id as u16, + &mut field_vec, + &mut field_vec_ngram1, + &mut field_vec_ngram2, + &mut field_vec_ngram3, + ); + + if ngram_type == &NgramType::SingleTerm + || index.meta.similarity == SimilarityType::Bm25fProximity + { + let idf = (((index.indexed_doc_count as f32 - posting_count as f32 + 0.5) + / (posting_count as f32 + 0.5)) + + 1.0) + .ln(); + + for field in field_vec.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf + * ((tf * (K + 1.0) / (tf + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } else if ngram_type == &NgramType::NgramFF + || ngram_type == &NgramType::NgramFR + || ngram_type == &NgramType::NgramRF + { + let idf_ngram1 = (((index.indexed_doc_count as f32 - posting_count_ngram_1 as f32 + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + let idf_ngram2 = (((index.indexed_doc_count as f32 - posting_count_ngram_2 as f32 + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + + for field in field_vec_ngram1.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } else { + let idf_ngram1 = (((index.indexed_doc_count as f32 - posting_count_ngram_1 as f32 + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + let idf_ngram2 = (((index.indexed_doc_count as f32 - posting_count_ngram_2 as f32 + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + + let idf_ngram3 = (((index.indexed_doc_count as f32 - posting_count_ngram_3 as f32 + 0.5) + / (posting_count_ngram_3 as f32 + 0.5)) + + 1.0) + .ln(); + + for field in field_vec_ngram1.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in field_vec_ngram2.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in field_vec_ngram3.iter() { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[if index.meta.access_type + == AccessType::Mmap + { + get_document_length_compressed_mmap(index, field.0 as usize, block_id, max_docid) + } else { + index.level_index[block_id].document_length_compressed_array[field.0 as usize] + [max_docid] + } as usize] as f32; + + let document_length_quotient = + document_length_normalized / index.document_length_normalized_average; + + let tf_ngram3 = field.1 as f32; + + let weight = index.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + bm25f +} + +pub(crate) fn update_list_max_impact_score(index: &mut Shard) { + if index.meta.access_type == AccessType::Mmap { + return; + } + + for key0 in 0..index.segment_number1 { + let keys: Vec = index.segments_index[key0].segment.keys().cloned().collect(); + for key in keys { + let ngram_type = FromPrimitive::from_u64(key & 0b111).unwrap_or(NgramType::SingleTerm); + + let blocks_len = index.segments_index[key0].segment[&key].blocks.len(); + let mut max_list_score = 0.0; + for block_index in 0..blocks_len { + let segment = &index.segments_index[key0]; + let posting_list = &segment.segment[&key]; + let block = &posting_list.blocks[block_index]; + let max_block_score = get_max_score( + index, + segment, + posting_list.posting_count_ngram_1, + posting_list.posting_count_ngram_2, + posting_list.posting_count_ngram_3, + posting_list.posting_count, + block.block_id as usize, + block.max_docid as usize, + block.max_p_docid as usize, + block.pointer_pivot_p_docid as usize, + block.compression_type_pointer, + &ngram_type, + ); + + index.segments_index[key0] + .segment + .get_mut(&key) + .unwrap() + .blocks[block_index] + .max_block_score = max_block_score; + max_list_score = f32::max(max_list_score, max_block_score); + } + index.segments_index[key0] + .segment + .get_mut(&key) + .unwrap() + .max_list_score = max_list_score; + } + } +} + +/// Loads the index from disk into RAM or MMAP. +/// * `index_path` - index path. +/// * `mute` - prevent emitting status messages (e.g. when using pipes for data interprocess communication). +pub(crate) async fn open_shard(index_path: PathBuf, mute: bool) -> Result { + if !mute { + println!("opening index ..."); + } + + let mut index_mmap_position = INDEX_HEADER_SIZE as usize; + let mut docstore_mmap_position = 0; + + match File::open(PathBuf::new(index_path).join(META_FILENAME)) { + Ok(meta_file) => { + let meta: IndexMetaObject = serde_json::from_reader(BufReader::new(meta_file)).unwrap(); + + match File::open(PathBuf::new(index_path).join(SCHEMA_FILENAME)) { + Ok(schema_file) => { + let schema = serde_json::from_reader(BufReader::new(schema_file)).unwrap(); + + let synonyms = if let Ok(synonym_file) = + File::open(PathBuf::new(index_path).join(SYNONYMS_FILENAME)) + { + serde_json::from_reader(BufReader::new(synonym_file)).unwrap_or_default() + } else { + Vec::new() + }; + + match create_shard(index_path, &meta, &schema, false, &synonyms, 11, mute, None) + { + Ok(mut shard) => { + let mut block_count_sum = 0; + + let is_mmap = shard.meta.access_type == AccessType::Mmap; + + let file_len = if is_mmap { + shard.index_file_mmap.len() as u64 + } else { + shard.index_file.metadata().unwrap().len() + }; + + while if is_mmap { + index_mmap_position as u64 + } else { + shard.index_file.stream_position().unwrap() + } < file_len + { + let mut segment_head_vec: Vec<(u32, u32)> = Vec::new(); + for key0 in 0..shard.segment_number1 { + if key0 == 0 { + shard.last_level_index_file_start_pos = if is_mmap { + index_mmap_position as u64 + } else { + shard.index_file.stream_position().unwrap() + }; + + shard.last_level_docstore_file_start_pos = if is_mmap { + docstore_mmap_position as u64 + } else { + shard.docstore_file.stream_position().unwrap() + }; + + if shard.level_index.is_empty() { + let longest_field_id = if is_mmap { + read_u16_ref( + &shard.index_file_mmap, + &mut index_mmap_position, + ) + as usize + } else { + let _ = shard.index_file.read( + &mut shard + .compressed_index_segment_block_buffer + [0..2], + ); + read_u16( + &shard.compressed_index_segment_block_buffer, + 0, + ) + as usize + }; + + for indexed_field in shard.indexed_field_vec.iter_mut() + { + indexed_field.is_longest_field = indexed_field + .indexed_field_id + == longest_field_id; + + if indexed_field.is_longest_field { + shard.longest_field_id = longest_field_id + } + } + } + + let mut document_length_compressed_array_vec: Vec< + [u8; ROARING_BLOCK_SIZE], + > = Vec::new(); + + let document_length_compressed_array_pointer = if is_mmap { + index_mmap_position + } else { + shard.index_file.stream_position().unwrap() as usize + }; + + for _i in 0..shard.indexed_field_vec.len() { + if is_mmap { + index_mmap_position += ROARING_BLOCK_SIZE; + } else { + let mut document_length_compressed_array_item = + [0u8; ROARING_BLOCK_SIZE]; + + let _ = shard.index_file.read( + &mut document_length_compressed_array_item, + ); + document_length_compressed_array_vec + .push(document_length_compressed_array_item); + } + } + + let mut docstore_pointer_docs: Vec = Vec::new(); + + let mut docstore_pointer_docs_pointer = 0; + if !shard.stored_field_names.is_empty() { + if is_mmap { + let docstore_pointer_docs_size = read_u32_ref( + &shard.docstore_file_mmap, + &mut docstore_mmap_position, + ) + as usize; + docstore_pointer_docs_pointer = + docstore_mmap_position; + docstore_mmap_position += + docstore_pointer_docs_size; + } else { + let _ = shard.docstore_file.read( + &mut shard + .compressed_index_segment_block_buffer + [0..4], + ); + + let docstore_pointer_docs_size = read_u32( + &shard.compressed_index_segment_block_buffer, + 0, + ) + as usize; + + docstore_pointer_docs_pointer = + shard.docstore_file.stream_position().unwrap() + as usize; + docstore_pointer_docs = + vec![0; docstore_pointer_docs_size]; + let _ = shard + .docstore_file + .read(&mut docstore_pointer_docs); + } + } + + if is_mmap { + shard.indexed_doc_count = read_u64_ref( + &shard.index_file_mmap, + &mut index_mmap_position, + ) + as usize; + shard.positions_sum_normalized = read_u64_ref( + &shard.index_file_mmap, + &mut index_mmap_position, + ); + + for _key0 in 0..shard.segment_number1 { + let block_length = read_u32_ref( + &shard.index_file_mmap, + &mut index_mmap_position, + ); + let key_count = read_u32_ref( + &shard.index_file_mmap, + &mut index_mmap_position, + ); + + segment_head_vec.push((block_length, key_count)); + } + } else { + let _ = shard.index_file.read( + &mut shard.compressed_index_segment_block_buffer + [0..16], + ); + + shard.indexed_doc_count = read_u64( + &shard.compressed_index_segment_block_buffer, + 0, + ) + as usize; + + shard.positions_sum_normalized = read_u64( + &shard.compressed_index_segment_block_buffer, + 8, + ); + + for _key0 in 0..shard.segment_number1 { + let _ = shard.index_file.read( + &mut shard + .compressed_index_segment_block_buffer + [0..8], + ); + + let block_length = read_u32( + &shard.compressed_index_segment_block_buffer, + 0, + ); + let key_count = read_u32( + &shard.compressed_index_segment_block_buffer, + 4, + ); + segment_head_vec.push((block_length, key_count)); + } + } + + shard.document_length_normalized_average = + shard.positions_sum_normalized as f32 + / shard.indexed_doc_count as f32; + + shard.level_index.push(LevelIndex { + document_length_compressed_array: + document_length_compressed_array_vec, + docstore_pointer_docs, + docstore_pointer_docs_pointer, + document_length_compressed_array_pointer, + }); + } + + let block_length = segment_head_vec[key0].0; + let key_count = segment_head_vec[key0].1; + + let block_id = + (block_count_sum >> shard.segment_number_bits1) as u32; + block_count_sum += 1; + + let key_body_pointer_write_start: u32 = + key_count * shard.key_head_size as u32; + + if is_mmap { + index_mmap_position += + key_count as usize * shard.key_head_size; + shard.segments_index[key0].byte_array_blocks_pointer.push( + ( + index_mmap_position, + (block_length - key_body_pointer_write_start) + as usize, + key_count, + ), + ); + + index_mmap_position += + (block_length - key_body_pointer_write_start) as usize; + } else { + let _ = shard.index_file.read( + &mut shard.compressed_index_segment_block_buffer + [0..(key_count as usize * shard.key_head_size)], + ); + let compressed_index_segment_block_buffer = &shard + .compressed_index_segment_block_buffer + [0..(key_count as usize * shard.key_head_size)]; + + let mut block_array: Vec = vec![ + 0; + (block_length - key_body_pointer_write_start) + as usize + ]; + + let _ = shard.index_file.read(&mut block_array); + shard.segments_index[key0] + .byte_array_blocks + .push(block_array); + + let mut read_pointer = 0; + + let mut posting_count_previous = 0; + let mut pointer_pivot_p_docid_previous = 0; + let mut compression_type_pointer_previous = 0; + + for key_index in 0..key_count { + let key_hash = read_u64_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + let posting_count = read_u16_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + let max_docid = read_u16_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + let max_p_docid = read_u16_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + let mut posting_count_ngram_1 = 0; + let mut posting_count_ngram_2 = 0; + let mut posting_count_ngram_3 = 0; + match shard.key_head_size { + 20 => {} + 22 => { + let posting_count_ngram_1_compressed = + read_u8_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION + [posting_count_ngram_1_compressed + as usize]; + + let posting_count_ngram_2_compressed = + read_u8_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION + [posting_count_ngram_2_compressed + as usize]; + } + _ => { + let posting_count_ngram_1_compressed = + read_u8_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION + [posting_count_ngram_1_compressed + as usize]; + + let posting_count_ngram_2_compressed = + read_u8_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION + [posting_count_ngram_2_compressed + as usize]; + + let posting_count_ngram_3_compressed = + read_u8_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + posting_count_ngram_3 = + DOCUMENT_LENGTH_COMPRESSION + [posting_count_ngram_3_compressed + as usize]; + } + } + + let pointer_pivot_p_docid = read_u16_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + let compression_type_pointer = read_u32_ref( + compressed_index_segment_block_buffer, + &mut read_pointer, + ); + + if let Some(value) = shard.segments_index[key0] + .segment + .get_mut(&key_hash) + { + value.posting_count += posting_count as u32 + 1; + + value.blocks.push(BlockObjectIndex { + max_block_score: 0.0, + block_id, + posting_count, + max_docid, + max_p_docid, + pointer_pivot_p_docid, + compression_type_pointer, + }); + } else { + let value = PostingListObjectIndex { + posting_count: posting_count as u32 + 1, + posting_count_ngram_1, + posting_count_ngram_2, + posting_count_ngram_3, + max_list_score: 0.0, + position_range_previous: 0, + blocks: vec![BlockObjectIndex { + max_block_score: 0.0, + block_id, + posting_count, + max_docid, + max_p_docid, + pointer_pivot_p_docid, + compression_type_pointer, + }], + ..Default::default() + }; + shard.segments_index[key0] + .segment + .insert(key_hash, value); + }; + + if !shard + .indexed_doc_count + .is_multiple_of(ROARING_BLOCK_SIZE) + && block_id as usize + == shard.indexed_doc_count / ROARING_BLOCK_SIZE + && shard.meta.access_type == AccessType::Ram + { + let position_range_previous = if key_index == 0 { + 0 + } else { + let posting_pointer_size_sum_previous = + pointer_pivot_p_docid_previous as usize * 2 + + if (pointer_pivot_p_docid_previous + as usize) + < posting_count_previous + { + (posting_count_previous + - pointer_pivot_p_docid_previous + as usize) + * 3 + } else { + 0 + }; + + let rank_position_pointer_range_previous= compression_type_pointer_previous & 0b0011_1111_1111_1111_1111_1111_1111_1111; + let compression_type_previous: CompressionType = + FromPrimitive::from_i32( + (compression_type_pointer_previous + >> 30) + as i32, + ) + .unwrap(); + + let compressed_docid_previous = + match compression_type_previous { + CompressionType::Array => { + posting_count_previous * 2 + } + CompressionType::Bitmap => 8192, + CompressionType::Rle => { + let byte_array_docid = &shard + .segments_index[key0] + .byte_array_blocks + [block_id as usize]; + 4 * read_u16( byte_array_docid, rank_position_pointer_range_previous as usize +posting_pointer_size_sum_previous) as usize + 2 + } + _ => 0, + }; + + rank_position_pointer_range_previous + + (posting_pointer_size_sum_previous + + compressed_docid_previous) + as u32 + }; + + let plo = shard.segments_index[key0] + .segment + .get_mut(&key_hash) + .unwrap(); + + plo.position_range_previous = + position_range_previous; + + posting_count_previous = posting_count as usize + 1; + pointer_pivot_p_docid_previous = + pointer_pivot_p_docid; + compression_type_pointer_previous = + compression_type_pointer; + }; + } + } + } + } + + shard.committed_doc_count = shard.indexed_doc_count; + shard.is_last_level_incomplete = + !shard.committed_doc_count.is_multiple_of(ROARING_BLOCK_SIZE); + + for (i, component) in shard.bm25_component_cache.iter_mut().enumerate() + { + let document_length_quotient = DOCUMENT_LENGTH_COMPRESSION[i] + as f32 + / shard.document_length_normalized_average; + *component = K * (1.0 - B + B * document_length_quotient); + } + + shard.string_set_to_single_term_id(); + + update_list_max_impact_score(&mut shard); + + let mut reader = BufReader::with_capacity(8192, &shard.delete_file); + while let Ok(buffer) = reader.fill_buf() { + let length = buffer.len(); + + if length == 0 { + break; + } + + for i in (0..length).step_by(8) { + let docid = read_u64(buffer, i); + shard.delete_hashset.insert(docid as usize); + } + + reader.consume(length); + } + + let shard_arc = Arc::new(RwLock::new(shard)); + + warmup(&shard_arc).await; + Ok(shard_arc.clone()) + } + Err(err) => Err(err.to_string()), + } + } + Err(err) => Err(err.to_string()), + } + } + Err(err) => Err(err.to_string()), + } +} + +/// Loads the index from disk into RAM or MMAP. +/// * `index_path` - index path. +/// * `mute` - prevent emitting status messages (e.g. when using pipes for data interprocess communication). +pub async fn open_index(index_path: PathBuf, mute: bool) -> Result { + if !mute { + println!("opening index ..."); + } + + let start_time = Instant::now(); + + match File::open(PathBuf::new(index_path).join(META_FILENAME)) { + Ok(meta_file) => { + let meta: IndexMetaObject = serde_json::from_reader(BufReader::new(meta_file)).unwrap(); + + match File::open(PathBuf::new(index_path).join(SCHEMA_FILENAME)) { + Ok(schema_file) => { + let schema = serde_json::from_reader(BufReader::new(schema_file)).unwrap(); + + let synonyms = if let Ok(synonym_file) = + File::open(PathBuf::new(index_path).join(SYNONYMS_FILENAME)) + { + serde_json::from_reader(BufReader::new(synonym_file)).unwrap_or_default() + } else { + Vec::new() + }; + + match create_index_root( + index_path, meta, &schema, false, &synonyms, 11, false, None, + ) + .await + { + Ok(index_arc) => { + let lock = Arc::into_inner(index_arc).unwrap(); + let index = RwLock::into_inner(lock); + + let index_arc = Arc::new(RwLock::new(index)); + + if let Some(symspell) = + &mut index_arc.read().await.symspell_option.as_ref() + { + let dictionary_path = + PathBuf::new(&index_arc.read().await.index_path_string) + .join(DICTIONARY_FILENAME); + let _ = symspell.write().await.load_dictionary( + &dictionary_path, + 0, + 1, + " ", + ); + } + + if let Some(completion_option) = + &mut index_arc.read().await.completion_option.as_ref() + { + let _ = completion_option.write().await.load_completions( + PathBuf::new(&index_arc.read().await.index_path_string) + .join(COMPLETIONS_FILENAME), + 0, + 1, + ":", + ); + } + + let mut level_count = 0; + + let mut shard_vec: Vec>> = Vec::new(); + let mut shard_queue = VecDeque::::new(); + + let paths: Vec<_> = fs::read_dir(index_path.join("shards")) + .unwrap() + .filter_map(Result::ok) + .collect(); + let mut shard_handle_vec = Vec::new(); + let index_path_clone = Arc::new(index_path.to_path_buf()); + for i in 0..paths.len() { + let index_path_clone2 = index_path_clone.clone(); + shard_handle_vec.push(tokio::spawn(async move { + let path = index_path_clone2.join("shards").join(i.to_string()); + + open_shard(&path, true).await.unwrap() + })); + } + + for shard_handle in shard_handle_vec { + let shard_arc = shard_handle.await.unwrap(); + shard_arc.write().await.index_option = Some(index_arc.clone()); + index_arc.write().await.indexed_doc_count += + shard_arc.read().await.indexed_doc_count; + index_arc.write().await.deleted_doc_count += + shard_arc.read().await.delete_hashset.len(); + level_count += shard_arc.read().await.level_index.len(); + let shard_id = shard_arc.read().await.meta.id; + shard_queue.push_back(shard_id as usize); + shard_vec.push(shard_arc); + } + + index_arc.write().await.shard_number = shard_vec.len(); + index_arc.write().await.shard_bits = + (usize::BITS - (shard_vec.len() - 1).leading_zeros()) as usize; + + for shard in shard_vec.iter() { + shard.write().await.shard_number = shard_vec.len(); + } + + index_arc.write().await.shard_vec = shard_vec; + index_arc.write().await.shard_queue = + Arc::new(RwLock::new(shard_queue)); + + let elapsed_time = start_time.elapsed().as_nanos(); + + if !mute { + let index_ref = index_arc.read().await; + println!( + "{} name {} id {} version {} {} shards {} ngrams {:08b} level {} fields {} {} facets {} docs {} deleted {} segments {} dictionary {} {} completions {} time {} s", + INDEX_FILENAME, + index_ref.meta.name, + index_ref.meta.id, + index_ref.index_format_version_major.to_string() + + "." + + &index_ref.index_format_version_minor.to_string(), + INDEX_FORMAT_VERSION_MAJOR.to_string() + + "." + + &INDEX_FORMAT_VERSION_MINOR.to_string(), + index_ref.shard_count().await, + index_ref.meta.ngram_indexing, + level_count, + index_ref.indexed_field_vec.len(), + index_ref.schema_map.len(), + index_ref.facets.len(), + index_ref.indexed_doc_count.to_formatted_string(&Locale::en), + index_ref.deleted_doc_count.to_formatted_string(&Locale::en), + index_ref.segment_number1, + if let Some(symspell) = index_ref.symspell_option.as_ref() { + symspell + .read() + .await + .get_dictionary_size() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + if let Some(symspell) = index_ref.symspell_option.as_ref() { + symspell + .read() + .await + .get_candidates_size() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + if let Some(completions) = index_ref.completion_option.as_ref() + { + completions + .read() + .await + .len() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + elapsed_time / 1_000_000_000 + ); + } + + Ok(index_arc.clone()) + } + Err(err) => Err(err.to_string()), + } + } + Err(err) => Err(err.to_string()), + } + } + Err(err) => Err(err.to_string()), + } +} + +pub(crate) async fn warmup(shard_object_arc: &ShardArc) { + shard_object_arc.write().await.frequentword_results.clear(); + let mut query_facets: Vec = Vec::new(); + for facet in shard_object_arc.read().await.facets.iter() { + match facet.field_type { + FieldType::String16 => query_facets.push(QueryFacet::String16 { + field: facet.name.clone(), + prefix: "".into(), + length: u16::MAX, + }), + FieldType::String32 => query_facets.push(QueryFacet::String32 { + field: facet.name.clone(), + prefix: "".into(), + length: u32::MAX, + }), + FieldType::StringSet16 => query_facets.push(QueryFacet::StringSet16 { + field: facet.name.clone(), + prefix: "".into(), + length: u16::MAX, + }), + FieldType::StringSet32 => query_facets.push(QueryFacet::StringSet32 { + field: facet.name.clone(), + prefix: "".into(), + length: u32::MAX, + }), + _ => {} + } + } + + let frequent_words = shard_object_arc.read().await.frequent_words.clone(); + for frequentword in frequent_words.iter() { + let results_list = shard_object_arc + .search_shard( + frequentword.to_owned(), + QueryType::Union, + 0, + 1000, + ResultType::TopkCount, + false, + Vec::new(), + query_facets.clone(), + Vec::new(), + Vec::new(), + ) + .await; + + let mut index_mut = shard_object_arc.write().await; + index_mut + .frequentword_results + .insert(frequentword.to_string(), results_list); + } +} + +#[derive(Default, Debug, Deserialize, Serialize, Clone)] +pub(crate) struct TermObject { + pub key_hash: u64, + pub key0: u32, + pub term: String, + + pub ngram_type: NgramType, + + pub term_ngram_2: String, + pub term_ngram_1: String, + pub term_ngram_0: String, + pub field_vec_ngram1: Vec<(usize, u32)>, + pub field_vec_ngram2: Vec<(usize, u32)>, + pub field_vec_ngram3: Vec<(usize, u32)>, + + pub field_positions_vec: Vec>, +} + +#[derive(Default, Debug, Serialize, Deserialize, Clone)] +pub(crate) struct NonUniqueTermObject { + pub term: String, + pub ngram_type: NgramType, + + pub term_ngram_2: String, + pub term_ngram_1: String, + pub term_ngram_0: String, + pub op: QueryType, +} + +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +use ahash::RandomState; +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +use std::sync::LazyLock; + +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +pub static HASHER_32: LazyLock = + LazyLock::new(|| RandomState::with_seeds(805272099, 242851902, 646123436, 591410655)); + +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +pub static HASHER_64: LazyLock = + LazyLock::new(|| RandomState::with_seeds(808259318, 750368348, 84901999, 789810389)); + +#[inline] +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +pub(crate) fn hash32(term_bytes: &[u8]) -> u32 { + HASHER_32.hash_one(term_bytes) as u32 +} + +#[inline] +#[cfg(not(all(target_feature = "aes", target_feature = "sse2")))] +pub(crate) fn hash64(term_bytes: &[u8]) -> u64 { + HASHER_64.hash_one(term_bytes) + & 0b1111111111111111111111111111111111111111111111111111111111111000 +} + +static FREQUENT_EN: &str = include_str!("../../assets/dictionaries/frequent_en.txt"); +static FREQUENT_DE: &str = include_str!("../../assets/dictionaries/frequent_de.txt"); +static FREQUENT_FR: &str = include_str!("../../assets/dictionaries/frequent_fr.txt"); +static FREQUENT_ES: &str = include_str!("../../assets/dictionaries/frequent_es.txt"); + +pub(crate) const NUM_FREE_VALUES: u32 = 24; + +/// Compress an u32 to a byte, preserving 4 significant bits. +/// used for compressing n-gram frequent_term positions_count and doc/field length +/// Ported from Lucene SmallFloat.java https://github.com/apache/lucene/blob/main/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java +pub(crate) fn int_to_byte4(i: u32) -> u8 { + if i < NUM_FREE_VALUES { + i as u8 + } else { + let ii = i - NUM_FREE_VALUES; + let num_bits = 32 - ii.leading_zeros(); + if num_bits < 4 { + (NUM_FREE_VALUES + ii) as u8 + } else { + let shift = num_bits - 4; + (NUM_FREE_VALUES + (((ii >> shift) & 0x07) | (shift + 1) << 3)) as u8 + } + } +} + +/// Decompress a byte that has been compressed with intToByte4(int), to an u32 +/// used for pre-calculating DOCUMENT_LENGTH_COMPRESSION table. Decompressing n-gram frequent_term positions_count and doc/field length via table lookup. +/// Ported from Lucene SmallFloat.java https://github.com/apache/lucene/blob/main/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java +pub(crate) const fn byte4_to_int(b: u8) -> u32 { + if (b as u32) < NUM_FREE_VALUES { + b as u32 + } else { + let i = b as u32 - NUM_FREE_VALUES; + let bits = i & 0x07; + let shift = i >> 3; + if shift == 0 { + NUM_FREE_VALUES + bits + } else { + NUM_FREE_VALUES + ((bits | 0x08) << (shift - 1)) + } + } +} + +/// Pre-calculated DOCUMENT_LENGTH_COMPRESSION table for fast lookup. +pub(crate) const DOCUMENT_LENGTH_COMPRESSION: [u32; 256] = { + let mut k2 = [0; 256]; + let mut i = 0usize; + while i < 256 { + k2[i] = byte4_to_int(i as u8); + i += 1; + } + k2 +}; + +impl Shard { + pub(crate) fn string_set_to_single_term_id(&mut self) { + for (i, facet) in self.facets.iter().enumerate() { + if facet.field_type == FieldType::StringSet16 + || facet.field_type == FieldType::StringSet32 + { + for (idx, value) in facet.values.iter().enumerate() { + for term in value.1.0.iter() { + self.string_set_to_single_term_id_vec[i] + .entry(term.to_string()) + .or_insert(AHashSet::from_iter(vec![idx as u32])) + .insert(idx as u32); + } + } + } + } + } + + /// Reset shard to empty, while maintaining schema + async fn clear_shard(&mut self) { + let permit = self.permits.clone().acquire_owned().await.unwrap(); + + self.level_terms.clear(); + + let mut mmap_options = MmapOptions::new(); + let mmap: MmapMut = mmap_options.len(4).map_anon().unwrap(); + self.index_file_mmap = mmap + .make_read_only() + .expect("Unable to make Mmap read-only"); + + let _ = self.index_file.rewind(); + if let Err(e) = self.index_file.set_len(0) { + println!( + "Unable to index_file.set_len in clear_index {} {} {:?}", + self.index_path_string, self.indexed_doc_count, e + ) + }; + + if !self.compressed_docstore_segment_block_buffer.is_empty() { + self.compressed_docstore_segment_block_buffer = vec![0; ROARING_BLOCK_SIZE * 4]; + }; + + write_u16( + INDEX_FORMAT_VERSION_MAJOR, + &mut self.compressed_index_segment_block_buffer, + 0, + ); + write_u16( + INDEX_FORMAT_VERSION_MINOR, + &mut self.compressed_index_segment_block_buffer, + 2, + ); + + let _ = self + .index_file + .write(&self.compressed_index_segment_block_buffer[0..INDEX_HEADER_SIZE as usize]); + let _ = self.index_file.flush(); + + self.index_file_mmap = + unsafe { Mmap::map(&self.index_file).expect("Unable to create Mmap") }; + + self.docstore_file_mmap = unsafe { + MmapOptions::new() + .len(0) + .map(&self.docstore_file) + .expect("Unable to create Mmap") + }; + + let _ = self.docstore_file.rewind(); + if let Err(e) = self.docstore_file.set_len(0) { + println!("Unable to docstore_file.set_len in clear_index {:?}", e) + }; + let _ = self.docstore_file.flush(); + + let _ = self.delete_file.rewind(); + if let Err(e) = self.delete_file.set_len(0) { + println!("Unable to delete_file.set_len in clear_index {:?}", e) + }; + let _ = self.delete_file.flush(); + self.delete_hashset.clear(); + + self.facets_file_mmap = unsafe { + MmapOptions::new() + .len(0) + .map_mut(&self.facets_file) + .expect("Unable to create Mmap") + }; + let _ = self.facets_file.rewind(); + if let Err(e) = self + .facets_file + .set_len((self.facets_size_sum * ROARING_BLOCK_SIZE) as u64) + { + println!("Unable to facets_file.set_len in clear_index {:?}", e) + }; + let _ = self.facets_file.flush(); + + self.facets_file_mmap = + unsafe { MmapMut::map_mut(&self.facets_file).expect("Unable to create Mmap") }; + let index_path = PathBuf::new(&self.index_path_string); + let _ = fs::remove_file(index_path.join(FACET_VALUES_FILENAME)); + for facet in self.facets.iter_mut() { + facet.values.clear(); + facet.min = ValueType::None; + facet.max = ValueType::None; + } + + if !self.stored_field_names.is_empty() && self.meta.access_type == AccessType::Mmap { + self.docstore_file_mmap = + unsafe { Mmap::map(&self.docstore_file).expect("Unable to create Mmap") }; + } + + self.document_length_normalized_average = 0.0; + self.indexed_doc_count = 0; + self.committed_doc_count = 0; + self.positions_sum_normalized = 0; + + self.level_index = Vec::new(); + + for segment in self.segments_index.iter_mut() { + segment.byte_array_blocks.clear(); + segment.byte_array_blocks_pointer.clear(); + segment.segment.clear(); + } + + for segment in self.segments_level0.iter_mut() { + segment.segment.clear(); + } + + self.key_count_sum = 0; + self.block_id = 0; + self.strip_compressed_sum = 0; + self.postings_buffer_pointer = 0; + self.docid_count = 0; + self.size_compressed_docid_index = 0; + self.size_compressed_positions_index = 0; + self.position_count = 0; + self.postinglist_count = 0; + + self.is_last_level_incomplete = false; + + drop(permit); + } + + pub(crate) fn get_index_string_facets_shard( + &self, + query_facets: Vec, + ) -> Option> { + if self.facets.is_empty() { + return None; + } + + let mut result_query_facets = Vec::new(); + if !query_facets.is_empty() { + result_query_facets = vec![ResultFacet::default(); self.facets.len()]; + for query_facet in query_facets.iter() { + match &query_facet { + QueryFacet::String16 { + field, + prefix, + length, + } => { + if let Some(idx) = self.facets_map.get(field) + && self.facets[*idx].field_type == FieldType::String16 + { + result_query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length as u32, + ..Default::default() + } + } + } + QueryFacet::StringSet16 { + field, + prefix, + length, + } => { + if let Some(idx) = self.facets_map.get(field) + && self.facets[*idx].field_type == FieldType::StringSet16 + { + result_query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length as u32, + ..Default::default() + } + } + } + + QueryFacet::String32 { + field, + prefix, + length, + } => { + if let Some(idx) = self.facets_map.get(field) + && self.facets[*idx].field_type == FieldType::String32 + { + result_query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length, + ..Default::default() + } + } + } + QueryFacet::StringSet32 { + field, + prefix, + length, + } => { + if let Some(idx) = self.facets_map.get(field) + && self.facets[*idx].field_type == FieldType::StringSet32 + { + result_query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length, + ..Default::default() + } + } + } + + _ => {} + }; + } + } + + let mut facets: AHashMap = AHashMap::new(); + for (i, facet) in result_query_facets.iter().enumerate() { + if facet.length == 0 || self.facets[i].values.is_empty() { + continue; + } + + if self.facets[i].field_type == FieldType::StringSet16 + || self.facets[i].field_type == FieldType::StringSet32 + { + let mut hash_map: AHashMap = AHashMap::new(); + for value in self.facets[i].values.iter() { + for term in value.1.0.iter() { + *hash_map.entry(term.clone()).or_insert(0) += value.1.1; + } + } + + let v = hash_map + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| (a.to_string(), *c)) + .filter(|(a, _c)| facet.prefix.is_empty() || a.starts_with(&facet.prefix)) + .take(facet.length as usize) + .collect::>(); + + if !v.is_empty() { + facets.insert(facet.field.clone(), v); + } + } else { + let v = self.facets[i] + .values + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| (a.to_string(), c.1)) + .filter(|(a, _c)| facet.prefix.is_empty() || a.starts_with(&facet.prefix)) + .take(facet.length as usize) + .collect::>(); + + if !v.is_empty() { + facets.insert(facet.field.clone(), v); + } + } + } + + Some(facets) + } +} + +impl Index { + /// Current document count: indexed document count - deleted document count + pub async fn current_doc_count(&self) -> usize { + let mut current_doc_count = 0; + for shard in self.shard_vec.iter() { + current_doc_count += + shard.read().await.indexed_doc_count - shard.read().await.delete_hashset.len(); + } + current_doc_count + } + + /// are there uncommited documents? + pub async fn uncommitted_doc_count(&self) -> usize { + let mut uncommitted_doc_count = 0; + for shard in self.shard_vec.iter() { + uncommitted_doc_count += + shard.read().await.indexed_doc_count - shard.read().await.committed_doc_count; + } + uncommitted_doc_count + } + + /// Get number of indexed documents. + pub async fn committed_doc_count(&self) -> usize { + let mut committed_doc_count = 0; + for shard in self.shard_vec.iter() { + committed_doc_count += shard.read().await.committed_doc_count; + } + committed_doc_count + } + + /// Get number of indexed documents. + pub async fn indexed_doc_count(&self) -> usize { + let mut indexed_doc_count = 0; + for shard in self.shard_vec.iter() { + indexed_doc_count += shard.read().await.indexed_doc_count; + } + indexed_doc_count + } + + /// Get number of index levels. One index level comprises 64K documents. + pub async fn level_count(&self) -> usize { + let mut level_count = 0; + for shard in self.shard_vec.iter() { + level_count += shard.read().await.level_index.len(); + } + level_count + } + + /// Get number of index shards. + pub async fn shard_count(&self) -> usize { + self.shard_number + } + + /// Get number of facets defined in the index schema. + pub fn facets_count(&self) -> usize { + self.facets.len() + } + + /// get_index_facets_minmax: return map of numeric facet fields, each with field name and min/max values. + pub async fn index_facets_minmax(&self) -> HashMap { + let mut facets_minmax: HashMap = HashMap::new(); + for shard in self.shard_vec.iter() { + for facet in shard.read().await.facets.iter() { + match (&facet.min, &facet.max) { + (ValueType::U8(min), ValueType::U8(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_u64().unwrap() as u8))).into(), + max: (*max.min(&(item.max.as_u64().unwrap() as u8))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::U16(min), ValueType::U16(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_u64().unwrap() as u16))).into(), + max: (*max.min(&(item.max.as_u64().unwrap() as u16))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::U32(min), ValueType::U32(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_u64().unwrap() as u32))).into(), + max: (*max.min(&(item.max.as_u64().unwrap() as u32))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::U64(min), ValueType::U64(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_u64().unwrap()))).into(), + max: (*max.min(&(item.max.as_u64().unwrap()))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::I8(min), ValueType::I8(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_i64().unwrap() as i8))).into(), + max: (*max.min(&(item.max.as_i64().unwrap() as i8))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::I16(min), ValueType::I16(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_i64().unwrap() as i16))).into(), + max: (*max.min(&(item.max.as_i64().unwrap() as i16))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::I32(min), ValueType::I32(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_i64().unwrap() as i32))).into(), + max: (*max.min(&(item.max.as_i64().unwrap() as i32))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::I64(min), ValueType::I64(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_i64().unwrap()))).into(), + max: (*max.min(&(item.max.as_i64().unwrap()))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::Timestamp(min), ValueType::Timestamp(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: (*min.min(&(item.min.as_i64().unwrap()))).into(), + max: (*max.min(&(item.max.as_i64().unwrap()))).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::F32(min), ValueType::F32(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: min.min(item.min.as_f64().unwrap() as f32).into(), + max: max.min(item.max.as_f64().unwrap() as f32).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + (ValueType::F64(min), ValueType::F64(max)) => { + if let Some(item) = facets_minmax.get_mut(&facet.name) { + *item = MinMaxFieldJson { + min: min.min(item.min.as_f64().unwrap()).into(), + max: max.min(item.max.as_f64().unwrap()).into(), + } + } else { + facets_minmax.insert( + facet.name.clone(), + MinMaxFieldJson { + min: (*min).into(), + max: (*max).into(), + }, + ); + } + } + _ => {} + } + } + } + facets_minmax + } + + /// get_index_string_facets: list of string facet fields, each with field name and a map of unique values and their count (number of times the specific value appears in the whole index). + /// values are sorted by their occurrence count within all indexed documents in descending order + /// * `query_facets`: Must be set if facet fields should be returned in get_index_facets. If set to Vec::new() then no facet fields are returned. + /// The prefix property of a QueryFacet allows to filter the returned facet values to those matching a given prefix, if there are too many distinct values per facet field. + /// The length property of a QueryFacet allows limiting the number of returned distinct values per facet field, if there are too many distinct values. The QueryFacet can be used to improve the usability in an UI. + /// If the length property of a QueryFacet is set to 0 then no facet values for that facet are returned. + /// The facet values are sorted by the frequency of the appearance of the value within the indexed documents matching the query in descending order. + /// Example: query_facets = vec![QueryFacet::String16 {field: "language".to_string(),prefix: "ger".to_string(),length: 5},QueryFacet::String16 {field: "brand".to_string(),prefix: "a".to_string(),length: 5}]; + pub async fn get_index_string_facets( + &self, + query_facets: Vec, + ) -> Option> { + if self.facets.is_empty() { + return None; + } + + let mut result: AHashMap = AHashMap::new(); + + let mut result_facets: AHashMap, u32)> = AHashMap::new(); + for query_facet in query_facets.iter() { + match query_facet { + QueryFacet::String16 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length as u32)); + } + QueryFacet::StringSet16 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length as u32)); + } + + QueryFacet::String32 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length)); + } + QueryFacet::StringSet32 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length)); + } + + _ => {} + } + } + + for shard_arc in self.shard_vec.iter() { + let shard = shard_arc.read().await; + if !shard.facets.is_empty() { + for facet in shard.facets.iter() { + if let Some(existing) = result_facets.get_mut(&facet.name) { + for (key, value) in facet.values.iter() { + *existing.0.entry(key.clone()).or_insert(0) += value.1; + } + }; + } + } + } + + for (key, value) in result_facets.iter_mut() { + let sum = value + .0 + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| (a.clone(), *c)) + .take(value.1 as usize) + .collect::>(); + result.insert(key.clone(), sum); + } + + Some(result) + } + + /// Reset index to empty, while maintaining schema + pub async fn clear_index(&mut self) { + let index_path = PathBuf::new(&self.index_path_string); + let _ = fs::remove_file(index_path.join(DICTIONARY_FILENAME)); + if let Some(spelling_correction) = self.meta.spelling_correction.as_ref() { + self.symspell_option = Some(Arc::new(RwLock::new(SymSpell::new( + spelling_correction.max_dictionary_edit_distance, + spelling_correction.term_length_threshold.clone(), + 7, + spelling_correction.count_threshold, + )))); + } + + let _ = fs::remove_file(index_path.join(COMPLETIONS_FILENAME)); + if let Some(_query_completion) = self.meta.query_completion.as_ref() { + self.completion_option = Some(Arc::new(RwLock::new(PruningRadixTrie::new()))); + } + + let mut result_object_list = Vec::new(); + for shard in self.shard_vec.iter() { + let shard_clone = shard.clone(); + result_object_list.push(tokio::spawn(async move { + shard_clone.write().await.clear_shard().await; + })); + } + future::join_all(result_object_list).await; + } + + /// Delete index from disc and ram + pub fn delete_index(&mut self) { + let index_path = PathBuf::new(&self.index_path_string); + + let _ = fs::remove_file(index_path.join(DICTIONARY_FILENAME)); + let _ = fs::remove_file(index_path.join(COMPLETIONS_FILENAME)); + + let _ = fs::remove_file(index_path.join(INDEX_FILENAME)); + let _ = fs::remove_file(index_path.join(SCHEMA_FILENAME)); + let _ = fs::remove_file(index_path.join(META_FILENAME)); + let _ = fs::remove_file(index_path.join(DELETE_FILENAME)); + let _ = fs::remove_file(index_path.join(FACET_FILENAME)); + let _ = fs::remove_file(index_path.join(FACET_VALUES_FILENAME)); + let _ = fs::remove_dir(index_path); + } + + /// Get synonyms from index + pub fn get_synonyms(&self) -> Result, String> { + if let Ok(synonym_file) = + File::open(PathBuf::new(&self.index_path_string).join(SYNONYMS_FILENAME)) + { + if let Ok(synonyms) = serde_json::from_reader(BufReader::new(synonym_file)) { + Ok(synonyms) + } else { + Err("not found".into()) + } + } else { + Err("not found".into()) + } + } + + /// Set/replace/overwrite synonyms in index + /// Affects only subsequently indexed documents + pub fn set_synonyms(&mut self, synonyms: &Vec) -> Result { + serde_json::to_writer( + &File::create(PathBuf::new(&self.index_path_string).join(SYNONYMS_FILENAME)).unwrap(), + &synonyms, + ) + .unwrap(); + + self.synonyms_map = get_synonyms_map(synonyms, self.segment_number_mask1); + Ok(synonyms.len()) + } + + /// Add/append/update/merge synonyms in index + /// Affects only subsequently indexed documents + pub fn add_synonyms(&mut self, synonyms: &[Synonym]) -> Result { + let mut merged_synonyms = if let Ok(synonym_file) = + File::open(PathBuf::new(&self.index_path_string).join(SYNONYMS_FILENAME)) + { + serde_json::from_reader(BufReader::new(synonym_file)).unwrap_or_default() + } else { + Vec::new() + }; + + merged_synonyms.extend(synonyms.iter().cloned()); + + serde_json::to_writer( + &File::create(PathBuf::new(&self.index_path_string).join(SYNONYMS_FILENAME)).unwrap(), + &merged_synonyms, + ) + .unwrap(); + + self.synonyms_map = get_synonyms_map(&merged_synonyms, self.segment_number_mask1); + Ok(merged_synonyms.len()) + } +} + +/// Remove index from RAM (Reverse of open_index) +#[allow(async_fn_in_trait)] +pub trait Close { + /// Remove index from RAM (Reverse of open_index) + async fn close(&self); +} + +/// Remove index from RAM (Reverse of open_index) +impl Close for IndexArc { + /// Remove index from RAM (Reverse of open_index) + async fn close(&self) { + self.commit().await; + + if let Some(completion_option) = &self.read().await.completion_option.as_ref() { + let trie = completion_option.read().await; + let completions_path = + PathBuf::new(&self.read().await.index_path_string).join(COMPLETIONS_FILENAME); + + _ = trie.save_completions(&completions_path, ":"); + } + + if let Some(symspell) = &mut self.read().await.symspell_option.as_ref() { + let dictionary_path = + PathBuf::new(&self.read().await.index_path_string).join(DICTIONARY_FILENAME); + let _ = symspell.read().await.save_dictionary(&dictionary_path, " "); + } + + let mut result_object_list = Vec::new(); + for shard in self.read().await.shard_vec.iter() { + let shard_clone = shard.clone(); + result_object_list.push(tokio::spawn(async move { + let mut mmap_options = MmapOptions::new(); + let mmap: MmapMut = mmap_options.len(4).map_anon().unwrap(); + shard_clone.write().await.index_file_mmap = mmap + .make_read_only() + .expect("Unable to make Mmap read-only"); + + let mut mmap_options = MmapOptions::new(); + let mmap: MmapMut = mmap_options.len(4).map_anon().unwrap(); + shard_clone.write().await.docstore_file_mmap = mmap + .make_read_only() + .expect("Unable to make Mmap read-only"); + })); + } + future::join_all(result_object_list).await; + } +} + +/// Delete document from index by document id +#[allow(async_fn_in_trait)] +pub trait DeleteDocument { + /// Delete document from index by document id + async fn delete_document(&self, docid: u64); +} + +/// Delete document from index by document id +/// Document ID can by obtained by search. +/// Immediately effective, indpendent of commit. +/// Index space used by deleted documents is not reclaimed (until compaction is implemented), but result_count_total is updated. +/// By manually deleting the delete.bin file the deleted documents can be recovered (until compaction). +/// Deleted documents impact performance, especially but not limited to counting (Count, TopKCount). They also increase the size of the index (until compaction is implemented). +/// For minimal query latency delete index and reindexing documents is preferred over deleting documents (until compaction is implemented). +/// BM25 scores are not updated (until compaction is implemented), but the impact is minimal. +impl DeleteDocument for IndexArc { + async fn delete_document(&self, docid: u64) { + let index_ref = self.read().await; + let shard_id = docid & ((1 << index_ref.shard_bits) - 1); + let doc_id = docid >> index_ref.shard_bits; + let mut shard_mut = index_ref.shard_vec[shard_id as usize].write().await; + + if doc_id as usize >= shard_mut.indexed_doc_count { + return; + } + if shard_mut.delete_hashset.insert(doc_id as usize) { + let mut buffer: [u8; 8] = [0; 8]; + write_u64(doc_id, &mut buffer, 0); + let _ = shard_mut.delete_file.write(&buffer); + let _ = shard_mut.delete_file.flush(); + } + } +} + +/// Delete documents from index by document id +#[allow(async_fn_in_trait)] +pub trait DeleteDocuments { + /// Delete documents from index by document id + async fn delete_documents(&self, docid_vec: Vec); +} + +/// Delete documents from index by document id +/// Document ID can by obtained by search. +/// Immediately effective, indpendent of commit. +/// Index space used by deleted documents is not reclaimed (until compaction is implemented), but result_count_total is updated. +/// By manually deleting the delete.bin file the deleted documents can be recovered (until compaction). +/// Deleted documents impact performance, especially but not limited to counting (Count, TopKCount). They also increase the size of the index (until compaction is implemented). +/// For minimal query latency delete index and reindexing documents is preferred over deleting documents (until compaction is implemented). +/// BM25 scores are not updated (until compaction is implemented), but the impact is minimal. +impl DeleteDocuments for IndexArc { + async fn delete_documents(&self, docid_vec: Vec) { + for docid in docid_vec { + self.delete_document(docid).await; + } + } +} + +/// Delete documents from index by query +/// Delete and search have identical parameters. +/// It is recommended to test with search prior to delete to verify that only those documents are returned that you really want to delete. +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub trait DeleteDocumentsByQuery { + /// Delete documents from index by query + /// Delete and search have identical parameters. + /// It is recommended to test with search prior to delete to verify that only those documents are returned that you really want to delete. + async fn delete_documents_by_query( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + include_uncommited: bool, + field_filter: Vec, + facet_filter: Vec, + result_sort: Vec, + ); +} + +/// Delete documents from index by query +/// Delete and search have identical parameters. +/// It is recommended to test with search prior to delete to verify that only those documents are returned that you really want to delete. +impl DeleteDocumentsByQuery for IndexArc { + async fn delete_documents_by_query( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + include_uncommited: bool, + field_filter: Vec, + facet_filter: Vec, + result_sort: Vec, + ) { + let rlo = self + .search( + query_string.to_owned(), + query_type_default, + offset, + length, + ResultType::Topk, + include_uncommited, + field_filter, + Vec::new(), + facet_filter, + result_sort, + QueryRewriting::SearchOnly, + ) + .await; + + let document_id_vec: Vec = rlo + .results + .iter() + .map(|result| result.doc_id as u64) + .collect(); + self.delete_documents(document_id_vec).await; + } +} + +/// Update document in index +/// Update_document is a combination of delete_document and index_document. +/// All current limitations of delete_document apply. +#[allow(async_fn_in_trait)] +pub trait UpdateDocument { + /// Update document in index + /// Update_document is a combination of delete_document and index_document. + /// All current limitations of delete_document apply. + async fn update_document(&self, id_document: (u64, Document)); +} + +/// Update document in index +/// Update_document is a combination of delete_document and index_document. +/// All current limitations of delete_document apply. +impl UpdateDocument for IndexArc { + async fn update_document(&self, id_document: (u64, Document)) { + self.delete_document(id_document.0).await; + self.index_document(id_document.1, FileType::None).await; + } +} + +/// Update documents in index +/// Update_document is a combination of delete_document and index_document. +/// All current limitations of delete_document apply. +#[allow(async_fn_in_trait)] +pub trait UpdateDocuments { + /// Update documents in index + /// Update_document is a combination of delete_document and index_document. + /// All current limitations of delete_document apply. + async fn update_documents(&self, id_document_vec: Vec<(u64, Document)>); +} + +/// Update documents in index +/// Update_document is a combination of delete_document and index_document. +/// All current limitations of delete_document apply. +impl UpdateDocuments for IndexArc { + async fn update_documents(&self, id_document_vec: Vec<(u64, Document)>) { + let (docid_vec, document_vec): (Vec<_>, Vec<_>) = id_document_vec.into_iter().unzip(); + self.delete_documents(docid_vec).await; + self.index_documents(document_vec).await; + } +} + +/// Indexes a list of documents +#[allow(async_fn_in_trait)] +pub trait IndexDocuments { + /// Indexes a list of documents + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_documents(&self, document_vec: Vec); +} + +impl IndexDocuments for IndexArc { + /// Index list of documents (bulk) + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_documents(&self, document_vec: Vec) { + for document in document_vec { + self.index_document(document, FileType::None).await; + } + } +} + +/// Indexes a single document +#[allow(async_fn_in_trait)] +pub trait IndexDocument { + /// Indexes a single document + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_document(&self, document: Document, file: FileType); +} + +impl IndexDocument for IndexArc { + /// Index document + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_document(&self, document: Document, file: FileType) { + while self.read().await.shard_queue.read().await.is_empty() { + hint::spin_loop(); + } + + let index_arc_clone = self.clone(); + let index_id = self + .read() + .await + .shard_queue + .write() + .await + .pop_front() + .unwrap(); + let index_shard = self.read().await.shard_vec[index_id].clone(); + + let permit = index_shard + .read() + .await + .permits + .clone() + .acquire_owned() + .await + .unwrap(); + + tokio::spawn(async move { + let index_id2 = index_id; + index_shard.index_document_shard(document, file).await; + + index_arc_clone + .read() + .await + .shard_queue + .write() + .await + .push_back(index_id2); + drop(permit); + }); + } +} + +/// Indexes a single document +#[allow(async_fn_in_trait)] +pub(crate) trait IndexDocumentShard { + /// Indexes a single document + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_document_shard(&self, document: Document, file: FileType); +} + +impl IndexDocumentShard for ShardArc { + /// Index document + /// May block, if the threshold of documents indexed in parallel is exceeded. + async fn index_document_shard(&self, document: Document, file: FileType) { + let shard_arc_clone = self.clone(); + let index_ref = self.read().await; + let schema = index_ref.indexed_schema_vec.clone(); + let ngram_indexing = index_ref.meta.ngram_indexing; + let indexed_field_vec_len = index_ref.indexed_field_vec.len(); + let tokenizer_type = index_ref.meta.tokenizer; + let segment_number_mask1 = index_ref.segment_number_mask1; + drop(index_ref); + + let token_per_field_max: u32 = u16::MAX as u32; + let mut unique_terms: AHashMap = AHashMap::new(); + let mut field_vec: Vec<(usize, u8, u32, u32)> = Vec::new(); + let shard_ref2 = shard_arc_clone.read().await; + + for schema_field in schema.iter() { + if !schema_field.indexed { + continue; + } + + let field_name = &schema_field.field; + + if let Some(field_value) = document.get(field_name) { + let mut non_unique_terms: Vec = Vec::new(); + let mut nonunique_terms_count = 0u32; + + let text = match schema_field.field_type { + FieldType::Text | FieldType::String16 | FieldType::String32 => { + serde_json::from_value::(field_value.clone()) + .unwrap_or(field_value.to_string()) + } + _ => field_value.to_string(), + }; + + let mut query_type_mut = QueryType::Union; + + tokenizer( + &shard_ref2, + &text, + &mut unique_terms, + &mut non_unique_terms, + tokenizer_type, + segment_number_mask1, + &mut nonunique_terms_count, + token_per_field_max, + MAX_POSITIONS_PER_TERM, + false, + &mut query_type_mut, + ngram_indexing, + schema_field.indexed_field_id, + indexed_field_vec_len, + ) + .await; + + let document_length_compressed: u8 = int_to_byte4(nonunique_terms_count); + let document_length_normalized: u32 = + DOCUMENT_LENGTH_COMPRESSION[document_length_compressed as usize]; + field_vec.push(( + schema_field.indexed_field_id, + document_length_compressed, + document_length_normalized, + nonunique_terms_count, + )); + } + } + drop(shard_ref2); + + let ngrams: Vec = unique_terms + .iter() + .filter(|term| term.1.ngram_type != NgramType::SingleTerm) + .map(|term| term.1.term.clone()) + .collect(); + + for term in ngrams.iter() { + let ngram = unique_terms.get(term).unwrap(); + + match ngram.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + let term_ngram1 = ngram.term_ngram_1.clone(); + let term_ngram2 = ngram.term_ngram_0.clone(); + + for indexed_field_id in 0..indexed_field_vec_len { + let positions_count_ngram1 = + unique_terms[&term_ngram1].field_positions_vec[indexed_field_id].len(); + let positions_count_ngram2 = + unique_terms[&term_ngram2].field_positions_vec[indexed_field_id].len(); + let ngram = unique_terms.get_mut(term).unwrap(); + + if positions_count_ngram1 > 0 { + ngram + .field_vec_ngram1 + .push((indexed_field_id, positions_count_ngram1 as u32)); + } + if positions_count_ngram2 > 0 { + ngram + .field_vec_ngram2 + .push((indexed_field_id, positions_count_ngram2 as u32)); + } + } + } + _ => { + let term_ngram1 = ngram.term_ngram_2.clone(); + let term_ngram2 = ngram.term_ngram_1.clone(); + let term_ngram3 = ngram.term_ngram_0.clone(); + + for indexed_field_id in 0..indexed_field_vec_len { + let positions_count_ngram1 = + unique_terms[&term_ngram1].field_positions_vec[indexed_field_id].len(); + let positions_count_ngram2 = + unique_terms[&term_ngram2].field_positions_vec[indexed_field_id].len(); + let positions_count_ngram3 = + unique_terms[&term_ngram3].field_positions_vec[indexed_field_id].len(); + let ngram = unique_terms.get_mut(term).unwrap(); + + if positions_count_ngram1 > 0 { + ngram + .field_vec_ngram1 + .push((indexed_field_id, positions_count_ngram1 as u32)); + } + if positions_count_ngram2 > 0 { + ngram + .field_vec_ngram2 + .push((indexed_field_id, positions_count_ngram2 as u32)); + } + if positions_count_ngram3 > 0 { + ngram + .field_vec_ngram3 + .push((indexed_field_id, positions_count_ngram3 as u32)); + } + } + } + } + } + + let document_item = DocumentItem { + document, + unique_terms, + field_vec, + }; + + shard_arc_clone.index_document_2(document_item, file).await; + } +} + +#[allow(async_fn_in_trait)] +pub(crate) trait IndexDocument2 { + async fn index_document_2(&self, document_item: DocumentItem, file: FileType); +} + +impl IndexDocument2 for ShardArc { + async fn index_document_2(&self, document_item: DocumentItem, file: FileType) { + let mut shard_mut = self.write().await; + + let doc_id: usize = shard_mut.indexed_doc_count; + shard_mut.indexed_doc_count += 1; + + let do_commit = shard_mut.block_id != doc_id >> 16; + if do_commit { + shard_mut.commit(doc_id).await; + + shard_mut.block_id = doc_id >> 16; + } + + if !shard_mut.facets.is_empty() { + let facets_size_sum = shard_mut.facets_size_sum; + for i in 0..shard_mut.facets.len() { + let facet = &mut shard_mut.facets[i]; + if let Some(field_value) = document_item.document.get(&facet.name) { + let address = (facets_size_sum * doc_id) + facet.offset; + + match facet.field_type { + FieldType::U8 => { + let value = field_value.as_u64().unwrap_or_default() as u8; + match (&facet.min, &facet.max) { + (ValueType::U8(min), ValueType::U8(max)) => { + if value < *min { + facet.min = ValueType::U8(value); + } + if value > *max { + facet.max = ValueType::U8(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::U8(value); + facet.max = ValueType::U8(value); + } + _ => {} + } + shard_mut.facets_file_mmap[address] = value + } + FieldType::U16 => { + let value = field_value.as_u64().unwrap_or_default() as u16; + match (&facet.min, &facet.max) { + (ValueType::U16(min), ValueType::U16(max)) => { + if value < *min { + facet.min = ValueType::U16(value); + } + if value > *max { + facet.max = ValueType::U16(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::U16(value); + facet.max = ValueType::U16(value); + } + _ => {} + } + write_u16(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::U32 => { + let value = field_value.as_u64().unwrap_or_default() as u32; + match (&facet.min, &facet.max) { + (ValueType::U32(min), ValueType::U32(max)) => { + if value < *min { + facet.min = ValueType::U32(value); + } + if value > *max { + facet.max = ValueType::U32(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::U32(value); + facet.max = ValueType::U32(value); + } + _ => {} + } + write_u32(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::U64 => { + let value = field_value.as_u64().unwrap_or_default(); + match (&facet.min, &facet.max) { + (ValueType::U64(min), ValueType::U64(max)) => { + if value < *min { + facet.min = ValueType::U64(value); + } + if value > *max { + facet.max = ValueType::U64(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::U64(value); + facet.max = ValueType::U64(value); + } + _ => {} + } + write_u64(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::I8 => { + let value = field_value.as_i64().unwrap_or_default() as i8; + match (&facet.min, &facet.max) { + (ValueType::I8(min), ValueType::I8(max)) => { + if value < *min { + facet.min = ValueType::I8(value); + } + if value > *max { + facet.max = ValueType::I8(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::I8(value); + facet.max = ValueType::I8(value); + } + _ => {} + } + write_i8(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::I16 => { + let value = field_value.as_i64().unwrap_or_default() as i16; + match (&facet.min, &facet.max) { + (ValueType::I16(min), ValueType::I16(max)) => { + if value < *min { + facet.min = ValueType::I16(value); + } + if value > *max { + facet.max = ValueType::I16(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::I16(value); + facet.max = ValueType::I16(value); + } + _ => {} + } + write_i16(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::I32 => { + let value = field_value.as_i64().unwrap_or_default() as i32; + match (&facet.min, &facet.max) { + (ValueType::I32(min), ValueType::I32(max)) => { + if value < *min { + facet.min = ValueType::I32(value); + } + if value > *max { + facet.max = ValueType::I32(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::I32(value); + facet.max = ValueType::I32(value); + } + _ => {} + } + write_i32(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::I64 => { + let value = field_value.as_i64().unwrap_or_default(); + match (&facet.min, &facet.max) { + (ValueType::I64(min), ValueType::I64(max)) => { + if value < *min { + facet.min = ValueType::I64(value); + } + if value > *max { + facet.max = ValueType::I64(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::I64(value); + facet.max = ValueType::I64(value); + } + _ => {} + } + write_i64(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::Timestamp => { + let value = field_value.as_i64().unwrap_or_default(); + match (&facet.min, &facet.max) { + (ValueType::Timestamp(min), ValueType::Timestamp(max)) => { + if value < *min { + facet.min = ValueType::Timestamp(value); + } + if value > *max { + facet.max = ValueType::Timestamp(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::Timestamp(value); + facet.max = ValueType::Timestamp(value); + } + _ => {} + } + + write_i64(value, &mut shard_mut.facets_file_mmap, address); + } + FieldType::F32 => { + let value = field_value.as_f64().unwrap_or_default() as f32; + match (&facet.min, &facet.max) { + (ValueType::F32(min), ValueType::F32(max)) => { + if value < *min { + facet.min = ValueType::F32(value); + } + if value > *max { + facet.max = ValueType::F32(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::F32(value); + facet.max = ValueType::F32(value); + } + _ => {} + } + + write_f32(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::F64 => { + let value = field_value.as_f64().unwrap_or_default(); + match (&facet.min, &facet.max) { + (ValueType::F64(min), ValueType::F64(max)) => { + if value < *min { + facet.min = ValueType::F64(value); + } + if value > *max { + facet.max = ValueType::F64(value); + } + } + (ValueType::None, ValueType::None) => { + facet.min = ValueType::F64(value); + facet.max = ValueType::F64(value); + } + _ => {} + } + + write_f64(value, &mut shard_mut.facets_file_mmap, address) + } + FieldType::String16 => { + if facet.values.len() < u16::MAX as usize { + let key = serde_json::from_value::(field_value.clone()) + .unwrap_or(field_value.to_string()); + + let key_string = key.clone(); + let key = vec![key]; + + facet.values.entry(key_string.clone()).or_insert((key, 0)).1 += 1; + + let facet_value_id = + facet.values.get_index_of(&key_string).unwrap() as u16; + write_u16(facet_value_id, &mut shard_mut.facets_file_mmap, address) + } + } + + FieldType::StringSet16 => { + if facet.values.len() < u16::MAX as usize { + let mut key: Vec = + serde_json::from_value(field_value.clone()).unwrap(); + key.sort(); + + let key_string = key.join("_"); + facet.values.entry(key_string.clone()).or_insert((key, 0)).1 += 1; + + let facet_value_id = + facet.values.get_index_of(&key_string).unwrap() as u16; + write_u16(facet_value_id, &mut shard_mut.facets_file_mmap, address) + } + } + + FieldType::String32 => { + if facet.values.len() < u32::MAX as usize { + let key = serde_json::from_value::(field_value.clone()) + .unwrap_or(field_value.to_string()); + + let key_string = key.clone(); + let key = vec![key]; + + facet.values.entry(key_string.clone()).or_insert((key, 0)).1 += 1; + + let facet_value_id = + facet.values.get_index_of(&key_string).unwrap() as u32; + write_u32(facet_value_id, &mut shard_mut.facets_file_mmap, address) + } + } + + FieldType::StringSet32 => { + if facet.values.len() < u32::MAX as usize { + let mut key: Vec = + serde_json::from_value(field_value.clone()).unwrap(); + key.sort(); + + let key_string = key.join("_"); + facet.values.entry(key_string.clone()).or_insert((key, 0)).1 += 1; + + let facet_value_id = + facet.values.get_index_of(&key_string).unwrap() as u32; + write_u32(facet_value_id, &mut shard_mut.facets_file_mmap, address) + } + } + + FieldType::Point => { + if let Ok(point) = serde_json::from_value::(field_value.clone()) + && point.len() == 2 + { + if point[0] >= -90.0 + && point[0] <= 90.0 + && point[1] >= -180.0 + && point[1] <= 180.0 + { + let morton_code = encode_morton_2_d(&point); + write_u64(morton_code, &mut shard_mut.facets_file_mmap, address) + } else { + println!( + "outside valid coordinate range: {} {}", + point[0], point[1] + ); + } + } + } + + _ => {} + }; + } + } + } + + if !shard_mut.uncommitted { + if shard_mut.segments_level0[0].positions_compressed.is_empty() { + for strip0 in shard_mut.segments_level0.iter_mut() { + strip0.positions_compressed = vec![0; MAX_POSITIONS_PER_TERM * 2]; + } + } + shard_mut.uncommitted = true; + } + + let mut longest_field_id: usize = 0; + let mut longest_field_length: u32 = 0; + for value in document_item.field_vec { + if doc_id == 0 && value.3 > longest_field_length { + longest_field_id = value.0; + longest_field_length = value.3; + } + + shard_mut.document_length_compressed_array[value.0][doc_id & 0b11111111_11111111] = + value.1; + shard_mut.positions_sum_normalized += value.2 as u64; + shard_mut.indexed_field_vec[value.0].field_length_sum += value.2 as usize; + } + + if doc_id == 0 { + if !shard_mut.longest_field_auto { + longest_field_id = shard_mut.longest_field_id; + } + shard_mut.longest_field_id = longest_field_id; + shard_mut.indexed_field_vec[longest_field_id].is_longest_field = true; + if shard_mut.indexed_field_vec.len() > 1 { + println!( + "detect longest field id {} name {} length {}", + longest_field_id, + shard_mut.indexed_field_vec[longest_field_id].schema_field_name, + longest_field_length + ); + } + } + + let mut unique_terms = document_item.unique_terms; + if !shard_mut.synonyms_map.is_empty() { + let unique_terms_clone = unique_terms.clone(); + for term in unique_terms_clone.iter() { + if term.1.ngram_type == NgramType::SingleTerm { + let synonym = shard_mut.synonyms_map.get(&term.1.key_hash).cloned(); + if let Some(synonym) = synonym { + for synonym_term in synonym { + let mut term_clone = term.1.clone(); + term_clone.key_hash = synonym_term.1.0; + term_clone.key0 = synonym_term.1.1; + term_clone.term = synonym_term.0.clone(); + + if let Some(existing) = unique_terms.get_mut(&synonym_term.0) { + existing + .field_positions_vec + .iter_mut() + .zip(term_clone.field_positions_vec.iter()) + .for_each(|(x1, x2)| { + x1.extend_from_slice(x2); + x1.sort_unstable(); + }); + } else { + unique_terms.insert(synonym_term.0.clone(), term_clone); + }; + } + } + } + } + } + + for term in unique_terms { + shard_mut.index_posting(term.1, doc_id, false, 0, 0, 0); + } + + match file { + FileType::PathBuf(file_path) => { + if let Err(e) = shard_mut.copy_file(&file_path, doc_id) { + println!("can't copy PDF {} {}", file_path.display(), e); + } + } + + FileType::Bytes(file_path, file_bytes) => { + if let Err(e) = shard_mut.write_file(&file_bytes, doc_id) { + println!("can't copy PDF {} {}", file_path.display(), e); + } + } + + _ => {} + } + + if !shard_mut.stored_field_names.is_empty() { + shard_mut.store_document(doc_id, document_item.document); + } + + if do_commit { + drop(shard_mut); + warmup(self).await; + } + } +} + +pub(crate) struct DocumentItem { + pub document: Document, + pub unique_terms: AHashMap, + pub field_vec: Vec<(usize, u8, u32, u32)>, +} diff --git a/mobile_app/rust/src/seekstorm/index_posting.rs b/mobile_app/rust/src/seekstorm/index_posting.rs new file mode 100644 index 0000000..01a21aa --- /dev/null +++ b/mobile_app/rust/src/seekstorm/index_posting.rs @@ -0,0 +1,941 @@ +use std::cmp; + +use num::FromPrimitive; + +use crate::{ + compress_postinglist::compress_positions, + index::{ + AccessType, CompressionType, FIELD_STOP_BIT_1, FIELD_STOP_BIT_2, NgramType, + POSTING_BUFFER_SIZE, PostingListObject0, ROARING_BLOCK_SIZE, STOP_BIT, Shard, TermObject, + }, + search::binary_search, + utils::{block_copy_mut, read_u16, read_u32, write_u16_ref, write_u32}, +}; + +impl Shard { + pub(crate) fn index_posting( + &mut self, + term: TermObject, + doc_id: usize, + restore: bool, + posting_count_ngram_1_compressed: u8, + posting_count_ngram_2_compressed: u8, + posting_count_ngram_3_compressed: u8, + ) { + if let Some(spelling_correction) = self.meta.spelling_correction.as_ref() + && term.key_hash & 7 == 0 + && (spelling_correction.term_length_threshold.as_ref().is_none() + || spelling_correction + .term_length_threshold + .as_ref() + .unwrap() + .is_empty() + || term.term.len() + >= spelling_correction.term_length_threshold.as_ref().unwrap()[0]) + { + let sum: usize = term + .field_positions_vec + .iter() + .enumerate() + .filter(|&x| self.indexed_schema_vec[x.0].dictionary_source) + .map(|field| field.1.len()) + .sum(); + if sum > 0 { + _ = self + .level_terms + .entry((term.key_hash >> 32) as u32) + .or_insert(term.term.clone()); + } + }; + + let mut positions_count_sum = 0; + let mut field_positions_vec: Vec> = Vec::new(); + for positions_uncompressed in term.field_positions_vec.iter() { + positions_count_sum += positions_uncompressed.len(); + let mut positions: Vec = Vec::new(); + let mut previous_position: u16 = 0; + for pos in positions_uncompressed.iter() { + if positions.is_empty() { + positions.push(*pos); + } else { + positions.push(*pos - previous_position - 1); + } + previous_position = *pos; + } + field_positions_vec.push(positions); + } + + if positions_count_sum == 0 { + println!("empty posting {} docid {}", term.term, doc_id); + return; + } + + if self.postings_buffer_pointer > self.postings_buffer.len() - (POSTING_BUFFER_SIZE >> 4) { + self.postings_buffer + .resize(self.postings_buffer.len() + (POSTING_BUFFER_SIZE >> 2), 0); + } + + let strip_object0 = self.segments_level0.get_mut(term.key0 as usize).unwrap(); + + let value = strip_object0 + .segment + .entry(term.key_hash) + .or_insert(PostingListObject0 { + posting_count_ngram_1_compressed, + posting_count_ngram_2_compressed, + posting_count_ngram_3_compressed, + ..Default::default() + }); + let exists: bool = value.posting_count > 0; + + if self.is_last_level_incomplete && !exists && !restore { + if self.meta.access_type == AccessType::Mmap { + let pointer = self.segments_index[term.key0 as usize] + .byte_array_blocks_pointer + .last() + .unwrap(); + + let key_count = pointer.2 as usize; + + let byte_array_keys = + &self.index_file_mmap[pointer.0 - (key_count * self.key_head_size)..pointer.0]; + let key_index = binary_search( + byte_array_keys, + key_count, + term.key_hash, + self.key_head_size, + ); + + if key_index >= 0 { + let key_address = key_index as usize * self.key_head_size; + let compression_type_pointer = + read_u32(byte_array_keys, key_address + self.key_head_size - 4); + let rank_position_pointer_range = + compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let position_range_previous = if key_index == 0 { + 0 + } else { + let posting_count_previous = + read_u16(byte_array_keys, key_address + 8 - self.key_head_size) + as usize + + 1; + let pointer_pivot_p_docid_previous = + read_u16(byte_array_keys, key_address - 6); + + let posting_pointer_size_sum_previous = pointer_pivot_p_docid_previous + as usize + * 2 + + if (pointer_pivot_p_docid_previous as usize) < posting_count_previous + { + (posting_count_previous - pointer_pivot_p_docid_previous as usize) + * 3 + } else { + 0 + }; + + let compression_type_pointer_previous = + read_u32(byte_array_keys, key_address + 18 - self.key_head_size); + let rank_position_pointer_range_previous = compression_type_pointer_previous + & 0b0011_1111_1111_1111_1111_1111_1111_1111; + let compression_type_previous: CompressionType = FromPrimitive::from_i32( + (compression_type_pointer_previous >> 30) as i32, + ) + .unwrap(); + + let compressed_docid_previous = match compression_type_previous { + CompressionType::Array => posting_count_previous * 2, + CompressionType::Bitmap => 8192, + CompressionType::Rle => { + let block_id = doc_id >> 16; + let segment: &crate::index::SegmentIndex = + &self.segments_index[term.key0 as usize]; + let byte_array_docid = &self.index_file_mmap[segment + .byte_array_blocks_pointer[block_id] + .0 + ..segment.byte_array_blocks_pointer[block_id].0 + + segment.byte_array_blocks_pointer[block_id].1]; + + 4 * read_u16( + byte_array_docid, + rank_position_pointer_range_previous as usize + + posting_pointer_size_sum_previous, + ) as usize + + 2 + } + _ => 0, + }; + + rank_position_pointer_range_previous + + (posting_pointer_size_sum_previous + compressed_docid_previous) as u32 + }; + + value.size_compressed_positions_key = + (rank_position_pointer_range - position_range_previous) as usize; + } + } else { + let posting_list_object_index_option = self.segments_index[term.key0 as usize] + .segment + .get(&term.key_hash); + + if let Some(plo) = posting_list_object_index_option { + let block = plo.blocks.last().unwrap(); + if block.block_id as usize == self.level_index.len() - 1 { + let rank_position_pointer_range: u32 = block.compression_type_pointer + & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + value.size_compressed_positions_key = + (rank_position_pointer_range - plo.position_range_previous) as usize; + } + }; + } + } + + let mut posting_pointer_size = + if value.size_compressed_positions_key < 32_768 && value.posting_count < 65_535 { + value.pointer_pivot_p_docid = value.posting_count as u16 + 1; + 2u8 + } else { + 3u8 + }; + + let mut nonempty_field_count = 0; + let mut only_longest_field = true; + for (field_id, item) in field_positions_vec.iter().enumerate() { + if !item.is_empty() { + nonempty_field_count += 1; + + if !self.indexed_field_vec[field_id].is_longest_field { + only_longest_field = false; + } + } + } + + let mut positions_meta_compressed_nonembedded_size = 0; + + match term.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + for (i, field) in term.field_vec_ngram1.iter().enumerate() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.1 < 128 { + 1 + } else if field.1 < 16_384 { + 2 + } else { + 3 + }; + } else if term.field_vec_ngram1.len() == 1 + && term.field_vec_ngram1[0].0 == self.longest_field_id + { + positions_meta_compressed_nonembedded_size += if field.1 < 64 { + 1 + } else if field.1 < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + } + for (i, field) in term.field_vec_ngram2.iter().enumerate() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.1 < 128 { + 1 + } else if field.1 < 16_384 { + 2 + } else { + 3 + }; + } else if term.field_vec_ngram2.len() == 1 + && term.field_vec_ngram2[0].0 == self.longest_field_id + { + positions_meta_compressed_nonembedded_size += if field.1 < 64 { + 1 + } else if field.1 < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + } + } + _ => { + for (i, field) in term.field_vec_ngram1.iter().enumerate() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.1 < 128 { + 1 + } else if field.1 < 16_384 { + 2 + } else { + 3 + }; + } else if term.field_vec_ngram1.len() == 1 + && term.field_vec_ngram1[0].0 == self.longest_field_id + { + positions_meta_compressed_nonembedded_size += if field.1 < 64 { + 1 + } else if field.1 < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + } + for (i, field) in term.field_vec_ngram2.iter().enumerate() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.1 < 128 { + 1 + } else if field.1 < 16_384 { + 2 + } else { + 3 + }; + } else if term.field_vec_ngram2.len() == 1 + && term.field_vec_ngram2[0].0 == self.longest_field_id + { + positions_meta_compressed_nonembedded_size += if field.1 < 64 { + 1 + } else if field.1 < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + } + for (i, field) in term.field_vec_ngram3.iter().enumerate() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.1 < 128 { + 1 + } else if field.1 < 16_384 { + 2 + } else { + 3 + }; + } else if term.field_vec_ngram3.len() == 1 + && term.field_vec_ngram3[0].0 == self.longest_field_id + { + positions_meta_compressed_nonembedded_size += if field.1 < 64 { + 1 + } else if field.1 < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + } + } + } + + let mut positions_sum = 0; + let mut positions_vec: Vec = Vec::new(); + let mut field_vec: Vec<(usize, u32)> = Vec::new(); + for (field_id, field) in field_positions_vec.iter().enumerate() { + if !field.is_empty() { + if field_positions_vec.len() == 1 { + positions_meta_compressed_nonembedded_size += if field.len() < 128 { + 1 + } else if field.len() < 16_384 { + 2 + } else { + 3 + }; + } else if only_longest_field { + positions_meta_compressed_nonembedded_size += if field.len() < 64 { + 1 + } else if field.len() < 8_192 { + 2 + } else { + 3 + }; + } else { + let required_position_count_bits = usize::BITS - field.len().leading_zeros(); + let only_longest_field_bit = if field_vec.is_empty() { 1 } else { 0 }; + + let meta_bits = only_longest_field_bit + + required_position_count_bits + + self.indexed_field_id_bits as u32; + + if meta_bits <= 6 { + positions_meta_compressed_nonembedded_size += 1; + } else if meta_bits <= 13 { + positions_meta_compressed_nonembedded_size += 2; + } else if meta_bits <= 20 { + positions_meta_compressed_nonembedded_size += 3; + } + } + + positions_sum += field.len(); + if self.indexed_field_vec.len() > 1 && field.len() <= 4 { + positions_vec.append(&mut field.clone()) + }; + + field_vec.push((field_id, field.len() as u32)); + } + } + + let mut embed_flag = term.ngram_type == NgramType::SingleTerm; + + if self.indexed_field_vec.len() == 1 { + if posting_pointer_size == 2 { + embed_flag &= positions_sum <= 2 + && ((positions_sum == 1 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 14) + || (positions_sum == 2 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 7 + && u16::BITS - field_positions_vec[0][1].leading_zeros() <= 7)); + } else { + embed_flag &= positions_sum <= 4 + && ((positions_sum == 1 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 21) + || (positions_sum == 2 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 10 + && u16::BITS - field_positions_vec[0][1].leading_zeros() <= 11) + || (positions_sum == 3 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 7 + && u16::BITS - field_positions_vec[0][1].leading_zeros() <= 7 + && u16::BITS - field_positions_vec[0][2].leading_zeros() <= 7) + || (positions_sum == 4 + && u16::BITS - field_positions_vec[0][0].leading_zeros() <= 5 + && u16::BITS - field_positions_vec[0][1].leading_zeros() <= 5 + && u16::BITS - field_positions_vec[0][2].leading_zeros() <= 5 + && u16::BITS - field_positions_vec[0][3].leading_zeros() <= 6)); + } + } else if only_longest_field { + if posting_pointer_size == 2 { + embed_flag &= positions_sum <= 2 + && ((positions_sum == 1 && u16::BITS - positions_vec[0].leading_zeros() <= 13) + || (positions_sum == 2 + && u16::BITS - positions_vec[0].leading_zeros() <= 6 + && u16::BITS - positions_vec[1].leading_zeros() <= 7)); + } else { + embed_flag &= positions_sum <= 4 + && ((positions_sum == 1 && u16::BITS - positions_vec[0].leading_zeros() <= 20) + || (positions_sum == 2 + && u16::BITS - positions_vec[0].leading_zeros() <= 10 + && u16::BITS - positions_vec[1].leading_zeros() <= 10) + || (positions_sum == 3 + && u16::BITS - positions_vec[0].leading_zeros() <= 6 + && u16::BITS - positions_vec[1].leading_zeros() <= 7 + && u16::BITS - positions_vec[2].leading_zeros() <= 7) + || (positions_sum == 4 + && u16::BITS - positions_vec[0].leading_zeros() <= 5 + && u16::BITS - positions_vec[1].leading_zeros() <= 5 + && u16::BITS - positions_vec[2].leading_zeros() <= 5 + && u16::BITS - positions_vec[3].leading_zeros() <= 5)); + } + } else { + let used_bits = nonempty_field_count * self.indexed_field_id_bits as u32; + let bits = if posting_pointer_size == 2 { 12 } else { 19 }; + let remaining_bits_new = if used_bits < bits { + bits - used_bits + } else { + embed_flag = false; + 0 + }; + + if posting_pointer_size == 2 { + embed_flag &= positions_sum <= 3 + && ((positions_sum == 1 + && u16::BITS - positions_vec[0].leading_zeros() <= remaining_bits_new) + || (positions_sum == 2 + && u16::BITS - positions_vec[0].leading_zeros() + <= remaining_bits_new / 2 + && u16::BITS - positions_vec[1].leading_zeros() + <= remaining_bits_new - remaining_bits_new / 2) + || (positions_sum == 3 + && nonempty_field_count == 1 + && u16::BITS - positions_vec[0].leading_zeros() + <= remaining_bits_new / 3 + && u16::BITS - positions_vec[1].leading_zeros() + <= (remaining_bits_new - remaining_bits_new / 3) / 2 + && u16::BITS - positions_vec[2].leading_zeros() + <= remaining_bits_new + - (remaining_bits_new - remaining_bits_new / 3) / 2 + - (remaining_bits_new / 3))); + } else { + embed_flag &= positions_sum <= 4 + && ((positions_sum == 1 + && u16::BITS - positions_vec[0].leading_zeros() <= remaining_bits_new) + || (positions_sum == 2 + && u16::BITS - positions_vec[0].leading_zeros() + <= remaining_bits_new / 2 + && u16::BITS - positions_vec[1].leading_zeros() + <= remaining_bits_new - remaining_bits_new / 2) + || (positions_sum == 3 + && u16::BITS - positions_vec[0].leading_zeros() + <= remaining_bits_new / 3 + && u16::BITS - positions_vec[1].leading_zeros() + <= (remaining_bits_new - remaining_bits_new / 3) / 2 + && u16::BITS - positions_vec[2].leading_zeros() + <= remaining_bits_new + - (remaining_bits_new - remaining_bits_new / 3) / 2 + - (remaining_bits_new / 3)) + || (positions_sum == 4 + && nonempty_field_count == 1 + && u16::BITS - positions_vec[0].leading_zeros() + <= remaining_bits_new / 4 + && u16::BITS - positions_vec[1].leading_zeros() + <= (remaining_bits_new - remaining_bits_new / 4) / 3 + && u16::BITS - positions_vec[2].leading_zeros() + <= (remaining_bits_new + - (remaining_bits_new - remaining_bits_new / 4) / 3 + - (remaining_bits_new / 4)) + / 2 + && u16::BITS - positions_vec[3].leading_zeros() + <= remaining_bits_new + - remaining_bits_new / 4 + - (remaining_bits_new - remaining_bits_new / 4) / 3 + - (remaining_bits_new + - (remaining_bits_new - remaining_bits_new / 4) / 3 + - (remaining_bits_new / 4)) + / 2)); + } + }; + + let mut write_pointer_base = self.postings_buffer_pointer; + let mut write_pointer = self.postings_buffer_pointer + 8; + + let mut positions_compressed_pointer = 0usize; + let positions_stack = if embed_flag { + 0 + } else { + for field_positions in field_positions_vec.iter() { + compress_positions( + field_positions, + &mut strip_object0.positions_compressed, + &mut positions_compressed_pointer, + ); + } + + let exceeded = posting_pointer_size == 2 + && (value.size_compressed_positions_key + + positions_meta_compressed_nonembedded_size + + positions_compressed_pointer + >= 32_768); + if exceeded { + posting_pointer_size = 3; + value.pointer_pivot_p_docid = value.posting_count as u16; + } + + positions_meta_compressed_nonembedded_size + positions_compressed_pointer + }; + + let compressed_position_size = if embed_flag { + let mut positions_vec: Vec = Vec::new(); + let mut data: u32 = 0; + for field in field_vec.iter() { + for pos in field_positions_vec[field.0].iter() { + positions_vec.push(*pos); + } + if self.indexed_field_vec.len() > 1 && !only_longest_field { + data <<= self.indexed_field_id_bits; + data |= field.0 as u32; + } + } + + let mut remaining_bits = posting_pointer_size as usize * 8 + - if posting_pointer_size == 2 { 0 } else { 1 } + - if self.indexed_field_vec.len() == 1 { + 2 + } else if only_longest_field { + 3 + } else { + 4 + nonempty_field_count as usize * self.indexed_field_id_bits + }; + for (i, position) in positions_vec.iter().enumerate() { + let position_bits = remaining_bits / (positions_vec.len() - i); + remaining_bits -= position_bits; + data <<= position_bits; + data |= *position as u32; + } + + if posting_pointer_size == 2 { + self.postings_buffer[write_pointer] = (data & 0b11111111) as u8; + if self.indexed_field_vec.len() == 1 { + self.postings_buffer[write_pointer + 1] = + (data >> 8) as u8 | 0b10000000 | ((positions_vec.len() - 1) << 6) as u8; + } else if only_longest_field { + self.postings_buffer[write_pointer + 1] = + (data >> 8) as u8 | 0b11000000 | ((positions_vec.len() - 1) << 5) as u8; + } else if nonempty_field_count == 1 { + self.postings_buffer[write_pointer + 1] = + (data >> 8) as u8 | 0b10000000 | ((positions_vec.len() - 1) << 4) as u8; + } else { + self.postings_buffer[write_pointer + 1] = (data >> 8) as u8 | 0b10110000; + }; + } else { + self.postings_buffer[write_pointer] = (data & 0b11111111) as u8; + self.postings_buffer[write_pointer + 1] = ((data >> 8) & 0b11111111) as u8; + if self.indexed_field_vec.len() == 1 { + self.postings_buffer[write_pointer + 2] = + (data >> 16) as u8 | 0b10000000 | ((positions_vec.len() - 1) << 5) as u8; + } else if only_longest_field { + self.postings_buffer[write_pointer + 2] = + (data >> 16) as u8 | 0b11000000 | ((positions_vec.len() - 1) << 4) as u8; + } else { + self.postings_buffer[write_pointer + 2] = (data >> 16) as u8 + | 0b10000000 + | if nonempty_field_count == 1 { + ((positions_vec.len() - 1) << 3) as u8 + } else if nonempty_field_count == 3 { + 0b00111000 + } else if field_vec[0].1 == 1 && field_vec[1].1 == 1 { + 0b00100000 + } else if field_vec[0].1 == 1 && field_vec[1].1 == 2 { + 0b00101000 + } else { + 0b00110000 + }; + } + } + + write_pointer += posting_pointer_size as usize; + posting_pointer_size as usize + } else { + let write_pointer_start = write_pointer; + + match term.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &term.field_vec_ngram1, + self.indexed_field_vec.len(), + term.field_vec_ngram1.len() == 1 + && term.field_vec_ngram1[0].0 == self.longest_field_id, + term.field_vec_ngram1.len() as u32, + self.indexed_field_id_bits as u32, + ); + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &term.field_vec_ngram2, + self.indexed_field_vec.len(), + term.field_vec_ngram2.len() == 1 + && term.field_vec_ngram2[0].0 == self.longest_field_id, + term.field_vec_ngram2.len() as u32, + self.indexed_field_id_bits as u32, + ); + } + _ => { + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &term.field_vec_ngram1, + self.indexed_field_vec.len(), + term.field_vec_ngram1.len() == 1 + && term.field_vec_ngram1[0].0 == self.longest_field_id, + term.field_vec_ngram1.len() as u32, + self.indexed_field_id_bits as u32, + ); + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &term.field_vec_ngram2, + self.indexed_field_vec.len(), + term.field_vec_ngram2.len() == 1 + && term.field_vec_ngram2[0].0 == self.longest_field_id, + term.field_vec_ngram2.len() as u32, + self.indexed_field_id_bits as u32, + ); + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &term.field_vec_ngram3, + self.indexed_field_vec.len(), + term.field_vec_ngram3.len() == 1 + && term.field_vec_ngram3[0].0 == self.longest_field_id, + term.field_vec_ngram3.len() as u32, + self.indexed_field_id_bits as u32, + ); + } + } + + write_field_vec( + &mut self.postings_buffer, + &mut write_pointer, + &field_vec, + self.indexed_field_vec.len(), + only_longest_field, + nonempty_field_count, + self.indexed_field_id_bits as u32, + ); + + block_copy_mut( + &mut strip_object0.positions_compressed, + 0, + &mut self.postings_buffer, + write_pointer, + positions_compressed_pointer, + ); + + write_pointer += positions_compressed_pointer; + write_pointer - write_pointer_start + }; + + let docid_lsb = (doc_id & 0xFFFF) as u16; + if exists { + value.posting_count += 1; + value.position_count += positions_count_sum; + value.size_compressed_positions_key += positions_stack; + if docid_lsb > value.docid_old { + value.docid_delta_max = + cmp::max(value.docid_delta_max, docid_lsb - value.docid_old - 1); + } + value.docid_old = docid_lsb; + + write_u32( + write_pointer_base as u32, + &mut self.postings_buffer, + value.pointer_last, + ); + + value.pointer_last = write_pointer_base; + } else if term.ngram_type == NgramType::NgramFF + || term.ngram_type == NgramType::NgramRF + || term.ngram_type == NgramType::NgramFR + { + *value = PostingListObject0 { + pointer_first: write_pointer_base, + pointer_last: write_pointer_base, + posting_count: 1, + position_count: positions_count_sum, + ngram_type: term.ngram_type.clone(), + term_ngram1: term.term_ngram_1, + term_ngram2: term.term_ngram_0, + term_ngram3: term.term_ngram_2, + size_compressed_positions_key: value.size_compressed_positions_key + + positions_stack, + docid_delta_max: docid_lsb, + docid_old: docid_lsb, + ..*value + }; + } else { + *value = PostingListObject0 { + pointer_first: write_pointer_base, + pointer_last: write_pointer_base, + posting_count: 1, + position_count: positions_count_sum, + ngram_type: term.ngram_type.clone(), + term_ngram1: term.term_ngram_2, + term_ngram2: term.term_ngram_1, + term_ngram3: term.term_ngram_0, + size_compressed_positions_key: value.size_compressed_positions_key + + positions_stack, + docid_delta_max: docid_lsb, + docid_old: docid_lsb, + ..*value + }; + } + + write_pointer_base += 4; + + write_u16_ref( + docid_lsb, + &mut self.postings_buffer, + &mut write_pointer_base, + ); + + if positions_compressed_pointer + 2 > ROARING_BLOCK_SIZE { + println!( + "compressed positions size exceeded: {}", + positions_compressed_pointer + 2 + ) + }; + + if !embed_flag && positions_stack != compressed_position_size { + println!( + "size conflict: embed {} term {} ngram_type {:?} frequent {} pos_count {} : positions_stack {} compressed_position_size {} : positions_compressed_pointer {} posting_pointer_size {} docid {}", + embed_flag, + term.term, + term.ngram_type, + only_longest_field, + positions_count_sum, + positions_stack, + compressed_position_size, + positions_compressed_pointer, + posting_pointer_size, + doc_id + ); + } + + write_u16_ref( + if embed_flag { + compressed_position_size | 0b10000000_00000000 + } else { + compressed_position_size & 0b01111111_11111111 + } as u16, + &mut self.postings_buffer, + &mut write_pointer_base, + ); + + self.postings_buffer_pointer = write_pointer; + } +} + +pub(crate) fn write_field_vec( + postings_buffer: &mut [u8], + write_pointer: &mut usize, + field_vec: &[(usize, u32)], + indexed_field_vec_len: usize, + only_longest_field: bool, + nonempty_field_count: u32, + indexed_field_id_bits: u32, +) { + for (i, field) in field_vec.iter().enumerate() { + if indexed_field_vec_len == 1 { + if field.1 < 128 { + postings_buffer[*write_pointer] = field.1 as u8 | STOP_BIT; + *write_pointer += 1; + } else if field.1 < 16_384 { + postings_buffer[*write_pointer] = (field.1 >> 7) as u8; + *write_pointer += 1; + postings_buffer[*write_pointer] = (field.1 & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else if field.1 < 2_097_152 { + postings_buffer[*write_pointer] = (field.1 >> 14) as u8; + *write_pointer += 1; + postings_buffer[*write_pointer] = ((field.1 >> 7) & 0b01111111) as u8; + *write_pointer += 1; + + postings_buffer[*write_pointer] = (field.1 & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else { + println!("positionCount exceeded1: {}", field.1); + } + } else if only_longest_field { + if field.1 < 64 { + postings_buffer[*write_pointer] = field.1 as u8 | 0b11000000; + *write_pointer += 1; + } else if field.1 < 8_192 { + postings_buffer[*write_pointer] = (field.1 >> 7) as u8 | 0b01000000; + *write_pointer += 1; + postings_buffer[*write_pointer] = (field.1 & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else if field.1 < 1_048_576 { + postings_buffer[*write_pointer] = (field.1 >> 14) as u8 | 0b01000000; + *write_pointer += 1; + postings_buffer[*write_pointer] = ((field.1 >> 7) & 0b01111111) as u8; + *write_pointer += 1; + + postings_buffer[*write_pointer] = (field.1 & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else { + println!("positionCount exceeded2: {}", field.1); + } + } else { + let field_stop_bit = if i == nonempty_field_count as usize - 1 { + if i == 0 { + FIELD_STOP_BIT_1 + } else { + FIELD_STOP_BIT_2 + } + } else { + 0b00000000 + }; + + let required_position_count_bits = u32::BITS - field.1.leading_zeros(); + + let field_id_position_count = ((field.1 as usize) << indexed_field_id_bits) | field.0; + let only_longest_field_bit = if i == 0 { 1 } else { 0 }; + let meta_bits = + only_longest_field_bit + required_position_count_bits + indexed_field_id_bits; + + if meta_bits <= 6 { + postings_buffer[*write_pointer] = + field_stop_bit | field_id_position_count as u8 | STOP_BIT; + *write_pointer += 1; + } else if meta_bits <= 13 { + postings_buffer[*write_pointer] = + field_stop_bit | (field_id_position_count >> 7) as u8; + *write_pointer += 1; + postings_buffer[*write_pointer] = + (field_id_position_count & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else if meta_bits <= 20 { + postings_buffer[*write_pointer] = + field_stop_bit | (field_id_position_count >> 14) as u8; + *write_pointer += 1; + postings_buffer[*write_pointer] = + ((field_id_position_count >> 7) & 0b01111111) as u8; + *write_pointer += 1; + postings_buffer[*write_pointer] = + (field_id_position_count & 0b01111111) as u8 | STOP_BIT; + *write_pointer += 1; + } else { + println!("positionCount exceeded3: {} ", field_id_position_count); + } + } + } +} diff --git a/mobile_app/rust/src/seekstorm/ingest.rs b/mobile_app/rust/src/seekstorm/ingest.rs new file mode 100644 index 0000000..11c137f --- /dev/null +++ b/mobile_app/rust/src/seekstorm/ingest.rs @@ -0,0 +1,756 @@ +use std::{ + collections::HashMap, + ffi::OsStr, + fs::{File, metadata}, + io::{self, BufReader, Read}, + path::PathBuf, + sync::Arc, + time::{Instant, SystemTime}, +}; + +use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; +use colored::Colorize; +use csv::{ReaderBuilder, Terminator}; +use num_format::{Locale, ToFormattedString}; +#[cfg(feature = "pdf")] +use pdfium_render::prelude::{PdfDocumentMetadataTagType, Pdfium}; +use serde_json::{Deserializer, json}; +use tokio::sync::RwLock; +use walkdir::WalkDir; + +use crate::{ + commit::Commit, + index::{Document, FileType, Index, IndexArc, IndexDocument}, + utils::truncate, +}; + +use lazy_static::lazy_static; + +#[cfg(feature = "pdf")] +type PdfDocument<'a> = pdfium_render::prelude::PdfDocument<'a>; +#[cfg(not(feature = "pdf"))] +type PdfDocument<'a> = (); + +#[cfg(feature = "pdf")] +lazy_static! { + pub(crate) static ref pdfium_option: Option = if let Ok(pdfium) = + Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./")) + .or_else(|_| Pdfium::bind_to_system_library()) + { + Some(Pdfium::new(pdfium)) + } else { + None + }; +} + +fn read_skipping_ws(mut reader: impl Read) -> io::Result { + loop { + let mut byte = 0u8; + reader.read_exact(std::slice::from_mut(&mut byte))?; + if !byte.is_ascii_whitespace() { + return Ok(byte); + } + } +} + +/// Index PDF file from local disk. +/// - converts pdf to text and indexes it +/// - extracts title from metatag, or first line of text, or from filename +/// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) +/// - copies all ingested pdf files to "files" subdirectory in index +/// # Arguments +/// * `file_path` - PathBuf to the file +/// # Returns +/// * `Result<(), String>` - Ok(()) or Err(String) +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub trait IndexPdfFile { + /// Index PDF file from local disk. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - PathBuf to the file + /// # Returns + /// * `Result<(), String>` - Ok(()) or Err(String) + async fn index_pdf_file(&self, file_path: &PathBuf) -> Result<(), String>; +} + +impl IndexPdfFile for IndexArc { + /// Index PDF file from local disk. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + async fn index_pdf_file(&self, file_path: &PathBuf) -> Result<(), String> { + #[cfg(feature = "pdf")] + { + if let Some(pdfium) = pdfium_option.as_ref() { + let file_size = file_path.metadata().unwrap().len() as usize; + + let date: DateTime = if let Ok(metadata) = metadata(file_path) { + if let Ok(time) = metadata.created() { + time + } else { + SystemTime::now() + } + } else { + SystemTime::now() + } + .into(); + let file_date = date.timestamp(); + + if let Ok(pdf) = pdfium.load_pdf_from_file(file_path, None) { + self.index_pdf( + file_path, + file_size, + file_date, + FileType::PathBuf(file_path.into()), + pdf, + ) + .await; + Ok(()) + } else { + println!("can't read PDF {} {}", file_path.display(), file_size); + Err("can't read PDF".to_string()) + } + } else { + println!( + "Pdfium library not found: download and copy into the same folder as the seekstorm_server.exe: https://github.com/bblanchon/pdfium-binaries" + ); + Err("Pdfium library not found".to_string()) + } + } + #[cfg(not(feature = "pdf"))] + { + println!("pdf feature flag not enabled"); + Err("pdf feature flag not enabled".to_string()) + } + } +} + +/// Index PDF file from byte array. +/// - converts pdf to text and indexes it +/// - extracts title from metatag, or first line of text, or from filename +/// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) +/// - copies all ingested pdf files to "files" subdirectory in index +/// # Arguments +/// * `file_path` - PathBuf to the file (fallback, if title and date can't be extracted) +/// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) +/// * `file_bytes` - Byte array of the file +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub trait IndexPdfBytes { + /// Index PDF file from byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - PathBuf to the file (fallback, if title and date can't be extracted) + /// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) + /// * `file_bytes` - Byte array of the file + async fn index_pdf_bytes( + &self, + file_path: &PathBuf, + file_date: i64, + file_bytes: &[u8], + ) -> Result<(), String>; +} + +#[cfg(feature = "pdf")] +impl IndexPdfBytes for IndexArc { + /// Index PDF file from byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - PathBuf to the file (fallback, if title and date can't be extracted) + /// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) + /// * `file_bytes` - Byte array of the file + async fn index_pdf_bytes( + &self, + file_path: &PathBuf, + file_date: i64, + file_bytes: &[u8], + ) -> Result<(), String> { + if let Some(pdfium) = pdfium_option.as_ref() { + let file_size = file_bytes.len(); + if let Ok(pdf) = pdfium.load_pdf_from_byte_slice(file_bytes, None) { + self.index_pdf( + file_path, + file_size, + file_date, + FileType::Bytes(file_path.into(), file_bytes.into()), + pdf, + ) + .await; + Ok(()) + } else { + println!("can't read PDF {} {}", file_path.display(), file_size); + Err("can't read PDF".to_string()) + } + } else { + println!( + "Pdfium library not found: download and copy into the same folder as the seekstorm_server.exe: https://github.com/bblanchon/pdfium-binaries" + ); + Err("Pdfium library not found".to_string()) + } + } +} + +#[cfg(not(feature = "pdf"))] +impl IndexPdfBytes for IndexArc { + /// Index PDF file from byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - PathBuf to the file (fallback, if title and date can't be extracted) + /// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) + /// * `file_bytes` - Byte array of the file + async fn index_pdf_bytes( + &self, + file_path: &PathBuf, + file_date: i64, + file_bytes: &[u8], + ) -> Result<(), String> { + println!("pdf feature flag not enabled"); + Err("pdf feature flag not enabled".to_string()) + } +} + +/// Index PDF file from local disk or byte array. +/// - converts pdf to text and indexes it +/// - extracts title from metatag, or first line of text, or from filename +/// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) +/// - copies all ingested pdf files to "files" subdirectory in index +/// # Arguments +/// * `file_path` - PathBuf to the file (fallback, if title and date can't be extracted) +/// * `file_date` - File creation date (Unix timestamp: the number of seconds since 1 January 1970) (fallback, if date can't be extracted) +/// * `file` - FileType::PathBuf or FileType::Bytes +/// * `pdf` - pdfium_render::prelude::PdfDocument +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +trait IndexPdf { + async fn index_pdf( + &self, + file_path: &PathBuf, + file_size: usize, + file_date: i64, + file: FileType, + pdf: PdfDocument<'_>, + ); +} + +#[cfg(feature = "pdf")] +impl IndexPdf for IndexArc { + /// Index PDF file from local disk or byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + async fn index_pdf( + &self, + file_path: &PathBuf, + file_size: usize, + file_date: i64, + file: FileType, + pdf: PdfDocument<'_>, + ) { + let mut text = String::with_capacity(file_size); + + pdf.pages().iter().for_each(|page| { + text.push_str(&page.text().unwrap().all()); + text.push_str(" \n"); + }); + + if text.is_empty() { + println!("can't extract text from PDF {}", file_path.display(),); + } else { + let meta = pdf.metadata(); + + let title = if let Some(title) = meta.get(PdfDocumentMetadataTagType::Title) { + title.value().to_owned() + } else { + let mut i = 0; + let mut lines = text.lines(); + loop { + i += 1; + if let Some(title) = lines.next() { + if title.trim().len() > 1 { + break truncate(title, 160).trim().to_owned(); + } else if i < 10 { + continue; + } + } + + break file_path + .file_stem() + .unwrap() + .to_string_lossy() + .to_string() + .replace("_", ""); + } + }; + + let mut creation_timestamp = + if let Some(date) = meta.get(PdfDocumentMetadataTagType::CreationDate) { + let mut date_string = if date.value().starts_with("D:") { + &date.value()[2..] + } else { + &date.value()[0..] + }; + + if date_string.len() > 14 + && date_string + .chars() + .nth(14) + .unwrap() + .eq_ignore_ascii_case(&'z') + { + date_string = &date_string[0..14]; + } + + if date_string.len() == 14 + || date_string.len() == 19 + || date_string.len() == 20 + || date_string.len() == 21 + { + let mut date_string2 = String::with_capacity(23); + date_string2.push_str(&date_string[0..4]); + date_string2.push('-'); + date_string2.push_str(&date_string[4..6]); + date_string2.push('-'); + date_string2.push_str(&date_string[6..8]); + date_string2.push('T'); + date_string2.push_str(&date_string[8..10]); + date_string2.push(':'); + date_string2.push_str(&date_string[10..12]); + date_string2.push(':'); + date_string2.push_str(&date_string[12..14]); + if date_string.len() == 14 { + date_string2.push_str("+00:00") + } else if date_string.chars().nth(17).unwrap() == '\'' { + date_string2.push_str(&date_string[14..17]); + date_string2.push(':'); + date_string2.push_str(&date_string[18..20]); + } else { + date_string2.push_str(&date_string[14..17]); + date_string2.push(':'); + date_string2.push_str(&date_string[17..19]); + } + + if let Ok(date) = DateTime::parse_from_rfc3339(&date_string2) { + date.timestamp() + } else { + file_date + } + } else if let Ok(date) = + NaiveDateTime::parse_from_str(date.value(), "%a %b %e %H:%M:%S %Y") + .map(|ndt| Utc.from_utc_datetime(&ndt)) + { + date.timestamp() + } else if let Ok(date) = + NaiveDateTime::parse_from_str(date.value(), "%Y/%m/%d %H:%M:%S") + .map(|ndt| Utc.from_utc_datetime(&ndt)) + { + date.timestamp() + } else if let Ok(date) = + NaiveDateTime::parse_from_str(date.value(), "%m/%e/%Y %H:%M:%S") + .map(|ndt| Utc.from_utc_datetime(&ndt)) + { + date.timestamp() + } else { + file_date + } + } else { + file_date + }; + + if creation_timestamp > Utc::now().timestamp() || creation_timestamp < 0 { + creation_timestamp = file_date; + } + + let document: Document = HashMap::from([ + ("title".to_string(), json!(title)), + ("body".to_string(), json!(text)), + ("url".to_string(), json!(&file_path.display().to_string())), + ("date".to_string(), json!(creation_timestamp)), + ]); + + self.index_document(document, file).await; + + let date_time = Utc.timestamp_opt(creation_timestamp, 0).unwrap(); + println!( + "indexed {} {} {} {}", + date_time.format("%d/%m/%Y %H:%M"), + file_path.display(), + text.len().to_formatted_string(&Locale::en), + title + ); + } + } +} + +#[cfg(not(feature = "pdf"))] +impl IndexPdf for IndexArc { + /// Index PDF file from local disk or byte array. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + async fn index_pdf( + &self, + file_path: &PathBuf, + file_size: usize, + file_date: i64, + file: FileType, + pdf: PdfDocument<'_>, + ) { + println!("pdf feature flag not enabled"); + } +} + +pub(crate) async fn path_recurse( + index_arc: &Arc>, + data_path: &PathBuf, + docid: &mut usize, +) { + for entry in WalkDir::new(data_path) { + let entry = entry.unwrap(); + let path = entry.path(); + + let md = metadata(path).unwrap(); + if md.is_file() + && let Some(extension) = path.extension().and_then(OsStr::to_str) + && extension.to_lowercase() == "pdf" + && index_arc.index_pdf_file(path).await.is_ok() + { + *docid += 1; + }; + } +} + +/// Index PDF files from local disk directory and sub-directories or from file. +/// - converts pdf to text and indexes it +/// - extracts title from metatag, or first line of text, or from filename +/// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) +/// - copies all ingested pdf files to "files" subdirectory in index +/// # Arguments +/// * `file_path` - PathBuf to the file +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub trait IngestPdf { + /// Index PDF files from local disk directory and sub-directories or from file. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + /// # Arguments + /// * `file_path` - PathBuf to the file + async fn ingest_pdf(&mut self, file_path: &PathBuf); +} + +#[cfg(feature = "pdf")] +impl IngestPdf for IndexArc { + /// Index PDF files from local disk directory and sub-directories or from file. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + async fn ingest_pdf(&mut self, data_path: &PathBuf) { + if pdfium_option.is_some() { + match data_path.exists() { + true => { + println!("ingesting PDF from: {}", data_path.display()); + + let start_time = Instant::now(); + let mut docid = 0usize; + + let index_ref = self.read().await; + drop(index_ref); + + let md = metadata(data_path).unwrap(); + if md.is_file() { + if let Some(extension) = PathBuf::new(&data_path.display().to_string()) + .extension() + .and_then(OsStr::to_str) + && extension.to_lowercase() == "pdf" + && self.index_pdf_file(data_path).await.is_ok() + { + docid += 1; + } + } else { + path_recurse(self, data_path, &mut docid).await; + } + + self.commit().await; + + let elapsed_time = start_time.elapsed().as_nanos(); + + println!( + "{}: docs {} docs/sec {} docs/day {} minutes {:.2} seconds {}", + "Indexing finished".green(), + docid.to_formatted_string(&Locale::en), + (docid as u128 * 1_000_000_000 / elapsed_time) + .to_formatted_string(&Locale::en), + ((docid as u128 * 1_000_000_000 / elapsed_time) * 3600 * 24) + .to_formatted_string(&Locale::en), + elapsed_time as f64 / 1_000_000_000.0 / 60.0, + elapsed_time / 1_000_000_000 + ); + } + false => { + println!("data file not found: {}", data_path.display()); + } + } + } else { + println!( + "Pdfium library not found: download and copy into the same folder as the seekstorm_server.exe: https://github.com/bblanchon/pdfium-binaries" + ) + } + } +} + +#[cfg(not(feature = "pdf"))] +impl IngestPdf for IndexArc { + /// Index PDF files from local disk directory and sub-directories or from file. + /// - converts pdf to text and indexes it + /// - extracts title from metatag, or first line of text, or from filename + /// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) + /// - copies all ingested pdf files to "files" subdirectory in index + async fn ingest_pdf(&mut self, data_path: &PathBuf) { + println!("pdf feature flag not enabled"); + } +} + +/// Ingest local data files in [JSON](https://en.wikipedia.org/wiki/JSON), [Newline-delimited JSON](https://github.com/ndjson/ndjson-spec) (ndjson), and [Concatenated JSON](https://en.wikipedia.org/wiki/JSON_streaming) formats via console command. +/// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub trait IngestJson { + /// Ingest local data files in [JSON](https://en.wikipedia.org/wiki/JSON), [Newline-delimited JSON](https://github.com/ndjson/ndjson-spec) (ndjson), and [Concatenated JSON](https://en.wikipedia.org/wiki/JSON_streaming) formats via console command. + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + async fn ingest_json(&mut self, data_path: &PathBuf); +} + +impl IngestJson for IndexArc { + /// Ingest local data files in [JSON](https://en.wikipedia.org/wiki/JSON), [Newline-delimited JSON](https://github.com/ndjson/ndjson-spec) (ndjson), and [Concatenated JSON](https://en.wikipedia.org/wiki/JSON_streaming) formats via console command. + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + async fn ingest_json(&mut self, data_path: &PathBuf) { + match data_path.exists() { + true => { + println!("ingesting data from: {}", data_path.display()); + + let start_time = Instant::now(); + let mut docid: i64 = 0; + + let index_arc_clone2 = self.clone(); + let index_ref = index_arc_clone2.read().await; + drop(index_ref); + + let index_arc_clone = self.clone(); + let file = File::open(data_path).unwrap(); + let mut reader = BufReader::new(file); + + let is_vector = read_skipping_ws(&mut reader).unwrap() == b'['; + + if !is_vector { + println!("Newline-delimited JSON (ndjson) or Concatenated JSON detected"); + reader.seek_relative(-1).unwrap(); + + for doc_object in Deserializer::from_reader(reader).into_iter::() { + let index_arc_clone_clone = index_arc_clone.clone(); + + index_arc_clone_clone + .index_document(doc_object.unwrap(), FileType::None) + .await; + docid += 1; + } + } else { + println!("JSON detected"); + + let index_arc_clone_clone = index_arc_clone.clone(); + loop { + let next_obj = Deserializer::from_reader(reader.by_ref()) + .into_iter::() + .next(); + match next_obj { + Some(doc_object) => { + index_arc_clone_clone + .index_document(doc_object.unwrap(), FileType::None) + .await + } + None => break, + } + + docid += 1; + + match read_skipping_ws(reader.by_ref()).unwrap() { + b',' => {} + b']' => break, + _ => break, + } + } + } + + self.commit().await; + + let elapsed_time = start_time.elapsed().as_nanos(); + + let date: DateTime = DateTime::from(SystemTime::now()); + + let index_ref = self.read().await; + + println!( + "{}: {} shards {} levels {} ngrams {:08b} docs {} docs/sec {} docs/day {} dictionary {} {} completions {} minutes {:.2} seconds {}", + "Indexing finished".green(), + date.format("%D"), + index_ref.shard_count().await, + index_ref.shard_vec[0].read().await.level_index.len(), + index_ref.meta.ngram_indexing, + docid.to_formatted_string(&Locale::en), + (docid as u128 * 1_000_000_000 / elapsed_time).to_formatted_string(&Locale::en), + ((docid as u128 * 1_000_000_000 / elapsed_time) * 3600 * 24) + .to_formatted_string(&Locale::en), + if let Some(symspell) = index_ref.symspell_option.as_ref() { + symspell + .read() + .await + .get_dictionary_size() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + if let Some(symspell) = index_ref.symspell_option.as_ref() { + symspell + .read() + .await + .get_candidates_size() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + if let Some(completions) = index_ref.completion_option.as_ref() { + completions + .read() + .await + .len() + .to_formatted_string(&Locale::en) + } else { + "None".to_string() + }, + elapsed_time as f64 / 1_000_000_000.0 / 60.0, + elapsed_time / 1_000_000_000 + ); + } + false => { + println!("data file not found: {}", data_path.display()); + } + } + } +} + +#[allow(async_fn_in_trait)] +/// Ingest local data files in [CSV](https://en.wikipedia.org/wiki/Comma-separated_values). +/// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. +pub trait IngestCsv { + /// Ingest local data files in [CSV](https://en.wikipedia.org/wiki/Comma-separated_values). + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + async fn ingest_csv( + &mut self, + data_path: &PathBuf, + has_header: bool, + quoting: bool, + delimiter: u8, + skip_docs: Option, + num_docs: Option, + ); +} + +impl IngestCsv for IndexArc { + /// Ingest local data files in [CSV](https://en.wikipedia.org/wiki/Comma-separated_values). + /// The document ingestion is streamed without loading the whole document vector into memory to allwow for unlimited file size while keeping RAM consumption low. + async fn ingest_csv( + &mut self, + data_path: &PathBuf, + has_header: bool, + quoting: bool, + delimiter: u8, + skip_docs: Option, + num_docs: Option, + ) { + match data_path.exists() { + true => { + println!("ingesting data from: {}", data_path.display()); + + let start_time = Instant::now(); + let mut docid: usize = 0; + + let index_arc_clone2 = self.clone(); + let index_ref = index_arc_clone2.read().await; + drop(index_ref); + + let index_arc_clone = self.clone(); + let index_arc_clone_clone = index_arc_clone.clone(); + + let index_ref = index_arc_clone.read().await; + let mut schema_vec: Vec = vec!["".to_string(); index_ref.schema_map.len()]; + for (key, value) in index_ref.schema_map.iter() { + schema_vec[value.field_id] = key.clone(); + } + drop(index_ref); + + let mut rdr = ReaderBuilder::new() + .has_headers(has_header) + .quoting(quoting) + .delimiter(delimiter) + .terminator(Terminator::CRLF) + .from_path(data_path) + .unwrap(); + + let skip = skip_docs.unwrap_or(0); + let max = num_docs.unwrap_or(usize::MAX); + let mut i: usize = 0; + let mut record = csv::StringRecord::new(); + while rdr.read_record(&mut record).unwrap() && docid < max { + if i < skip { + i += 1; + continue; + } + let mut document: Document = HashMap::new(); + for (i, element) in record.iter().enumerate() { + document.insert(schema_vec[i].clone(), json!(element)); + } + + index_arc_clone_clone + .index_document(document, FileType::None) + .await; + docid += 1; + } + + self.commit().await; + + let elapsed_time = start_time.elapsed().as_nanos(); + + println!( + "{}: docs {} docs/sec {} docs/day {} minutes {:.2} seconds {}", + "Indexing finished".green(), + docid.to_formatted_string(&Locale::en), + (docid as u128 * 1_000_000_000 / elapsed_time).to_formatted_string(&Locale::en), + ((docid as u128 * 1_000_000_000 / elapsed_time) * 3600 * 24) + .to_formatted_string(&Locale::en), + elapsed_time as f64 / 1_000_000_000.0 / 60.0, + elapsed_time / 1_000_000_000 + ); + } + false => { + println!("data file not found: {}", data_path.display()); + } + } + } +} diff --git a/mobile_app/rust/src/seekstorm/intersection.rs b/mobile_app/rust/src/seekstorm/intersection.rs new file mode 100644 index 0000000..671996c --- /dev/null +++ b/mobile_app/rust/src/seekstorm/intersection.rs @@ -0,0 +1,2300 @@ +use crate::{ + add_result::add_result_multiterm_multifield, + compatible::{_blsr_u64, _mm_tzcnt_64}, + index::{ + AccessType, CompressionType, NonUniquePostingListObjectQuery, PostingListObjectQuery, + SORT_FLAG, SPEEDUP_FLAG, Shard, + }, + intersection_simd::intersection_vector16, + search::{FilterSparse, ResultType, SearchResult}, + utils::{read_u16, read_u64}, +}; +use ahash::AHashSet; +use num_traits::FromPrimitive; +use std::{ + cmp, + cmp::Ordering as OtherOrdering, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, +}; + +pub(crate) fn bitpacking32_get_delta(body: &[u8], bitposition: u32, rangebits: u32) -> u16 { + let bodyspan = &body[((bitposition >> 3) as usize)..]; + let bodyspan_4: &[u8; 4] = bodyspan.try_into().unwrap(); + let source_bytes = u32::from_be_bytes(*bodyspan_4); + ((source_bytes >> (32 - rangebits - (bitposition & 7)) as i32) + & (0b1111_1111_1111_1111_1111_1111_1111_1111 >> (32 - rangebits as i32))) as u16 +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn intersection_bitmap_2( + result_count: &mut i32, + block_id: usize, + shard: &Shard, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, + filtered: bool, + block_score: f32, + all_terms_frequent: bool, +) { + for ulong_pos in 0..1024 { + let mut bits1 = read_u64( + &query_list[0].byte_array[query_list[0].compressed_doc_id_range..], + ulong_pos * 8, + ); + let mut bits2 = read_u64( + &query_list[1].byte_array[query_list[1].compressed_doc_id_range..], + ulong_pos * 8, + ); + + let mut intersect = bits1 & bits2; + + if !filtered && result_type == &ResultType::Count { + *result_count += u64::count_ones(intersect) as i32; + } else if !filtered + && search_result.topk_candidates.current_heap_size == top_k + && block_score <= search_result.topk_candidates._elements[0].score + { + if result_type != &ResultType::Topk { + *result_count += u64::count_ones(intersect) as i32; + } + } else { + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as usize; + let doc_id1 = (ulong_pos << 6) + bit_pos; + + if bit_pos > 0 { + let mask2 = u64::MAX << bit_pos; + let mask1 = !mask2; + query_list[0].p_docid += (bits1 & mask1).count_ones() as usize; + query_list[1].p_docid += (bits2 & mask1).count_ones() as usize; + bits1 &= mask2; + bits2 &= mask2; + } + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + intersect = unsafe { _blsr_u64(intersect) }; + } + query_list[0].p_docid += bits1.count_ones() as usize; + query_list[1].p_docid += bits2.count_ones() as usize; + } + } +} + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::never_loop)] +pub(crate) async fn intersection_docid( + shard: &Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'_>], + query_list: &mut [PostingListObjectQuery<'_>], + not_query_list: &mut [PostingListObjectQuery<'_>], + block_id: usize, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + phrase_query: bool, + block_score: f32, +) { + let t1 = 0; + let mut t2 = 1; + + let filtered = !not_query_list.is_empty() + || phrase_query + || !field_filter_set.is_empty() + || !search_result.topk_candidates.result_sort.is_empty() + || (!search_result.query_facets.is_empty() || !facet_filter.is_empty()) + && (!search_result.skip_facet_count || !facet_filter.is_empty()); + + for plo in not_query_list.iter_mut() { + let query_list_item_mut = plo; + + let result = query_list_item_mut + .blocks + .binary_search_by(|block| block.block_id.cmp(&(block_id as u32))); + match result { + Ok(p_block) => { + query_list_item_mut.bm25_flag = true; + query_list_item_mut.p_block = p_block as i32 + } + Err(_) => { + query_list_item_mut.bm25_flag = false; + continue; + } + }; + + let blo = &query_list_item_mut.blocks[query_list_item_mut.p_block as usize]; + + query_list_item_mut.compression_type = + FromPrimitive::from_i32((blo.compression_type_pointer >> 30) as i32).unwrap(); + + query_list_item_mut.rank_position_pointer_range = + blo.compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum = blo.pointer_pivot_p_docid as usize * 2 + + if (blo.pointer_pivot_p_docid as usize) <= blo.posting_count as usize { + ((blo.posting_count as usize + 1) - blo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + query_list_item_mut.compressed_doc_id_range = + query_list_item_mut.rank_position_pointer_range as usize + posting_pointer_size_sum; + + query_list_item_mut.p_docid = 0; + query_list_item_mut.p_docid_count = blo.posting_count as usize + 1; + + query_list_item_mut.pointer_pivot_p_docid = blo.pointer_pivot_p_docid; + + query_list_item_mut.docid = 0; + + if query_list_item_mut.compression_type == CompressionType::Rle { + query_list_item_mut.p_run_count = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range, + ) as i32; + let startdocid = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 2, + ); + let runlength = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 4, + ); + query_list_item_mut.docid = startdocid as i32; + query_list_item_mut.run_end = (startdocid + runlength) as i32; + query_list_item_mut.p_run_sum = runlength as i32; + query_list_item_mut.p_run = 6; + } + } + + let mut all_terms_frequent = shard.indexed_doc_count > (top_k << 8); + for query_list_item_mut in query_list.iter_mut() { + let blo = &query_list_item_mut.blocks[query_list_item_mut.p_block as usize]; + + query_list_item_mut.p_docid = 0; + query_list_item_mut.p_docid_count = blo.posting_count as usize + 1; + + if query_list_item_mut.bm25_flag + && (query_list_item_mut.posting_count as f32) / (shard.indexed_doc_count as f32) < 0.5 + { + all_terms_frequent = false; + } + + query_list_item_mut.compression_type = + FromPrimitive::from_i32((blo.compression_type_pointer >> 30) as i32).unwrap(); + + query_list_item_mut.rank_position_pointer_range = + blo.compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + query_list_item_mut.pointer_pivot_p_docid = blo.pointer_pivot_p_docid; + + let posting_pointer_size_sum = blo.pointer_pivot_p_docid as usize * 2 + + if (blo.pointer_pivot_p_docid as usize) <= blo.posting_count as usize { + ((blo.posting_count as usize + 1) - blo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + query_list_item_mut.compressed_doc_id_range = + query_list_item_mut.rank_position_pointer_range as usize + posting_pointer_size_sum; + + query_list_item_mut.docid = 0; + query_list_item_mut.p_run = 0; + query_list_item_mut.p_run_count = 0; + + query_list_item_mut.p_run_sum = + if query_list_item_mut.compression_type == CompressionType::Rle { + query_list_item_mut.p_run_count = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range, + ) as i32; + + read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 4, + ) + .into() + } else { + 0 + }; + } + + if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && (result_type == &ResultType::Topk) + && (search_result.topk_candidates.current_heap_size == top_k) + && (block_score <= search_result.topk_candidates._elements[0].score) + { + return; + } + + query_list.sort_unstable_by(|x, y| { + if (x.compression_type == CompressionType::Bitmap) + != (y.compression_type == CompressionType::Bitmap) + { + if x.compression_type == CompressionType::Bitmap { + OtherOrdering::Greater + } else { + OtherOrdering::Less + } + } else { + x.blocks[x.p_block as usize] + .posting_count + .partial_cmp(&y.blocks[y.p_block as usize].posting_count) + .unwrap() + } + }); + + 'restart: loop { + match ( + &query_list[t1].compression_type, + &query_list[t2].compression_type, + ) { + (CompressionType::Array, CompressionType::Array) => 'exit: loop { + let mut doc_id1: u16 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + let mut doc_id2: u16 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + + if query_list.len() == 2 + && cfg!(any(target_arch = "x86_64", target_arch = "aarch64")) + { + intersection_vector16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[0].p_docid_count, + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + query_list[1].p_docid_count, + result_count, + block_id, + shard, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + all_terms_frequent, + ); + + break 'exit; + } + + loop { + match doc_id1.cmp(&doc_id2) { + cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Array { + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + continue 'restart; + } else { + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + doc_id1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + } + cmp::Ordering::Greater => { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].p_docid_count { + break; + } + + let mut bound = 2; + while (query_list[t2].p_docid + bound < query_list[t2].p_docid_count) + && (read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (query_list[t2].p_docid + bound) * 2, + ) < doc_id1) + { + query_list[t2].p_docid += bound; + bound <<= 1; + } + + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } + cmp::Ordering::Equal => { + if t2 + 1 < query_list.len() { + t2 += 1; + if query_list[t2].compression_type != CompressionType::Array { + continue 'restart; + } else { + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + continue; + } + } + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break 'exit; + } + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 == item.run_end as u16) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Array { + continue 'restart; + } + doc_id1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } + } + } + + break; + }, + + (CompressionType::Array, CompressionType::Delta) => 'exit: loop { + let mut doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + let mut doc_id2: u16 = query_list[t2].docid as u16; + + loop { + match doc_id1.cmp(&doc_id2) { + cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Delta { + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + continue 'restart; + } else { + doc_id2 = query_list[t2].docid as u16; + } + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + doc_id1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + } + cmp::Ordering::Greater => { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].p_docid_count { + break; + } + + query_list[t2].bitposition += query_list[t2].rangebits as u32; + doc_id2 = query_list[t2].docid as u16 + + bitpacking32_get_delta( + query_list[t2].byte_array, + query_list[t2].bitposition, + query_list[t2].rangebits as u32, + ) + + 1; + query_list[t2].docid = doc_id2 as i32; + } + cmp::Ordering::Equal => { + if t2 + 1 < query_list.len() { + t2 += 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } else { + doc_id2 = query_list[t2].docid as u16; + continue; + } + } + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut() { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 == item.run_end as u16) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } + doc_id1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + doc_id2 = query_list[t2].docid as u16; + } + } + } + + break; + }, + (CompressionType::Bitmap, CompressionType::Bitmap) => 'exit: loop { + if query_list.len() == 2 && SPEEDUP_FLAG { + intersection_bitmap_2( + result_count, + block_id, + shard, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + filtered, + block_score, + all_terms_frequent, + ); + break 'exit; + } + + let mut intersect_mask: u64 = u64::MAX << (query_list[t1].docid & 63); + + for ulong_pos in (query_list[t1].docid as usize >> 6)..1024 { + let ulong_1 = read_u64( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + ulong_pos * 8, + ); + let ulong_2 = read_u64( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + ulong_pos * 8, + ); + let mut intersect: u64 = ulong_1 & ulong_2 & intersect_mask; + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as usize; + let doc_id1 = (ulong_pos << 6) + bit_pos; + + if t2 + 1 < query_list.len() { + for i in (query_list[t2].p_run as usize)..ulong_pos { + query_list[t2].p_run_sum += read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + i * 8, + ) + .count_ones() + as i32 + } + query_list[t2].p_docid = if bit_pos == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos * 8, + ) << (64 - bit_pos)) + .count_ones() as usize + }; + + query_list[t2].p_run = ulong_pos as i32; + + t2 += 1; + + intersect &= read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos * 8, + ); + + if ((1u64 << bit_pos) & intersect) == 0 { + t2 = 1; + } + + query_list[t1].docid = doc_id1 as i32; + + continue; + } + + intersect = unsafe { _blsr_u64(intersect) }; + + if SPEEDUP_FLAG + && !filtered + && (result_type == &ResultType::Count + || ((search_result.topk_candidates.current_heap_size == top_k) + && (block_score + <= search_result.topk_candidates._elements[0].score))) + { + if result_type != &ResultType::Topk { + *result_count += 1; + } + } else { + for i in (query_list[t1].p_run as usize)..ulong_pos { + query_list[t1].p_run_sum += read_u64( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + i * 8, + ) + .count_ones() + as i32 + } + query_list[t1].p_docid = if bit_pos == 0 { + query_list[t1].p_run_sum as usize + } else { + query_list[t1].p_run_sum as usize + + (read_u64( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + ulong_pos * 8, + ) << (64 - bit_pos)) + .count_ones() as usize + }; + query_list[t1].p_run = ulong_pos as i32; + + for i in (query_list[t2].p_run as usize)..ulong_pos { + query_list[t2].p_run_sum += read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + i * 8, + ) + .count_ones() + as i32 + } + query_list[t2].p_docid = if bit_pos == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos * 8, + ) << (64 - bit_pos)) + .count_ones() as usize + }; + query_list[t2].p_run = ulong_pos as i32; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 == item.run_end as usize) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + } + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Bitmap { + query_list[t1].docid = doc_id1 as i32 + 1; + continue 'restart; + } + } + + intersect_mask = u64::MAX; + } + + break; + }, + + (CompressionType::Array, CompressionType::Bitmap) => 'exit: loop { + if query_list.len() == 2 { + let block_id_bits = block_id << 16; + let mut p_docid = query_list[0].p_docid; + let compressed_doc_id_range = query_list[1].compressed_doc_id_range; + let p_docid_count = query_list[0].p_docid_count; + loop { + let doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + p_docid * 2, + ); + if (query_list[1].byte_array + [compressed_doc_id_range + (doc_id1 >> 3) as usize] + & (1u32 << (doc_id1 & 7)) as u8) + > 0 + { + query_list[0].p_docid = p_docid; + + let byte_pos = (doc_id1 >> 6) << 3; + let bit_pos = doc_id1 & 63; + for i in (((query_list[t2].p_run << 3) as usize)..byte_pos as usize) + .step_by(8) + { + query_list[t2].p_run_sum += read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + i, + ) + .count_ones() + as i32; + } + + query_list[t2].p_docid = if bit_pos == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + byte_pos as usize, + ) << (64 - bit_pos)) + .count_ones() as usize + }; + + query_list[t2].p_run = (doc_id1 >> 6) as i32; + add_result_multiterm_multifield( + shard, + block_id_bits | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + } + + p_docid += 1; + if p_docid == p_docid_count { + break 'exit; + } + } + } + + loop { + let doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + + if (query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range + (doc_id1 >> 3) as usize] + & (1u32 << (doc_id1 & 7)) as u8) + > 0 + { + let byte_pos2 = (doc_id1 >> 6) << 3; + let bit_pos2 = doc_id1 & 63; + + if t2 + 1 < query_list.len() { + for i in (((query_list[t2].p_run << 3) as usize)..byte_pos2 as usize) + .step_by(8) + { + query_list[t2].p_run_sum += read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + i, + ) + .count_ones() + as i32; + } + query_list[t2].p_docid = if bit_pos2 == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + byte_pos2 as usize, + ) << (64 - bit_pos2)) + .count_ones() as usize + }; + query_list[t2].p_run = (doc_id1 >> 6) as i32; + + t2 += 1; + continue; + } + + for i in + (((query_list[t2].p_run << 3) as usize)..byte_pos2 as usize).step_by(8) + { + query_list[t2].p_run_sum += (read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + i, + )) + .count_ones() + as i32; + } + query_list[t2].p_docid = if bit_pos2 == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + query_list[t2].byte_array, + query_list[t2].compressed_doc_id_range + byte_pos2 as usize, + ) << (64 - bit_pos2)) + .count_ones() as usize + }; + query_list[t2].p_run = (doc_id1 >> 6) as i32; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 as i32 == item.run_end) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + 4 + (item.p_run << 2) as usize, + ) as i32; + } + } + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break 'exit; + } + t2 = 1; + if query_list[t2].compression_type != CompressionType::Bitmap { + continue 'restart; + } + } + }, + + (CompressionType::Array, CompressionType::Rle) => 'exit: loop { + query_list[t2].p_run_count = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + 0, + ) as i32; + let mut doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + let mut run_start2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + let mut run_length2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + let mut run_end2 = run_start2 + run_length2; + query_list[t2].run_end = run_end2 as i32; + + loop { + if doc_id1 > run_end2 { + query_list[t2].p_run += 1; + if query_list[t2].p_run == query_list[t2].p_run_count { + break; + } + + if false { + let mut bound: i32 = 2; + while (query_list[t2].p_run + bound < query_list[t2].p_run_count) + && (read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + ((query_list[t2].p_run + bound) << 1) as usize) * 2, + ) + read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + ((query_list[t2].p_run + bound) << 1) as usize) * 2, + ) < doc_id1) + { + query_list[t2].p_run += bound; + bound <<= 1; + } + } + + run_start2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + run_length2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + run_end2 = run_start2 + run_length2; + query_list[t2].p_run_sum += run_length2 as i32; + query_list[t2].run_end = run_end2 as i32; + } else if doc_id1 < run_start2 { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Rle { + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + continue 'restart; + } else { + run_start2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + run_end2 = query_list[t2].run_end as u16; + } + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + } else { + if t2 + 1 < query_list.len() { + run_length2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + query_list[t2].p_docid = query_list[t2].p_run_sum as usize + - run_length2 as usize + + doc_id1 as usize + - run_start2 as usize + + query_list[t2].p_run as usize; + + t2 += 1; + if query_list[t2].compression_type != CompressionType::Rle { + continue 'restart; + } else { + query_list[t2].p_run_count = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + 0, + ) + as i32; + run_start2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + run_length2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + run_end2 = run_start2 + run_length2; + query_list[t2].run_end = run_end2 as i32; + + continue; + } + } + + query_list[t2].p_docid = query_list[t2].p_run_sum as usize + - run_length2 as usize + + doc_id1 as usize + - run_start2 as usize + + query_list[t2].p_run as usize; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break 'exit; + } + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 as i32 == item.run_end) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + 4 + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Rle { + continue 'restart; + } + + doc_id1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + query_list[t1].p_docid * 2, + ); + + run_start2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + run_length2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + run_end2 = run_start2 + run_length2; + query_list[t2].run_end = run_end2 as i32; + } + } + + break; + }, + + (CompressionType::Delta, CompressionType::Delta) => 'exit: loop { + let mut doc_id1: u16 = query_list[t1].docid as u16; + let mut doc_id2: u16 = query_list[t2].docid as u16; + + loop { + match doc_id1.cmp(&doc_id2) { + cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Delta { + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + continue 'restart; + } else { + doc_id2 = query_list[t2].docid as u16; + } + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].p_docid_count { + break; + } + + query_list[t1].bitposition += query_list[t1].rangebits as u32; + doc_id1 = query_list[t1].docid as u16 + + bitpacking32_get_delta( + query_list[t1].byte_array, + query_list[t1].bitposition, + query_list[t1].rangebits as u32, + ) + + 1; + query_list[t1].docid = doc_id1 as i32; + } + cmp::Ordering::Greater => { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].p_docid_count { + break; + } + + query_list[t2].bitposition += query_list[t2].rangebits as u32; + doc_id2 = query_list[t2].docid as u16 + + bitpacking32_get_delta( + query_list[t2].byte_array, + query_list[t2].bitposition, + query_list[t2].rangebits as u32, + ) + + 1; + query_list[t2].docid = doc_id2 as i32; + } + cmp::Ordering::Equal => { + if t2 + 1 < query_list.len() { + t2 += 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } else { + doc_id2 = query_list[t2].docid as u16; + continue; + } + } + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id1 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut() { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id1 == item.run_end as u16) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } + doc_id1 = query_list[t1].docid as u16; + doc_id2 = query_list[t2].docid as u16; + } + } + } + + break; + }, + + (CompressionType::Bitmap, CompressionType::Delta) => 'exit: loop { + loop { + let doc_id2 = query_list[t2].docid as u16; + let byte_pos = doc_id2 >> 3; + let bit_pos = doc_id2 & 7; + + if (query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range + byte_pos as usize] + & (1u32 << bit_pos) as u8) + > 0 + { + if t2 + 1 < query_list.len() { + t2 += 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } else { + continue; + } + } + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id2 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id2 == item.run_end as u16) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + 4 + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Delta { + continue 'restart; + } + } else { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].p_docid_count { + break 'exit; + } + } + } + }, + + (CompressionType::Rle, CompressionType::Rle) => 'exit: loop { + query_list[t1].p_run_count = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + 0, + ) as i32; + let mut runstart1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + let mut runlength1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + + let mut runend1 = runstart1 + runlength1; + query_list[t1].run_end = runend1 as i32; + + query_list[t2].p_run_count = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + 0, + ) as i32; + let mut runstart2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + let mut runlength2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + let mut runend2 = runstart2 + runlength2; + query_list[t2].run_end = runend2 as i32; + + 'start: loop { + if query_list[t1].docid < runstart2 as i32 && t2 > 1 { + query_list[t1].docid = runstart2 as i32; + + t2 = 1; + + continue 'restart; + } + + if runstart1 > runend2 { + query_list[t2].p_run += 1; + if query_list[t2].p_run == query_list[t2].p_run_count { + break 'exit; + } + + runstart2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + runlength2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + runend2 = runstart2 + runlength2; + + query_list[t2].p_run_sum += runlength2 as i32; + query_list[t2].run_end = runend2 as i32; + } else if runend1 < runstart2 { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Rle { + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + + query_list[t1].p_run_sum += read_u16( + query_list[t1].byte_array, + query_list[t1].compressed_doc_id_range + + 4 + + (query_list[t1].p_run << 2) as usize, + ) + as i32; + + continue 'restart; + } else { + runstart2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + runlength2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + runend2 = query_list[t2].run_end as u16; + } + } + + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + runstart1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + runlength1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + runend1 = runstart1 + runlength1; + + query_list[t1].p_run_sum += runlength1 as i32; + query_list[t1].run_end = runend1 as i32; + } else { + for doc_id in cmp::max( + query_list[t1].docid, + cmp::max(runstart1 as i32, runstart2 as i32), + ) + ..=(cmp::min(runend1 as i32, runend2 as i32)) + { + if t2 + 1 < query_list.len() { + query_list[t2].p_docid = query_list[t2].p_run_sum as usize + - runlength2 as usize + + doc_id as usize + - read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ) as usize + + query_list[t2].p_run as usize; + + t2 += 1; + if query_list[t2].compression_type != CompressionType::Rle { + query_list[t1].docid = doc_id; + + continue 'restart; + } else { + query_list[t2].p_run_count = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + 0, + ) + as i32; + runstart2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + runlength2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + + runend2 = runstart2 + runlength2; + query_list[t2].run_end = runend2 as i32; + + query_list[t1].docid = doc_id; + + continue 'start; + } + } + + query_list[t1].p_docid = query_list[t1].p_run_sum as usize + - runlength1 as usize + + doc_id as usize + - read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ) as usize + + query_list[t1].p_run as usize; + + query_list[t2].p_docid = query_list[t2].p_run_sum as usize + - runlength2 as usize + + doc_id as usize + - read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ) as usize + + query_list[t2].p_run as usize; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + query_list[t1].docid = doc_id + 1; + + let mut flag = false; + for item in query_list.iter_mut() { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id >= item.run_end) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + + flag = true; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Rle { + continue 'restart; + } else if flag || (query_list.len() > 2) { + continue 'exit; + } + } + + if query_list[t1].docid > query_list[t1].run_end { + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + runstart1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + runlength1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + runend1 = runstart1 + runlength1; + query_list[t1].p_run_sum += runlength1 as i32; + query_list[t1].run_end = runend1 as i32; + } + + if query_list[t1].docid > query_list[t2].run_end { + query_list[t2].p_run += 1; + if query_list[t2].p_run == query_list[t2].p_run_count { + break 'exit; + } + runstart2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (1 + query_list[t2].p_run * 2) as usize * 2, + ); + runlength2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (2 + query_list[t2].p_run * 2) as usize * 2, + ); + runend2 = runstart2 + runlength2; + query_list[t2].p_run_sum += runlength2 as i32; + query_list[t2].run_end = runend2 as i32; + } + } + } + + #[allow(unreachable_code)] + break; + }, + + (CompressionType::Rle, CompressionType::Bitmap) => 'exit: loop { + query_list[t1].p_run_count = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + 0, + ) as i32; + + loop { + let mut runstart1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (1 + (query_list[t1].p_run * 2) as usize) * 2, + ); + let runlength1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + (query_list[t1].p_run * 2) as usize) * 2, + ); + + let runend1 = runstart1 + runlength1; + query_list[t1].run_end = runend1 as i32; + + runstart1 = cmp::max(runstart1, query_list[t1].docid as u16); + + let mut intersect_mask: u64 = if (query_list[t1].docid as u16) < runstart1 { + u64::MAX + } else { + u64::MAX << (query_list[t1].docid & 63) + }; + + let byte_pos_start = runstart1 >> 6; + let byte_pos_end = runend1 >> 6; + + for ulong_pos in byte_pos_start..=byte_pos_end { + let mut intersect: u64 = read_u64( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + ulong_pos as usize * 8, + ) & intersect_mask; + + if ulong_pos == byte_pos_start { + intersect &= u64::MAX << (runstart1 & 63); + } + if ulong_pos == byte_pos_end { + intersect &= u64::MAX >> (63 - (runend1 & 63)); + } + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) }; + let doc_id = ((ulong_pos as u32) << 6) + bit_pos as u32; + + query_list[t1].docid = doc_id as i32; + + if t2 + 1 < query_list.len() { + for i in (query_list[t2].p_run as usize)..ulong_pos as usize { + query_list[t2].p_run_sum += read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + i * 8, + ) + .count_ones() + as i32 + } + query_list[t2].p_docid = if bit_pos == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos as usize * 8, + ) << (64 - bit_pos)) + .count_ones() + as usize + }; + query_list[t2].p_run = ulong_pos as i32; + + t2 += 1; + intersect &= read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + continue; + } + + intersect = unsafe { _blsr_u64(intersect) }; + + query_list[t1].p_docid = query_list[t1].p_run_sum as usize + - runlength1 as usize + + doc_id as usize + - read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (1 + (query_list[t1].p_run * 2)) as usize * 2, + ) as usize + + query_list[t1].p_run as usize; + + for i in (query_list[t2].p_run as usize)..ulong_pos as usize { + query_list[t2].p_run_sum += read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + i * 8, + ) + .count_ones() + as i32 + } + query_list[t2].p_docid = if bit_pos == 0 { + query_list[t2].p_run_sum as usize + } else { + query_list[t2].p_run_sum as usize + + (read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos as usize * 8, + ) << (64 - bit_pos)) + .count_ones() as usize + }; + + query_list[t2].p_run = ulong_pos as i32; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + }; + } else if (item.compression_type == CompressionType::Rle) + && (doc_id == item.run_end as u32) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + }; + + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + + 4 + + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Bitmap { + if doc_id == query_list[t1].run_end as u32 { + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + query_list[t1].p_run_sum += read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (2 + (query_list[t1].p_run * 2)) as usize * 2, + ) + as i32; + } + if doc_id == 65_535 { + break 'exit; + } + query_list[t1].docid = doc_id as i32 + 1; + continue 'restart; + } + intersect &= read_u64( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + intersect_mask = u64::MAX; + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Bitmap { + if query_list[t1].docid == 65_535 { + break 'exit; + } + + query_list[t1].docid = cmp::max( + query_list[t1].docid + 1, + cmp::min(((ulong_pos + 1) << 6) as i32, runend1 as i32 + 1), + ); + + continue 'restart; + } + } + + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + query_list[t1].p_run_sum += read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + (query_list[t1].p_run * 2)) as usize * 2, + ) as i32; + } + }, + + (CompressionType::Rle, CompressionType::Array) => 'exit: loop { + query_list[t1].p_run_count = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + 0, + ) as i32; + let mut runstart1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + let mut runlength1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + + let mut runend1 = runstart1 + runlength1; + query_list[t1].run_end = runend1 as i32; + + runstart1 = cmp::max(runstart1, query_list[t1].docid as u16); + + let mut doc_id2: u16 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + + loop { + if doc_id2 > runend1 { + if t2 > 1 { + t2 = 1; + if query_list[t2].compression_type != CompressionType::Array { + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break; + } + query_list[t1].p_run_sum += read_u16( + query_list[t1].byte_array, + query_list[t1].compressed_doc_id_range + + 4 + + (query_list[t1].p_run << 2) as usize, + ) + as i32; + + continue 'restart; + } else { + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } + } + + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break; + } + + runstart1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + runlength1 = read_u16( + &query_list[t1].byte_array[query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + runend1 = runstart1 + runlength1; + query_list[t1].p_run_sum += runlength1 as i32; + query_list[t1].run_end = runend1 as i32; + } else if doc_id2 < runstart1 { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].p_docid_count { + break; + } + + if true { + let mut bound = 2; + while (query_list[t2].p_docid + bound < query_list[t2].p_docid_count) + && (read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + (query_list[t2].p_docid + bound) * 2, + ) < runstart1) + { + query_list[t2].p_docid += bound; + bound <<= 1; + } + } + doc_id2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } else { + if t2 + 1 < query_list.len() { + t2 += 1; + if query_list[t2].compression_type != CompressionType::Array { + query_list[t1].docid = doc_id2 as i32; + continue 'restart; + } else { + doc_id2 = read_u16( + &query_list[t2].byte_array + [query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + continue; + } + } + + query_list[t1].p_docid = query_list[t1].p_run_sum as usize + - runlength1 as usize + + doc_id2 as usize + - runstart1 as usize + + query_list[t1].p_run as usize; + + add_result_multiterm_multifield( + shard, + (block_id << 16) | doc_id2 as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + block_score, + all_terms_frequent, + ); + + for item in query_list.iter_mut().skip(1) { + if item.compression_type == CompressionType::Array { + item.p_docid += 1; + if item.p_docid == item.p_docid_count { + break 'exit; + } + } else if (item.compression_type == CompressionType::Rle) + && (doc_id2 == item.run_end as u16) + { + item.p_run += 1; + if item.p_run == item.p_run_count { + break 'exit; + } + item.p_run_sum += read_u16( + item.byte_array, + item.compressed_doc_id_range + 4 + (item.p_run << 2) as usize, + ) as i32; + } + } + + t2 = 1; + if query_list[t2].compression_type != CompressionType::Array { + query_list[t1].docid = doc_id2 as i32 + 1; + + continue 'restart; + } + + if doc_id2 == query_list[t1].run_end as u16 { + query_list[t1].p_run += 1; + if query_list[t1].p_run == query_list[t1].p_run_count { + break 'exit; + } + runstart1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (1 + query_list[t1].p_run * 2) as usize * 2, + ); + runlength1 = read_u16( + &query_list[t1].byte_array + [query_list[t1].compressed_doc_id_range..], + (2 + query_list[t1].p_run * 2) as usize * 2, + ); + runend1 = runstart1 + runlength1; + query_list[t1].p_run_sum += runlength1 as i32; + query_list[t1].run_end = runend1 as i32; + } + doc_id2 = read_u16( + &query_list[t2].byte_array[query_list[t2].compressed_doc_id_range..], + query_list[t2].p_docid * 2, + ); + } + } + + break; + }, + + _ => { + println!( + "invalid compression combination: block: {} t1: {} {} {} {:?} t2: {} {} {} {:?} {} ", + block_id, + t1, + query_list[t1].term, + query_list[t1].blocks[query_list[t1].p_block as usize].posting_count, + query_list[t1].compression_type, + t2, + query_list[t2].term, + query_list[t2].blocks[query_list[t2].p_block as usize].posting_count, + query_list[t2].compression_type, + query_list.len() + ); + } + } + + break; + } +} + +pub(crate) struct BlockObject { + pub block_id: usize, + pub block_score: f32, + pub p_block_vec: Vec, +} + +/// Intersection between blocks of 64k docids of a posting list +#[allow(clippy::too_many_arguments)] +pub(crate) async fn intersection_blockid<'a>( + shard: &'a Shard, + non_unique_query_list: &mut Vec>, + query_list: &mut Vec>, + not_query_list: &mut [PostingListObjectQuery<'a>], + result_count_arc: &Arc, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + matching_blocks: &mut i32, + phrase_query: bool, + query_term_count: usize, +) { + let item_0 = &query_list[0]; + let enable_inter_query_threading_multi = + if !shard.enable_search_quality_test && shard.enable_inter_query_threading_auto { + item_0.posting_count / item_0.p_block_max as u32 > 10 + } else { + shard.enable_inter_query_threading + }; + + let mut task_list = Vec::new(); + + let t1: i32 = 0; + let mut t2: i32 = 1; + + let item_1 = &query_list[t1 as usize]; + let item_2 = &query_list[t2 as usize]; + let mut block_id1 = item_1.blocks[item_1.p_block as usize].block_id; + let mut block_id2 = item_2.blocks[item_2.p_block as usize].block_id; + + let mut block_vec: Vec = Vec::new(); + + 'exit: loop { + match block_id1.cmp(&block_id2) { + cmp::Ordering::Less => { + let item_1 = &mut query_list[t1 as usize]; + item_1.p_block += 1; + if item_1.p_block == item_1.p_block_max { + break; + } + block_id1 = item_1.blocks[item_1.p_block as usize].block_id; + + t2 = 1; + let item_2 = &query_list[t2 as usize]; + block_id2 = item_2.blocks[item_2.p_block as usize].block_id; + } + cmp::Ordering::Greater => { + let item_2 = &mut query_list[t2 as usize]; + item_2.p_block += 1; + if item_2.p_block == item_2.p_block_max { + break; + } + block_id2 = item_2.blocks[item_2.p_block as usize].block_id; + } + cmp::Ordering::Equal => { + if t2 + 1 < query_list.len() as i32 { + t2 += 1; + let item_2 = &query_list[t2 as usize]; + block_id2 = item_2.blocks[item_2.p_block as usize].block_id; + + continue; + } + + if !enable_inter_query_threading_multi { + let mut block_score = 0.0; + if SPEEDUP_FLAG && result_type != &ResultType::Count { + for query_list_item_mut in query_list.iter_mut() { + block_score += query_list_item_mut.blocks + [query_list_item_mut.p_block as usize] + .max_block_score; + } + } + + if SPEEDUP_FLAG && SORT_FLAG && result_type != &ResultType::Count { + let mut p_block_vec: Vec = vec![0; query_term_count]; + for i in 0..query_list.len() { + p_block_vec[query_list[i].term_index_unique] = query_list[i].p_block + } + let block_object = BlockObject { + block_id: block_id1 as usize, + block_score, + p_block_vec, + }; + block_vec.push(block_object); + } else if !SPEEDUP_FLAG + || result_type == &ResultType::Count + || search_result.topk_candidates.current_heap_size < top_k + || block_score > search_result.topk_candidates._elements[0].score + { + if shard.meta.access_type == AccessType::Mmap { + for query_list_item_mut in query_list.iter_mut() { + let segment = + &shard.segments_index[query_list_item_mut.key0 as usize]; + query_list_item_mut.byte_array = &shard.index_file_mmap[segment + .byte_array_blocks_pointer + [block_id1 as usize] + .0 + ..segment.byte_array_blocks_pointer[block_id1 as usize].0 + + segment.byte_array_blocks_pointer[block_id1 as usize].1]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + let segment = &shard.segments_index + [nonunique_query_list_item_mut.key0 as usize]; + nonunique_query_list_item_mut.byte_array = &shard.index_file_mmap + [segment.byte_array_blocks_pointer[block_id1 as usize].0 + ..segment.byte_array_blocks_pointer[block_id1 as usize].0 + + segment.byte_array_blocks_pointer + [block_id1 as usize] + .1]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + let segment = + &shard.segments_index[not_query_list_item_mut.key0 as usize]; + not_query_list_item_mut.byte_array = &shard.index_file_mmap[segment + .byte_array_blocks_pointer + [block_id1 as usize] + .0 + ..segment.byte_array_blocks_pointer[block_id1 as usize].0 + + segment.byte_array_blocks_pointer[block_id1 as usize].1]; + } + } else { + for query_list_item_mut in query_list.iter_mut() { + query_list_item_mut.byte_array = &shard.segments_index + [query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id1 as usize]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + nonunique_query_list_item_mut.byte_array = &shard.segments_index + [nonunique_query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id1 as usize]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + not_query_list_item_mut.byte_array = &shard.segments_index + [not_query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id1 as usize]; + } + } + + let mut result_count_local = 0; + intersection_docid( + shard, + non_unique_query_list, + query_list, + not_query_list, + block_id1 as usize, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + phrase_query, + block_score, + ) + .await; + + result_count_arc.fetch_add(result_count_local as usize, Ordering::Relaxed); + } + } else { + let mut query_list_copy: Vec = Vec::new(); + let mut non_unique_query_list_copy: Vec = + Vec::new(); + + for x in &mut *query_list { + query_list_copy.push(x.clone()); + } + + for x in &mut *non_unique_query_list { + let y = x.clone(); + non_unique_query_list_copy.push(y); + } + + let result_count_clone = result_count_arc.clone(); + task_list.push(tokio::spawn(async move { + let result_count_local = 1; + result_count_clone.fetch_add(result_count_local, Ordering::Relaxed); + })); + } + + *matching_blocks += 1; + + t2 = 1; + + for item in query_list.iter_mut() { + item.p_block += 1; + if item.p_block == item.p_block_max { + break 'exit; + } + } + + let item_1 = &query_list[t1 as usize]; + let item_2 = &query_list[t2 as usize]; + block_id1 = item_1.blocks[item_1.p_block as usize].block_id; + block_id2 = item_2.blocks[item_2.p_block as usize].block_id; + } + } + } + + if SORT_FLAG && SPEEDUP_FLAG && (result_type != &ResultType::Count) { + block_vec.sort_unstable_by(|x, y| y.block_score.partial_cmp(&x.block_score).unwrap()); + for block in block_vec { + if (result_type == &ResultType::Topk) + && search_result.topk_candidates.result_sort.is_empty() + && (search_result.topk_candidates.current_heap_size == top_k) + && (block.block_score <= search_result.topk_candidates._elements[0].score) + { + break; + } + + for item in query_list.iter_mut() { + item.p_block = block.p_block_vec[item.term_index_unique]; + } + + if shard.meta.access_type == AccessType::Mmap { + for query_list_item_mut in query_list.iter_mut() { + let segment = &shard.segments_index[query_list_item_mut.key0 as usize]; + query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block.block_id].0 + ..segment.byte_array_blocks_pointer[block.block_id].0 + + segment.byte_array_blocks_pointer[block.block_id].1]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + let segment = + &shard.segments_index[nonunique_query_list_item_mut.key0 as usize]; + nonunique_query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block.block_id].0 + ..segment.byte_array_blocks_pointer[block.block_id].0 + + segment.byte_array_blocks_pointer[block.block_id].1]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + let segment = &shard.segments_index[not_query_list_item_mut.key0 as usize]; + not_query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block.block_id].0 + ..segment.byte_array_blocks_pointer[block.block_id].0 + + segment.byte_array_blocks_pointer[block.block_id].1]; + } + } else { + for query_list_item_mut in query_list.iter_mut() { + query_list_item_mut.byte_array = &shard.segments_index + [query_list_item_mut.key0 as usize] + .byte_array_blocks[block.block_id]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + nonunique_query_list_item_mut.byte_array = &shard.segments_index + [nonunique_query_list_item_mut.key0 as usize] + .byte_array_blocks[block.block_id]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + not_query_list_item_mut.byte_array = &shard.segments_index + [not_query_list_item_mut.key0 as usize] + .byte_array_blocks[block.block_id]; + } + } + + let mut result_count_local = 0; + intersection_docid( + shard, + non_unique_query_list, + query_list, + not_query_list, + block.block_id, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + phrase_query, + block.block_score, + ) + .await; + + result_count_arc.fetch_add(result_count_local as usize, Ordering::Relaxed); + } + } +} diff --git a/mobile_app/rust/src/seekstorm/intersection_simd.rs b/mobile_app/rust/src/seekstorm/intersection_simd.rs new file mode 100644 index 0000000..6fd0489 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/intersection_simd.rs @@ -0,0 +1,178 @@ +#[cfg(target_arch = "aarch64")] +use std::{ + arch::aarch64::{uint16x8_t, vceqq_u16, vld1q_dup_u16, vld1q_u16, vst1q_u16}, + mem::{self}, +}; + +use crate::index::Shard; +use crate::{ + add_result::add_result_multiterm_multifield, + index::{NonUniquePostingListObjectQuery, PostingListObjectQuery}, + search::{FilterSparse, ResultType, SearchResult}, + utils::read_u16, +}; +use ahash::AHashSet; + + + +#[cfg(target_arch = "aarch64")] +pub(crate) fn intersection_vector16( + a: &[u8], + s_a: usize, + b: &[u8], + s_b: usize, + result_count: &mut i32, + block_id: usize, + index: &Shard, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, + all_terms_frequent: bool, +) { + unsafe { + let mut i_a = 0; + let mut i_b = 0; + let vectorlength = mem::size_of::() / mem::size_of::(); + let st_b = (s_b / vectorlength) * vectorlength; + while i_a < s_a && i_b < st_b { + if read_u16(&a[..], i_a * 2) < read_u16(&b[..], i_b * 2) { + i_a += 1; + continue; + } else if read_u16(&a[..], i_a * 2) > read_u16(&b[..], (i_b + vectorlength - 1) * 2) { + i_b += vectorlength; + continue; + } + + let v_a = vld1q_dup_u16(a[(i_a * 2)..].as_ptr() as *const _); + let v_b = vld1q_u16(b[(i_b * 2)..].as_ptr() as *const _); + let res_v = vceqq_u16(v_a, v_b); + let mut res = [0u16; 8]; + vst1q_u16(res.as_mut_ptr(), res_v); + for i in 0..res.len() { + if res[i] == 0 { + continue; + } + query_list[0].p_docid = i_a; + query_list[1].p_docid = i_b + i; + add_result_multiterm_multifield( + index, + (block_id << 16) | read_u16(&a[..], i_a * 2) as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + f32::MAX, + all_terms_frequent, + ); + break; + } + i_a += 1; + } + while i_a < s_a && i_b < s_b { + let a = read_u16(&a[..], i_a * 2); + let b = read_u16(&b[..], i_b * 2); + match a.cmp(&b) { + std::cmp::Ordering::Less => { + i_a += 1; + } + std::cmp::Ordering::Greater => { + i_b += 1; + } + std::cmp::Ordering::Equal => { + query_list[0].p_docid = i_a; + query_list[1].p_docid = i_b; + add_result_multiterm_multifield( + index, + (block_id << 16) | a as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + f32::MAX, + all_terms_frequent, + ); + + i_a += 1; + i_b += 1; + } + } + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] +pub(crate) fn intersection_vector16( + a: &[u8], + s_a: usize, + b: &[u8], + s_b: usize, + result_count: &mut i32, + block_id: usize, + index: &Shard, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, + all_terms_frequent: bool, +) { + let mut i_a = 0; + let mut i_b = 0; + while i_a < s_a && i_b < s_b { + let a = read_u16(&a[..], i_a * 2); + let b = read_u16(&b[..], i_b * 2); + match a.cmp(&b) { + std::cmp::Ordering::Less => { + i_a += 1; + } + std::cmp::Ordering::Greater => { + i_b += 1; + } + std::cmp::Ordering::Equal => { + query_list[0].p_docid = i_a; + query_list[1].p_docid = i_b; + add_result_multiterm_multifield( + index, + (block_id << 16) | a as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + f32::MAX, + all_terms_frequent, + ); + + i_a += 1; + i_b += 1; + } + } + } +} diff --git a/mobile_app/rust/src/seekstorm/lib.rs b/mobile_app/rust/src/seekstorm/lib.rs new file mode 100644 index 0000000..2538c60 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/lib.rs @@ -0,0 +1,517 @@ +#![crate_type = "lib"] +#![crate_name = "seekstorm"] +#![doc(html_logo_url = "http://seekstorm.com/assets/logo.svg")] +#![doc(html_favicon_url = "http://seekstorm.com/favicon.ico")] + +//! # `seekstorm` +//! SeekStorm is an open-source, sub-millisecond full-text search library & multi-tenancy server written in Rust. +//! The **SeekStorm library** can be embedded into your program, while the **SeekStorm server** is a standalone search server to be accessed via HTTP. +//! ### Add required crates to your project +//! ```text +//! cargo add seekstorm +//! cargo add tokio +//! cargo add serde_json +//! ``` +//! ### use an asynchronous Rust runtime +//! ```no_run +//! use std::error::Error; +//! #[tokio::main] +//! async fn main() -> Result<(), Box> { +//! +//! // your SeekStorm code here +//! +//! Ok(()) +//! } +//! ``` +//! ### create index +//! ```no_run +//! # tokio_test::block_on(async { +//! use std::path::PathBuf; +//! use std::sync::{Arc, RwLock}; +//! use seekstorm::index::{IndexMetaObject, SimilarityType,TokenizerType,StopwordType,FrequentwordType,AccessType,StemmerType,NgramSet,create_index}; +//! +//! let index_path=PathBuf::new("C:/index/"); +//! let schema_json = r#" +//! [{"field":"title","field_type":"Text","stored":false,"indexed":false}, +//! {"field":"body","field_type":"Text","stored":true,"indexed":true}, +//! {"field":"url","field_type":"Text","stored":false,"indexed":false}]"#; +//! let schema=serde_json::from_str(schema_json).unwrap(); +//! let meta = IndexMetaObject { +//! id: 0, +//! name: "test_index".into(), +//! similarity:SimilarityType::Bm25f, +//! tokenizer:TokenizerType::AsciiAlphabetic, +//! stemmer:StemmerType::None, +//! stop_words: StopwordType::None, +//! frequent_words:FrequentwordType::English, +//! ngram_indexing:NgramSet::NgramFF as u8, +//! access_type: AccessType::Mmap, +//! spelling_correction: None, +//! query_completion: None, +//! }; +//! let segment_number_bits1=11; +//! let serialize_schema=true; +//! let index_arc=create_index(index_path,meta,&schema,&Vec::new(),segment_number_bits1,false,None).await.unwrap(); +//! # }); +//! ``` +//! ### open index (alternatively to create index) +//! ```no_run +//! # tokio_test::block_on(async { +//! use seekstorm::index::open_index; +//! use std::path::PathBuf; +//! +//! let index_path=PathBuf::new("C:/index/"); +//! let index_arc=open_index(index_path,false).await.unwrap(); +//! # }); +//! ``` +//! ### index document +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::IndexDocument; +//! use seekstorm::index::FileType; +//! +//! let document_json = r#" +//! {"title":"title1 test","body":"body1","url":"url1"}"#; +//! let document=serde_json::from_str(document_json).unwrap(); +//! index_arc.index_document(document,FileType::None).await; +//! # }); +//! ``` +//! ### index documents +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::IndexDocuments; +//! let documents_json = r#" +//! [{"title":"title1 test","body":"body1","url":"url1"}, +//! {"title":"title2","body":"body2 test","url":"url2"}, +//! {"title":"title3 test","body":"body3 test","url":"url3"}]"#; +//! let documents_vec=serde_json::from_str(documents_json).unwrap(); +//! index_arc.index_documents(documents_vec).await; +//! # }); +//! ``` +//! ### delete documents by document id +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::DeleteDocuments; +//! +//! let docid_vec=vec![1,2]; +//! index_arc.delete_documents(docid_vec).await; +//! # }); +//! ``` +//! ### delete documents by query +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::search::QueryType; +//! use seekstorm::index::DeleteDocumentsByQuery; +//! +//! let query="test".to_string(); +//! let offset=0; +//! let length=10; +//! let query_type=QueryType::Intersection; +//! let include_uncommitted=false; +//! let field_filter=Vec::new(); +//! let facet_filter=Vec::new(); +//! let result_sort=Vec::new(); +//! index_arc.delete_documents_by_query(query, query_type, offset, length, include_uncommitted,field_filter,facet_filter,result_sort).await; +//! # }); +//! ``` +//! ### update documents +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::UpdateDocuments; +//! use seekstorm::commit::Commit; +//! +//! let id_document_vec_json = r#" +//! [[1,{"title":"title1 test","body":"body1","url":"url1"}], +//! [2,{"title":"title3 test","body":"body3 test","url":"url3"}]]"#; +//! let id_document_vec=serde_json::from_str(id_document_vec_json).unwrap(); +//! index_arc.update_documents(id_document_vec).await; +//! +//! // ### commit documents +//! +//! index_arc.commit().await; +//! # }); +//! ``` +//! ### search index +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::search::{Search, QueryType, ResultType, QueryRewriting}; +//! +//! let query="test".to_string(); +//! let offset=10; +//! let length=10; +//! let query_type=QueryType::Intersection; +//! let result_type=ResultType::TopkCount; +//! let include_uncommitted=false; +//! let field_filter=Vec::new(); +//! let query_facets=Vec::new(); +//! let facet_filter=Vec::new(); +//! let result_sort=Vec::new(); +//! let result_object = index_arc.search(query, query_type, offset, length, result_type,include_uncommitted,field_filter,query_facets,facet_filter,result_sort,QueryRewriting::SearchOnly).await; +//! +//! // ### display results +//! +//! use seekstorm::highlighter::{Highlight, highlighter}; +//! use std::collections::HashSet; +//! +//! let highlights:Vec= vec![ +//! Highlight { +//! field: "body".to_string(), +//! name:String::new(), +//! fragment_number: 2, +//! fragment_size: 160, +//! highlight_markup: true, +//! ..Default::default() +//! }, +//! ]; +//! let highlighter=Some(highlighter(&index_arc,highlights, result_object.query_terms).await); +//! let return_fields_filter= HashSet::new(); +//! let distance_fields=Vec::new(); +//! let index=index_arc.read().await; +//! for result in result_object.results.iter() { +//! let doc=index.get_document(result.doc_id,false,&highlighter,&return_fields_filter,&distance_fields).await.unwrap(); +//! println!("result {} rank {} body field {:?}" , result.doc_id,result.score, doc.get("body")); +//! } +//! println!("result counts {} {} {}",result_object.results.len(), result_object.result_count, result_object.result_count_total); +//! # }); +//! ``` +//! ### get document +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use std::collections::HashSet; +//! +//! let doc_id=0; +//! let highlighter=None; +//! let return_fields_filter= HashSet::new(); +//! let distance_fields=Vec::new(); +//! let index=index_arc.read().await; +//! let doc=index.get_document(doc_id,false,&highlighter,&return_fields_filter,&distance_fields).await.unwrap(); +//! # }); +//! ``` +//! ### index JSON file in JSON, Newline-delimited JSON and Concatenated JSON format +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::ingest::IngestJson; +//! use std::path::PathBuf; +//! +//! let file_path=PathBuf::new("wiki-articles.json"); +//! let _ =index_arc.ingest_json(file_path).await; +//! # }); +//! ``` +//! ### index all PDF files in directory and sub-directories +//! - converts pdf to text and indexes it +//! - extracts title from metatag, or first line of text, or from filename +//! - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970) +//! - copies all ingested pdf files to "files" subdirectory in index +//! - the following index schema is required (and automatically created by the console `ingest` command): +//! ```no_run +//! let schema_json = r#" +//! [ +//! { +//! "field": "title", +//! "stored": true, +//! "indexed": true, +//! "field_type": "Text", +//! "boost": 10 +//! }, +//! { +//! "field": "body", +//! "stored": true, +//! "indexed": true, +//! "field_type": "Text" +//! }, +//! { +//! "field": "url", +//! "stored": true, +//! "indexed": false, +//! "field_type": "Text" +//! }, +//! { +//! "field": "date", +//! "stored": true, +//! "indexed": false, +//! "field_type": "Timestamp", +//! "facet": true +//! } +//! ]"#; +//! ``` +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! use std::path::PathBuf; +//! use seekstorm::ingest::IngestPdf; +//! +//! let file_path=PathBuf::new("C:/Users/johndoe/Downloads"); +//! let _ =index_arc.ingest_pdf(file_path).await; +//! # }); +//! ``` +//! ### index PDF file +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! use std::path::PathBuf; +//! use seekstorm::ingest::IndexPdfFile; +//! +//! let file_path=PathBuf::new("C:/test.pdf"); +//! let _ =index_arc.index_pdf_file(file_path).await; +//! # }); +//! ``` +//! ### index PDF file bytes +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! use std::path::PathBuf; +//! use std::fs; +//! use chrono::Utc; +//! use seekstorm::ingest::IndexPdfBytes; +//! +//! let file_date=Utc::now().timestamp(); +//! let file_path=PathBuf::new("C:/test.pdf"); +//! let document = fs::read(file_path).unwrap(); +//! let _ =index_arc.index_pdf_bytes(file_path, file_date, &document).await; +//! # }); +//! ``` +//! ### get PDF file bytes +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # use std::path::PathBuf; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! let doc_id=0; +//! let _file=index_arc.read().await.get_file(doc_id).await.unwrap(); +//! # }); +//! ``` +//! ### clear index +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # use std::path::PathBuf; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! index_arc.write().await.clear_index().await; +//! # }); +//! ``` +//! ### delete index +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # use std::path::PathBuf; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! index_arc.write().await.delete_index(); +//! # }); +//! ``` +//! ### close index +//! ```no_run +//! # tokio_test::block_on(async { +//! # use seekstorm::index::open_index; +//! # use std::path::PathBuf; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let mut index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::Close; +//! +//! index_arc.close().await; +//! # }); +//! ``` +//! ### seekstorm library version string +//! ```no_run +//! use seekstorm::index::version; +//! +//! let version=version(); +//! println!("version {}",version); +//! ``` +//! +//! ---------------- +//! ### Faceted search - Quick start +//! **Facets are defined in 3 different places:** +//! 1. the facet fields are defined in schema at create_index, +//! 2. the facet field values are set in index_document at index time, +//! 3. the query_facets/facet_filter search parameters are specified at query time. +//! Facets are then returned in the search result object. +//! +//! A minimal working example of faceted indexing & search requires just 60 lines of code. But to puzzle it all together from the documentation alone might be tedious. +//! This is why we provide a quick start example here: +//! ### create index +//! ```no_run +//! # tokio_test::block_on(async { +//! use std::path::PathBuf; +//! use seekstorm::index::{IndexMetaObject, SimilarityType,TokenizerType,StopwordType,FrequentwordType,AccessType,StemmerType,NgramSet,create_index}; +//! +//! let index_path=PathBuf::new("C:/index/"); +//! let schema_json = r#" +//! [{"field":"title","field_type":"Text","stored":false,"indexed":false}, +//! {"field":"body","field_type":"Text","stored":true,"indexed":true}, +//! {"field":"url","field_type":"Text","stored":true,"indexed":false}, +//! {"field":"town","field_type":"String15","stored":false,"indexed":false,"facet":true}]"#; +//! let schema=serde_json::from_str(schema_json).unwrap(); +//! let meta = IndexMetaObject { +//! id: 0, +//! name: "test_index".into(), +//! similarity:SimilarityType::Bm25f, +//! tokenizer:TokenizerType::AsciiAlphabetic, +//! stemmer:StemmerType::None, +//! stop_words: StopwordType::None, +//! frequent_words:FrequentwordType::English, +//! ngram_indexing:NgramSet::NgramFF as u8, +//! access_type: AccessType::Mmap, +//! spelling_correction: None, +//! query_completion: None, +//! }; +//! let serialize_schema=true; +//! let segment_number_bits1=11; +//! let index_arc=create_index(index_path,meta,&schema,&Vec::new(),segment_number_bits1,false,None).await.unwrap(); +//! # }); +//! ``` +//! ### index documents +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::index::IndexDocuments; +//! use seekstorm::commit::Commit; +//! use seekstorm::search::{QueryType, ResultType, QueryFacet, FacetFilter}; +//! +//! let documents_json = r#" +//! [{"title":"title1 test","body":"body1","url":"url1","town":"Berlin"}, +//! {"title":"title2","body":"body2 test","url":"url2","town":"Warsaw"}, +//! {"title":"title3 test","body":"body3 test","url":"url3","town":"New York"}]"#; +//! let documents_vec=serde_json::from_str(documents_json).unwrap(); +//! index_arc.index_documents(documents_vec).await; +//! +//! // ### commit documents +//! +//! index_arc.commit().await; +//! # }); +//! ``` +//! ### search index +//! ```no_run +//! # tokio_test::block_on(async { +//! # use std::path::PathBuf; +//! # use seekstorm::index::open_index; +//! # let index_path=PathBuf::new("C:/index/"); +//! # let index_arc=open_index(index_path,false).await.unwrap(); +//! use seekstorm::search::{QueryType, ResultType, QueryFacet, FacetFilter, QueryRewriting,Search}; +//! +//! let query="test".to_string(); +//! let offset=0; +//! let length=10; +//! let query_type=QueryType::Intersection; +//! let result_type=ResultType::TopkCount; +//! let include_uncommitted=false; +//! let field_filter=Vec::new(); +//! let query_facets = vec![QueryFacet::String16 {field: "town".to_string(),prefix: "".to_string(),length: u16::MAX}]; +//! let facet_filter=Vec::new(); +//! //let facet_filter = vec![FacetFilter {field: "town".to_string(), filter:Filter::String(vec!["Berlin".to_string()])}]; +//! let result_sort=Vec::new(); +//! let result_object = index_arc.search(query, query_type, offset, length, result_type,include_uncommitted,field_filter,query_facets,facet_filter,result_sort,QueryRewriting::SearchOnly).await; +//! +//! // ### display results +//! +//! use std::collections::HashSet; +//! use seekstorm::highlighter::{highlighter, Highlight}; +//! +//! let highlights:Vec= vec![ +//! Highlight { +//! field: "body".to_owned(), +//! name:String::new(), +//! fragment_number: 2, +//! fragment_size: 160, +//! highlight_markup: true, +//! ..Default::default() +//! }, +//! ]; +//! let highlighter=Some(highlighter(&index_arc,highlights, result_object.query_terms).await); +//! let return_fields_filter= HashSet::new(); +//! let distance_fields=Vec::new(); +//! let index=index_arc.write().await; +//! for result in result_object.results.iter() { +//! let doc=index.get_document(result.doc_id,false,&highlighter,&return_fields_filter,&distance_fields).await.unwrap(); +//! println!("result {} rank {} body field {:?}" , result.doc_id,result.score, doc.get("body")); +//! } +//! println!("result counts {} {} {}",result_object.results.len(), result_object.result_count, result_object.result_count_total); +//! +//! // ### display facets +//! +//! println!("{}", serde_json::to_string_pretty(&result_object.facets).unwrap()); +//! # }); +//! ``` + +/// include README.md in documentation +#[cfg_attr(doctest, doc = include_str!("../../README.md"))] +pub struct ReadmeDoctests; + +/// include FACETED_SEARCH.md in documentation +#[cfg_attr(doctest, doc = include_str!("../../FACETED_SEARCH.md"))] +pub struct ReadmeDoctests2; + +pub(crate) mod add_result; +/// Commit moves indexed documents from the intermediate uncompressed data structure in RAM +/// to the final compressed data structure on disk. +pub mod commit; +pub(crate) mod compatible; +pub(crate) mod compress_postinglist; +pub(crate) mod doc_store; +/// Geo search by indexing geo points (latitude, longitude), proximity searching for points within a specified radius, and proximity sorting. +pub mod geo_search; +/// Extracts the most relevant fragments (snippets, summaries) from specified fields of the document to provide a "keyword in context" (KWIC) functionality. +/// With highlight_markup the matching query terms within the fragments can be highlighted with HTML markup. +pub mod highlighter; +/// Operate the index: reate_index, open_index, clear_index, close_index, delete_index, index_document(s) +pub mod index; +pub(crate) mod index_posting; +/// Ingest JSON, Newline-delimited JSON, Concatenated JSON files, and PDF files into the index. +pub mod ingest; +pub(crate) mod intersection; +pub(crate) mod intersection_simd; +pub(crate) mod min_heap; +pub(crate) mod realtime_search; +/// Search the index for all indexed documents, both for committed and uncommitted documents. +/// The latter enables true realtime search: documents are available for search in exact the same millisecond they are indexed. +pub mod search; +pub(crate) mod single; +/// Tokenizes text into tokens (words), supports Chinese word segmentation, folds (converts) diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters into their basic representation. +pub(crate) mod tokenizer; +pub(crate) mod union; +/// Utils `truncate()` and `substring()` +pub mod utils; +#[cfg(feature = "zh")] +pub(crate) mod word_segmentation; diff --git a/mobile_app/rust/src/seekstorm/min_heap.rs b/mobile_app/rust/src/seekstorm/min_heap.rs new file mode 100644 index 0000000..dd12ddb --- /dev/null +++ b/mobile_app/rust/src/seekstorm/min_heap.rs @@ -0,0 +1,1167 @@ +use tokio::sync::RwLockReadGuard; + +use ahash::AHashMap; +use serde::{Deserialize, Serialize}; + +use crate::{ + geo_search::morton_ordering, + index::{FieldType, Shard}, + search::{FacetValue, ResultSortIndex, SortOrder}, + utils::{ + read_f32, read_f64, read_i8, read_i16, read_i32, read_i64, read_u16, read_u32, read_u64, + }, +}; + +#[derive(Clone, Debug, Copy, Default, Deserialize, Serialize)] +pub struct Result { + pub doc_id: usize, + pub score: f32, +} + +/// MinHeap implements an min-heap, which is a binary heap used as priority queue. +/// Maintains a list of the top-k most relevant result candidates. +/// Better performance than a ordered list with binary search, inserts, and deletes +pub(crate) struct MinHeap<'a> { + pub _elements: Vec, + pub current_heap_size: usize, + pub docid_hashset: AHashMap, + + pub index: &'a Shard, + pub result_sort: &'a Vec>, +} + +#[inline] +pub(crate) fn result_ordering_root( + shard_vec: &[RwLockReadGuard<'_, Shard>], + shard_bits: usize, + result_sort: &Vec>, + result1: Result, + result2: Result, +) -> core::cmp::Ordering { + let shard_id1 = result1.doc_id & ((1 << shard_bits) - 1); + let doc_id1 = result1.doc_id >> shard_bits; + let shard1 = &shard_vec[shard_id1]; + + let shard_id2 = result2.doc_id & ((1 << shard_bits) - 1); + let doc_id2 = result2.doc_id >> shard_bits; + let shard2 = &shard_vec[shard_id2]; + + for field in result_sort.iter() { + match shard1.facets[field.idx].field_type { + FieldType::U8 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = + &shard1.facets_file_mmap[(shard1.facets_size_sum * doc_id1) + offset]; + + let facet_value_2 = + &shard2.facets_file_mmap[(shard2.facets_size_sum * doc_id2) + offset]; + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(facet_value_2) + } else { + facet_value_2.cmp(facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::U16 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_u16( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_u16( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::U32 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_u32( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_u32( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::U64 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_u64( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_u64( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::I8 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_i8( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_i8( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::I16 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_i16( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_i16( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::I32 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_i32( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_i32( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::I64 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_i64( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_i64( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::Timestamp => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_i64( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_i64( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::F32 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_f32( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_f32( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1 + .partial_cmp(&facet_value_2) + .unwrap_or(core::cmp::Ordering::Equal) + } else { + facet_value_2 + .partial_cmp(&facet_value_1) + .unwrap_or(core::cmp::Ordering::Equal) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::F64 => { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_f64( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_f64( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1 + .partial_cmp(&facet_value_2) + .unwrap_or(core::cmp::Ordering::Equal) + } else { + facet_value_2 + .partial_cmp(&facet_value_1) + .unwrap_or(core::cmp::Ordering::Equal) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::String16 => { + let offset = shard1.facets[field.idx].offset; + + let facet_id_1 = read_u16( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_id_2 = read_u16( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let facet_value_1 = shard1.facets[field.idx] + .values + .get_index((facet_id_1).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = shard2.facets[field.idx] + .values + .get_index((facet_id_2).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::StringSet16 => { + let offset = shard1.facets[field.idx].offset; + + let facet_id_1 = read_u16( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_id_2 = read_u16( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let facet_value_1 = shard1.facets[field.idx] + .values + .get_index((facet_id_1).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = shard2.facets[field.idx] + .values + .get_index((facet_id_2).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::String32 => { + let offset = shard1.facets[field.idx].offset; + + let facet_id_1 = read_u32( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_id_2 = read_u32( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let facet_value_1 = shard1.facets[field.idx] + .values + .get_index(facet_id_1 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = shard2.facets[field.idx] + .values + .get_index(facet_id_2 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::StringSet32 => { + let offset = shard1.facets[field.idx].offset; + + let facet_id_1 = read_u32( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_id_2 = read_u32( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let facet_value_1 = shard1.facets[field.idx] + .values + .get_index(facet_id_1 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = shard2.facets[field.idx] + .values + .get_index(facet_id_2 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::Point => { + if let FacetValue::Point(base) = &field.base { + let offset = shard1.facets[field.idx].offset; + + let facet_value_1 = read_u64( + &shard1.facets_file_mmap, + (shard1.facets_size_sum * doc_id1) + offset, + ); + let facet_value_2 = read_u64( + &shard2.facets_file_mmap, + (shard2.facets_size_sum * doc_id2) + offset, + ); + + let order = morton_ordering(facet_value_1, facet_value_2, base, &field.order); + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + } + + _ => {} + } + } + + result1 + .score + .partial_cmp(&result2.score) + .unwrap_or(core::cmp::Ordering::Equal) +} + +impl<'a> MinHeap<'a> { + #[inline(always)] + pub(crate) fn new( + size: usize, + index: &'a Shard, + result_sort: &'a Vec, + ) -> MinHeap<'a> { + MinHeap { + current_heap_size: 0, + docid_hashset: AHashMap::new(), + _elements: vec![ + Result { + doc_id: 0, + score: 0.0, + }; + size + ], + index, + result_sort, + } + } + + #[inline] + pub(crate) fn result_ordering_shard( + &self, + result1: Result, + result2: Result, + ) -> core::cmp::Ordering { + for field in self.result_sort.iter() { + match self.index.facets[field.idx].field_type { + FieldType::U8 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = &self.index.facets_file_mmap + [(self.index.facets_size_sum * result1.doc_id) + offset]; + + let facet_value_2 = &self.index.facets_file_mmap + [(self.index.facets_size_sum * result2.doc_id) + offset]; + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(facet_value_2) + } else { + facet_value_2.cmp(facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::U16 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::U32 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::U64 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_u64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_u64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::I8 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_i8( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_i8( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::I16 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_i16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_i16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::I32 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_i32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_i32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + FieldType::I64 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_i64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_i64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::Timestamp => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_i64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_i64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::F32 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_f32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_f32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1 + .partial_cmp(&facet_value_2) + .unwrap_or(core::cmp::Ordering::Equal) + } else { + facet_value_2 + .partial_cmp(&facet_value_1) + .unwrap_or(core::cmp::Ordering::Equal) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::F64 => { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_f64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_f64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = if field.order == SortOrder::Descending { + facet_value_1 + .partial_cmp(&facet_value_2) + .unwrap_or(core::cmp::Ordering::Equal) + } else { + facet_value_2 + .partial_cmp(&facet_value_1) + .unwrap_or(core::cmp::Ordering::Equal) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::String16 => { + let offset = self.index.facets[field.idx].offset; + + let facet_id_1 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_id_2 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let facet_value_1 = self.index.facets[field.idx] + .values + .get_index((facet_id_1).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = self.index.facets[field.idx] + .values + .get_index((facet_id_2).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::StringSet16 => { + let offset = self.index.facets[field.idx].offset; + + let facet_id_1 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_id_2 = read_u16( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let facet_value_1 = self.index.facets[field.idx] + .values + .get_index((facet_id_1).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = self.index.facets[field.idx] + .values + .get_index((facet_id_2).into()) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::String32 => { + let offset = self.index.facets[field.idx].offset; + + let facet_id_1 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_id_2 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let facet_value_1 = self.index.facets[field.idx] + .values + .get_index(facet_id_1 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = self.index.facets[field.idx] + .values + .get_index(facet_id_2 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::StringSet32 => { + let offset = self.index.facets[field.idx].offset; + + let facet_id_1 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_id_2 = read_u32( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let facet_value_1 = self.index.facets[field.idx] + .values + .get_index(facet_id_1 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let facet_value_2 = self.index.facets[field.idx] + .values + .get_index(facet_id_2 as usize) + .unwrap() + .1 + .0[0] + .clone(); + + let order = if field.order == SortOrder::Descending { + facet_value_1.cmp(&facet_value_2) + } else { + facet_value_2.cmp(&facet_value_1) + }; + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + + FieldType::Point => { + if let FacetValue::Point(base) = &field.base { + let offset = self.index.facets[field.idx].offset; + + let facet_value_1 = read_u64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result1.doc_id) + offset, + ); + let facet_value_2 = read_u64( + &self.index.facets_file_mmap, + (self.index.facets_size_sum * result2.doc_id) + offset, + ); + + let order = + morton_ordering(facet_value_1, facet_value_2, base, &field.order); + + if order != core::cmp::Ordering::Equal { + return order; + }; + } + } + + _ => {} + } + } + + result1 + .score + .partial_cmp(&result2.score) + .unwrap_or(core::cmp::Ordering::Equal) + } + + #[inline(always)] + fn get_left_child_index(element_index: usize) -> usize { + 2 * element_index + 1 + } + + #[inline(always)] + fn get_right_child_index(element_index: usize) -> usize { + 2 * element_index + 2 + } + + #[inline(always)] + fn get_parent_index(element_index: usize) -> usize { + (element_index - 1) / 2 + } + + #[inline(always)] + fn has_left_child(&self, element_index: usize) -> bool { + Self::get_left_child_index(element_index) < self.current_heap_size + } + + #[inline(always)] + fn has_right_child(&self, element_index: usize) -> bool { + Self::get_right_child_index(element_index) < self.current_heap_size + } + + #[inline(always)] + fn is_root(element_index: usize) -> bool { + element_index == 0 + } + + #[inline(always)] + fn get_left_child(&self, element_index: usize) -> &Result { + &self._elements[Self::get_left_child_index(element_index)] + } + + #[inline(always)] + fn get_right_child(&self, element_index: usize) -> &Result { + &self._elements[Self::get_right_child_index(element_index)] + } + + #[inline(always)] + fn get_parent(&self, element_index: usize) -> &Result { + &self._elements[Self::get_parent_index(element_index)] + } + + #[inline(always)] + fn swap(&mut self, first_index: usize, second_index: usize) { + self._elements.swap(first_index, second_index); + } + + #[inline(always)] + fn add(&mut self, result: &Result) { + self._elements[self.current_heap_size].score = result.score; + self._elements[self.current_heap_size].doc_id = result.doc_id; + self.current_heap_size += 1; + + self.heapify_up(); + } + + #[inline(always)] + fn pop_add(&mut self, score: f32, doc_id: usize) { + if !self.docid_hashset.is_empty() { + self.docid_hashset.remove(&self._elements[0].doc_id); + } + + self._elements[0].score = score; + self._elements[0].doc_id = doc_id; + self.heapify_down(); + } + + #[inline(always)] + fn heapify_up(&mut self) { + let mut index = self.current_heap_size - 1; + while !Self::is_root(index) + && self + .result_ordering_shard(self._elements[index], *Self::get_parent(self, index)) + .is_lt() + { + let parent_index = Self::get_parent_index(index); + self.swap(parent_index, index); + index = parent_index; + } + } + + #[inline(always)] + fn heapify_down(&mut self) { + let mut index: usize = 0; + while self.has_left_child(index) { + let mut smaller_index = Self::get_left_child_index(index); + if self.has_right_child(index) + && self + .result_ordering_shard( + *self.get_right_child(index), + *self.get_left_child(index), + ) + .is_lt() + { + smaller_index = Self::get_right_child_index(index); + } + if self + .result_ordering_shard(self._elements[smaller_index], self._elements[index]) + .is_ge() + { + break; + } + + self.swap(smaller_index, index); + index = smaller_index; + } + } + + #[inline(always)] + fn heapify_down_index(&mut self, index: usize) { + let mut index: usize = index; + while self.has_left_child(index) { + let mut smaller_index = Self::get_left_child_index(index); + if self.has_right_child(index) + && self + .result_ordering_shard( + *self.get_right_child(index), + *self.get_left_child(index), + ) + .is_lt() + { + smaller_index = Self::get_right_child_index(index); + } + + if self + .result_ordering_shard(self._elements[smaller_index], self._elements[index]) + .is_ge() + { + break; + } + + self.swap(smaller_index, index); + index = smaller_index; + } + } + + #[inline(always)] + pub(crate) fn add_topk(&mut self, result: Result, top_k: usize) -> bool { + if self.current_heap_size > top_k + && self + .result_ordering_shard(self._elements[0], result) + .is_ge() + { + return false; + } + + if !self.docid_hashset.is_empty() && self.docid_hashset.contains_key(&result.doc_id) { + if self._elements[0].doc_id == result.doc_id { + if self + .result_ordering_shard(result, self._elements[0]) + .is_gt() + { + self._elements[0].score = result.score; + self.heapify_down(); + return true; + } else { + return false; + } + } else { + if self + .result_ordering_shard( + Result { + doc_id: result.doc_id, + score: self.docid_hashset[&result.doc_id], + }, + result, + ) + .is_ge() + { + return false; + } + + let mut index = 0; + while result.doc_id != self._elements[index].doc_id { + if index == self.current_heap_size - 1 { + self.pop_add(result.score, result.doc_id); + return true; + } + index += 1; + } + + self._elements[index].score = result.score; + self.heapify_down_index(index); + return true; + } + } + + if self.current_heap_size < top_k { + self.add(&result); + true + } else if self + .result_ordering_shard(result, self._elements[0]) + .is_gt() + { + self.pop_add(result.score, result.doc_id); + true + } else { + false + } + } +} diff --git a/mobile_app/rust/src/seekstorm/realtime_search.rs b/mobile_app/rust/src/seekstorm/realtime_search.rs new file mode 100644 index 0000000..ff7daad --- /dev/null +++ b/mobile_app/rust/src/seekstorm/realtime_search.rs @@ -0,0 +1,2077 @@ +use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, +}; + +use ahash::{AHashMap, AHashSet}; +use smallvec::SmallVec; + +use crate::{ + add_result::{B, K, SIGMA, facet_count, is_facet_filter, read_multifield_vec}, + index::{ + AccessType, DOCUMENT_LENGTH_COMPRESSION, DUMMY_VEC_8, NgramType, + NonUniquePostingListObjectQuery, NonUniqueTermObject, PostingListObjectQuery, STOP_BIT, + Shard, SimilarityType, TermObject, hash32, hash64, + }, + min_heap, + search::{FilterSparse, QueryType, ResultType, SearchResult, decode_posting_list_counts}, + utils::{read_u16, read_u16_ref, read_u32, read_u32_ref}, +}; + +#[inline(always)] +pub(crate) fn get_next_position_uncommitted( + shard: &Shard, + plo: &mut NonUniquePostingListObjectQuery, +) -> u32 { + if plo.is_embedded { + return plo.embedded_positions[if plo.p_field == 0 { + plo.p_pos as usize + } else { + plo.field_vec[plo.p_field - 1].1 + plo.p_pos as usize + }]; + } + + if (shard.postings_buffer[plo.positions_pointer] & STOP_BIT) != 0 { + let position = (shard.postings_buffer[plo.positions_pointer] & 0b0111_1111) as u32; + plo.positions_pointer += 1; + position + } else if (shard.postings_buffer[plo.positions_pointer + 1] & STOP_BIT) != 0 { + let position = ((shard.postings_buffer[plo.positions_pointer] as u32) << 7) + | (shard.postings_buffer[plo.positions_pointer + 1] & 0b0111_1111) as u32; + plo.positions_pointer += 2; + position + } else { + let position = ((shard.postings_buffer[plo.positions_pointer] as u32) << 13) + | ((shard.postings_buffer[plo.positions_pointer + 1] as u32) << 7) + | (shard.postings_buffer[plo.positions_pointer + 2] & 0b0111_1111) as u32; + plo.positions_pointer += 3; + position + } +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn add_result_singleterm_uncommitted( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + + plo_single: &mut PostingListObjectQuery, + not_query_list: &mut [PostingListObjectQuery], +) { + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + let local_docid = docid & 0b11111111_11111111; + + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + let mut read_pointer = plo.posting_pointer; + + plo.posting_pointer = read_u32_ref(&shard.postings_buffer, &mut read_pointer) as usize; + plo.docid = read_u16_ref(&shard.postings_buffer, &mut read_pointer) as i32; + + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + let filtered = !not_query_list.is_empty() + || !field_filter_set.is_empty() + || !shard.delete_hashset.is_empty() + || !facet_filter.is_empty(); + + shard.decode_positions_uncommitted(plo_single, false); + + if !field_filter_set.is_empty() + && plo_single.field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() + { + let mut match_flag = false; + for field in plo_single.field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + + match *result_type { + ResultType::Count => { + if filtered { + facet_count(shard, search_result, docid); + + *result_count += 1; + } + return; + } + ResultType::Topk => {} + ResultType::TopkCount => { + if filtered { + facet_count(shard, search_result, docid); + + *result_count += 1; + } + } + } + + let bm25 = get_bm25f_singleterm_multifield_uncommitted(shard, docid, plo_single); + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25, + }, + top_k, + ); +} + +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub(crate) fn add_result_multiterm_uncommitted( + shard: &Shard, + docid: usize, + result_count: &mut i32, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + phrase_query: bool, +) { + if !shard.delete_hashset.is_empty() && shard.delete_hashset.contains(&docid) { + return; + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + let local_docid = docid & 0b11111111_11111111; + + while plo.p_docid < plo.p_docid_count + && (plo.p_docid == 0 || (plo.docid as usize) < local_docid) + { + let mut read_pointer = plo.posting_pointer; + + plo.posting_pointer = read_u32_ref(&shard.postings_buffer, &mut read_pointer) as usize; + plo.docid = read_u16_ref(&shard.postings_buffer, &mut read_pointer) as i32; + + plo.p_docid += 1; + } + if (plo.docid as usize) == local_docid { + return; + } + } + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + return; + }; + + let filtered = phrase_query + || !field_filter_set.is_empty() + || !shard.delete_hashset.is_empty() + || !facet_filter.is_empty(); + + if !filtered && result_type == &ResultType::Count { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + + for plo in query_list.iter_mut() { + shard.decode_positions_uncommitted(plo, phrase_query); + + if !field_filter_set.is_empty() + && plo.field_vec.len() + field_filter_set.len() <= shard.indexed_field_vec.len() + { + let mut match_flag = false; + for field in plo.field_vec.iter() { + if field_filter_set.contains(&field.0) { + match_flag = true; + } + } + if !match_flag { + return; + } + } + } + + if phrase_query { + let len = query_list.len(); + let mut index_transpose = vec![0; len]; + for i in 0..len { + index_transpose[query_list[i].term_index_unique] = i; + } + + let mut phrasematch_count = 0; + if shard.indexed_field_vec.len() == 1 { + for plo in non_unique_query_list.iter_mut() { + plo.p_pos = 0; + let item = &query_list[index_transpose[plo.term_index_unique]]; + plo.positions_pointer = item.positions_pointer as usize; + plo.positions_count = item.positions_count; + + plo.is_embedded = item.is_embedded; + plo.embedded_positions = item.embedded_positions; + + plo.pos = get_next_position_uncommitted(shard, plo); + } + + non_unique_query_list + .sort_by(|x, y| x.positions_count.partial_cmp(&y.positions_count).unwrap()); + + let t1 = 0; + let mut t2 = 1; + let mut pos1 = non_unique_query_list[t1].pos; + let mut pos2 = non_unique_query_list[t2].pos; + + loop { + match (pos1 + non_unique_query_list[t2].term_index_nonunique as u32) + .cmp(&(pos2 + non_unique_query_list[t1].term_index_nonunique as u32)) + { + std::cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + pos2 = non_unique_query_list[t2].pos; + } + + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + pos1 += + get_next_position_uncommitted(shard, &mut non_unique_query_list[t1]) + + 1; + } + std::cmp::Ordering::Greater => { + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + pos2 = non_unique_query_list[t2].pos + + get_next_position_uncommitted(shard, &mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + std::cmp::Ordering::Equal => { + if t2 + 1 < non_unique_query_list.len() { + t2 += 1; + pos2 = non_unique_query_list[t2].pos; + continue; + } + + phrasematch_count += 1; + if phrasematch_count >= 1 { + break; + } + + t2 = 1; + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + break; + } + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + break; + } + + pos1 += + get_next_position_uncommitted(shard, &mut non_unique_query_list[t1]) + + 1; + pos2 = non_unique_query_list[t2].pos + + get_next_position_uncommitted(shard, &mut non_unique_query_list[t2]) + + 1; + non_unique_query_list[t2].pos = pos2; + } + } + } + } else { + for plo in non_unique_query_list.iter_mut() { + let item = &query_list[index_transpose[plo.term_index_unique]]; + plo.positions_pointer = item.positions_pointer as usize; + plo.is_embedded = item.is_embedded; + plo.embedded_positions = item.embedded_positions; + plo.field_vec.clone_from(&item.field_vec); + plo.p_pos = 0; + plo.positions_count = item.positions_count; + plo.p_field = 0; + } + + 'main: for i in 0..shard.indexed_field_vec.len() as u16 { + for plo in non_unique_query_list.iter_mut() { + while plo.field_vec[plo.p_field].0 < i { + if !plo.is_embedded { + for _ in plo.p_pos..plo.field_vec[plo.p_field].1 as i32 { + get_next_position_uncommitted(shard, plo); + } + } + if plo.p_field < plo.field_vec.len() - 1 { + plo.p_field += 1; + plo.p_pos = 0; + } else { + break 'main; + } + } + if plo.field_vec[plo.p_field].0 > i { + continue 'main; + } + } + + for plo in non_unique_query_list.iter_mut() { + plo.p_pos = 0; + plo.positions_count = plo.field_vec[plo.p_field].1 as u32; + plo.pos = get_next_position_uncommitted(shard, plo); + } + + if !field_filter_set.is_empty() && !field_filter_set.contains(&i) { + continue; + } + + non_unique_query_list + .sort_by(|x, y| x.positions_count.partial_cmp(&y.positions_count).unwrap()); + + let t1 = 0; + let mut t2 = 1; + let mut pos1 = non_unique_query_list[t1].pos; + let mut pos2 = non_unique_query_list[t2].pos; + + loop { + match (pos1 + non_unique_query_list[t2].term_index_nonunique as u32) + .cmp(&(pos2 + non_unique_query_list[t1].term_index_nonunique as u32)) + { + std::cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + pos2 = non_unique_query_list[t2].pos; + } + + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(1) { + item.p_pos += 1 + } + } + break; + } + pos1 += get_next_position_uncommitted( + shard, + &mut non_unique_query_list[t1], + ) + 1; + } + std::cmp::Ordering::Greater => { + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for (j, item) in non_unique_query_list.iter_mut().enumerate() { + if j != t2 { + item.p_pos += 1 + } + } + } + break; + } + pos2 = non_unique_query_list[t2].pos + + get_next_position_uncommitted( + shard, + &mut non_unique_query_list[t2], + ) + + 1; + non_unique_query_list[t2].pos = pos2; + } + std::cmp::Ordering::Equal => { + if t2 + 1 < non_unique_query_list.len() { + t2 += 1; + pos2 = non_unique_query_list[t2].pos; + continue; + } + + phrasematch_count += 1; + if phrasematch_count >= 1 { + break 'main; + } + + t2 = 1; + non_unique_query_list[t1].p_pos += 1; + if non_unique_query_list[t1].p_pos + == non_unique_query_list[t1].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(1) { + item.p_pos += 1 + } + } + break; + } + non_unique_query_list[t2].p_pos += 1; + if non_unique_query_list[t2].p_pos + == non_unique_query_list[t2].positions_count as i32 + { + if (i as usize) < shard.indexed_field_vec.len() - 1 { + for item in non_unique_query_list.iter_mut().skip(2) { + item.p_pos += 1 + } + } + break; + } + + pos1 += get_next_position_uncommitted( + shard, + &mut non_unique_query_list[t1], + ) + 1; + pos2 = non_unique_query_list[t2].pos + + get_next_position_uncommitted( + shard, + &mut non_unique_query_list[t2], + ) + + 1; + non_unique_query_list[t2].pos = pos2; + } + } + } + } + } + + if phrase_query && (phrasematch_count == 0) { + return; + } + } + + match *result_type { + ResultType::Count => { + facet_count(shard, search_result, docid); + + *result_count += 1; + return; + } + ResultType::Topk => {} + ResultType::TopkCount => { + facet_count(shard, search_result, docid); + + *result_count += 1; + } + } + + let bm25 = get_bm25f_multiterm_multifield_uncommitted(shard, docid, query_list); + + search_result.topk_candidates.add_topk( + min_heap::Result { + doc_id: docid, + score: bm25, + }, + top_k, + ); +} + +#[inline(always)] +pub(crate) fn get_bm25f_singleterm_multifield_uncommitted( + shard: &Shard, + docid: usize, + plo_single: &PostingListObjectQuery, +) -> f32 { + let mut bm25f = 0.0; + + let document_length_normalized_average = if shard.document_length_normalized_average == 0.0 { + shard.positions_sum_normalized as f32 / shard.indexed_doc_count as f32 + } else { + shard.document_length_normalized_average + }; + + if shard.indexed_field_vec.len() == 1 { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION + [shard.document_length_compressed_array[0][docid & 0b11111111_11111111] as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + match plo_single.ngram_type { + NgramType::SingleTerm => { + let tf = plo_single.field_vec[0].1 as f32; + + bm25f = plo_single.idf + * ((tf * (K + 1.0) / (tf + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + let tf_ngram1 = plo_single.field_vec_ngram1[0].1 as f32; + let tf_ngram2 = plo_single.field_vec_ngram2[0].1 as f32; + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + _ => { + let tf_ngram1 = plo_single.field_vec_ngram1[0].1 as f32; + let tf_ngram2 = plo_single.field_vec_ngram2[0].1 as f32; + let tf_ngram3 = plo_single.field_vec_ngram3[0].1 as f32; + bm25f = plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo_single.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + } else { + match plo_single.ngram_type { + NgramType::SingleTerm => { + for field in plo_single.field_vec.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf = field.1 as f32; + + bm25f += plo_single.idf + * ((tf * (K + 1.0) + / (tf + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + for field in plo_single.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + bm25f += plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo_single.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + bm25f += plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + _ => { + for field in plo_single.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + bm25f += plo_single.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo_single.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + bm25f += plo_single.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo_single.field_vec_ngram3.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram3 = field.1 as f32; + + bm25f += plo_single.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + } + } + + bm25f +} + +#[inline(always)] +pub(crate) fn get_bm25f_multiterm_multifield_uncommitted( + shard: &Shard, + docid: usize, + query_list: &mut [PostingListObjectQuery], +) -> f32 { + let mut bm25f = 0.0; + + let document_length_normalized_average = if shard.document_length_normalized_average == 0.0 { + shard.positions_sum_normalized as f32 / shard.indexed_doc_count as f32 + } else { + shard.document_length_normalized_average + }; + + if shard.indexed_field_vec.len() == 1 { + let mut document_length_quotient = 0.0; + + for plo in query_list.iter() { + if !plo.bm25_flag { + continue; + } + + if document_length_quotient == 0.0 { + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[0][docid & 0b11111111_11111111] + as usize] as f32; + + document_length_quotient = + document_length_normalized / document_length_normalized_average; + } + + match plo.ngram_type { + NgramType::SingleTerm => { + let tf = plo.field_vec[0].1 as f32; + + bm25f += plo.idf + * ((tf * (K + 1.0) + / (tf + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + let tf_ngram1 = plo.field_vec_ngram1[0].1 as f32; + let tf_ngram2 = plo.field_vec_ngram2[0].1 as f32; + + bm25f += plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + _ => { + let tf_ngram1 = plo.field_vec_ngram1[0].1 as f32; + let tf_ngram2 = plo.field_vec_ngram2[0].1 as f32; + let tf_ngram3 = plo.field_vec_ngram3[0].1 as f32; + + bm25f += plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA) + + plo.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + } + } else { + for plo in query_list.iter() { + if !plo.bm25_flag { + continue; + } + + match plo.ngram_type { + NgramType::SingleTerm => { + for field in plo.field_vec.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf + * ((tf * (K + 1.0) + / (tf + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + for field in plo.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + _ => { + for field in plo.field_vec_ngram1.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram1 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram1 + * ((tf_ngram1 * (K + 1.0) + / (tf_ngram1 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo.field_vec_ngram2.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram2 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram2 + * ((tf_ngram2 * (K + 1.0) + / (tf_ngram2 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + + for field in plo.field_vec_ngram3.iter() { + let field_id = field.0 as usize; + + let document_length_normalized = DOCUMENT_LENGTH_COMPRESSION[shard + .document_length_compressed_array[field_id][docid & 0b11111111_11111111] + as usize] + as f32; + + let document_length_quotient = + document_length_normalized / document_length_normalized_average; + + let tf_ngram3 = field.1 as f32; + + let weight = shard.indexed_schema_vec[field.0 as usize].boost; + + bm25f += weight + * plo.idf_ngram3 + * ((tf_ngram3 * (K + 1.0) + / (tf_ngram3 + (K * (1.0 - B + (B * document_length_quotient))))) + + SIGMA); + } + } + } + } + } + + bm25f +} + +impl Shard { + pub(crate) fn get_posting_count_uncommited(&self, term_string: &str) -> usize { + let term_bytes = term_string.as_bytes(); + let key0 = hash32(term_bytes) & self.segment_number_mask1; + let key_hash = hash64(term_bytes); + + match self.segments_level0[key0 as usize].segment.get(&key_hash) { + Some(value1) => value1.posting_count, + + None => 0, + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn search_uncommitted( + &self, + unique_terms: &AHashMap, + non_unique_terms: &[NonUniqueTermObject], + query_type_mut: &mut QueryType, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + + search_result: &mut SearchResult, + result_count_arc: &Arc, + top_k: usize, + ) { + let mut query_list_map: AHashMap = AHashMap::new(); + let mut query_list: Vec; + + let mut not_query_list_map: AHashMap = AHashMap::new(); + let mut not_query_list: Vec; + + let mut non_unique_query_list: Vec = Vec::new(); + + let block_id = if self.is_last_level_incomplete { + self.level_index.len() - 1 + } else { + self.level_index.len() + }; + let mut preceding_ngram_count = 0; + + for non_unique_term in non_unique_terms.iter() { + let term = unique_terms.get(&non_unique_term.term).unwrap(); + let key0: u32 = term.key0; + let key_hash: u64 = term.key_hash; + + match self.segments_level0[key0 as usize].segment.get(&key_hash) { + Some(value1) => { + let mut idf = 0.0; + let mut idf_ngram1 = 0.0; + let mut idf_ngram2 = 0.0; + let mut idf_ngram3 = 0.0; + if result_type != &ResultType::Count { + let posting_counts_option = if self.meta.access_type == AccessType::Mmap { + decode_posting_list_counts( + &self.segments_index[key0 as usize], + self, + key_hash, + ) + } else { + let posting_list_object_index_option = + self.segments_index[key0 as usize].segment.get(&key_hash); + posting_list_object_index_option.map(|plo| { + ( + plo.posting_count, + plo.posting_count_ngram_1, + plo.posting_count_ngram_2, + plo.posting_count_ngram_3, + ) + }) + }; + + if non_unique_term.ngram_type == NgramType::SingleTerm + || self.meta.similarity == SimilarityType::Bm25fProximity + { + let posting_count = if let Some(posting_count) = posting_counts_option { + posting_count.0 as usize + value1.posting_count + } else { + value1.posting_count + }; + + idf = (((self.indexed_doc_count as f32 - posting_count as f32 + 0.5) + / (posting_count as f32 + 0.5)) + + 1.0) + .ln(); + } else if term.ngram_type == NgramType::NgramFF + || term.ngram_type == NgramType::NgramRF + || term.ngram_type == NgramType::NgramFR + { + let posting_count_ngram_1 = + if let Some(posting_count) = posting_counts_option { + posting_count.1 + } else { + 0 + } + self.get_posting_count_uncommited(&non_unique_term.term_ngram_1) + as u32; + + let posting_count_ngram_2 = + if let Some(posting_count) = posting_counts_option { + posting_count.2 + } else { + 0 + } + self.get_posting_count_uncommited(&non_unique_term.term_ngram_0) + as u32; + + idf_ngram1 = (((self.indexed_doc_count as f32 + - posting_count_ngram_1 as f32 + + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram2 = (((self.indexed_doc_count as f32 + - posting_count_ngram_2 as f32 + + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + } else { + let posting_count_ngram_1 = + if let Some(posting_count) = posting_counts_option { + posting_count.1 + } else { + 0 + } + self.get_posting_count_uncommited(&non_unique_term.term_ngram_1) + as u32; + + let posting_count_ngram_2 = + if let Some(posting_count) = posting_counts_option { + posting_count.2 + } else { + 0 + } + self.get_posting_count_uncommited(&non_unique_term.term_ngram_0) + as u32; + + let posting_count_ngram_3 = + if let Some(posting_count) = posting_counts_option { + posting_count.3 + } else { + 0 + } + self.get_posting_count_uncommited(&non_unique_term.term_ngram_0) + as u32; + + idf_ngram1 = (((self.indexed_doc_count as f32 + - posting_count_ngram_1 as f32 + + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram2 = (((self.indexed_doc_count as f32 + - posting_count_ngram_2 as f32 + + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram3 = (((self.indexed_doc_count as f32 + - posting_count_ngram_3 as f32 + + 0.5) + / (posting_count_ngram_3 as f32 + 0.5)) + + 1.0) + .ln(); + } + } + + let term_index_unique = if non_unique_term.op == QueryType::Not { + let query_list_map_len = not_query_list_map.len(); + let value = + not_query_list_map + .entry(key_hash) + .or_insert(PostingListObjectQuery { + posting_count: value1.posting_count as u32, + posting_pointer: value1.pointer_first, + term: non_unique_term.term.clone(), + key0, + term_index_unique: query_list_map_len, + + p_docid: 0, + p_docid_count: value1.posting_count, + docid: 0, + + idf, + idf_ngram1, + idf_ngram2, + idf_ngram3, + ngram_type: non_unique_term.ngram_type.clone(), + ..Default::default() + }); + value.term_index_unique + } else { + let query_list_map_len = query_list_map.len(); + let value = + query_list_map + .entry(key_hash) + .or_insert(PostingListObjectQuery { + posting_count: value1.posting_count as u32, + posting_pointer: value1.pointer_first, + term: non_unique_term.term.clone(), + key0, + term_index_unique: query_list_map_len, + + pointer_pivot_p_docid: value1.pointer_pivot_p_docid, + p_docid: 0, + p_docid_count: value1.posting_count, + docid: 0, + + idf, + idf_ngram1, + idf_ngram2, + idf_ngram3, + ngram_type: non_unique_term.ngram_type.clone(), + ..Default::default() + }); + value.term_index_unique + }; + + if non_unique_term.op == QueryType::Phrase { + let nu_plo = NonUniquePostingListObjectQuery { + term_index_unique, + term_index_nonunique: non_unique_query_list.len() + + preceding_ngram_count, + pos: 0, + p_pos: 0, + positions_pointer: 0, + positions_count: 0, + byte_array: &DUMMY_VEC_8, + key0, + is_embedded: false, + p_field: 0, + field_vec: SmallVec::new(), + embedded_positions: [0; 4], + }; + + non_unique_query_list.push(nu_plo); + } + + match non_unique_term.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramRF | NgramType::NgramFR => { + preceding_ngram_count += 1 + } + _ => preceding_ngram_count += 2, + }; + } + None => { + if non_unique_term.op == QueryType::Intersection + || non_unique_term.op == QueryType::Phrase + { + return; + } + } + } + } + + not_query_list = not_query_list_map.into_values().collect(); + query_list = query_list_map.into_values().collect(); + let query_list_len = query_list.len(); + + let non_unique_query_list_count = non_unique_query_list.len(); + + if query_list_len == 0 { + } else if query_list_len == 1 { + self.single_docid_uncommitted( + block_id, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + 0, + result_type, + field_filter_set, + facet_filter, + search_result, + result_count_arc, + top_k, + ); + } else if query_type_mut == &QueryType::Union { + self.union_docid_uncommitted( + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + block_id, + result_count_arc, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ); + } else { + self.intersection_docid_uncommitted( + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + block_id, + result_count_arc, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + query_type_mut == &mut QueryType::Phrase && non_unique_query_list_count >= 2, + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn single_docid_uncommitted<'a>( + self: &Shard, + block_id: usize, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'a>], + query_list: &mut [PostingListObjectQuery<'a>], + not_query_list: &mut [PostingListObjectQuery<'a>], + term_index: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + + search_result: &mut SearchResult, + result_count_arc: &Arc, + top_k: usize, + ) { + let filtered = !not_query_list.is_empty() + || !field_filter_set.is_empty() + || !self.delete_hashset.is_empty() + || !facet_filter.is_empty(); + + if (self.enable_single_term_topk || (result_type == &ResultType::Count)) + && (non_unique_query_list.len() <= 1 && !filtered) + { + result_count_arc.fetch_add( + query_list[term_index].posting_count as usize, + Ordering::Relaxed, + ); + + return; + } + + let plo1 = &mut query_list[term_index]; + + let mut result_count_local = 0; + for i in 0..plo1.posting_count { + plo1.p_docid = i as usize; + + self.get_next_docid_uncommitted(plo1); + + add_result_singleterm_uncommitted( + self, + (block_id << 16) | plo1.docid as usize, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + plo1, + not_query_list, + ); + } + + if result_type != &ResultType::Topk { + let filtered = !not_query_list.is_empty() || !field_filter_set.is_empty(); + result_count_arc.fetch_add( + if filtered { + result_count_local as usize + } else { + plo1.posting_count as usize + }, + Ordering::Relaxed, + ); + } + } + + pub(crate) fn get_next_docid_uncommitted(self: &Shard, plo: &mut PostingListObjectQuery) { + plo.posting_pointer_previous = plo.posting_pointer; + + let mut read_pointer = plo.posting_pointer; + + plo.posting_pointer = read_u32_ref(&self.postings_buffer, &mut read_pointer) as usize; + + plo.docid = read_u16_ref(&self.postings_buffer, &mut read_pointer) as i32; + } + + #[inline(always)] + pub(crate) fn decode_positions_uncommitted( + self: &Shard, + plo: &mut PostingListObjectQuery, + phrase_query: bool, + ) { + let mut read_pointer = plo.posting_pointer_previous + 6; + + let position_size_byte_temp: u16 = read_u16_ref(&self.postings_buffer, &mut read_pointer); + + let mut field_vec: SmallVec<[(u16, usize); 2]> = SmallVec::new(); + plo.is_embedded = position_size_byte_temp & 0b10000000_00000000 > 0; + + if !plo.is_embedded { + match plo.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramFR | NgramType::NgramRF => { + plo.field_vec_ngram1 = SmallVec::new(); + plo.field_vec_ngram2 = SmallVec::new(); + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut plo.field_vec_ngram1, + &self.postings_buffer, + &mut read_pointer, + ); + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut plo.field_vec_ngram2, + &self.postings_buffer, + &mut read_pointer, + ); + } + _ => { + plo.field_vec_ngram1 = SmallVec::new(); + plo.field_vec_ngram2 = SmallVec::new(); + plo.field_vec_ngram3 = SmallVec::new(); + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut plo.field_vec_ngram1, + &self.postings_buffer, + &mut read_pointer, + ); + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut plo.field_vec_ngram2, + &self.postings_buffer, + &mut read_pointer, + ); + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut plo.field_vec_ngram3, + &self.postings_buffer, + &mut read_pointer, + ); + } + } + + read_multifield_vec( + self.indexed_field_vec.len(), + self.indexed_field_id_bits, + self.indexed_field_id_mask, + self.longest_field_id, + &mut field_vec, + &self.postings_buffer, + &mut read_pointer, + ); + } else { + let field_id; + + if plo.p_docid < plo.pointer_pivot_p_docid as usize { + let rank_position_pointer = read_u16(&self.postings_buffer, read_pointer) as u32; + + match ( + self.indexed_field_vec.len() == 1, + rank_position_pointer >> 12, + ) { + (true, 0b1000..=0b1011) => { + if phrase_query { + plo.embedded_positions = + [rank_position_pointer & 0b00111111_11111111, 0, 0, 0]; + }; + field_vec.push((0, 1)); + } + (true, 0b1100..=0b1111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 7) & 0b00000000_01111111, + rank_position_pointer & 0b00000000_01111111, + 0, + 0, + ]; + }; + field_vec.push((0, 2)); + } + + (false, 0b1100 | 0b1101) => { + if phrase_query { + plo.embedded_positions = + [rank_position_pointer & 0b00011111_11111111, 0, 0, 0]; + }; + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 1)); + } + (false, 0b1110 | 0b1111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 7) & 0b00000000_00111111, + rank_position_pointer & 0b00000000_01111111, + 0, + 0, + ]; + }; + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 2)); + } + + (false, 0b1000) => { + let position_bits = 12 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer & ((1 << position_bits) - 1)), + 0, + 0, + 0, + ]; + }; + } + (false, 0b1001) => { + let position_bits = 12 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 2)); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b1010) => { + let position_bits = 12 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 3)); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b1011) => { + let position_bits = + 12 - self.indexed_field_id_bits - self.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + + (_, _) => { + if phrase_query { + println!("unsupported 2 byte pointer embedded"); + plo.embedded_positions = [0, 0, 0, 0] + }; + } + } + } else { + let rank_position_pointer = read_u32(&self.postings_buffer, read_pointer); + + match ( + self.indexed_field_vec.len() == 1, + (rank_position_pointer & 0b11111111_11111111_11111111) >> 19, + ) { + (true, 0b10000..=0b10011) => { + if phrase_query { + plo.embedded_positions = [ + rank_position_pointer & 0b00011111_11111111_11111111, + 0, + 0, + 0, + ]; + }; + field_vec.push((0, 1)); + } + (true, 0b10100..=0b10111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 11) & 0b00000000_00000011_11111111, + rank_position_pointer & 0b00000000_00000111_11111111, + 0, + 0, + ]; + }; + field_vec.push((0, 2)); + } + (true, 0b11000..=0b11011) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 14) & 0b00000000_00000000_01111111, + (rank_position_pointer >> 7) & 0b00000000_00000000_01111111, + rank_position_pointer & 0b00000000_00000000_01111111, + 0, + ]; + }; + field_vec.push((0, 3)); + } + (true, 0b11100..=0b11111) => { + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 16) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 11) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 6) & 0b00000000_00000000_00011111, + rank_position_pointer & 0b00000000_00000000_00111111, + ]; + }; + field_vec.push((0, 4)); + } + + (false, 0b11000 | 0b11001) => { + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + rank_position_pointer & 0b00001111_11111111_11111111, + 0, + 0, + 0, + ]; + }; + } + (false, 0b11010 | 0b11011) => { + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 2)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 10) & 0b00000000_00000011_11111111, + rank_position_pointer & 0b00000000_00000011_11111111, + 0, + 0, + ]; + }; + } + (false, 0b11100 | 0b11101) => { + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 3)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 14) & 0b00000000_00000000_00111111, + (rank_position_pointer >> 7) & 0b00000000_00000000_01111111, + rank_position_pointer & 0b00000000_00000000_01111111, + 0, + ]; + }; + } + (false, 0b11110 | 0b11111) => { + field_id = self.longest_field_id as u16; + field_vec.push((field_id, 4)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer >> 15) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 10) & 0b00000000_00000000_00011111, + (rank_position_pointer >> 5) & 0b00000000_00000000_00011111, + rank_position_pointer & 0b00000000_00000000_00011111, + ]; + }; + } + + (false, 0b10000) => { + let position_bits = 19 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 1)); + if phrase_query { + plo.embedded_positions = [ + (rank_position_pointer & ((1 << position_bits) - 1)), + 0, + 0, + 0, + ]; + }; + } + + (false, 0b10001) => { + let position_bits = 19 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 2)); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b10010) => { + let position_bits = 19 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 3)); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10011) => { + let position_bits = 19 - self.indexed_field_id_bits; + field_id = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.push((field_id, 4)); + if phrase_query { + let position_bits_1 = position_bits >> 2; + let position_bits_2 = (position_bits - position_bits_1) / 3; + let position_bits_3 = + (position_bits - position_bits_1 - position_bits_2) >> 1; + let position_bits_4 = + position_bits - position_bits_1 - position_bits_2 - position_bits_3; + plo.embedded_positions = [ + ((rank_position_pointer + >> (position_bits_2 + position_bits_3 + position_bits_4)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> (position_bits_3 + position_bits_4)) + & ((1 << position_bits_2) - 1)), + ((rank_position_pointer >> position_bits_4) + & ((1 << position_bits_3) - 1)), + (rank_position_pointer & ((1 << position_bits_4) - 1)), + ]; + }; + } + (false, 0b10100) => { + let position_bits = + 19 - self.indexed_field_id_bits - self.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits >> 1; + let position_bits_2 = position_bits - position_bits_1; + plo.embedded_positions = [ + ((rank_position_pointer >> position_bits_2) + & ((1 << position_bits_1) - 1)), + (rank_position_pointer & ((1 << position_bits_2) - 1)), + 0, + 0, + ]; + }; + } + (false, 0b10101) => { + let position_bits = + 19 - self.indexed_field_id_bits - self.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 2)]); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10110) => { + let position_bits = + 19 - self.indexed_field_id_bits - self.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_2 = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 2), (field_id_2, 1)]); + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + (false, 0b10111) => { + let position_bits = 19 + - self.indexed_field_id_bits + - self.indexed_field_id_bits + - self.indexed_field_id_bits; + field_id = ((rank_position_pointer + >> (position_bits + + self.indexed_field_id_bits + + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_2 = ((rank_position_pointer + >> (position_bits + self.indexed_field_id_bits)) + & self.indexed_field_id_mask as u32) + as u16; + let field_id_3 = ((rank_position_pointer >> position_bits) + & self.indexed_field_id_mask as u32) + as u16; + field_vec.extend([(field_id, 1), (field_id_2, 1), (field_id_3, 1)]); + + if phrase_query { + let position_bits_1 = position_bits / 3; + let position_bits_2 = (position_bits - position_bits_1) >> 1; + let position_bits_3 = position_bits - position_bits_1 - position_bits_2; + plo.embedded_positions = [ + ((rank_position_pointer >> (position_bits_2 + position_bits_3)) + & ((1 << position_bits_1) - 1)), + ((rank_position_pointer >> position_bits_3) + & ((1 << position_bits_2) - 1)), + (rank_position_pointer & ((1 << position_bits_3) - 1)), + 0, + ]; + }; + } + + (_, _) => { + if phrase_query { + println!("unsupported 3 byte pointer embedded"); + plo.embedded_positions = [0, 0, 0, 0] + }; + } + } + }; + } + + plo.positions_count = field_vec[0].1 as u32; + plo.field_vec = field_vec; + plo.positions_pointer = read_pointer as u32; + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn intersection_docid_uncommitted( + self: &Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'_>], + query_list: &mut [PostingListObjectQuery<'_>], + not_query_list: &mut [PostingListObjectQuery<'_>], + block_id: usize, + result_count_arc: &Arc, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + phrase_query: bool, + ) { + let mut result_count = 0; + let t1 = 0; + let mut t2 = 1; + + query_list.sort_by(|x, y| x.posting_count.partial_cmp(&y.posting_count).unwrap()); + + for plo in query_list.iter_mut() { + plo.p_docid = 0; + self.get_next_docid_uncommitted(plo); + } + + 'outer: loop { + match query_list[t1].docid.cmp(&query_list[t2].docid) { + std::cmp::Ordering::Less => { + if t2 > 1 { + t2 = 1; + } + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].posting_count as usize { + break; + } + self.get_next_docid_uncommitted(&mut query_list[t1]); + } + std::cmp::Ordering::Greater => { + query_list[t2].p_docid += 1; + if query_list[t2].p_docid == query_list[t2].posting_count as usize { + break; + } + + self.get_next_docid_uncommitted(&mut query_list[t2]); + } + std::cmp::Ordering::Equal => { + if t2 + 1 < query_list.len() { + t2 += 1; + continue; + } + + add_result_multiterm_uncommitted( + self, + (block_id << 16) | query_list[t1].docid as usize, + &mut result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + phrase_query, + ); + + query_list[t1].p_docid += 1; + if query_list[t1].p_docid == query_list[t1].posting_count as usize { + break; + } + for item in query_list.iter_mut().skip(1) { + item.p_docid += 1; + if item.p_docid == item.posting_count as usize { + break 'outer; + } + self.get_next_docid_uncommitted(item); + } + + t2 = 1; + self.get_next_docid_uncommitted(&mut query_list[t1]); + } + } + } + + if result_type != &ResultType::Topk { + result_count_arc.fetch_add(result_count as usize, Ordering::Relaxed); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn union_docid_uncommitted( + &self, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + block_id: usize, + result_count_arc: &Arc, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + ) { + let mut result_count: i32 = 0; + + if result_type == &ResultType::Count { + self.union_count_uncommitted(&mut result_count, query_list); + result_count_arc.fetch_add(result_count as usize, Ordering::Relaxed); + return; + } + + self.union_scan_uncommitted( + &mut result_count, + block_id, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + ); + + result_count_arc.fetch_add(result_count as usize, Ordering::Relaxed); + } + + pub(crate) fn union_count_uncommitted( + &self, + result_count: &mut i32, + query_list: &mut [PostingListObjectQuery], + ) { + query_list.sort_by(|a, b| b.posting_count.partial_cmp(&a.posting_count).unwrap()); + + let mut result_count_local = query_list[0].posting_count; + let mut bitmap_0: [u8; 8192] = [0u8; 8192]; + + for (i, item) in query_list.iter_mut().enumerate() { + if item.end_flag { + continue; + } + + if i == 0 { + for _p_docid in 0..item.posting_count { + self.get_next_docid_uncommitted(item); + let docid = item.docid as usize; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + bitmap_0[byte_index] |= 1 << bit_index; + } + } else { + for _p_docid in 0..item.posting_count { + self.get_next_docid_uncommitted(item); + let docid = item.docid as usize; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + if bitmap_0[byte_index] & (1 << bit_index) == 0 { + bitmap_0[byte_index] |= 1 << bit_index; + result_count_local += 1; + } + } + } + } + + *result_count += result_count_local as i32; + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn union_scan_uncommitted( + &self, + result_count: &mut i32, + block_id: usize, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + non_unique_query_list: &mut [NonUniquePostingListObjectQuery], + query_list: &mut [PostingListObjectQuery], + not_query_list: &mut [PostingListObjectQuery], + ) { + for plo in query_list.iter_mut() { + if !plo.end_flag { + self.get_next_docid_uncommitted(plo); + } + } + + loop { + let mut break_loop = true; + let mut docid_min = u16::MAX; + + for plo in query_list.iter_mut() { + if !plo.end_flag && (plo.docid as u16) < docid_min { + docid_min = plo.docid as u16; + } + } + + if result_type != &ResultType::Count { + let mut term_match_count = 0; + let mut term_index = 0; + for (i, plo) in query_list.iter_mut().enumerate() { + if !plo.end_flag && (plo.docid as u16 == docid_min) { + plo.bm25_flag = true; + term_match_count += 1; + term_index = i; + } else { + plo.bm25_flag = false; + } + } + + if term_match_count == 1 { + add_result_singleterm_uncommitted( + self, + (block_id << 16) | docid_min as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + &mut query_list[term_index], + not_query_list, + ); + if not_query_list.is_empty() && result_type != &ResultType::Topk { + *result_count += 1; + } + } else { + add_result_multiterm_uncommitted( + self, + (block_id << 16) | docid_min as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + false, + ); + } + } else { + *result_count += 1; + } + + for plo in query_list.iter_mut() { + if !plo.end_flag { + let doc_id = plo.docid as u16; + if doc_id == docid_min { + if plo.p_docid < plo.posting_count as usize - 1 { + plo.p_docid += 1; + self.get_next_docid_uncommitted(plo); + break_loop = false; + } else { + plo.end_flag = true; + } + } else { + break_loop = false; + } + } + } + + if break_loop { + break; + } + } + } +} diff --git a/mobile_app/rust/src/seekstorm/search.rs b/mobile_app/rust/src/seekstorm/search.rs new file mode 100644 index 0000000..7bc184c --- /dev/null +++ b/mobile_app/rust/src/seekstorm/search.rs @@ -0,0 +1,3243 @@ +use crate::geo_search::{decode_morton_2_d, point_distance_to_morton_range}; +use crate::index::{ + DOCUMENT_LENGTH_COMPRESSION, DistanceUnit, Facet, FieldType, NgramType, ResultFacet, Shard, + ShardArc, hash64, +}; +use crate::min_heap::{Result, result_ordering_root}; +use crate::tokenizer::{tokenizer, tokenizer_lite}; +use crate::union::{union_docid_2, union_docid_3}; +use crate::utils::{ + read_f32, read_f64, read_i8, read_i16, read_i32, read_i64, read_u8, read_u16, read_u32, + read_u64, +}; +use crate::{ + index::{ + AccessType, BlockObjectIndex, DUMMY_VEC, DUMMY_VEC_8, Index, IndexArc, + MAX_POSITIONS_PER_TERM, NonUniquePostingListObjectQuery, NonUniqueTermObject, + PostingListObjectIndex, PostingListObjectQuery, QueueObject, SPEEDUP_FLAG, SegmentIndex, + SimilarityType, TermObject, get_max_score, + }, + intersection::intersection_blockid, + min_heap::MinHeap, + single::single_blockid, + union::union_blockid, +}; + +use ahash::{AHashMap, AHashSet}; +use itertools::Itertools; +use num::FromPrimitive; +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use std::mem::discriminant; +use std::ops::Range; +use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, +}; +use std::{cmp, mem}; +use utoipa::ToSchema; + +use symspell_complete_rs::Suggestion; + +/// Specifies the default QueryType: The following query types are supported: +/// - **Union** (OR, disjunction), +/// - **Intersection** (AND, conjunction), +/// - **Phrase** (""), +/// - **Not** (-). +/// +/// The default QueryType is superseded if the query parser detects that a different query type is specified within the query string (+ - ""). +#[derive(Default, PartialEq, Clone, Debug, Serialize, Deserialize, ToSchema)] +pub enum QueryType { + /// Union (OR, disjunction) + #[default] + Union = 0, + /// Intersection (AND, conjunction) + Intersection = 1, + /// Phrase ("") + Phrase = 2, + /// Not (-) + Not = 3, +} + +/// Specifies whether query rewriting is enabled or disabled +#[derive(Default, PartialEq, Clone, Debug, Serialize, Deserialize, ToSchema)] +pub enum QueryRewriting { + /// Query rewriting disabled, returns query results for query as-is, returns no suggestions for corrected or completed query. + /// No performance overhead for spelling correction and suggestions. + #[default] + SearchOnly, + /// Query rewriting disabled, returns query results for original query string, returns suggestions for corrected or completed query. + /// Additional latency for spelling suggestions. + SearchSuggest { + /// Enable query correction, for queries with query string length >= threshold + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + correct: Option, + /// The edit distance thresholds for suggestions: 1..2 recommended; higher values increase latency and memory consumption. + distance: usize, + /// Term length thresholds for each edit distance. + /// None: max_dictionary_edit_distance for all terms lengths + /// Some(\[4\]): max_dictionary_edit_distance for all terms lengths >= 4, + /// Some(\[2,8\]): max_dictionary_edit_distance for all terms lengths >=2, max_dictionary_edit_distance +1 for all terms for lengths>=8 + term_length_threshold: Option>, + /// Enable query completions, for queries with query string length >= threshold, in addition to spelling corrections + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + complete: Option, + /// An option to limit maximum number of returned suggestions. + length: Option, + }, + /// Query rewriting enabled, returns query results for spelling corrected or completed query string (=instant search), returns suggestions for corrected or completed query. + /// Additional latency for spelling correction and suggestions. + SearchRewrite { + /// Enable query correction, for queries with query string length >= threshold + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + correct: Option, + /// The edit distance thresholds for suggestions: 1..2 recommended; higher values increase latency and memory consumption. + distance: usize, + /// Term length thresholds for each edit distance. + /// None: max_dictionary_edit_distance for all terms lengths + /// Some(\[4\]): max_dictionary_edit_distance for all terms lengths >= 4, + /// Some(\[2,8\]): max_dictionary_edit_distance for all terms lengths >=2, max_dictionary_edit_distance +1 for all terms for lengths>=8 + term_length_threshold: Option>, + /// Enable query completions, for queries with query string length >= threshold, in addition to spelling corrections + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + complete: Option, + /// An option to limit maximum number of returned suggestions. + length: Option, + }, + /// Search disabled, returns no query results, only returns suggestions for corrected or completed query. + SuggestOnly { + /// Enable query correction, for queries with query string length >= threshold + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + correct: Option, + /// The edit distance thresholds for suggestions: 1..2 recommended; higher values increase latency and memory consumption. + distance: usize, + /// Term length thresholds for each edit distance. + /// None: max_dictionary_edit_distance for all terms lengths + /// Some(\[4\]): max_dictionary_edit_distance for all terms lengths >= 4, + /// Some(\[2,8\]): max_dictionary_edit_distance for all terms lengths >=2, max_dictionary_edit_distance +1 for all terms for lengths>=8 + term_length_threshold: Option>, + /// Enable query completions, for queries with query string length >= threshold, in addition to spelling corrections + /// A minimum length of 2 is advised to prevent irrelevant suggestions and results. + complete: Option, + /// An option to limit maximum number of returned suggestions. + length: Option, + }, +} + +/// The following result types are supported: +/// - **Count** (count all results that match the query, but returning top-k results is not required) +/// - **Topk** (returns the top-k results per query, but counting all results that match the query is not required) +/// - **TopkCount** (returns the top-k results per query + count all results that match the query) +#[derive(Default, PartialEq, Clone, Debug, Serialize, Deserialize, ToSchema)] +pub enum ResultType { + /// Count all results that match the query, without returning top-k results + Count = 0, + /// Return the top-k results per query, without counting all results that match the query + Topk = 1, + /// Return the top-k results per query and count all results that match the query + #[default] + TopkCount = 2, +} + +pub(crate) struct SearchResult<'a> { + pub topk_candidates: MinHeap<'a>, + pub query_facets: Vec, + pub skip_facet_count: bool, +} + +/// Contains the results returned when searching the index. +#[derive(Default, Debug, Deserialize, Serialize, Clone)] +pub struct ResultObject { + /// Search query string + pub original_query: String, + /// Search query string after any automatic query correction or completion + pub query: String, + /// Vector of search query terms. Can be used e.g. for custom highlighting. + pub query_terms: Vec, + /// Number of returned search results. Identical to results.len() + pub result_count: usize, + + /// Total number of search results that match the query + /// result_count_total is only accurate if result_type=TopkCount or ResultType=Count, but not for ResultType=Topk + pub result_count_total: usize, + + /// List of search results: doc ID and BM25 score + pub results: Vec, + /// List of facet fields: field name and vector of unique values and their counts. + /// Unique values and their counts are only accurate if result_type=TopkCount or ResultType=Count, but not for ResultType=Topk + pub facets: AHashMap, + ///Suggestions for auto complete and spelling correction. + pub suggestions: Vec, +} + +/// specifies how to count the frequency of numerical facet field values +#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq, ToSchema)] +pub enum RangeType { + /// within the specified range + #[default] + CountWithinRange, + /// within the range and all ranges above + CountAboveRange, + /// within the range and all ranges below + CountBelowRange, +} + +/// Defines the query facets: +/// - string facet field values +/// - range segments for numerical facet field values +#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq, ToSchema)] +pub enum QueryFacet { + /// Range segment definition for numerical facet field values of type u8 + U8 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, u8)>, + }, + /// Range segment definition for numerical facet field values of type u16 + U16 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, u16)>, + }, + /// Range segment definition for numerical facet field values of type u32 + U32 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, u32)>, + }, + /// Range segment definition for numerical facet field values of type u64 + U64 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, u64)>, + }, + /// Range segment definition for numerical facet field values of type i8 + I8 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, i8)>, + }, + /// Range segment definition for numerical facet field values of type i16 + I16 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, i16)>, + }, + /// Range segment definition for numerical facet field values of type i32 + I32 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, i32)>, + }, + /// Range segment definition for numerical facet field values of type i64 + I64 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, i64)>, + }, + /// Range segment definition for numerical facet field values of type Unix timestamp + Timestamp { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, i64)>, + }, + /// Range segment definition for numerical facet field values of type f32 + F32 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, f32)>, + }, + /// Range segment definition for numerical facet field values of type f64 + F64 { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, f64)>, + }, + /// Facet field values of type string + String16 { + /// field name + field: String, + /// Prefix filter of facet values to return + prefix: String, + /// maximum number of facet values to return + length: u16, + }, + /// Facet field values of type string + String32 { + /// field name + field: String, + /// Prefix filter of facet values to return + prefix: String, + /// maximum number of facet values to return + length: u32, + }, + /// Facet field values of type string set + StringSet16 { + /// field name + field: String, + /// Prefix filter of facet values to return + prefix: String, + /// maximum number of facet values to return + length: u16, + }, + /// Facet field values of type string set + StringSet32 { + /// field name + field: String, + /// Prefix filter of facet values to return + prefix: String, + /// maximum number of facet values to return + length: u32, + }, + /// Range segment definition for numerical facet field values of type Point (distance between base of type Point and facet field of type Point) + Point { + /// field name + field: String, + /// range type (CountWithinRange,CountBelowRange,CountAboveRange) + range_type: RangeType, + /// range label, range start + ranges: Vec<(String, f64)>, + /// base point (latitude/lat, longitude/lon) + base: Point, + /// distance unit (kilometers/miles) + unit: DistanceUnit, + }, + /// No query facet + #[default] + None, +} + +/// Defines the range segments for numerical facet field values +#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq)] +pub enum Ranges { + /// U8 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + U8(RangeType, Vec<(String, u8)>), + /// U16 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + U16(RangeType, Vec<(String, u16)>), + /// U32 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + U32(RangeType, Vec<(String, u32)>), + /// U64 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + U64(RangeType, Vec<(String, u64)>), + /// I8 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + I8(RangeType, Vec<(String, i8)>), + /// I16 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + I16(RangeType, Vec<(String, i16)>), + /// I32 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + I32(RangeType, Vec<(String, i32)>), + /// I64 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + I64(RangeType, Vec<(String, i64)>), + /// Unix timestamp (number of seconds since 1 January 1970) range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + Timestamp(RangeType, Vec<(String, i64)>), + /// F32 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + F32(RangeType, Vec<(String, f32)>), + /// F64 range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, range start + F64(RangeType, Vec<(String, f64)>), + /// Proximity range filter: range type (CountWithinRange,CountBelowRange,CountAboveRange), range label, base point (longitude/lon, latitude/lat), distance unit + Point(RangeType, Vec<(String, f64)>, Point, DistanceUnit), + #[default] + /// No range filter + None, +} + +/// FacetValue: Facet field value types +#[derive(Clone, PartialEq, Serialize, Deserialize, ToSchema)] +pub enum FacetValue { + /// Boolean value + Bool(bool), + /// Unsigned 8-bit integer + U8(u8), + /// Unsigned 16-bit integer + U16(u16), + /// Unsigned 32-bit integer + U32(u32), + /// Unsigned 64-bit integer + U64(u64), + /// Signed 8-bit integer + I8(i8), + /// Signed 16-bit integer + I16(i16), + /// Signed 32-bit integer + I32(i32), + /// Signed 64-bit integer + I64(i64), + /// Unix timestamp: the number of seconds since 1 January 1970 + Timestamp(i64), + /// 32-bit floating point number + F32(f32), + /// 64-bit floating point number + F64(f64), + /// String value + String(String), + /// String set value + StringSet(Vec), + /// Point value: latitude/lat, longitude/lon + Point(Point), + /// No value + None, +} + +impl Index { + /// get_facet_value: Returns value from facet field for a doc_id even if schema stored=false (field not stored in document JSON). + /// Facet fields are more compact than fields stored in document JSON. + /// Strings are stored more compact as indices to a unique term dictionary. Numbers are stored binary, not as strings. + /// Facet fields are faster because no document loading, decompression and JSON decoding is required. + /// Facet fields are always memory mapped, internally always stored with fixed byte length layout, regardless of string size. + #[inline] + pub async fn get_facet_value(self: &Index, field: &str, doc_id: usize) -> FacetValue { + let shard_id = doc_id & ((1 << self.shard_bits) - 1); + let doc_id = doc_id >> self.shard_bits; + self.shard_vec[shard_id] + .read() + .await + .get_facet_value_shard(field, doc_id) + } +} + +impl Shard { + #[inline] + pub(crate) fn get_facet_value_shard(self: &Shard, field: &str, doc_id: usize) -> FacetValue { + if let Some(field_idx) = self.facets_map.get(field) { + match &self.facets[*field_idx].field_type { + FieldType::U8 => { + let facet_value = &self.facets_file_mmap + [(self.facets_size_sum * doc_id) + self.facets[*field_idx].offset]; + FacetValue::U8(*facet_value) + } + FieldType::U16 => { + let facet_value = read_u16( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::U16(facet_value) + } + FieldType::U32 => { + let facet_value = read_u32( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::U32(facet_value) + } + FieldType::U64 => { + let facet_value = read_u64( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::U64(facet_value) + } + FieldType::I8 => { + let facet_value = read_i8( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::I8(facet_value) + } + FieldType::I16 => { + let facet_value = read_i16( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::I16(facet_value) + } + FieldType::I32 => { + let facet_value = read_i32( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::I32(facet_value) + } + FieldType::I64 => { + let facet_value = read_i64( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::I64(facet_value) + } + FieldType::Timestamp => { + let facet_value = read_i64( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::Timestamp(facet_value) + } + FieldType::F32 => { + let facet_value = read_f32( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::F32(facet_value) + } + FieldType::F64 => { + let facet_value = read_f64( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + FacetValue::F64(facet_value) + } + + FieldType::String16 => { + let facet_id = read_u16( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + + let facet_value = self.facets[*field_idx] + .values + .get_index((facet_id).into()) + .unwrap(); + + FacetValue::String(facet_value.1.0[0].clone()) + } + + FieldType::StringSet16 => { + let facet_id = read_u16( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + + let facet_value = self.facets[*field_idx] + .values + .get_index((facet_id).into()) + .unwrap(); + + FacetValue::StringSet(facet_value.1.0.clone()) + } + + FieldType::String32 => { + let facet_id = read_u32( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + + let facet_value = self.facets[*field_idx] + .values + .get_index(facet_id as usize) + .unwrap(); + + FacetValue::String(facet_value.1.0[0].clone()) + } + + FieldType::StringSet32 => { + let facet_id = read_u32( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + + let facet_value = self.facets[*field_idx] + .values + .get_index(facet_id as usize) + .unwrap(); + + FacetValue::StringSet(facet_value.1.0.clone()) + } + + FieldType::Point => { + let code = read_u64( + &self.facets_file_mmap, + (self.facets_size_sum * doc_id) + self.facets[*field_idx].offset, + ); + + let x = decode_morton_2_d(code); + + FacetValue::Point(x.clone()) + } + + _ => FacetValue::None, + } + } else { + FacetValue::None + } + } +} + +/// U8 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeU8 { + /// range start + pub start: u8, + /// range end + pub end: u8, +} + +/// U16 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeU16 { + /// range start + pub start: u16, + /// range end + pub end: u16, +} + +/// U32 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeU32 { + /// range start + pub start: u32, + /// range end + pub end: u32, +} + +/// U64 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeU64 { + /// range start + pub start: u64, + /// range end + pub end: u64, +} + +/// I8 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeI8 { + /// range start + pub start: i8, + /// range end + pub end: i8, +} + +/// I16 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeI16 { + /// range start + pub start: i16, + /// range end + pub end: i16, +} + +/// I32 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeI32 { + /// range start + pub start: i32, + /// range end + pub end: i32, +} + +/// I64 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeI64 { + /// range start + pub start: i64, + /// range end + pub end: i64, +} + +/// F32 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeF32 { + /// range start + pub start: f32, + /// range end + pub end: f32, +} + +/// F64 range filter +#[allow(dead_code)] +#[derive(ToSchema)] +pub struct RangeF64 { + /// range start + pub start: f64, + /// range end + pub end: f64, +} + +/// FacetFilter: +/// either numerical range facet filter (range start/end) or +/// string facet filter (vector of strings) at least one (boolean OR) must match. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, ToSchema)] +pub enum FacetFilter { + /// U8 range filter + U8 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeU8)] + filter: Range, + }, + /// U16 range filter + U16 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeU16)] + filter: Range, + }, + /// U32 range filter + U32 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeU32)] + filter: Range, + }, + /// U64 range filter + U64 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeU64)] + filter: Range, + }, + /// I8 range filter + I8 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeI8)] + filter: Range, + }, + /// I16 range filter + I16 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeI16)] + filter: Range, + }, + /// I32 range filter + I32 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeI32)] + filter: Range, + }, + /// I64 range filter + I64 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeI64)] + filter: Range, + }, + /// Timestamp range filter, Unix timestamp: the number of seconds since 1 January 1970 + Timestamp { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeI64)] + filter: Range, + }, + /// F32 range filter + F32 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeF32)] + filter: Range, + }, + /// F64 range filter + F64 { + /// field name + field: String, + /// filter: range start, range end + #[schema(value_type=RangeF64)] + filter: Range, + }, + /// String16 filter + String16 { + /// field name + field: String, + /// filter: array of facet string values + filter: Vec, + }, + /// StringSet16 filter + StringSet16 { + /// field name + field: String, + /// filter: array of facet string values + filter: Vec, + }, + /// String32 filter + String32 { + /// field name + field: String, + /// filter: array of facet string values + filter: Vec, + }, + /// StringSet32 filter + StringSet32 { + /// field name + field: String, + /// filter: array of facet string values + filter: Vec, + }, + /// Point proximity range filter + Point { + /// field name + field: String, + /// filter: base point (latitude/lat, longitude/lon), proximity range start, proximity range end, distance unit + #[schema(value_type=(Point, RangeF64, DistanceUnit))] + filter: (Point, Range, DistanceUnit), + }, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq)] +pub(crate) enum FilterSparse { + U8(Range), + U16(Range), + U32(Range), + U64(Range), + I8(Range), + I16(Range), + I32(Range), + I64(Range), + /// Unix timestamp: the number of seconds since 1 January 1970 + Timestamp(Range), + F32(Range), + F64(Range), + String16(Vec), + String32(Vec), + Point(Point, Range, DistanceUnit, Range), + #[default] + None, +} + +/// Specifies the sort order for the search results. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, ToSchema)] +pub enum SortOrder { + /// Ascending sort order + Ascending = 0, + /// Descending sort order + Descending = 1, +} + +/// Specifies the sort order for the search results. +#[derive(Clone, Deserialize, Serialize, ToSchema)] +pub struct ResultSort { + /// name of the facet field to sort by + pub field: String, + /// Sort order: Ascending or Descending + pub order: SortOrder, + /// Base value/point for (geo) proximity sorting + pub base: FacetValue, +} + +/// Specifies the sort order for the search results. +#[derive(Clone, Serialize)] +pub(crate) struct ResultSortIndex<'a> { + /// Index/ID of the facet field to sort by + pub idx: usize, + /// Sort order: Ascending or Descending + pub order: SortOrder, + /// Base value/point for (geo) proximity sorting + pub base: &'a FacetValue, +} + +/// latitude lat +/// longitude lon +pub type Point = Vec; + +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +/// Search the index for all indexed documents, both for committed and uncommitted documents. +/// The latter enables true realtime search: documents are available for search in exact the same millisecond they are indexed. +/// Arguments: +/// * `query_string`: query string `+` `-` `""` search operators are recognized. +/// * `query_type_default`: Specifiy default QueryType: +/// * **Union**, disjunction, OR, +/// * **Intersection**, conjunction, AND, `+`, +/// * **Phrase** `""`, +/// * **Not**, except, minus `-`. +/// +/// The default QueryType is superseded if the query parser detects that a different query type is specified within the query string (`+` `-` `""`). +/// +/// Boolean queries are specified in the search method either via the query_type parameter or via operator chars within the query parameter. +/// The interpretation of operator chars within the query string (set `query_type=QueryType::Union`) allows to specify advanced search operations via a simple search box. +/// +/// Intersection, AND `+` +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Union; +/// let query_string="+red +apple".to_string(); +/// ``` +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Intersection; +/// let query_string="red apple".to_string(); +/// ``` +/// Union, OR +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Union; +/// let query_string="red apple".to_string(); +/// ``` +/// Phrase `""` +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Union; +/// let query_string="\"red apple\"".to_string(); +/// ``` +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Phrase; +/// let query_string="red apple".to_string(); +/// ``` +/// Except, minus, NOT `-` +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Union; +/// let query_string="apple -red".to_string(); +/// ``` +/// Mixed phrase and intersection +/// ```rust ,no_run +/// use seekstorm::search::QueryType; +/// let query_type=QueryType::Union; +/// let query_string="+\"the who\" +uk".to_string(); +/// ``` +/// * `offset`: offset of search results to return. +/// * `length`: number of search results to return. +/// With length=0, resultType::TopkCount will be automatically downgraded to resultType::Count, returning the number of results only, without returning the results itself. +/// * `result_type`: type of search results to return: Count, Topk, TopkCount. +/// * `include_uncommited`: true realtime search: include indexed documents which where not yet committed into search results. +/// * `field_filter`: Specify field names where to search at querytime, whereas SchemaField.indexed is set at indextime. If set to Vec::new() then all indexed fields are searched. +/// * `query_facets`: Must be set if facets should be returned in ResultObject. If set to Vec::new() then no facet fields are returned. +/// Facet fields are only collected, counted and returned for ResultType::Count and ResultType::TopkCount, but not for ResultType::Topk. +/// The prefix property of a QueryFacet allows at query time to filter the returned facet values to those matching a given prefix, if there are too many distinct values per facet field. +/// The length property of a QueryFacet allows at query time limiting the number of returned distinct values per facet field, if there are too many distinct values. The QueryFacet can be used to improve the usability in an UI. +/// If the length property of a QueryFacet is set to 0 then no facet values for that facet are collected, counted and returned at query time. That decreases the query latency significantly. +/// The facet values are sorted by the frequency of the appearance of the value within the indexed documents matching the query in descending order. +/// Examples: +/// query_facets = vec![QueryFacet::String16 {field: "language".into(),prefix: "ger".into(),length: 5},QueryFacet::String16 {field: "brand".into(),prefix: "a".into(),length: 5}]; +/// query_facets = vec![QueryFacet::U8 {field: "age".into(), range_type: RangeType::CountWithinRange, ranges: vec![("0-20".into(), 0),("20-40".into(), 20), ("40-60".into(), 40),("60-80".into(), 60), ("80-100".into(), 80)]}]; +/// query_facets = vec![QueryFacet::Point {field: "location".into(),base:vec![38.8951, -77.0364],unit:DistanceUnit::Kilometers,range_type: RangeType::CountWithinRange,ranges: vec![ ("0-200".into(), 0.0),("200-400".into(), 200.0), ("400-600".into(), 400.0), ("600-800".into(), 600.0), ("800-1000".into(), 800.0)]}]; +/// * `facet_filter`: Search results are filtered to documents matching specific string values or numerical ranges in the facet fields. If set to Vec::new() then result are not facet filtered. +/// The filter parameter filters the returned results to those documents both matching the query AND matching for all (boolean AND) stated facet filter fields at least one (boolean OR) of the stated values. +/// If the query is changed then both facet counts and search results are changed. If the facet filter is changed then only the search results are changed, while facet counts remain unchanged. +/// The facet counts depend only from the query and not which facet filters are selected. +/// Examples: +/// facet_filter=vec![FacetFilter::String{field:"language".into(),filter:vec!["german".into()]},FacetFilter::String{field:"brand".into(),filter:vec!["apple".into(),"google".into()]}]; +/// facet_filter=vec![FacetFilter::U8{field:"age".into(),filter: 21..65}]; +/// facet_filter = vec![FacetFilter::Point {field: "location".into(),filter: (vec![38.8951, -77.0364], 0.0..1000.0, DistanceUnit::Kilometers)}]; +/// * `result_sort`: Sort field and order: Search results are sorted by the specified facet field, either in ascending or descending order. +/// If no sort field is specified, then the search results are sorted by rank in descending order per default. +/// Multiple sort fields are combined by a "sort by, then sort by"-method ("tie-breaking"-algorithm). +/// The results are sorted by the first field, and only for those results where the first field value is identical (tie) the results are sub-sorted by the second field, +/// until the n-th field value is either not equal or the last field is reached. +/// A special _score field (BM25x), reflecting how relevant the result is for a given search query (phrase match, match in title etc.) can be combined with any of the other sort fields as primary, secondary or n-th search criterium. +/// Sort is only enabled on facet fields that are defined in schema at create_index! +/// Examples: +/// result_sort = vec![ResultSort {field: "price".into(), order: SortOrder::Descending, base: FacetValue::None},ResultSort {field: "language".into(), order: SortOrder::Ascending, base: FacetValue::None}]; +/// result_sort = vec![ResultSort {field: "location".into(),order: SortOrder::Ascending, base: FacetValue::Point(vec![38.8951, -77.0364])}]; +/// +/// If query_string is empty, then index facets (collected at index time) are returned, otherwise query facets (collected at query time) are returned. +/// Facets are defined in 3 different places: +/// the facet fields are defined in schema at create_index, +/// the facet field values are set in index_document at index time, +/// the query_facets/facet_filter search parameters are specified at query time. +/// Facets are then returned in the search result object. +pub trait Search { + /// Search the index for all indexed documents, both for committed and uncommitted documents. + /// The latter enables true realtime search: documents are available for search in exact the same millisecond they are indexed. + /// Arguments: + /// * `query_string`: query string `+` `-` `""` search operators are recognized. + /// * `query_type_default`: Specifiy default QueryType: + /// * **Union**, disjunction, OR, + /// * **Intersection**, conjunction, AND, `+`, + /// * **Phrase** `""`, + /// * **Not**, except, minus `-`. + /// + /// The default QueryType is superseded if the query parser detects that a different query type is specified within the query string (`+` `-` `""`). + /// + /// Boolean queries are specified in the search method either via the query_type parameter or via operator chars within the query parameter. + /// The interpretation of operator chars within the query string (set `query_type=QueryType::Union`) allows to specify advanced search operations via a simple search box. + /// + /// Intersection, AND `+` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="+red +apple".to_string(); + /// ``` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Intersection; + /// let query_string="red apple".to_string(); + /// ``` + /// Union, OR + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="red apple".to_string(); + /// ``` + /// Phrase `""` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="\"red apple\"".to_string(); + /// ``` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Phrase; + /// let query_string="red apple".to_string(); + /// ``` + /// Except, minus, NOT `-` + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="apple -red".to_string(); + /// ``` + /// Mixed phrase and intersection + /// ```rust ,no_run + /// use seekstorm::search::QueryType; + /// let query_type=QueryType::Union; + /// let query_string="+\"the who\" +uk".to_string(); + /// ``` + /// * `offset`: offset of search results to return. + /// * `length`: number of search results to return. + /// With length=0, resultType::TopkCount will be automatically downgraded to resultType::Count, returning the number of results only, without returning the results itself. + /// * `result_type`: type of search results to return: Count, Topk, TopkCount. + /// * `include_uncommited`: true realtime search: include indexed documents which where not yet committed into search results. + /// * `field_filter`: Specify field names where to search at querytime, whereas SchemaField.indexed is set at indextime. If set to Vec::new() then all indexed fields are searched. + /// * `query_facets`: Must be set if facets should be returned in ResultObject. If set to Vec::new() then no facet fields are returned. + /// Facet fields are only collected, counted and returned for ResultType::Count and ResultType::TopkCount, but not for ResultType::Topk. + /// The prefix property of a QueryFacet allows at query time to filter the returned facet values to those matching a given prefix, if there are too many distinct values per facet field. + /// The length property of a QueryFacet allows at query time limiting the number of returned distinct values per facet field, if there are too many distinct values. The QueryFacet can be used to improve the usability in an UI. + /// If the length property of a QueryFacet is set to 0 then no facet values for that facet are collected, counted and returned at query time. That decreases the query latency significantly. + /// The facet values are sorted by the frequency of the appearance of the value within the indexed documents matching the query in descending order. + /// Examples: + /// query_facets = vec![QueryFacet::String16 {field: "language".into(),prefix: "ger".into(),length: 5},QueryFacet::String16 {field: "brand".into(),prefix: "a".into(),length: 5}]; + /// query_facets = vec![QueryFacet::U8 {field: "age".into(), range_type: RangeType::CountWithinRange, ranges: vec![("0-20".into(), 0),("20-40".into(), 20), ("40-60".into(), 40),("60-80".into(), 60), ("80-100".into(), 80)]}]; + /// query_facets = vec![QueryFacet::Point {field: "location".into(),base:vec![38.8951, -77.0364],unit:DistanceUnit::Kilometers,range_type: RangeType::CountWithinRange,ranges: vec![ ("0-200".into(), 0.0),("200-400".into(), 200.0), ("400-600".into(), 400.0), ("600-800".into(), 600.0), ("800-1000".into(), 800.0)]}]; + /// * `facet_filter`: Search results are filtered to documents matching specific string values or numerical ranges in the facet fields. If set to Vec::new() then result are not facet filtered. + /// The filter parameter filters the returned results to those documents both matching the query AND matching for all (boolean AND) stated facet filter fields at least one (boolean OR) of the stated values. + /// If the query is changed then both facet counts and search results are changed. If the facet filter is changed then only the search results are changed, while facet counts remain unchanged. + /// The facet counts depend only from the query and not which facet filters are selected. + /// Examples: + /// facet_filter=vec![FacetFilter::String{field:"language".into(),filter:vec!["german".into()]},FacetFilter::String{field:"brand".into(),filter:vec!["apple".into(),"google".into()]}]; + /// facet_filter=vec![FacetFilter::U8{field:"age".into(),filter: 21..65}]; + /// facet_filter = vec![FacetFilter::Point {field: "location".into(),filter: (vec![38.8951, -77.0364], 0.0..1000.0, DistanceUnit::Kilometers)}]; + /// * `result_sort`: Sort field and order: Search results are sorted by the specified facet field, either in ascending or descending order. + /// If no sort field is specified, then the search results are sorted by rank in descending order per default. + /// Multiple sort fields are combined by a "sort by, then sort by"-method ("tie-breaking"-algorithm). + /// The results are sorted by the first field, and only for those results where the first field value is identical (tie) the results are sub-sorted by the second field, + /// until the n-th field value is either not equal or the last field is reached. + /// A special _score field (BM25x), reflecting how relevant the result is for a given search query (phrase match, match in title etc.) can be combined with any of the other sort fields as primary, secondary or n-th search criterium. + /// Sort is only enabled on facet fields that are defined in schema at create_index! + /// Examples: + /// result_sort = vec![ResultSort {field: "price".into(), order: SortOrder::Descending, base: FacetValue::None},ResultSort {field: "language".into(), order: SortOrder::Ascending, base: FacetValue::None}]; + /// result_sort = vec![ResultSort {field: "location".into(),order: SortOrder::Ascending, base: FacetValue::Point(vec![38.8951, -77.0364])}]; + /// * `query_rewriting`: Enables query rewriting features such as spelling correction and query auto-completion (QAC). + /// The spelling correction of multi-term query strings handles three cases: + /// 1. mistakenly inserted space into a correct term led to two incorrect terms: `hels inki` -> `helsinki` + /// 2. mistakenly omitted space between two correct terms led to one incorrect combined term: `modernart` -> `modern art` + /// 3. multiple independent input terms with/without spelling errors: `cinese indastrialication` -> `chinese industrialization` + /// + /// Query correction/completion supports phrases "", but is disabled, if +- operators are used, or if a opening quote is used after the first term, or if a closing quote is used before the last term. + /// See QueryRewriting enum for details. + /// ⚠️ In addition to setting the query_rewriting parameter per query, the incremental creation of the Symspell dictionary during the indexing of documents has to be enabled via the create_index parameter `meta.spelling_correction`. + /// + /// Facets: + /// If query_string is empty, then index facets (collected at index time) are returned, otherwise query facets (collected at query time) are returned. + /// Facets are defined in 3 different places: + /// the facet fields are defined in schema at create_index, + /// the facet field values are set in index_document at index time, + /// the query_facets/facet_filter search parameters are specified at query time. + /// Facets are then returned in the search result object. + async fn search( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + result_type: ResultType, + include_uncommited: bool, + field_filter: Vec, + query_facets: Vec, + facet_filter: Vec, + result_sort: Vec, + query_rewriting: QueryRewriting, + ) -> ResultObject; +} + +impl Search for IndexArc { + async fn search( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + result_type: ResultType, + include_uncommited: bool, + field_filter: Vec, + query_facets: Vec, + facet_filter: Vec, + result_sort: Vec, + query_rewriting: QueryRewriting, + ) -> ResultObject { + let index_ref = self.read().await; + let original_query = query_string.clone(); + + let (edit_distance_max, term_length_threshold, correct, complete, suggestion_length) = + match &query_rewriting { + QueryRewriting::SearchSuggest { + distance, + term_length_threshold, + correct, + complete, + length, + } => (distance, term_length_threshold, correct, complete, length), + QueryRewriting::SuggestOnly { + distance, + term_length_threshold, + correct, + complete, + length, + } => (distance, term_length_threshold, correct, complete, length), + QueryRewriting::SearchRewrite { + distance, + term_length_threshold, + correct, + complete, + length, + } => (distance, term_length_threshold, correct, complete, length), + _ => (&0, &None, &None, &None, &None), + }; + + let (query_string, suggestions) = if correct.is_some() || complete.is_some() { + let mut query_string = query_string; + let mut allow_loop = true; + let mut previous_qac: Option<(String, Vec)> = None; + loop { + let shard = index_ref.shard_vec[0].read().await; + let query_terms = tokenizer_lite(&query_string, &index_ref.meta.tokenizer, &shard); + drop(shard); + + let qac = if !query_terms.is_empty() { + let query_terms_vec: Vec = + query_terms.iter().map(|s| s.0.to_string()).collect(); + + let suffix = if query_string.ends_with(" ") { " " } else { "" }; + let (query_terms_prefix, query_terms_str) = if query_terms.len() + suffix.len() + > 3 + { + ( + query_terms_vec[..query_terms.len() - 3 + suffix.len()].join(" ") + " ", + query_terms_vec[query_terms.len() - 3 + suffix.len()..].join(" ") + + suffix, + ) + } else { + (String::new(), query_terms_vec.join(" ") + suffix) + }; + let is_phrase = + !query_terms.is_empty() && query_terms[0].1 == QueryType::Phrase; + + let qac: Option<(String, Vec)> = if let Some(completion_option) = + index_ref.completion_option.as_ref() + && complete.is_some() + && query_string.len() >= complete.unwrap() + && query_rewriting != QueryRewriting::SearchOnly + { + let trie = completion_option.read().await; + let completions = + trie.lookup_completions(&query_terms_str, suggestion_length.to_owned()); + + if completions.is_empty() { + previous_qac.clone() + } else { + let mut suggestions: Vec = Vec::new(); + for qc in completions.iter() { + suggestions.push(Suggestion { + term: if is_phrase { + ["\"", &query_terms_prefix, &qc.term, "\""].join("") + } else { + [query_terms_prefix.as_str(), &qc.term].join("") + }, + distance: qc.term.len() - query_string.len(), + count: *qc.count, + }); + } + + if let Some(suggestion_length) = suggestion_length.as_ref() + && suggestions.len() < *suggestion_length + && query_terms.len() >= 2 + { + let mut position = 0; + let mut completion_term_vec = Vec::new(); + for (i, completion) in completions.iter().enumerate() { + completion_term_vec = + completion.term.split(" ").collect::>(); + if completion_term_vec.len() >= 3 { + position = i + 1; + break; + } + } + + if completion_term_vec.len() >= 3 { + let completion_term_str = + completion_term_vec[1..].join(" ") + " "; + let additional_completions = trie.lookup_completions( + &completion_term_str, + Some(suggestion_length - suggestions.len() + 5), + ); + + let query_terms_prefix = query_terms_vec[..query_terms.len() + - if query_terms.len() == 2 { 1 } else { 2 }] + .join(" ") + + " "; + + let mut j = 0; + for qc in additional_completions.iter() { + if let Some(p) = qc.term.rfind(' ') + && p + 1 < qc.term.len() + { + let suffix = qc.term[p + 1..].to_string(); + let hash = hash64(suffix.as_bytes()); + if index_ref.frequent_hashset.contains(&hash) { + continue; + } + }; + + suggestions.insert( + position + j, + Suggestion { + term: if is_phrase { + ["\"", &query_terms_prefix, &qc.term, "\""] + .join("") + } else { + [query_terms_prefix.as_str(), &qc.term].join("") + }, + distance: qc.term.len() - query_string.len(), + count: *qc.count, + }, + ); + + j += 1; + + if suggestions.len() >= *suggestion_length { + break; + } + } + } + } + + let completed_query = suggestions[0].term.to_string(); + + Some((completed_query, suggestions)) + } + } else { + previous_qac.clone() + }; + + let qac: Option<(String, Vec)> = if let Some(symspell) = + &index_ref.symspell_option + && correct.is_some() + && query_string.len() >= correct.unwrap() + && query_rewriting != QueryRewriting::SearchOnly + && qac.is_none() + && allow_loop + { + if let Ok(symspell) = symspell.try_read() + && (term_length_threshold.is_none() + || term_length_threshold.as_ref().unwrap().is_empty() + || query_string.len() >= term_length_threshold.as_ref().unwrap()[0]) + { + let mut corrections = symspell.lookup_compound_vec( + &query_terms_vec, + edit_distance_max.to_owned(), + term_length_threshold, + false, + ); + + if corrections.is_empty() { + None + } else { + if is_phrase { + for suggestion in corrections.iter_mut() { + suggestion.term = ["\"", &suggestion.term, "\""].join(""); + } + } + + query_string = corrections[0].term.clone(); + allow_loop = false; + previous_qac = Some((corrections[0].term.clone(), corrections)); + continue; + } + } else { + None + } + } else { + qac + }; + + if let Some((corrected_query, suggestions)) = qac { + if discriminant(&query_rewriting) + == discriminant(&QueryRewriting::SearchRewrite { + distance: 0, + term_length_threshold: None, + correct: None, + complete: None, + length: None, + }) + { + (corrected_query, Some(suggestions)) + } else { + (query_string, Some(suggestions)) + } + } else { + (query_string, None) + } + } else { + (query_string, None) + }; + + break qac; + } + } else { + (query_string, None) + }; + + if discriminant(&query_rewriting) + == discriminant(&QueryRewriting::SuggestOnly { + distance: 0, + term_length_threshold: None, + correct: None, + complete: None, + length: None, + }) + { + let mut result_object = ResultObject { + original_query, + query: query_string.clone(), + ..Default::default() + }; + if let Some(suggestions) = suggestions.as_ref() { + result_object.suggestions = suggestions.iter().map(|s| s.term.clone()).collect(); + } + return result_object; + } + + if index_ref.shard_vec.len() == 1 { + let mut result_object = index_ref.shard_vec[0] + .search_shard( + query_string.clone(), + query_type_default, + offset, + length, + result_type, + include_uncommited, + field_filter, + query_facets, + facet_filter, + result_sort, + ) + .await; + result_object.original_query = original_query; + result_object.query = query_string.clone(); + if let Some(suggestions) = suggestions.as_ref() { + result_object.suggestions = suggestions.iter().map(|s| s.term.clone()).collect(); + } + return result_object; + } + + let mut result_object_list = Vec::new(); + let shard_bits = index_ref.shard_bits; + let aggregate_results = result_type != ResultType::Count; + + for shard in index_ref.shard_vec.iter() { + let query_string_clone = query_string.clone(); + let shard_clone = shard.clone(); + let query_type_clone = query_type_default.clone(); + let result_type_clone = result_type.clone(); + let field_filter_clone = field_filter.clone(); + let query_facets_clone = query_facets.clone(); + let facet_filter_clone = facet_filter.clone(); + let result_sort_clone = result_sort.clone(); + let shard_id = shard.read().await.meta.id; + + result_object_list.push(tokio::spawn(async move { + let mut rlo = shard_clone + .search_shard( + query_string_clone, + query_type_clone, + 0, + offset + length, + result_type_clone, + include_uncommited, + field_filter_clone, + query_facets_clone, + facet_filter_clone, + result_sort_clone, + ) + .await; + + if aggregate_results { + for result in rlo.results.iter_mut() { + result.doc_id = (result.doc_id << shard_bits) | shard_id as usize; + } + } + + rlo + })); + } + + let mut result_object: ResultObject = Default::default(); + + let mut result_facets: AHashMap, u32)> = AHashMap::new(); + if result_type != ResultType::Topk { + for query_facet in query_facets.iter() { + match query_facet { + QueryFacet::String16 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length as u32)); + } + QueryFacet::StringSet16 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length as u32)); + } + QueryFacet::String32 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length)); + } + QueryFacet::StringSet32 { + field, + prefix: _, + length, + } => { + result_facets.insert(field.into(), (AHashMap::new(), *length)); + } + QueryFacet::Timestamp { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + + QueryFacet::U8 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::U16 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::U32 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::U64 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::I8 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::I16 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::I32 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::I64 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::F32 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::F64 { + field, + range_type: _, + ranges: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + QueryFacet::Point { + field, + range_type: _, + ranges: _, + base: _, + unit: _, + } => { + result_facets.insert(field.into(), (AHashMap::new(), u16::MAX as u32)); + } + + _ => {} + } + } + } + + for result_object_shard in result_object_list { + let mut rlo_shard = result_object_shard.await.unwrap(); + + result_object.result_count_total += rlo_shard.result_count_total; + if aggregate_results { + result_object.results.append(&mut rlo_shard.results); + } + + if result_object.query_terms.is_empty() { + result_object.query_terms = rlo_shard.query_terms + }; + + if !rlo_shard.facets.is_empty() { + for facet in rlo_shard.facets.iter() { + if let Some(existing) = result_facets.get_mut(facet.0) { + for (key, value) in facet.1.iter() { + *existing.0.entry(key.clone()).or_insert(0) += value; + } + }; + } + } + } + + for (key, value) in result_facets.iter_mut() { + let sum = value + .0 + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| (a.clone(), *c)) + .take(value.1 as usize) + .collect::>(); + result_object.facets.insert(key.clone(), sum); + } + + if aggregate_results { + let mut result_sort_index: Vec = Vec::new(); + if !result_sort.is_empty() { + for rs in result_sort.iter() { + if let Some(idx) = index_ref.shard_vec[0] + .read() + .await + .facets_map + .get(&rs.field) + { + result_sort_index.push(ResultSortIndex { + idx: *idx, + order: rs.order.clone(), + base: &rs.base, + }); + } + } + let shard_vec = + futures::future::join_all(index_ref.shard_vec.iter().map(|s| s.read())).await; + + result_object.results.sort_by(|a, b| { + result_ordering_root( + &shard_vec, + index_ref.shard_bits, + &result_sort_index, + *b, + *a, + ) + }); + } else { + result_object + .results + .sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + } + + if offset > 0 { + result_object.results = if offset >= result_object.results.len() { + Vec::new() + } else { + result_object.results.split_off(offset) + }; + } + + if result_object.results.len() > length { + result_object.results.truncate(length); + } + + result_object.result_count = result_object.results.len(); + } + + result_object.original_query = original_query; + result_object.query = query_string.clone(); + if let Some(suggestions) = suggestions { + result_object.suggestions = suggestions.into_iter().map(|s| s.term.clone()).collect(); + } + + result_object + } +} + +#[allow(clippy::too_many_arguments)] +#[allow(async_fn_in_trait)] +pub(crate) trait SearchShard { + async fn search_shard( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + result_type: ResultType, + include_uncommited: bool, + field_filter: Vec, + query_facets: Vec, + facet_filter: Vec, + result_sort: Vec, + ) -> ResultObject; +} + +/// Non-recursive binary search of non-consecutive u64 values in a slice of bytes +#[inline(never)] +pub(crate) fn binary_search( + byte_array: &[u8], + len: usize, + key_hash: u64, + key_head_size: usize, +) -> i64 { + let mut left = 0i64; + let mut right = len as i64 - 1; + while left <= right { + let mid = (left + right) / 2; + + let pivot = read_u64(byte_array, mid as usize * key_head_size); + match pivot.cmp(&key_hash) { + cmp::Ordering::Equal => { + return mid; + } + cmp::Ordering::Less => left = mid + 1, + cmp::Ordering::Greater => right = mid - 1, + } + } + + -1 +} + +/// Decode posting_list_object and blocks on demand from mmap, instead keepping all posting_list_object and blocks for all keys in ram +#[inline(always)] +pub(crate) fn decode_posting_list_count( + segment: &SegmentIndex, + index: &Shard, + key_hash1: u64, + previous: bool, +) -> Option { + let offset = if previous { 1 } else { 0 }; + + let mut posting_count_list = 0u32; + let mut found = false; + + if segment.byte_array_blocks_pointer.len() <= offset { + return None; + } + + let block_id_last = segment.byte_array_blocks_pointer.len() - 1 - offset; + for pointer in segment + .byte_array_blocks_pointer + .iter() + .take(block_id_last + 1) + { + let key_count = pointer.2 as usize; + + let byte_array = + &index.index_file_mmap[pointer.0 - (key_count * index.key_head_size)..pointer.0]; + let key_index = binary_search(byte_array, key_count, key_hash1, index.key_head_size); + + if key_index >= 0 { + found = true; + let key_address = key_index as usize * index.key_head_size; + let posting_count = read_u16(byte_array, key_address + 8); + posting_count_list += posting_count as u32 + 1; + } + } + + if found { + Some(posting_count_list) + } else { + None + } +} + +#[inline(always)] +pub(crate) fn decode_posting_list_counts( + segment: &SegmentIndex, + index: &Shard, + key_hash1: u64, +) -> Option<(u32, u32, u32, u32)> { + let mut posting_count_list = 0u32; + let mut posting_count_ngram_1_compressed = 0; + let mut posting_count_ngram_2_compressed = 0; + let mut posting_count_ngram_3_compressed = 0; + let mut posting_count_ngram_1 = 0; + let mut posting_count_ngram_2 = 0; + let mut posting_count_ngram_3 = 0; + let mut found = false; + + let read_flag = key_hash1 & 0b111 > 0; + + if segment.byte_array_blocks_pointer.is_empty() { + return None; + } + + for pointer in segment.byte_array_blocks_pointer.iter() { + let key_count = pointer.2 as usize; + + let byte_array = + &index.index_file_mmap[pointer.0 - (key_count * index.key_head_size)..pointer.0]; + let key_index = binary_search(byte_array, key_count, key_hash1, index.key_head_size); + + if key_index >= 0 { + found = true; + let key_address = key_index as usize * index.key_head_size; + let posting_count = read_u16(byte_array, key_address + 8); + + match index.key_head_size { + 20 => {} + 22 => { + if read_flag { + posting_count_ngram_1_compressed = read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = read_u8(byte_array, key_address + 15); + } + } + _ => { + if read_flag { + posting_count_ngram_1_compressed = read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = read_u8(byte_array, key_address + 15); + posting_count_ngram_3_compressed = read_u8(byte_array, key_address + 16); + } + } + } + + posting_count_list += posting_count as u32 + 1; + } + } + + if found { + match index.key_head_size { + 20 => {} + 22 => { + if read_flag { + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + } + } + _ => { + if read_flag { + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + posting_count_ngram_3 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_3_compressed as usize]; + } + } + } + + Some(( + posting_count_list, + posting_count_ngram_1, + posting_count_ngram_2, + posting_count_ngram_3, + )) + } else { + None + } +} + +/// Decode posting_list_object and blocks on demand from mmap, instead keepping all posting_list_object and blocks for all keys in ram +#[inline(always)] +pub(crate) fn decode_posting_list_object( + segment: &SegmentIndex, + shard: &Shard, + key_hash1: u64, + calculate_score: bool, +) -> Option { + let mut posting_count_list = 0u32; + let mut max_list_score = 0.0; + let mut blocks_owned: Vec = Vec::new(); + let mut posting_count_ngram_1_compressed = 0; + let mut posting_count_ngram_2_compressed = 0; + let mut posting_count_ngram_3_compressed = 0; + let mut posting_count_ngram_1 = 0; + let mut posting_count_ngram_2 = 0; + let mut posting_count_ngram_3 = 0; + + let read_flag = key_hash1 & 0b111 > 0; + + for (block_id, pointer) in segment.byte_array_blocks_pointer.iter().enumerate() { + let key_count = pointer.2 as usize; + + let byte_array = + &shard.index_file_mmap[pointer.0 - (key_count * shard.key_head_size)..pointer.0]; + let key_index = binary_search(byte_array, key_count, key_hash1, shard.key_head_size); + + if key_index >= 0 { + let key_address = key_index as usize * shard.key_head_size; + let posting_count = read_u16(byte_array, key_address + 8); + + let max_docid = read_u16(byte_array, key_address + 10); + let max_p_docid = read_u16(byte_array, key_address + 12); + + match shard.key_head_size { + 20 => {} + 22 => { + if read_flag { + posting_count_ngram_1_compressed = read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = read_u8(byte_array, key_address + 15); + } + } + _ => { + if read_flag { + posting_count_ngram_1_compressed = read_u8(byte_array, key_address + 14); + posting_count_ngram_2_compressed = read_u8(byte_array, key_address + 15); + posting_count_ngram_3_compressed = read_u8(byte_array, key_address + 16); + } + } + } + + let pointer_pivot_p_docid = read_u16(byte_array, key_address + shard.key_head_size - 6); + let compression_type_pointer = + read_u32(byte_array, key_address + shard.key_head_size - 4); + + posting_count_list += posting_count as u32 + 1; + + let block_object_index = BlockObjectIndex { + max_block_score: 0.0, + block_id: block_id as u32, + posting_count, + max_docid, + max_p_docid, + pointer_pivot_p_docid, + compression_type_pointer, + }; + blocks_owned.push(block_object_index); + } + } + + if !blocks_owned.is_empty() { + if calculate_score { + match shard.key_head_size { + 20 => {} + 22 => { + if read_flag { + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + } + } + _ => { + if read_flag { + posting_count_ngram_1 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_1_compressed as usize]; + posting_count_ngram_2 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_2_compressed as usize]; + posting_count_ngram_3 = + DOCUMENT_LENGTH_COMPRESSION[posting_count_ngram_3_compressed as usize]; + } + } + } + + let ngram_type = + FromPrimitive::from_u64(key_hash1 & 0b111).unwrap_or(NgramType::SingleTerm); + + for block in blocks_owned.iter_mut() { + block.max_block_score = get_max_score( + shard, + segment, + posting_count_ngram_1, + posting_count_ngram_2, + posting_count_ngram_3, + posting_count_list, + block.block_id as usize, + block.max_docid as usize, + block.max_p_docid as usize, + block.pointer_pivot_p_docid as usize, + block.compression_type_pointer, + &ngram_type, + ); + + if block.max_block_score > max_list_score { + max_list_score = block.max_block_score + } + } + } + + let posting_list_object_index = PostingListObjectIndex { + posting_count: posting_count_list, + posting_count_ngram_1, + posting_count_ngram_2, + posting_count_ngram_3, + max_list_score, + blocks: blocks_owned, + position_range_previous: 0, + ..Default::default() + }; + + Some(posting_list_object_index) + } else { + None + } +} + +impl SearchShard for ShardArc { + async fn search_shard( + &self, + query_string: String, + query_type_default: QueryType, + offset: usize, + length: usize, + result_type: ResultType, + include_uncommited: bool, + field_filter: Vec, + query_facets: Vec, + facet_filter: Vec, + result_sort: Vec, + ) -> ResultObject { + let shard_ref = self.read().await; + let mut query_type_mut = query_type_default; + + let facet_cap = if shard_ref.shard_number == 1 { + 0 + } else { + u32::MAX + }; + + let mut result_object: ResultObject = Default::default(); + + let mut result_type = result_type; + if length == 0 && result_type != ResultType::Count { + if result_type == ResultType::Topk { + return result_object; + } + result_type = ResultType::Count; + } + + if shard_ref.segments_index.is_empty() { + return result_object; + } + + let mut field_filter_set: AHashSet = AHashSet::new(); + for item in field_filter.iter() { + match shard_ref.schema_map.get(item) { + Some(value) => { + if value.indexed { + field_filter_set.insert(value.indexed_field_id as u16); + } + } + None => { + println!("field not found: {}", item) + } + } + } + + let mut result_sort_index: Vec = Vec::new(); + if !result_sort.is_empty() && result_type != ResultType::Count { + for rs in result_sort.iter() { + if let Some(idx) = shard_ref.facets_map.get(&rs.field) { + result_sort_index.push(ResultSortIndex { + idx: *idx, + order: rs.order.clone(), + base: &rs.base, + }); + } + } + } + + let heap_size = if result_type != ResultType::Count { + cmp::min(offset + length, shard_ref.indexed_doc_count) + } else { + 0 + }; + let mut search_result = SearchResult { + topk_candidates: MinHeap::new(heap_size, &shard_ref, &result_sort_index), + query_facets: Vec::new(), + skip_facet_count: false, + }; + + let mut facet_filter_sparse: Vec = Vec::new(); + if !facet_filter.is_empty() { + facet_filter_sparse = vec![FilterSparse::None; shard_ref.facets.len()]; + for facet_filter_item in facet_filter.iter() { + match &facet_filter_item { + FacetFilter::U8 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U8 + { + facet_filter_sparse[*idx] = FilterSparse::U8(filter.clone()) + } + } + FacetFilter::U16 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U16 + { + facet_filter_sparse[*idx] = FilterSparse::U16(filter.clone()) + } + } + FacetFilter::U32 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U32 + { + facet_filter_sparse[*idx] = FilterSparse::U32(filter.clone()) + } + } + FacetFilter::U64 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U64 + { + facet_filter_sparse[*idx] = FilterSparse::U64(filter.clone()) + } + } + FacetFilter::I8 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I8 + { + facet_filter_sparse[*idx] = FilterSparse::I8(filter.clone()) + } + } + FacetFilter::I16 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I16 + { + facet_filter_sparse[*idx] = FilterSparse::I16(filter.clone()) + } + } + FacetFilter::I32 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I32 + { + facet_filter_sparse[*idx] = FilterSparse::I32(filter.clone()) + } + } + FacetFilter::I64 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I64 + { + facet_filter_sparse[*idx] = FilterSparse::I64(filter.clone()) + } + } + FacetFilter::Timestamp { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::Timestamp + { + facet_filter_sparse[*idx] = FilterSparse::Timestamp(filter.clone()) + } + } + FacetFilter::F32 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::F32 + { + facet_filter_sparse[*idx] = FilterSparse::F32(filter.clone()) + } + } + FacetFilter::F64 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::F64 + { + facet_filter_sparse[*idx] = FilterSparse::F64(filter.clone()) + } + } + + FacetFilter::String16 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) { + let facet = &shard_ref.facets[*idx]; + if shard_ref.facets[*idx].field_type == FieldType::String16 { + let mut string_id_vec = Vec::new(); + for value in filter.iter() { + let key = [value.clone()]; + if let Some(facet_value_id) = facet.values.get_index_of(&key[0]) + { + string_id_vec.push(facet_value_id as u16); + } + } + facet_filter_sparse[*idx] = FilterSparse::String16(string_id_vec); + } + } + } + + FacetFilter::StringSet16 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) { + let facet = &shard_ref.facets[*idx]; + if shard_ref.facets[*idx].field_type == FieldType::StringSet16 { + let mut string_id_vec = Vec::new(); + for value in filter.iter() { + let key = [value.clone()]; + if let Some(facet_value_id) = + facet.values.get_index_of(&key.join("_")) + { + string_id_vec.push(facet_value_id as u16); + } + + if let Some(facet_value_ids) = shard_ref + .string_set_to_single_term_id_vec[*idx] + .get(&value.clone()) + { + for code in facet_value_ids.iter() { + string_id_vec.push(*code as u16); + } + } + } + facet_filter_sparse[*idx] = FilterSparse::String16(string_id_vec); + } + } + } + + FacetFilter::String32 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) { + let facet = &shard_ref.facets[*idx]; + + if shard_ref.facets[*idx].field_type == FieldType::String32 { + let mut string_id_vec = Vec::new(); + for value in filter.iter() { + let key = [value.clone()]; + if let Some(facet_value_id) = facet.values.get_index_of(&key[0]) + { + string_id_vec.push(facet_value_id as u32); + } + } + facet_filter_sparse[*idx] = FilterSparse::String32(string_id_vec); + } + } + } + + FacetFilter::StringSet32 { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) { + let facet = &shard_ref.facets[*idx]; + if shard_ref.facets[*idx].field_type == FieldType::StringSet32 { + let mut string_id_vec = Vec::new(); + for value in filter.iter() { + let key = [value.clone()]; + if let Some(facet_value_id) = + facet.values.get_index_of(&key.join("_")) + { + string_id_vec.push(facet_value_id as u32); + } + + if let Some(facet_value_ids) = shard_ref + .string_set_to_single_term_id_vec[*idx] + .get(&value.clone()) + { + for code in facet_value_ids.iter() { + string_id_vec.push(*code); + } + } + } + facet_filter_sparse[*idx] = FilterSparse::String32(string_id_vec); + } + } + } + + FacetFilter::Point { field, filter } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::Point + { + facet_filter_sparse[*idx] = FilterSparse::Point( + filter.0.clone(), + filter.1.clone(), + filter.2.clone(), + point_distance_to_morton_range(&filter.0, filter.1.end, &filter.2), + ); + } + } + } + } + } + + let mut is_range_facet = false; + if !query_facets.is_empty() { + search_result.query_facets = vec![ResultFacet::default(); shard_ref.facets.len()]; + for query_facet in query_facets.iter() { + match &query_facet { + QueryFacet::U8 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U8 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::U8(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::U16 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U16 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::U16(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::U32 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U32 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::U32(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::U64 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::U64 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::U64(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::I8 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I8 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::I8(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::I16 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I16 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::I16(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::I32 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I32 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::I32(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::I64 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::I64 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::I64(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::Timestamp { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::Timestamp + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::Timestamp(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::F32 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::F32 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::F32(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::F64 { + field, + range_type, + ranges, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::F64 + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::F64(range_type.clone(), ranges.clone()), + ..Default::default() + }; + } + } + QueryFacet::String16 { + field, + prefix, + length, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::String16 + { + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length as u32, + ..Default::default() + } + } + } + QueryFacet::StringSet16 { + field, + prefix, + length, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::StringSet16 + { + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length as u32, + ..Default::default() + } + } + } + + QueryFacet::String32 { + field, + prefix, + length, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::String32 + { + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length, + ..Default::default() + } + } + } + QueryFacet::StringSet32 { + field, + prefix, + length, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::StringSet32 + { + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + prefix: prefix.clone(), + length: *length, + ..Default::default() + } + } + } + + QueryFacet::Point { + field, + range_type, + ranges, + base, + unit, + } => { + if let Some(idx) = shard_ref.facets_map.get(field) + && shard_ref.facets[*idx].field_type == FieldType::Point + { + is_range_facet = true; + search_result.query_facets[*idx] = ResultFacet { + field: field.clone(), + length: u16::MAX as u32, + ranges: Ranges::Point( + range_type.clone(), + ranges.clone(), + base.clone(), + unit.clone(), + ), + ..Default::default() + }; + } + } + + QueryFacet::None => {} + }; + } + } + + let result_count_arc = Arc::new(AtomicUsize::new(0)); + let result_count_uncommitted_arc = Arc::new(AtomicUsize::new(0)); + + 'fallback: loop { + let mut unique_terms: AHashMap = AHashMap::new(); + let mut non_unique_terms: Vec = Vec::new(); + let mut nonunique_terms_count = 0u32; + + tokenizer( + &shard_ref, + &query_string, + &mut unique_terms, + &mut non_unique_terms, + shard_ref.meta.tokenizer, + shard_ref.segment_number_mask1, + &mut nonunique_terms_count, + u16::MAX as u32, + MAX_POSITIONS_PER_TERM, + true, + &mut query_type_mut, + shard_ref.meta.ngram_indexing, + 0, + 1, + ) + .await; + + if include_uncommited && shard_ref.uncommitted { + shard_ref.search_uncommitted( + &unique_terms, + &non_unique_terms, + &mut query_type_mut, + &result_type, + &field_filter_set, + &facet_filter_sparse, + &mut search_result, + &result_count_uncommitted_arc, + offset + length, + ); + } + + let mut query_list_map: AHashMap = AHashMap::new(); + let mut query_list: Vec; + + let mut not_query_list_map: AHashMap = AHashMap::new(); + let mut not_query_list: Vec; + + let mut non_unique_query_list: Vec = Vec::new(); + let mut preceding_ngram_count = 0; + + let mut blocks_vec: Vec> = Vec::new(); + + let mut not_found_terms_hashset: AHashSet = AHashSet::new(); + + for non_unique_term in non_unique_terms.iter() { + let term = unique_terms.get(&non_unique_term.term).unwrap(); + let key0: u32 = term.key0; + let key_hash: u64 = term.key_hash; + let term_no_diacritics_umlaut_case = &non_unique_term.term; + + let mut idf = 0.0; + let mut idf_ngram1 = 0.0; + let mut idf_ngram2 = 0.0; + let mut idf_ngram3 = 0.0; + + let mut term_index_unique = 0; + if non_unique_term.op == QueryType::Not { + let query_list_map_len = not_query_list_map.len(); + let not_query_list_option = not_query_list_map.get(&key_hash); + if not_query_list_option.is_none() + && !not_found_terms_hashset.contains(&key_hash) + { + let posting_count; + let max_list_score; + let blocks; + let blocks_len; + let found_plo = if shard_ref.meta.access_type == AccessType::Mmap { + let posting_list_object_index_option = decode_posting_list_object( + &shard_ref.segments_index[key0 as usize], + &shard_ref, + key_hash, + false, + ); + + if let Some(plo) = posting_list_object_index_option { + posting_count = plo.posting_count; + max_list_score = plo.max_list_score; + blocks = &DUMMY_VEC; + blocks_len = plo.blocks.len(); + blocks_vec.push(plo.blocks); + true + } else { + posting_count = 0; + max_list_score = 0.0; + blocks = &DUMMY_VEC; + blocks_len = 0; + false + } + } else { + let posting_list_object_index_option = shard_ref.segments_index + [key0 as usize] + .segment + .get(&key_hash); + + if let Some(plo) = posting_list_object_index_option { + posting_count = plo.posting_count; + max_list_score = plo.max_list_score; + blocks_len = plo.blocks.len(); + blocks = &plo.blocks; + true + } else { + posting_count = 0; + max_list_score = 0.0; + blocks = &DUMMY_VEC; + blocks_len = 0; + false + } + }; + + if found_plo { + let value_new = PostingListObjectQuery { + posting_count, + max_list_score, + blocks, + blocks_index: blocks_vec.len(), + p_block_max: blocks_len as i32, + term: term_no_diacritics_umlaut_case.clone(), + key0, + term_index_unique: query_list_map_len, + idf, + idf_ngram1, + idf_ngram2, + idf_ngram3, + ngram_type: non_unique_term.ngram_type.clone(), + ..Default::default() + }; + not_query_list_map.insert(key_hash, value_new); + } else { + not_found_terms_hashset.insert(key_hash); + } + } + } else { + let query_list_map_len = query_list_map.len(); + let mut found = true; + let query_list_option = query_list_map.get(&key_hash); + match query_list_option { + None => { + if !not_found_terms_hashset.contains(&key_hash) { + let posting_count; + let posting_count_ngram_1; + let posting_count_ngram_2; + let posting_count_ngram_3; + let max_list_score; + let blocks; + let blocks_len; + let found_plo = if shard_ref.meta.access_type == AccessType::Mmap { + let posting_list_object_index_option = + decode_posting_list_object( + &shard_ref.segments_index[key0 as usize], + &shard_ref, + key_hash, + true, + ); + + if let Some(plo) = posting_list_object_index_option { + posting_count = plo.posting_count; + posting_count_ngram_1 = plo.posting_count_ngram_1; + posting_count_ngram_2 = plo.posting_count_ngram_2; + posting_count_ngram_3 = plo.posting_count_ngram_3; + max_list_score = plo.max_list_score; + blocks = &DUMMY_VEC; + blocks_len = plo.blocks.len(); + blocks_vec.push(plo.blocks); + true + } else { + posting_count = 0; + posting_count_ngram_1 = 0; + posting_count_ngram_2 = 0; + posting_count_ngram_3 = 0; + max_list_score = 0.0; + blocks = &DUMMY_VEC; + blocks_len = 0; + false + } + } else { + let posting_list_object_index_option = shard_ref.segments_index + [key0 as usize] + .segment + .get(&key_hash); + + if let Some(plo) = posting_list_object_index_option { + posting_count = plo.posting_count; + posting_count_ngram_1 = plo.posting_count_ngram_1; + posting_count_ngram_2 = plo.posting_count_ngram_2; + posting_count_ngram_3 = plo.posting_count_ngram_3; + max_list_score = plo.max_list_score; + blocks_len = plo.blocks.len(); + blocks = &plo.blocks; + true + } else { + posting_count = 0; + posting_count_ngram_1 = 0; + posting_count_ngram_2 = 0; + posting_count_ngram_3 = 0; + max_list_score = 0.0; + blocks = &DUMMY_VEC; + blocks_len = 0; + false + } + }; + + if found_plo { + if result_type != ResultType::Count { + if non_unique_term.ngram_type == NgramType::SingleTerm + || shard_ref.meta.similarity + == SimilarityType::Bm25fProximity + { + idf = (((shard_ref.indexed_doc_count as f32 + - posting_count as f32 + + 0.5) + / (posting_count as f32 + 0.5)) + + 1.0) + .ln(); + } else if non_unique_term.ngram_type == NgramType::NgramFF + || non_unique_term.ngram_type == NgramType::NgramRF + || non_unique_term.ngram_type == NgramType::NgramFR + { + idf_ngram1 = (((shard_ref.indexed_doc_count as f32 + - posting_count_ngram_1 as f32 + + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram2 = (((shard_ref.indexed_doc_count as f32 + - posting_count_ngram_2 as f32 + + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + } else { + idf_ngram1 = (((shard_ref.indexed_doc_count as f32 + - posting_count_ngram_1 as f32 + + 0.5) + / (posting_count_ngram_1 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram2 = (((shard_ref.indexed_doc_count as f32 + - posting_count_ngram_2 as f32 + + 0.5) + / (posting_count_ngram_2 as f32 + 0.5)) + + 1.0) + .ln(); + + idf_ngram3 = (((shard_ref.indexed_doc_count as f32 + - posting_count_ngram_3 as f32 + + 0.5) + / (posting_count_ngram_3 as f32 + 0.5)) + + 1.0) + .ln(); + } + } + + let value_new = PostingListObjectQuery { + posting_count, + max_list_score, + blocks, + blocks_index: blocks_vec.len(), + p_block_max: blocks_len as i32, + term: term_no_diacritics_umlaut_case.clone(), + key0, + term_index_unique: query_list_map_len, + idf, + idf_ngram1, + idf_ngram2, + idf_ngram3, + ngram_type: non_unique_term.ngram_type.clone(), + ..Default::default() + }; + term_index_unique = value_new.term_index_unique; + query_list_map.insert(key_hash, value_new); + } else { + if non_unique_term.op == QueryType::Intersection + || non_unique_term.op == QueryType::Phrase + { + break 'fallback; + } + not_found_terms_hashset.insert(key_hash); + found = false; + } + } + } + Some(value) => { + term_index_unique = value.term_index_unique; + } + } + + if found && non_unique_term.op == QueryType::Phrase { + let nu_plo = NonUniquePostingListObjectQuery { + term_index_unique, + term_index_nonunique: non_unique_query_list.len() + + preceding_ngram_count, + pos: 0, + p_pos: 0, + positions_pointer: 0, + positions_count: 0, + byte_array: &DUMMY_VEC_8, + field_vec: SmallVec::new(), + p_field: 0, + key0, + is_embedded: false, + embedded_positions: [0; 4], + }; + + match non_unique_term.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramRF | NgramType::NgramFR => { + preceding_ngram_count += 1 + } + _ => preceding_ngram_count += 2, + }; + + non_unique_query_list.push(nu_plo); + } + } + match term.ngram_type { + NgramType::SingleTerm => {} + NgramType::NgramFF | NgramType::NgramRF | NgramType::NgramFR => { + result_object + .query_terms + .push(term.term_ngram_1.to_string()); + result_object + .query_terms + .push(term.term_ngram_0.to_string()); + } + _ => { + result_object + .query_terms + .push(term.term_ngram_2.to_string()); + result_object + .query_terms + .push(term.term_ngram_1.to_string()); + result_object + .query_terms + .push(term.term_ngram_0.to_string()); + } + }; + { + result_object.query_terms.push(term.term.to_string()); + } + } + + not_query_list = not_query_list_map.into_values().collect(); + query_list = query_list_map.into_values().collect(); + + if shard_ref.meta.access_type == AccessType::Mmap { + for plo in query_list.iter_mut() { + plo.blocks = &blocks_vec[plo.blocks_index - 1] + } + for plo in not_query_list.iter_mut() { + plo.blocks = &blocks_vec[plo.blocks_index - 1] + } + } + + let query_list_len = query_list.len(); + let non_unique_query_list_len = non_unique_query_list.len(); + + let mut matching_blocks: i32 = 0; + let query_term_count = non_unique_terms.len(); + if query_list_len == 0 { + } else if query_list_len == 1 { + if !(shard_ref.uncommitted && include_uncommited) + && offset + length <= 1000 + && not_query_list.is_empty() + && field_filter_set.is_empty() + && shard_ref.delete_hashset.is_empty() + && facet_filter_sparse.is_empty() + && !is_range_facet + && result_sort_index.is_empty() + && let Some(stopword_result_object) = shard_ref + .frequentword_results + .get(&non_unique_terms[0].term) + { + result_object.query = stopword_result_object.query.clone(); + result_object + .query_terms + .clone_from(&stopword_result_object.query_terms); + result_object.result_count = stopword_result_object.result_count; + result_object.result_count_total = stopword_result_object.result_count_total; + + if result_type != ResultType::Count { + result_object + .results + .clone_from(&stopword_result_object.results); + if offset > 0 { + result_object.results.drain(..offset); + } + if length < 1000 { + result_object.results.truncate(length); + } + } + + if !search_result.query_facets.is_empty() && result_type != ResultType::Topk { + let mut facets: AHashMap = AHashMap::new(); + for facet in search_result.query_facets.iter() { + if facet.length == 0 + || stopword_result_object.facets[&facet.field].is_empty() + { + continue; + } + + let v = stopword_result_object.facets[&facet.field] + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(&a.1)) + .map(|(a, c)| (a.clone(), *c)) + .filter(|(a, _c)| { + facet.prefix.is_empty() || a.starts_with(&facet.prefix) + }) + .take(facet.length.max(facet_cap) as usize) + .collect::>(); + + if !v.is_empty() { + facets.insert(facet.field.clone(), v); + } + } + result_object.facets = facets; + }; + + return result_object; + } + + single_blockid( + &shard_ref, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + &mut matching_blocks, + ) + .await; + } else if query_type_mut == QueryType::Union { + search_result.skip_facet_count = true; + + if result_type == ResultType::Count && query_list_len != 2 { + union_blockid( + &shard_ref, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + ) + .await; + } else if SPEEDUP_FLAG + && query_list_len == 2 + && search_result.query_facets.is_empty() + && facet_filter_sparse.is_empty() + && search_result.topk_candidates.result_sort.is_empty() + { + union_docid_2( + &shard_ref, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + &mut matching_blocks, + query_term_count, + ) + .await; + } else if SPEEDUP_FLAG + && search_result.topk_candidates.result_sort.is_empty() + && query_list_len <= 10 + { + union_docid_3( + &shard_ref, + &mut non_unique_query_list, + &mut Vec::from([QueueObject { + query_list: query_list.clone(), + query_index: 0, + max_score: f32::MAX, + }]), + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + &mut matching_blocks, + 0, + query_term_count, + ) + .await; + } else { + union_blockid( + &shard_ref, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + ) + .await; + } + } else { + intersection_blockid( + &shard_ref, + &mut non_unique_query_list, + &mut query_list, + &mut not_query_list, + &result_count_arc, + &mut search_result, + offset + length, + &result_type, + &field_filter_set, + &facet_filter_sparse, + &mut matching_blocks, + query_type_mut == QueryType::Phrase && non_unique_query_list_len >= 2, + query_term_count, + ) + .await; + + if shard_ref.enable_fallback + && (result_count_arc.load(Ordering::Relaxed) < offset + length) + { + continue 'fallback; + } + } + + break; + } + + result_object.result_count = search_result.topk_candidates.current_heap_size; + + if search_result.topk_candidates.current_heap_size > offset { + result_object.results = mem::take(&mut search_result.topk_candidates._elements); + + if search_result.topk_candidates.current_heap_size < offset + length { + result_object + .results + .truncate(search_result.topk_candidates.current_heap_size); + } + + result_object + .results + .sort_by(|a, b| search_result.topk_candidates.result_ordering_shard(*b, *a)); + + if offset > 0 { + result_object.results.drain(..offset); + } + } + + result_object.result_count_total = result_count_uncommitted_arc.load(Ordering::Relaxed) + + result_count_arc.load(Ordering::Relaxed); + + if !search_result.query_facets.is_empty() { + result_object.facets = if result_object.query_terms.is_empty() { + shard_ref + .get_index_string_facets_shard(query_facets) + .unwrap_or_default() + } else { + let mut facets: AHashMap = AHashMap::new(); + for (i, facet) in search_result.query_facets.iter_mut().enumerate() { + if facet.length == 0 || facet.values.is_empty() { + continue; + } + + let v = if facet.ranges == Ranges::None { + if shard_ref.facets[i].values.is_empty() { + continue; + } + + if shard_ref.facets[i].field_type == FieldType::StringSet16 + || shard_ref.facets[i].field_type == FieldType::StringSet32 + { + let mut hash_map: AHashMap = AHashMap::new(); + for value in facet.values.iter() { + let value2 = shard_ref.facets[i] + .values + .get_index(*value.0 as usize) + .unwrap(); + + for term in value2.1.0.iter() { + *hash_map.entry(term.clone()).or_insert(0) += value.1; + } + } + + hash_map + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| (a.clone(), *c)) + .filter(|(a, _c)| { + facet.prefix.is_empty() || a.starts_with(&facet.prefix) + }) + .take(facet.length.max(facet_cap) as usize) + .collect::>() + } else { + facet + .values + .iter() + .sorted_unstable_by(|a, b| b.1.cmp(a.1)) + .map(|(a, c)| { + ( + shard_ref.facets[i] + .values + .get_index(*a as usize) + .unwrap() + .0 + .clone(), + *c, + ) + }) + .filter(|(a, _c)| { + facet.prefix.is_empty() || a.starts_with(&facet.prefix) + }) + .take(facet.length.max(facet_cap) as usize) + .collect::>() + } + } else { + let range_type = match &facet.ranges { + Ranges::U8(range_type, _ranges) => range_type.clone(), + Ranges::U16(range_type, _ranges) => range_type.clone(), + Ranges::U32(range_type, _ranges) => range_type.clone(), + Ranges::U64(range_type, _ranges) => range_type.clone(), + Ranges::I8(range_type, _ranges) => range_type.clone(), + Ranges::I16(range_type, _ranges) => range_type.clone(), + Ranges::I32(range_type, _ranges) => range_type.clone(), + Ranges::I64(range_type, _ranges) => range_type.clone(), + Ranges::Timestamp(range_type, _ranges) => range_type.clone(), + Ranges::F32(range_type, _ranges) => range_type.clone(), + Ranges::F64(range_type, _ranges) => range_type.clone(), + Ranges::Point(range_type, _ranges, _base, _unit) => range_type.clone(), + _ => RangeType::CountWithinRange, + }; + + match range_type { + RangeType::CountAboveRange => { + let mut sum = 0usize; + for value in facet + .values + .iter_mut() + .sorted_unstable_by(|a, b| b.0.cmp(a.0)) + { + sum += *value.1; + *value.1 = sum; + } + } + RangeType::CountBelowRange => { + let mut sum = 0usize; + for value in facet + .values + .iter_mut() + .sorted_unstable_by(|a, b| a.0.cmp(b.0)) + { + sum += *value.1; + *value.1 = sum; + } + } + RangeType::CountWithinRange => {} + } + + facet + .values + .iter() + .sorted_unstable_by(|a, b| a.0.cmp(b.0)) + .map(|(a, c)| { + ( + match &facet.ranges { + Ranges::U8(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::U16(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::U32(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::U64(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::I8(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::I16(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::I32(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::I64(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::Timestamp(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::F32(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + Ranges::F64(_range_type, ranges) => { + ranges[*a as usize].0.clone() + } + + Ranges::Point(_range_type, ranges, _base, _unit) => { + ranges[*a as usize].0.clone() + } + + _ => "".into(), + }, + *c, + ) + }) + .filter(|(a, _c)| { + facet.prefix.is_empty() || a.starts_with(&facet.prefix) + }) + .collect::>() + }; + + if !v.is_empty() { + facets.insert(facet.field.clone(), v); + } + } + facets + }; + } + + result_object + } +} diff --git a/mobile_app/rust/src/seekstorm/single.rs b/mobile_app/rust/src/seekstorm/single.rs new file mode 100644 index 0000000..99b47a7 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/single.rs @@ -0,0 +1,419 @@ +use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, +}; + +use crate::{ + add_result::{PostingListObjectSingle, add_result_singleterm_multifield}, + compatible::{_blsr_u64, _mm_tzcnt_64}, + index::{ + AccessType, BlockObjectIndex, CompressionType, NonUniquePostingListObjectQuery, + PostingListObjectQuery, SORT_FLAG, SPEEDUP_FLAG, Shard, + }, + intersection::{BlockObject, bitpacking32_get_delta}, + search::{FilterSparse, ResultType, SearchResult}, + utils::{read_u16, read_u64}, +}; + +use ahash::AHashSet; +use num_traits::FromPrimitive; + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::ptr_arg)] +#[allow(non_snake_case)] +pub(crate) async fn single_docid<'a>( + shard: &'a Shard, + query_list: &mut Vec>, + not_query_list: &mut [PostingListObjectQuery<'a>], + blo: &BlockObjectIndex, + term_index: usize, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], +) { + let block_score = blo.max_block_score; + + let filtered = !not_query_list.is_empty() + || !field_filter_set.is_empty() + || !search_result.topk_candidates.result_sort.is_empty() + || (!search_result.query_facets.is_empty() || !facet_filter.is_empty()) + && result_type != &ResultType::Topk; + if SPEEDUP_FLAG + && (result_type == &ResultType::Count + || (search_result.topk_candidates.current_heap_size == top_k + && block_score <= search_result.topk_candidates._elements[0].score)) + && (!filtered || result_type == &ResultType::Topk) + { + return; + } + + let block_id = blo.block_id; + for plo in not_query_list.iter_mut() { + let query_list_item_mut = plo; + + let result = query_list_item_mut + .blocks + .binary_search_by(|block| block.block_id.cmp(&block_id)); + match result { + Ok(p_block) => { + query_list_item_mut.bm25_flag = true; + query_list_item_mut.p_block = p_block as i32 + } + Err(_) => { + query_list_item_mut.bm25_flag = false; + continue; + } + }; + let blo = &query_list_item_mut.blocks[query_list_item_mut.p_block as usize]; + + query_list_item_mut.compression_type = + FromPrimitive::from_i32((blo.compression_type_pointer >> 30) as i32).unwrap(); + + query_list_item_mut.rank_position_pointer_range = + blo.compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum = blo.pointer_pivot_p_docid as usize * 2 + + if (blo.pointer_pivot_p_docid as usize) <= blo.posting_count as usize { + ((blo.posting_count as usize + 1) - blo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + query_list_item_mut.compressed_doc_id_range = + query_list_item_mut.rank_position_pointer_range as usize + posting_pointer_size_sum; + + if shard.meta.access_type == AccessType::Mmap { + let segment = &shard.segments_index[query_list_item_mut.key0 as usize]; + query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[blo.block_id as usize].0 + ..segment.byte_array_blocks_pointer[blo.block_id as usize].0 + + segment.byte_array_blocks_pointer[blo.block_id as usize].1]; + } else { + query_list_item_mut.byte_array = &shard.segments_index + [query_list_item_mut.key0 as usize] + .byte_array_blocks[blo.block_id as usize]; + } + + query_list_item_mut.p_docid = 0; + query_list_item_mut.p_docid_count = + query_list_item_mut.blocks[query_list_item_mut.p_block as usize].posting_count as usize + + 1; + + query_list_item_mut.docid = 0; + + if query_list_item_mut.compression_type == CompressionType::Rle { + query_list_item_mut.p_run_count = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range, + ) as i32; + let startdocid = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 2, + ); + let runlength = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 4, + ); + query_list_item_mut.docid = startdocid as i32; + query_list_item_mut.run_end = (startdocid + runlength) as i32; + query_list_item_mut.p_run_sum = runlength as i32; + query_list_item_mut.p_run = 6; + } + } + + let compression_type: CompressionType = + FromPrimitive::from_i32((blo.compression_type_pointer >> 30) as i32).unwrap(); + + let rank_position_pointer_range: u32 = + blo.compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum = blo.pointer_pivot_p_docid as u32 * 2 + + if (blo.pointer_pivot_p_docid as usize) <= blo.posting_count as usize { + ((blo.posting_count as u32 + 1) - blo.pointer_pivot_p_docid as u32) * 3 + } else { + 0 + }; + let compressed_doc_id_range: u32 = rank_position_pointer_range + posting_pointer_size_sum; + + let query_list_item_mut = &mut query_list[term_index]; + + let byte_array = if shard.meta.access_type == AccessType::Mmap { + let segment = &shard.segments_index[query_list_item_mut.key0 as usize]; + &shard.index_file_mmap[segment.byte_array_blocks_pointer[blo.block_id as usize].0 + ..segment.byte_array_blocks_pointer[blo.block_id as usize].0 + + segment.byte_array_blocks_pointer[blo.block_id as usize].1] + } else { + &shard.segments_index[query_list_item_mut.key0 as usize].byte_array_blocks + [blo.block_id as usize] + }; + + let mut plo = PostingListObjectSingle { + rank_position_pointer_range, + pointer_pivot_p_docid: blo.pointer_pivot_p_docid, + byte_array, + p_docid: 0, + idf: query_list_item_mut.idf, + idf_ngram1: query_list_item_mut.idf_ngram1, + idf_ngram2: query_list_item_mut.idf_ngram2, + idf_ngram3: query_list_item_mut.idf_ngram3, + ngram_type: query_list_item_mut.ngram_type.clone(), + }; + + match compression_type { + CompressionType::Array => { + for i in 0..=blo.posting_count { + plo.p_docid = i as i32; + + add_result_singleterm_multifield( + shard, + ((blo.block_id as usize) << 16) + | read_u16( + byte_array, + compressed_doc_id_range as usize + (i as usize * 2), + ) as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + &plo, + not_query_list, + block_score, + ); + } + } + + CompressionType::Delta => { + let deltasizebits = 4; + let rangebits: i32 = + byte_array[compressed_doc_id_range as usize] as i32 >> (8 - deltasizebits); + + let mut docid_old: i32 = -1; + let mut bitposition: u32 = (compressed_doc_id_range << 3) + deltasizebits; + + for i in 0..=blo.posting_count { + plo.p_docid = i as i32; + let delta = bitpacking32_get_delta(byte_array, bitposition, rangebits as u32); + bitposition += rangebits as u32; + + let doc_id: u16 = (docid_old + delta as i32 + 1) as u16; + docid_old = doc_id as i32; + + add_result_singleterm_multifield( + shard, + ((blo.block_id as usize) << 16) | doc_id as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + &plo, + not_query_list, + block_score, + ); + } + } + + CompressionType::Rle => { + let runs_count = read_u16(&byte_array[compressed_doc_id_range as usize..], 0) as i32; + + plo.p_docid = 0; + for i in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &byte_array[compressed_doc_id_range as usize..], + i as usize * 2, + ); + let runlength = read_u16( + &byte_array[compressed_doc_id_range as usize..], + (i + 1) as usize * 2, + ); + + for j in 0..=runlength { + add_result_singleterm_multifield( + shard, + ((blo.block_id as usize) << 16) | (startdocid + j) as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + &plo, + not_query_list, + block_score, + ); + + plo.p_docid += 1; + } + } + } + + CompressionType::Bitmap => { + plo.p_docid = 0; + let block_id_msb = (blo.block_id as usize) << 16; + + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &byte_array[compressed_doc_id_range as usize..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + + intersect = unsafe { _blsr_u64(intersect) }; + + add_result_singleterm_multifield( + shard, + block_id_msb | ((ulong_pos << 6) + bit_pos) as usize, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + &plo, + not_query_list, + block_score, + ); + + plo.p_docid += 1; + } + } + } + + _ => {} + } +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn single_blockid<'a>( + index: &'a Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'a>], + query_list: &mut Vec>, + not_query_list: &mut [PostingListObjectQuery<'a>], + result_count_arc: &Arc, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + matching_blocks: &mut i32, +) { + let term_index = 0; + let filtered = !not_query_list.is_empty() + || !field_filter_set.is_empty() + || !index.delete_hashset.is_empty() + || !search_result.topk_candidates.result_sort.is_empty() + || (!search_result.query_facets.is_empty() || !facet_filter.is_empty()) + && result_type != &ResultType::Topk; + + if (index.enable_single_term_topk || (result_type == &ResultType::Count)) + && (non_unique_query_list.len() <= 1) + && !filtered + { + result_count_arc.fetch_add( + query_list[term_index].posting_count as usize, + Ordering::Relaxed, + ); + + return; + } + + let mut result_count_local = 0; + + let enable_inter_query_threading_single = + if !index.enable_search_quality_test && index.enable_inter_query_threading_auto { + query_list[term_index].posting_count / query_list[term_index].p_block_max as u32 > 10 + } else { + index.enable_inter_query_threading + }; + + let mut block_vec: Vec = Vec::new(); + + for (p_block, blo) in query_list[term_index].blocks.iter().enumerate() { + if !enable_inter_query_threading_single { + let block_score = blo.max_block_score; + + if SPEEDUP_FLAG && SORT_FLAG { + let p_block_vec: Vec = vec![p_block as i32]; + let block_object = BlockObject { + block_id: blo.block_id as usize, + block_score, + p_block_vec, + }; + + block_vec.push(block_object); + } else if !SPEEDUP_FLAG + || (filtered && result_type != &ResultType::Topk) + || search_result.topk_candidates.current_heap_size < top_k + || block_score > search_result.topk_candidates._elements[0].score + { + single_docid( + index, + query_list, + not_query_list, + blo, + term_index, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + } + } + } + + if SORT_FLAG && SPEEDUP_FLAG { + block_vec.sort_unstable_by(|x, y| y.block_score.partial_cmp(&x.block_score).unwrap()); + for (block_index, block) in block_vec.iter().enumerate() { + if !filtered && block_index == top_k { + break; + } + if (search_result.topk_candidates.current_heap_size == top_k) + && (block.block_score <= search_result.topk_candidates._elements[0].score) + { + if !filtered { + break; + } else if result_type == &ResultType::Topk { + continue; + } + } + + let blo = &query_list[term_index].blocks[block.p_block_vec[0] as usize]; + + single_docid( + index, + query_list, + not_query_list, + blo, + term_index, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + } + } + + result_count_arc.fetch_add( + if !filtered { + query_list[term_index].posting_count as usize + } else { + result_count_local as usize + }, + Ordering::Relaxed, + ); + + *matching_blocks = query_list[term_index].blocks.len() as i32; +} diff --git a/mobile_app/rust/src/seekstorm/tokenizer.rs b/mobile_app/rust/src/seekstorm/tokenizer.rs new file mode 100644 index 0000000..5539d1e --- /dev/null +++ b/mobile_app/rust/src/seekstorm/tokenizer.rs @@ -0,0 +1,1680 @@ +use std::cmp; + +use ahash::AHashMap; +use finl_unicode::categories::{CharacterCategories, MinorCategory}; + +use crate::{ + index::{ + MAX_QUERY_TERM_NUMBER, NgramSet, NgramType, NonUniqueTermObject, Shard, TermObject, + TokenizerType, hash32, hash64, + }, + search::QueryType, +}; + +const APOSTROPH: [char; 2] = ['\u{2019}', '\u{0027}']; +const ZALGO_CHAR_CATEGORIES: [MinorCategory; 2] = [MinorCategory::Mn, MinorCategory::Me]; + +/// fold_diacritics_accents_zalgo_umlaut() (used by TokenizerType::UnicodeAlphanumericFolded): +/// Converts text with diacritics, accents, zalgo text, umlaut, bold, italic, full-width UTF-8 characters into its basic representation. +/// Unicode UTF-8 has made life so much easier compared to the old code pages, but its endless possibilities also pose challenges in parsing and indexing. +/// The challenge is that the same basic letter might be represented by different UTF8 characters if they contain diacritics, accents, or are bold, italic, or full-width. +/// Sometimes, users can't search because the keyboard doesn't have these letters or they don't know how to enter, or they even don't know what that letter looks like. +/// Sometimes the document to be ingested is already written without diacritics for the same reasons. +/// We don't want to search for every variant separately, most often we even don't know that they exist in the index. +/// We want to have all results, for every variant, no matter which variant is entered in the query, +/// e.g. for indexing LinkedIn posts that make use of external bold/italic formatters or for indexing documents in accented languages. +/// It is important that the search engine supports the character folding rather than external preprocessing, as we want to have both: enter the query in any character form, receive all results independent from their character form, but have them returned in their original, unaltered characters. +pub fn fold_diacritics_accents_ligatures_zalgo_umlaut(string: &str) -> String { + string + .to_lowercase() + .chars() + .fold(String::with_capacity(string.len()), |mut folded, cc| { + let mut base_char = None; + let mut base_char2 = None; + + match cc { + 'ff' => folded.push_str("ff"), + 'ffi' => folded.push_str("ffi"), + 'ffl' => folded.push_str("ffl"), + 'fi' => folded.push_str("fi"), + 'fl' => folded.push_str("fl"), + 'st' => folded.push_str("st"), + 'ſt' => folded.push_str("st"), + + 'ⅰ' => folded.push('i'), + 'ⅱ' => folded.push_str("ii"), + 'ⅲ' => folded.push_str("iii"), + 'ⅳ' => folded.push_str("iv"), + 'ⅴ' => folded.push('v'), + 'ⅵ' => folded.push_str("vi"), + 'ⅶ' => folded.push_str("vii"), + 'ⅷ' => folded.push_str("viii"), + 'ⅸ' => folded.push_str("ix"), + 'ⅹ' => folded.push('x'), + 'ⅺ' => folded.push_str("xi"), + 'ⅻ' => folded.push_str("xii"), + 'ⅼ' => folded.push('l'), + 'ⅽ' => folded.push('c'), + 'ⅾ' => folded.push('d'), + 'ⅿ' => folded.push('m'), + + 'ä' => folded.push_str("ae"), + 'ö' => folded.push_str("oe"), + 'ü' => folded.push_str("ue"), + 'ß' => folded.push_str("ss"), + 'ł' => folded.push('l'), + 'æ' => folded.push('a'), + 'œ' => folded.push('o'), + 'ø' => folded.push('o'), + 'ð' => folded.push('d'), + 'þ' => folded.push('t'), + 'đ' => folded.push('d'), + 'ɖ' => folded.push('d'), + 'ħ' => folded.push('h'), + 'ı' => folded.push('i'), + 'ƿ' => folded.push('w'), + 'ȝ' => folded.push('g'), + 'Ƿ' => folded.push('w'), + 'Ȝ' => folded.push('g'), + + _ => { + unicode_normalization::char::decompose_canonical(cc, |c| { + base_char.get_or_insert(c); + }); + unicode_normalization::char::decompose_compatible(base_char.unwrap(), |c| { + if c.is_alphanumeric() { + base_char2.get_or_insert(c); + } + }); + if base_char2.is_none() { + base_char2 = base_char + } + + if !ZALGO_CHAR_CATEGORIES.contains(&base_char2.unwrap().get_minor_category()) { + match base_char2.unwrap() { + 'ł' => folded.push('l'), + 'æ' => folded.push('a'), + 'œ' => folded.push('o'), + 'ø' => folded.push('o'), + 'ð' => folded.push('d'), + 'þ' => folded.push('t'), + 'đ' => folded.push('d'), + 'ɖ' => folded.push('d'), + 'ħ' => folded.push('h'), + 'ı' => folded.push('i'), + 'ƿ' => folded.push('w'), + 'ȝ' => folded.push('g'), + 'Ƿ' => folded.push('w'), + 'Ȝ' => folded.push('g'), + + _ => folded.push(base_char2.unwrap()), + } + } + } + } + folded + }) +} + +/// Tokenizer splits text to terms +#[allow(clippy::too_many_arguments)] +#[allow(clippy::assigning_clones)] +pub(crate) async fn tokenizer( + index: &Shard, + text: &str, + unique_terms: &mut AHashMap, + non_unique_terms: &mut Vec, + tokenizer: TokenizerType, + segment_number_mask1: u32, + nonunique_terms_count: &mut u32, + token_per_field_max: u32, + position_per_term_max: usize, + is_query: bool, + query_type: &mut QueryType, + ngram_indexing: u8, + indexed_field_id: usize, + indexed_field_number: usize, +) { + let (max_completion_entries, completion_len) = if is_query { + (0, 0) + } else { + let root_index = &index.index_option.as_ref().unwrap().read().await; + if let Some(v) = root_index.completion_option.as_ref() { + (root_index.max_completion_entries, v.read().await.len()) + } else { + (0, 0) + } + }; + + let token_per_field_max_capped = cmp::max(token_per_field_max, 65_536); + + let text_normalized; + let mut non_unique_terms_line: Vec<&str> = Vec::new(); + let mut non_unique_terms_line_string: Vec = Vec::new(); + + let mut start = false; + let mut start_pos = 0; + let mut first_part = &text[0..0]; + + if is_query { + match tokenizer { + TokenizerType::AsciiAlphabetic => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + 'a'..='z' | '"' | '+' | '-' => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + TokenizerType::UnicodeAlphanumeric => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + TokenizerType::UnicodeAlphanumericFolded => { + text_normalized = fold_diacritics_accents_ligatures_zalgo_umlaut(text); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + let apostroph = APOSTROPH.contains(&char.1); + if start { + if apostroph { + first_part = &text_normalized[start_pos..char.0]; + } else { + if first_part.len() >= 2 { + non_unique_terms_line.push(first_part) + } else { + non_unique_terms_line + .push(&text_normalized[start_pos..char.0]); + } + first_part = &text_normalized[0..0]; + } + } else if !apostroph && !first_part.is_empty() { + non_unique_terms_line.push(first_part); + first_part = &text_normalized[0..0]; + } + + false + } + }; + } + } + + TokenizerType::Whitespace => { + text_normalized = text.to_owned(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + TokenizerType::WhitespaceLowercase => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + #[cfg(feature = "zh")] + TokenizerType::UnicodeAlphanumericZH => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(&text_normalized[start_pos..char.0], true); + non_unique_terms_line_string.extend(result.0); + } + false + } + }; + } + } + } + } else { + match tokenizer { + TokenizerType::AsciiAlphabetic => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + 'a'..='z' => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + TokenizerType::UnicodeAlphanumeric => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '+' | '-' | '#' => start, + + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + TokenizerType::UnicodeAlphanumericFolded => { + text_normalized = fold_diacritics_accents_ligatures_zalgo_umlaut(text); + + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '+' | '-' | '#' => start, + + _ => { + let apostroph = APOSTROPH.contains(&char.1); + if start { + if apostroph { + first_part = &text_normalized[start_pos..char.0]; + } else { + if first_part.len() >= 2 { + non_unique_terms_line.push(first_part) + } else { + non_unique_terms_line + .push(&text_normalized[start_pos..char.0]); + } + first_part = &text_normalized[0..0]; + } + } else if !apostroph && !first_part.is_empty() { + non_unique_terms_line.push(first_part); + first_part = &text_normalized[0..0]; + } + + false + } + }; + } + } + + TokenizerType::Whitespace => { + text_normalized = text.to_owned(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + TokenizerType::WhitespaceLowercase => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + non_unique_terms_line.push(&text_normalized[start_pos..char.0]); + } + false + } + }; + } + } + + #[cfg(feature = "zh")] + TokenizerType::UnicodeAlphanumericZH => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '+' | '-' | '#' => start, + + _ => { + if start { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(&text_normalized[start_pos..char.0], true); + non_unique_terms_line_string.extend(result.0); + } + false + } + }; + } + } + } + } + + #[cfg(feature = "zh")] + if tokenizer == TokenizerType::UnicodeAlphanumericZH { + if start { + if first_part.len() >= 2 { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(first_part, true); + non_unique_terms_line_string.extend(result.0); + } else { + non_unique_terms_line.push(&text_normalized[start_pos..text_normalized.len()]); + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(&text_normalized[start_pos..text_normalized.len()], true); + non_unique_terms_line_string.extend(result.0); + } + } else if !first_part.is_empty() { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(first_part, true); + non_unique_terms_line_string.extend(result.0); + } + non_unique_terms_line = non_unique_terms_line_string + .iter() + .map(|s| s.as_str()) + .collect(); + } + + if tokenizer == TokenizerType::AsciiAlphabetic + || tokenizer == TokenizerType::UnicodeAlphanumeric + || tokenizer == TokenizerType::UnicodeAlphanumericFolded + || tokenizer == TokenizerType::Whitespace + || tokenizer == TokenizerType::WhitespaceLowercase + { + if start { + if first_part.len() >= 2 { + non_unique_terms_line.push(first_part) + } else { + non_unique_terms_line.push(&text_normalized[start_pos..text_normalized.len()]); + } + } else if !first_part.is_empty() { + non_unique_terms_line.push(first_part) + } + } + + if is_query && non_unique_terms_line.len() > MAX_QUERY_TERM_NUMBER { + non_unique_terms_line.truncate(MAX_QUERY_TERM_NUMBER); + } + + let mut position: u32 = 0; + let mut is_phrase = query_type == &QueryType::Phrase; + let mut term_string_1 = "".to_string(); + let mut term_frequent_1 = false; + let mut term_string_2 = "".to_string(); + let mut term_frequent_2 = false; + + let mut term_len_1 = 0; + let mut term_len_2 = 0; + + let mut non_unique_terms_raw = Vec::new(); + + for term_string in non_unique_terms_line.iter_mut() { + if is_query { + let mut query_type_term = if is_phrase { + QueryType::Phrase + } else { + query_type.clone() + }; + if term_string.starts_with('+') { + if query_type != &QueryType::Phrase { + *query_type = QueryType::Intersection; + } + query_type_term = QueryType::Intersection; + *term_string = &term_string[1..]; + } else if term_string.starts_with('-') { + query_type_term = QueryType::Not; + *term_string = &term_string[1..]; + } + if term_string.starts_with('\"') { + is_phrase = true; + *query_type = QueryType::Phrase; + query_type_term = QueryType::Phrase; + *term_string = &term_string[1..]; + } + if term_string.ends_with('\"') { + *query_type = QueryType::Phrase; + *term_string = &term_string[0..term_string.len() - 1]; + is_phrase = false; + } + + if term_string.is_empty() { + continue; + } + + if !index.stop_words.is_empty() && index.stop_words.contains(*term_string) { + continue; + } + + let term_string = if let Some(stemmer) = index.stemmer.as_ref() { + stemmer.stem(term_string).to_string() + } else { + term_string.to_string() + }; + + non_unique_terms_raw.push((term_string, query_type_term)); + } else { + if !index.stop_words.is_empty() && index.stop_words.contains(*term_string) { + continue; + } + + let term_string_0 = if let Some(stemmer) = index.stemmer.as_ref() { + stemmer.stem(term_string).to_string() + } else { + term_string.to_string() + }; + + let mut term_positions_len; + let term_hash_0 = hash64(term_string_0.as_bytes()); + let term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + + let term_number_0 = term_string_0.chars().next().unwrap().is_ascii_digit() + && term_string_0.chars().last().unwrap().is_ascii_digit(); + let term_len_0 = term_string_0.chars().count(); + + if index.indexed_schema_vec[indexed_field_id].completion_source { + let mut level_completions = index.level_completions.write().await; + + if !term_number_0 && term_len_0 > 1 { + let unigram_string = vec![term_string_0.clone()]; + if completion_len < max_completion_entries { + level_completions + .entry(unigram_string) + .and_modify(|v| { + *v += 1; + }) + .or_insert(1); + } + } + + if !term_string_1.is_empty() { + if term_len_1 > 1 { + let bigram_string = vec![term_string_1.clone(), term_string_0.clone()]; + if completion_len < max_completion_entries { + level_completions + .entry(bigram_string) + .and_modify(|v| { + *v += 1; + }) + .or_insert(1); + } + } + + if !term_string_2.is_empty() && term_len_2 > 1 { + let trigram_string = vec![ + term_string_2.clone(), + term_string_1.clone(), + term_string_0.clone(), + ]; + if completion_len < max_completion_entries { + level_completions + .entry(trigram_string) + .and_modify(|v| { + *v += 1; + }) + .or_insert(1); + } + } + } + + drop(level_completions); + + term_len_2 = term_len_1; + term_len_1 = term_len_0; + } + + let term_object = unique_terms + .entry(term_string_0.clone()) + .or_insert_with(|| { + let term_bytes = term_string_0.as_bytes(); + TermObject { + term: term_string_0.clone(), + + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes), + + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::SingleTerm, + + ..Default::default() + } + }); + + term_object.field_positions_vec[indexed_field_id].push(position as u16); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + + if !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramFF as u8 != 0 + && term_frequent_1 + && term_frequent_0) + { + let term_string = [term_string_1.as_str(), term_string_0.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFF, + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 1); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramRF as u8 != 0 + && !term_frequent_1 + && term_frequent_0) + { + let term_string = [term_string_1.as_str(), term_string_0.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramRF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramRF, + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 1); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramFR as u8 != 0 + && term_frequent_1 + && !term_frequent_0) + { + let term_string = [term_string_1.as_str(), term_string_0.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFR as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFR, + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 1); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_2.is_empty() + && !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramFFF as u8 != 0 + && term_frequent_2 + && term_frequent_1 + && term_frequent_0) + { + let term_string = [ + term_string_2.as_str(), + term_string_1.as_str(), + term_string_0.as_str(), + ] + .join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFFF, + term_ngram_2: term_string_2.clone(), + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 2); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_2.is_empty() + && !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramRFF as u8 != 0 + && !term_frequent_2 + && term_frequent_1 + && term_frequent_0) + { + let term_string = [ + term_string_2.as_str(), + term_string_1.as_str(), + term_string_0.as_str(), + ] + .join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramRFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramRFF, + term_ngram_2: term_string_2.clone(), + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 2); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_2.is_empty() + && !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramRFF as u8 != 0 + && term_frequent_2 + && term_frequent_1 + && !term_frequent_0) + { + let term_string = [ + term_string_2.as_str(), + term_string_1.as_str(), + term_string_0.as_str(), + ] + .join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFFR as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFFR, + term_ngram_2: term_string_2.clone(), + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 2); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + if !term_string_2.is_empty() + && !term_string_1.is_empty() + && (ngram_indexing & NgramSet::NgramRFF as u8 != 0 + && term_frequent_2 + && !term_frequent_1 + && term_frequent_0) + { + let term_string = [ + term_string_2.as_str(), + term_string_1.as_str(), + term_string_0.as_str(), + ] + .join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFRF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFRF, + term_ngram_2: term_string_2, + term_ngram_1: term_string_1.clone(), + term_ngram_0: term_string_0.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16 - 2); + term_positions_len = term_object.field_positions_vec[indexed_field_id].len(); + } + + term_string_2 = term_string_1; + term_string_1 = term_string_0; + + term_frequent_2 = term_frequent_1; + term_frequent_1 = term_frequent_0; + + position += 1; + + if position >= token_per_field_max_capped { + break; + } + if term_positions_len >= position_per_term_max { + continue; + } + }; + } + + if is_query { + let len = non_unique_terms_raw.len(); + + let mut term_0; + let mut term_frequent_0; + let mut term_phrase_0; + + if len > 0 { + let item = &non_unique_terms_raw[0]; + term_0 = item.0.clone(); + let term_hash_0 = hash64(term_0.as_bytes()); + term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + term_phrase_0 = item.1 == QueryType::Phrase; + } else { + term_0 = "".to_string(); + term_frequent_0 = false; + term_phrase_0 = false; + } + + let mut term_1; + let mut term_frequent_1; + let mut term_phrase_1; + if len > 1 { + let item = &non_unique_terms_raw[1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + + let len = non_unique_terms_raw.len(); + let mut i = 0; + while i < len { + let term_2; + let term_frequent_2; + let term_phrase_2; + if len > i + 2 { + let item = &non_unique_terms_raw[i + 2]; + term_2 = item.0.clone(); + let term_hash_2 = hash64(term_2.as_bytes()); + term_frequent_2 = index.frequent_hashset.contains(&term_hash_2); + term_phrase_2 = item.1 == QueryType::Phrase; + } else { + term_2 = "".to_string(); + term_frequent_2 = false; + term_phrase_2 = false; + } + if i + 2 < len + && (ngram_indexing & NgramSet::NgramFFF as u8 != 0 + && term_frequent_0 + && term_frequent_1 + && term_frequent_2 + && term_phrase_0 + && term_phrase_1 + && term_phrase_2) + { + let term_string = [term_0.as_str(), term_1.as_str(), term_2.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFFF, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramFFF, + op: QueryType::Phrase, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + }); + + i += 3; + + if len > i { + let item = &non_unique_terms_raw[i]; + term_0 = item.0.clone(); + let term_hash_0 = hash64(term_0.as_bytes()); + term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + term_phrase_0 = item.1 == QueryType::Phrase; + } else { + term_0 = "".to_string(); + term_frequent_0 = false; + term_phrase_0 = false; + } + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 2 < len + && (ngram_indexing & NgramSet::NgramRFF as u8 != 0 + && !term_frequent_0 + && term_frequent_1 + && term_frequent_2 + && term_phrase_0 + && term_phrase_1 + && term_phrase_2) + { + let term_string = [term_0.as_str(), term_1.as_str(), term_2.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramRFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramRFF, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramRFF, + op: QueryType::Phrase, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + }); + + i += 3; + + if len > i { + let item = &non_unique_terms_raw[i]; + term_0 = item.0.clone(); + let term_hash_0 = hash64(term_0.as_bytes()); + term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + term_phrase_0 = item.1 == QueryType::Phrase; + } else { + term_0 = "".to_string(); + term_frequent_0 = false; + term_phrase_0 = false; + } + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 2 < len + && (ngram_indexing & NgramSet::NgramFFR as u8 != 0 + && term_frequent_0 + && term_frequent_1 + && !term_frequent_2 + && term_phrase_0 + && term_phrase_1 + && term_phrase_2) + { + let term_string = [term_0.as_str(), term_1.as_str(), term_2.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFFR as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFFR, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramFFR, + op: QueryType::Phrase, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + }); + + i += 3; + + if len > i { + let item = &non_unique_terms_raw[i]; + term_0 = item.0.clone(); + let term_hash_0 = hash64(term_0.as_bytes()); + term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + term_phrase_0 = item.1 == QueryType::Phrase; + } else { + term_0 = "".to_string(); + term_frequent_0 = false; + term_phrase_0 = false; + } + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 2 < len + && (ngram_indexing & NgramSet::NgramFRF as u8 != 0 + && term_frequent_0 + && !term_frequent_1 + && term_frequent_2 + && term_phrase_0 + && term_phrase_1 + && term_phrase_2) + { + let term_string = [term_0.as_str(), term_1.as_str(), term_2.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFRF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFRF, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramFRF, + op: QueryType::Phrase, + term_ngram_2: term_0.clone(), + term_ngram_1: term_1.clone(), + term_ngram_0: term_2.clone(), + }); + + i += 3; + + if len > i { + let item = &non_unique_terms_raw[i]; + term_0 = item.0.clone(); + let term_hash_0 = hash64(term_0.as_bytes()); + term_frequent_0 = index.frequent_hashset.contains(&term_hash_0); + term_phrase_0 = item.1 == QueryType::Phrase; + } else { + term_0 = "".to_string(); + term_frequent_0 = false; + term_phrase_0 = false; + } + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 1 < len + && (ngram_indexing & NgramSet::NgramFF as u8 != 0 + && term_frequent_0 + && term_frequent_1 + && term_phrase_0 + && term_phrase_1) + { + let term_string = [term_0.as_str(), term_1.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFF, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramFF, + op: QueryType::Phrase, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + }); + + i += 2; + + term_0 = term_2.clone(); + term_frequent_0 = term_frequent_2; + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 1 < len + && (ngram_indexing & NgramSet::NgramRF as u8 != 0 + && !term_frequent_0 + && term_frequent_1 + && term_phrase_0 + && term_phrase_1) + { + let term_string = [term_0.as_str(), term_1.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramRF as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramRF, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramRF, + op: QueryType::Phrase, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + }); + + i += 2; + + term_0 = term_2.clone(); + term_frequent_0 = term_frequent_2; + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else if i + 1 < len + && (ngram_indexing & NgramSet::NgramFR as u8 != 0 + && term_frequent_0 + && !term_frequent_1 + && term_phrase_0 + && term_phrase_1) + { + let term_string = [term_0.as_str(), term_1.as_str()].join(" "); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.clone(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes) | NgramType::NgramFR as u64, + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::NgramFR, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::NgramFR, + op: QueryType::Phrase, + term_ngram_1: term_0.clone(), + term_ngram_0: term_1.clone(), + + ..Default::default() + }); + + i += 2; + + term_0 = term_2.clone(); + term_frequent_0 = term_frequent_2; + + if len > i + 1 { + let item = &non_unique_terms_raw[i + 1]; + term_1 = item.0.clone(); + let term_hash_1 = hash64(term_1.as_bytes()); + term_frequent_1 = index.frequent_hashset.contains(&term_hash_1); + term_phrase_1 = item.1 == QueryType::Phrase; + } else { + term_1 = "".to_string(); + term_frequent_1 = false; + term_phrase_1 = false; + } + } else { + let term_string = term_0.clone(); + + let term_object = unique_terms.entry(term_string.clone()).or_insert_with(|| { + let term_bytes = term_string.as_bytes(); + TermObject { + term: term_string.to_string(), + key0: hash32(term_bytes) & segment_number_mask1, + key_hash: hash64(term_bytes), + field_positions_vec: vec![Vec::new(); indexed_field_number], + ngram_type: NgramType::SingleTerm, + ..Default::default() + } + }); + term_object.field_positions_vec[indexed_field_id].push(position as u16); + + non_unique_terms.push(NonUniqueTermObject { + term: term_string, + ngram_type: NgramType::SingleTerm, + op: non_unique_terms_raw[i].1.clone(), + ..Default::default() + }); + + i += 1; + + term_0.clone_from(&term_1); + term_1.clone_from(&term_2); + + term_frequent_0 = term_frequent_1; + term_frequent_1 = term_frequent_2; + + term_phrase_0 = term_phrase_1; + term_phrase_1 = term_phrase_2; + }; + + position += 1; + } + } + + *nonunique_terms_count = position; +} + +/// Parse a string into words, using the specified tokenizer type. +pub fn tokenizer_lite( + text: &str, + tokenizer: &TokenizerType, + index: &Shard, +) -> Vec<(String, QueryType)> { + let text_normalized; + let mut non_unique_terms_line: Vec = Vec::new(); + + let mut start = false; + let mut start_pos = 0; + let mut first_part = &text[0..0]; + + match tokenizer { + TokenizerType::AsciiAlphabetic => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + 'a'..='z' | '"' | '+' | '-' => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line + .push(text_normalized[start_pos..char.0].to_string()); + } + false + } + }; + } + } + TokenizerType::UnicodeAlphanumeric => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + non_unique_terms_line + .push(text_normalized[start_pos..char.0].to_string()); + } + false + } + }; + } + } + TokenizerType::UnicodeAlphanumericFolded => { + text_normalized = fold_diacritics_accents_ligatures_zalgo_umlaut(text); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + let apostroph = APOSTROPH.contains(&char.1); + if start { + if apostroph { + first_part = &text_normalized[start_pos..char.0]; + } else { + if first_part.len() >= 2 { + non_unique_terms_line.push(first_part.to_string()) + } else { + non_unique_terms_line + .push(text_normalized[start_pos..char.0].to_string()); + } + first_part = &text_normalized[0..0]; + } + } else if !apostroph && !first_part.is_empty() { + non_unique_terms_line.push(first_part.to_string()); + first_part = &text_normalized[0..0]; + } + + false + } + }; + } + } + + TokenizerType::Whitespace => { + text_normalized = text.to_owned(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line + .push(text_normalized[start_pos..char.0].to_string()); + } + false + } + }; + } + } + + TokenizerType::WhitespaceLowercase => { + text_normalized = text.to_ascii_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if !token.is_whitespace() => { + if !start { + start_pos = char.0; + } + true + } + + _ => { + if start { + non_unique_terms_line + .push(text_normalized[start_pos..char.0].to_string()); + } + false + } + }; + } + } + + #[cfg(feature = "zh")] + TokenizerType::UnicodeAlphanumericZH => { + text_normalized = text.to_lowercase(); + for char in text_normalized.char_indices() { + start = match char.1 { + token if regex_syntax::is_word_character(token) => { + if !start { + start_pos = char.0; + } + true + } + + '"' | '+' | '-' | '#' => { + if !start { + start_pos = char.0; + } + true + } + _ => { + if start { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(&text_normalized[start_pos..char.0], true); + non_unique_terms_line.extend(result.0); + } + false + } + }; + } + } + } + + #[cfg(feature = "zh")] + if tokenizer == &TokenizerType::UnicodeAlphanumericZH { + if start { + if first_part.len() >= 2 { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(first_part, true); + non_unique_terms_line.extend(result.0); + } else { + non_unique_terms_line + .push(text_normalized[start_pos..text_normalized.len()].to_string()); + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(&text_normalized[start_pos..text_normalized.len()], true); + non_unique_terms_line.extend(result.0); + } + } else if !first_part.is_empty() { + let result = index + .word_segmentation_option + .as_ref() + .unwrap() + .segment(first_part, true); + non_unique_terms_line.extend(result.0); + } + } + + if tokenizer != &TokenizerType::AsciiAlphabetic + || tokenizer == &TokenizerType::UnicodeAlphanumeric + || tokenizer == &TokenizerType::UnicodeAlphanumericFolded + || tokenizer == &TokenizerType::Whitespace + || tokenizer == &TokenizerType::WhitespaceLowercase + { + if start { + if first_part.len() >= 2 { + non_unique_terms_line.push(first_part.to_string()) + } else { + non_unique_terms_line + .push(text_normalized[start_pos..text_normalized.len()].to_string()); + } + } else if !first_part.is_empty() { + non_unique_terms_line.push(first_part.to_string()) + } + } + + let mut non_unique_terms_raw = Vec::new(); + let query_type = &mut QueryType::Union; + let mut is_phrase = query_type == &QueryType::Phrase; + let mut is_endswith_quote = false; + for term_string in non_unique_terms_line.iter_mut() { + if is_endswith_quote { + return Vec::new(); + } + + let mut query_type_term = if is_phrase { + QueryType::Phrase + } else { + query_type.clone() + }; + if term_string.starts_with('+') || term_string.starts_with('-') { + return Vec::new(); + } + if term_string.starts_with('\"') { + if !non_unique_terms_raw.is_empty() { + return Vec::new(); + } + + is_phrase = true; + *query_type = QueryType::Phrase; + query_type_term = QueryType::Phrase; + *term_string = term_string[1..].to_string(); + } + if term_string.ends_with('\"') { + *query_type = QueryType::Phrase; + *term_string = term_string[0..term_string.len() - 1].to_string(); + is_phrase = false; + is_endswith_quote = true; + } + + if term_string.is_empty() { + continue; + } + + if !index.stop_words.is_empty() && index.stop_words.contains(term_string) { + continue; + } + + let term_string = if let Some(stemmer) = index.stemmer.as_ref() { + stemmer.stem(term_string).to_string() + } else { + term_string.to_string() + }; + + non_unique_terms_raw.push((term_string, query_type_term)); + } + + non_unique_terms_raw +} diff --git a/mobile_app/rust/src/seekstorm/union.rs b/mobile_app/rust/src/seekstorm/union.rs new file mode 100644 index 0000000..b2d95f2 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/union.rs @@ -0,0 +1,1478 @@ +use crate::{ + add_result::{add_result_multiterm_multifield, is_facet_filter}, + compatible::{_blsr_u64, _mm_tzcnt_64}, + geo_search::{decode_morton_2_d, euclidian_distance}, + index::{ + AccessType, CompressionType, FieldType, NonUniquePostingListObjectQuery, + PostingListObjectQuery, QueueObject, ROARING_BLOCK_SIZE, Shard, + }, + intersection::intersection_blockid, + search::{FilterSparse, Ranges, ResultType, SearchResult}, + single::{single_blockid, single_docid}, + utils::{ + block_copy, read_f32, read_f64, read_i8, read_i16, read_i32, read_i64, read_u16, read_u32, + read_u64, write_u64, + }, +}; + +use ahash::AHashSet; +use num_traits::FromPrimitive; + +use std::sync::Arc; +use std::{ + cmp, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use async_recursion::async_recursion; + +/// Union for a single block +#[allow(clippy::too_many_arguments)] +pub(crate) async fn union_docid<'a>( + shard: &'a Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'a>], + query_list: &mut Vec>, + not_query_list: &mut [PostingListObjectQuery<'a>], + block_id: usize, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], +) { + for plo in not_query_list.iter_mut() { + let query_list_item_mut = plo; + + let result = query_list_item_mut + .blocks + .binary_search_by(|block| block.block_id.cmp(&(block_id as u32))); + match result { + Ok(p_block) => { + query_list_item_mut.bm25_flag = true; + query_list_item_mut.p_block = p_block as i32 + } + Err(_) => { + query_list_item_mut.bm25_flag = false; + continue; + } + }; + let blo = &query_list_item_mut.blocks[query_list_item_mut.p_block as usize]; + + query_list_item_mut.compression_type = + FromPrimitive::from_i32((blo.compression_type_pointer >> 30) as i32).unwrap(); + + query_list_item_mut.rank_position_pointer_range = + blo.compression_type_pointer & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + let posting_pointer_size_sum = blo.pointer_pivot_p_docid as usize * 2 + + if (blo.pointer_pivot_p_docid as usize) <= blo.posting_count as usize { + ((blo.posting_count as usize + 1) - blo.pointer_pivot_p_docid as usize) * 3 + } else { + 0 + }; + query_list_item_mut.compressed_doc_id_range = + query_list_item_mut.rank_position_pointer_range as usize + posting_pointer_size_sum; + + query_list_item_mut.p_docid = 0; + query_list_item_mut.p_docid_count = + query_list_item_mut.blocks[query_list_item_mut.p_block as usize].posting_count as usize + + 1; + + query_list_item_mut.docid = 0; + + if query_list_item_mut.compression_type == CompressionType::Rle { + query_list_item_mut.p_run_count = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range, + ) as i32; + let startdocid = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 2, + ); + let runlength = read_u16( + query_list_item_mut.byte_array, + query_list_item_mut.compressed_doc_id_range + 4, + ); + query_list_item_mut.docid = startdocid as i32; + query_list_item_mut.run_end = (startdocid + runlength) as i32; + query_list_item_mut.p_run_sum = runlength as i32; + query_list_item_mut.p_run = 6; + } + } + + let mut valid_term_count = 0; + let mut term_index = 0; + let mut single_term_index = 0; + + for query_list_item_mut in query_list.iter_mut() { + query_list_item_mut.end_flag = query_list_item_mut.end_flag_block + || (query_list_item_mut.blocks[query_list_item_mut.p_block as usize].block_id + != block_id as u32); + + if query_list_item_mut.end_flag { + term_index += 1; + continue; + } + + valid_term_count += 1; + single_term_index = term_index; + term_index += 1; + + query_list_item_mut.p_docid = 0; + query_list_item_mut.p_docid_count = + query_list_item_mut.blocks[query_list_item_mut.p_block as usize].posting_count as usize + + 1; + + query_list_item_mut.compression_type = FromPrimitive::from_u32( + query_list_item_mut.blocks[query_list_item_mut.p_block as usize] + .compression_type_pointer + >> 30, + ) + .unwrap(); + + query_list_item_mut.rank_position_pointer_range = query_list_item_mut.blocks + [query_list_item_mut.p_block as usize] + .compression_type_pointer + & 0b0011_1111_1111_1111_1111_1111_1111_1111; + + query_list_item_mut.pointer_pivot_p_docid = + query_list_item_mut.blocks[query_list_item_mut.p_block as usize].pointer_pivot_p_docid; + + let posting_pointer_size_sum = query_list_item_mut.blocks + [query_list_item_mut.p_block as usize] + .pointer_pivot_p_docid as usize + * 2 + + if (query_list_item_mut.blocks[query_list_item_mut.p_block as usize] + .pointer_pivot_p_docid as usize) + <= query_list_item_mut.blocks[query_list_item_mut.p_block as usize].posting_count + as usize + { + ((query_list_item_mut.blocks[query_list_item_mut.p_block as usize].posting_count + as usize + + 1) + - query_list_item_mut.blocks[query_list_item_mut.p_block as usize] + .pointer_pivot_p_docid as usize) + * 3 + } else { + 0 + }; + query_list_item_mut.compressed_doc_id_range = + query_list_item_mut.rank_position_pointer_range as usize + posting_pointer_size_sum; + query_list_item_mut.docid = 0; + query_list_item_mut.intersect = 0; + query_list_item_mut.ulong_pos = 0; + query_list_item_mut.p_run = -2; + query_list_item_mut.run_end = 0; + } + + if valid_term_count == 0 { + return; + } + + if valid_term_count == 1 { + if result_type == &ResultType::Count && search_result.query_facets.is_empty() { + *result_count += query_list[single_term_index].p_docid_count as i32; + } else { + let skip_facet_count = search_result.skip_facet_count; + search_result.skip_facet_count = false; + + single_docid( + shard, + query_list, + not_query_list, + &query_list[single_term_index].blocks + [query_list[single_term_index].p_block as usize], + single_term_index, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + + search_result.skip_facet_count = skip_facet_count; + } + return; + }; + + if result_type == &ResultType::Count { + union_count( + shard, + result_count, + search_result, + query_list, + not_query_list, + facet_filter, + block_id, + ) + .await; + return; + } + + if query_list.len() <= 8 { + union_scan_8( + shard, + non_unique_query_list, + query_list, + not_query_list, + block_id, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + } else { + let mut result_count_local = *result_count; + union_scan_32( + shard, + non_unique_query_list, + query_list, + not_query_list, + block_id, + result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + + if query_list.len() > 32 && result_type == &ResultType::TopkCount { + union_count( + shard, + &mut result_count_local, + search_result, + query_list, + not_query_list, + facet_filter, + block_id, + ) + .await; + *result_count = result_count_local; + } + } +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn union_blockid<'a>( + shard: &'a Shard, + non_unique_query_list: &mut Vec>, + query_list: &mut Vec>, + not_query_list: &mut [PostingListObjectQuery<'a>], + result_count_arc: &Arc, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], +) { + let item_0 = &query_list[0]; + let enable_inter_query_threading_multi = + if !shard.enable_search_quality_test && shard.enable_inter_query_threading_auto { + item_0.posting_count / item_0.p_block_max as u32 > 10 + } else { + shard.enable_inter_query_threading + }; + let mut task_list = Vec::new(); + + loop { + let mut break_loop = true; + let mut block_id_min = usize::MAX; + + for plo in query_list.iter_mut() { + if !plo.end_flag_block { + let block_id = plo.blocks[plo.p_block as usize].block_id as usize; + + if block_id < block_id_min { + block_id_min = block_id; + } + } + } + + if !enable_inter_query_threading_multi { + if shard.meta.access_type == AccessType::Mmap { + for query_list_item_mut in query_list.iter_mut() { + let segment = &shard.segments_index[query_list_item_mut.key0 as usize]; + query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block_id_min].0 + ..segment.byte_array_blocks_pointer[block_id_min].0 + + segment.byte_array_blocks_pointer[block_id_min].1]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + let segment = + &shard.segments_index[nonunique_query_list_item_mut.key0 as usize]; + nonunique_query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block_id_min].0 + ..segment.byte_array_blocks_pointer[block_id_min].0 + + segment.byte_array_blocks_pointer[block_id_min].1]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + let segment = &shard.segments_index[not_query_list_item_mut.key0 as usize]; + not_query_list_item_mut.byte_array = + &shard.index_file_mmap[segment.byte_array_blocks_pointer[block_id_min].0 + ..segment.byte_array_blocks_pointer[block_id_min].0 + + segment.byte_array_blocks_pointer[block_id_min].1]; + } + } else { + for query_list_item_mut in query_list.iter_mut() { + query_list_item_mut.byte_array = &shard.segments_index + [query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id_min]; + } + for nonunique_query_list_item_mut in non_unique_query_list.iter_mut() { + nonunique_query_list_item_mut.byte_array = &shard.segments_index + [nonunique_query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id_min]; + } + for not_query_list_item_mut in not_query_list.iter_mut() { + not_query_list_item_mut.byte_array = &shard.segments_index + [not_query_list_item_mut.key0 as usize] + .byte_array_blocks[block_id_min]; + } + } + + let mut result_count_local = 0; + union_docid( + shard, + non_unique_query_list, + query_list, + not_query_list, + block_id_min, + &mut result_count_local, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + ) + .await; + + result_count_arc.fetch_add(result_count_local as usize, Ordering::Relaxed); + } else { + let mut query_list_copy: Vec = Vec::new(); + let mut non_unique_query_list_copy: Vec = Vec::new(); + + for x in &mut *query_list { + query_list_copy.push(x.clone()); + } + + for x in &mut *non_unique_query_list { + let y = x.clone(); + non_unique_query_list_copy.push(y); + } + + let result_count_clone = result_count_arc.clone(); + + task_list.push(tokio::spawn(async move { + let result_count_local = 1; + result_count_clone.fetch_add(result_count_local, Ordering::Relaxed); + })); + } + + for plo in query_list.iter_mut() { + if !plo.end_flag_block { + let block_id = plo.blocks[plo.p_block as usize].block_id as usize; + if block_id == block_id_min { + if plo.p_block < plo.p_block_max - 1 { + plo.p_block += 1; + break_loop = false; + } else { + plo.end_flag_block = true; + } + } else { + break_loop = false; + } + } + } + + if break_loop { + break; + } + } +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn union_scan_8<'a>( + shard: &'a Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'a>], + query_list: &mut [PostingListObjectQuery<'a>], + not_query_list: &mut [PostingListObjectQuery<'a>], + block_id: usize, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], +) { + let union_max = 8usize; + + let mut query_terms_bitset_table: [u8; ROARING_BLOCK_SIZE] = [0u8; ROARING_BLOCK_SIZE]; + let mut result_count_local = 0; + + query_list.sort_by(|a, b| { + b.blocks[b.p_block as usize] + .max_block_score + .partial_cmp(&a.blocks[a.p_block as usize].max_block_score) + .unwrap() + }); + + let mut max_score = 0.0; + + for (i, plo) in query_list.iter_mut().take(union_max).enumerate() { + if plo.end_flag { + continue; + } + + plo.p_docid = 0; + let mask = 1 << i; + max_score += plo.blocks[plo.p_block as usize].max_block_score; + + if plo.compression_type == CompressionType::Bitmap { + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &plo.byte_array[plo.compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = ((ulong_pos << 6) + bit_pos) as usize; + query_terms_bitset_table[docid] |= mask; + } + } + } else if plo.compression_type == CompressionType::Array { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + + query_terms_bitset_table[docid] |= mask; + } + } else { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + + query_terms_bitset_table[docid] |= mask; + } + } + } + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + if plo.compression_type == CompressionType::Bitmap { + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &plo.byte_array[plo.compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = ((ulong_pos << 6) + bit_pos) as usize; + query_terms_bitset_table[docid] = 0; + } + } + } else if plo.compression_type == CompressionType::Array { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + + query_terms_bitset_table[docid] = 0; + } + } else { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + + query_terms_bitset_table[docid] = 0; + } + } + } + } + + let block_skip = search_result.topk_candidates.current_heap_size >= top_k + && max_score <= search_result.topk_candidates._elements[0].score + && search_result.topk_candidates.result_sort.is_empty(); + + let query_list_len = cmp::min(query_list.len(), union_max); + + let query_combination_count = 1 << query_list_len; + let mut query_terms_max_score_sum_table: Vec = vec![0.0; query_combination_count]; + for (i, max_score) in query_terms_max_score_sum_table.iter_mut().enumerate() { + for (j, term) in query_list.iter().enumerate().take(query_list_len) { + if ((1 << j) & i) > 0 { + *max_score += term.blocks[term.p_block as usize].max_block_score + } + } + } + + let mut p_docid_array = vec![0u16; union_max]; + + let mut _result_count = 0; + let block_id_msb = block_id << 16; + + for (i, query_terms_bitset) in query_terms_bitset_table.iter().enumerate() { + if *query_terms_bitset > 0 { + result_count_local += 1; + + if !block_skip + && (search_result.topk_candidates.current_heap_size < top_k + || query_terms_max_score_sum_table[*query_terms_bitset as usize] + > search_result.topk_candidates._elements[0].score) + { + for (j, query_term) in query_list.iter_mut().take(query_list_len).enumerate() { + query_term.bm25_flag = (query_terms_bitset & (1 << j)) > 0; + + query_term.p_docid = p_docid_array[j] as usize; + } + + add_result_multiterm_multifield( + shard, + block_id_msb | i, + &mut _result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + false, + f32::MAX, + false, + ); + } + + if !block_skip { + for (j, item) in p_docid_array.iter_mut().take(query_list_len).enumerate() { + *item += ((query_terms_bitset >> j) & 1) as u16 + } + } + } + } + + *result_count += result_count_local; +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn union_scan_32<'a>( + shard: &'a Shard, + non_unique_query_list: &mut [NonUniquePostingListObjectQuery<'a>], + query_list: &mut [PostingListObjectQuery<'a>], + not_query_list: &mut [PostingListObjectQuery<'a>], + block_id: usize, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], +) { + let union_max = 32usize; + + let mut query_terms_bitset_table: [u32; ROARING_BLOCK_SIZE] = [0u32; ROARING_BLOCK_SIZE]; + let mut result_count_local = 0; + + query_list.sort_by(|a, b| { + b.blocks[b.p_block as usize] + .max_block_score + .partial_cmp(&a.blocks[a.p_block as usize].max_block_score) + .unwrap() + }); + + let mut max_score = 0.0; + let mut mask = u32::MAX >> (32 - query_list.len()); + for plo in query_list.iter_mut().take(union_max).rev() { + if plo.end_flag { + continue; + } + max_score += plo.blocks[plo.p_block as usize].max_block_score; + + if max_score > search_result.topk_candidates._elements[0].score { + break; + } + + mask >>= 1; + } + + let mut max_score = 0.0; + + for (i, plo) in query_list.iter_mut().take(union_max).enumerate() { + if plo.end_flag { + continue; + } + + plo.p_docid = 0; + let mask = 1 << i; + max_score += plo.blocks[plo.p_block as usize].max_block_score; + + if plo.compression_type == CompressionType::Bitmap { + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &plo.byte_array[plo.compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = ((ulong_pos << 6) + bit_pos) as usize; + query_terms_bitset_table[docid] |= mask; + } + } + } else if plo.compression_type == CompressionType::Array { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + + query_terms_bitset_table[docid] |= mask; + } + } else { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + + query_terms_bitset_table[docid] |= mask; + } + } + } + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + if plo.compression_type == CompressionType::Bitmap { + for ulong_pos in 0u64..1024 { + let mut intersect: u64 = read_u64( + &plo.byte_array[plo.compressed_doc_id_range..], + ulong_pos as usize * 8, + ); + + while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as u64; + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = ((ulong_pos << 6) + bit_pos) as usize; + query_terms_bitset_table[docid] = 0; + } + } + } else if plo.compression_type == CompressionType::Array { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + + query_terms_bitset_table[docid] = 0; + } + } else { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + + query_terms_bitset_table[docid] = 0; + } + } + } + } + + let block_skip = search_result.topk_candidates.current_heap_size >= top_k + && max_score <= search_result.topk_candidates._elements[0].score + && search_result.topk_candidates.result_sort.is_empty(); + + let query_list_len = cmp::min(query_list.len(), union_max); + + let mut p_docid_array = vec![0u16; union_max]; + + let mut _result_count = 0; + let block_id_msb = block_id << 16; + + for (i, query_terms_bitset) in query_terms_bitset_table.iter().enumerate() { + if *query_terms_bitset > 0 { + result_count_local += 1; + + if !block_skip + && (search_result.topk_candidates.current_heap_size < top_k + || query_terms_bitset & mask > 0) + { + let mut query_terms_max_score_sum = 0f32; + for (j, plo) in query_list.iter().enumerate() { + if (query_terms_bitset & (1 << j)) > 0 { + query_terms_max_score_sum += + plo.blocks[plo.p_block as usize].max_block_score; + } + } + if query_terms_max_score_sum > search_result.topk_candidates._elements[0].score { + for (j, query_term) in query_list.iter_mut().take(query_list_len).enumerate() { + query_term.bm25_flag = (query_terms_bitset & (1 << j)) > 0; + + query_term.p_docid = p_docid_array[j] as usize; + } + + add_result_multiterm_multifield( + shard, + block_id_msb | i, + &mut _result_count, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + non_unique_query_list, + query_list, + not_query_list, + false, + f32::MAX, + false, + ); + } + } + + if !block_skip { + for (j, item) in p_docid_array.iter_mut().take(query_list_len).enumerate() { + *item += ((query_terms_bitset >> j) & 1) as u16 + } + } + + continue; + } + } + + *result_count += result_count_local; +} + +pub(crate) async fn union_count<'a>( + shard: &'a Shard, + result_count: &mut i32, + search_result: &mut SearchResult<'_>, + + query_list: &mut [PostingListObjectQuery<'a>], + not_query_list: &mut [PostingListObjectQuery<'a>], + facet_filter: &[FilterSparse], + block_id: usize, +) { + query_list.sort_unstable_by(|a, b| b.p_docid_count.partial_cmp(&a.p_docid_count).unwrap()); + + let mut result_count_local = + query_list[0].blocks[query_list[0].p_block as usize].posting_count as u32 + 1; + + let mut bitmap_0: [u8; 8192] = [0u8; 8192]; + + for (i, plo) in query_list.iter_mut().enumerate() { + if plo.end_flag { + continue; + } + + if plo.compression_type == CompressionType::Bitmap { + if i == 0 { + block_copy( + plo.byte_array, + plo.compressed_doc_id_range, + &mut bitmap_0, + 0, + 8192, + ); + } else { + for i in (0..8192).step_by(8) { + let x1 = read_u64(&bitmap_0, i); + let x2 = read_u64(&plo.byte_array[plo.compressed_doc_id_range..], i); + result_count_local += u64::count_ones(!x1 & x2); + write_u64(x1 | x2, &mut bitmap_0, i); + } + } + } else if plo.compression_type == CompressionType::Array { + if i == 0 { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + bitmap_0[byte_index] |= 1 << bit_index; + } + } else { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + if bitmap_0[byte_index] & (1 << bit_index) == 0 { + bitmap_0[byte_index] |= 1 << bit_index; + result_count_local += 1; + } + } + } + } else { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + if i == 0 { + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + bitmap_0[byte_index] |= 1 << bit_index; + } + } + } else { + for ii in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + ii as usize * 2, + ) as usize; + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (ii + 1) as usize * 2, + ) as usize; + + for j in 0..=runlength { + let docid = startdocid + j; + let byte_index = docid >> 3; + let bit_index = docid & 7; + + if bitmap_0[byte_index] & (1 << bit_index) == 0 { + bitmap_0[byte_index] |= 1 << bit_index; + result_count_local += 1; + } + } + } + } + } + } + + for plo in not_query_list.iter_mut() { + if !plo.bm25_flag { + continue; + } + + match plo.compression_type { + CompressionType::Array => { + for i in 0..plo.p_docid_count { + let docid = + read_u16(&plo.byte_array[plo.compressed_doc_id_range..], i * 2) as usize; + let byte_index = docid >> 3; + let bit_index = docid & 7; + if bitmap_0[byte_index] & (1 << bit_index) != 0 { + bitmap_0[byte_index] &= !(1 << bit_index); + result_count_local -= 1; + } + } + } + + CompressionType::Rle => { + let runs_count = read_u16(&plo.byte_array[plo.compressed_doc_id_range..], 0) as i32; + + for i in (1..(runs_count << 1) + 1).step_by(2) { + let startdocid = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + i as usize * 2, + ); + let runlength = read_u16( + &plo.byte_array[plo.compressed_doc_id_range..], + (i + 1) as usize * 2, + ); + + for j in 0..=runlength { + let docid = (startdocid + j) as usize; + + let byte_index = docid >> 3; + let bit_index = docid & 7; + if bitmap_0[byte_index] & (1 << bit_index) != 0 { + bitmap_0[byte_index] &= !(1 << bit_index); + result_count_local -= 1; + } + } + } + } + + CompressionType::Bitmap => { + for i in (0..8192).step_by(8) { + let x1 = read_u64(&bitmap_0, i); + let x2 = read_u64(&plo.byte_array[plo.compressed_doc_id_range..], i); + result_count_local -= u64::count_ones(x1 & x2); + write_u64(x1 & !x2, &mut bitmap_0, i); + } + } + + _ => {} + } + } + + if !shard.delete_hashset.is_empty() { + for docid in shard.delete_hashset.iter() { + if block_id == docid >> 16 { + let byte_index = (docid >> 3) & 8191; + let bit_mask = 1 << (docid & 7); + if bitmap_0[byte_index] & bit_mask > 0 { + bitmap_0[byte_index] &= !bit_mask; + result_count_local -= 1; + } + } + } + } + + if !search_result.query_facets.is_empty() || !facet_filter.is_empty() { + let block_id_msb = block_id << 16; + for ulong_pos in 0usize..1024 { + let ulong_pos_msb = block_id_msb | ulong_pos << 6; + let mut intersect = read_u64(&bitmap_0, ulong_pos * 8); + 'next: while intersect != 0 { + let bit_pos = unsafe { _mm_tzcnt_64(intersect) } as usize; + intersect = unsafe { _blsr_u64(intersect) }; + + let docid = ulong_pos_msb | bit_pos; + + if !facet_filter.is_empty() && is_facet_filter(shard, facet_filter, docid) { + result_count_local -= 1; + continue 'next; + } + + for (i, facet) in shard.facets.iter().enumerate() { + if search_result.query_facets[i].length == 0 { + continue; + } + + let facet_value_id = match &search_result.query_facets[i].ranges { + Ranges::U8(_range_type, ranges) => { + let facet_value = shard.facets_file_mmap + [(shard.facets_size_sum * docid) + facet.offset]; + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U16(_range_type, ranges) => { + let facet_value = read_u16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U32(_range_type, ranges) => { + let facet_value = read_u32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::U64(_range_type, ranges) => { + let facet_value = read_u64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I8(_range_type, ranges) => { + let facet_value = read_i8( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I16(_range_type, ranges) => { + let facet_value = read_i16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I32(_range_type, ranges) => { + let facet_value = read_i32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::I64(_range_type, ranges) => { + let facet_value = read_i64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::Timestamp(_range_type, ranges) => { + let facet_value = read_i64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by_key(&facet_value, |range| range.1) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::F32(_range_type, ranges) => { + let facet_value = read_f32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by(|range| { + range.1.partial_cmp(&facet_value).unwrap() + }) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::F64(_range_type, ranges) => { + let facet_value = read_f64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + ranges + .binary_search_by(|range| { + range.1.partial_cmp(&facet_value).unwrap() + }) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + Ranges::Point(_range_type, ranges, base, unit) => { + let facet_value = read_u64( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ); + let facet_value_distance = + euclidian_distance(base, &decode_morton_2_d(facet_value), unit); + ranges + .binary_search_by(|range| { + range.1.partial_cmp(&facet_value_distance).unwrap() + }) + .map_or_else(|idx| idx as u16 - 1, |idx| idx as u16) + as u32 + } + + _ => { + if facet.field_type == FieldType::String16 + || facet.field_type == FieldType::StringSet16 + { + read_u16( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ) as u32 + } else { + read_u32( + &shard.facets_file_mmap, + (shard.facets_size_sum * docid) + facet.offset, + ) + } + } + }; + + *search_result.query_facets[i] + .values + .entry(facet_value_id) + .or_insert(0) += 1; + } + } + } + } + + *result_count += result_count_local as i32; +} + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::ptr_arg)] +pub(crate) async fn union_docid_2<'a>( + shard: &'a Shard, + non_unique_query_list: &mut Vec>, + query_list: &mut Vec>, + not_query_list: &mut Vec>, + result_count_arc: &Arc, + search_result: &mut SearchResult<'_>, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + matching_blocks: &mut i32, + query_term_count: usize, +) { + let filtered = !not_query_list.is_empty() || !field_filter_set.is_empty(); + let mut count = 0; + if filtered { + single_blockid( + shard, + non_unique_query_list, + &mut query_list[0..1].to_vec(), + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Count, + field_filter_set, + facet_filter, + matching_blocks, + ) + .await; + + single_blockid( + shard, + non_unique_query_list, + &mut query_list[1..2].to_vec(), + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Count, + field_filter_set, + facet_filter, + matching_blocks, + ) + .await; + + count = result_count_arc.load(Ordering::Relaxed); + result_count_arc.store(0, Ordering::Relaxed); + } + + intersection_blockid( + shard, + non_unique_query_list, + query_list, + not_query_list, + result_count_arc, + search_result, + top_k, + result_type, + field_filter_set, + facet_filter, + matching_blocks, + false, + query_term_count, + ) + .await; + + let mut result_count_local = if filtered { + count + } else { + (query_list[0].posting_count + query_list[1].posting_count) as usize + }; + let result_count_global = result_count_arc.load(Ordering::Relaxed); + if result_count_local > result_count_global { + result_count_local -= result_count_global + } + + if result_type == &ResultType::Count { + result_count_arc.store(result_count_local, Ordering::Relaxed); + return; + } + + if (search_result.topk_candidates.current_heap_size < top_k) + || (query_list[0].max_list_score > search_result.topk_candidates._elements[0].score) + { + for i in 0..search_result.topk_candidates.current_heap_size { + search_result.topk_candidates.docid_hashset.insert( + search_result.topk_candidates._elements[i].doc_id, + search_result.topk_candidates._elements[i].score, + ); + } + + single_blockid( + shard, + non_unique_query_list, + &mut query_list[0..1].to_vec(), + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Topk, + field_filter_set, + facet_filter, + matching_blocks, + ) + .await; + } + + if (search_result.topk_candidates.current_heap_size < top_k) + || (query_list[1].max_list_score > search_result.topk_candidates._elements[0].score) + { + for i in 0..search_result.topk_candidates.current_heap_size { + search_result.topk_candidates.docid_hashset.insert( + search_result.topk_candidates._elements[i].doc_id, + search_result.topk_candidates._elements[i].score, + ); + } + + single_blockid( + shard, + non_unique_query_list, + &mut query_list[1..2].to_vec(), + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Topk, + field_filter_set, + facet_filter, + matching_blocks, + ) + .await; + } + + result_count_arc.store(result_count_local, Ordering::Relaxed); +} + +#[allow(clippy::too_many_arguments)] +#[async_recursion] +pub(crate) async fn union_docid_3<'a>( + shard: &'a Shard, + non_unique_query_list: &mut Vec>, + query_queue: &'a mut Vec>, + not_query_list: &mut Vec>, + + result_count_arc: &Arc, + search_result: &mut SearchResult, + top_k: usize, + result_type: &ResultType, + field_filter_set: &AHashSet, + facet_filter: &[FilterSparse], + matching_blocks: &mut i32, + recursion_count: usize, + query_term_count: usize, +) { + let queue_object = query_queue.remove(0); + + let mut query_list = queue_object.query_list; + + if result_type == &ResultType::Topk || result_type == &ResultType::TopkCount { + if query_list.len() >= 3 { + intersection_blockid( + shard, + non_unique_query_list, + &mut query_list, + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Topk, + field_filter_set, + facet_filter, + matching_blocks, + false, + query_term_count, + ) + .await; + + for j in 0..search_result.topk_candidates.current_heap_size { + search_result.topk_candidates.docid_hashset.insert( + search_result.topk_candidates._elements[j].doc_id, + search_result.topk_candidates._elements[j].score, + ); + } + + { + for i in queue_object.query_index..query_list.len() { + let ii = query_list.len() - 1 - i; + + for plo in query_list.iter_mut() { + plo.p_block = 0; + } + + let list = if ii == 0 { + query_list[1..query_list.len()].to_vec() + } else if ii == query_list.len() - 1 { + query_list[0..query_list.len() - 1].to_vec() + } else { + [&query_list[0..ii], &query_list[ii + 1..query_list.len()]].concat() + }; + + let mut max_score = 0.0; + for term in list.iter() { + max_score += term.max_list_score; + } + + if search_result.topk_candidates.current_heap_size < top_k + || max_score > search_result.topk_candidates._elements[0].score + { + if !query_queue.is_empty() + && max_score > query_queue[query_queue.len() - 1].max_score + { + let pos = query_queue + .binary_search_by(|e| { + e.max_score + .partial_cmp(&max_score) + .expect("Couldn't compare values") + .reverse() + }) + .unwrap_or_else(|e| e); + query_queue.insert( + pos, + QueueObject { + query_list: list, + query_index: i, + max_score, + }, + ); + } else { + query_queue.push(QueueObject { + query_list: list, + query_index: i, + max_score, + }); + } + }; + } + } + } else { + union_docid_2( + shard, + non_unique_query_list, + &mut query_list, + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Topk, + field_filter_set, + facet_filter, + matching_blocks, + query_term_count, + ) + .await; + } + + if !query_queue.is_empty() + && (search_result.topk_candidates.current_heap_size < top_k + || query_queue.first().unwrap().max_score + > search_result.topk_candidates._elements[0].score) + { + for i in 0..search_result.topk_candidates.current_heap_size { + search_result.topk_candidates.docid_hashset.insert( + search_result.topk_candidates._elements[i].doc_id, + search_result.topk_candidates._elements[i].score, + ); + } + + if recursion_count < 200 { + union_docid_3( + shard, + non_unique_query_list, + query_queue, + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Topk, + field_filter_set, + facet_filter, + matching_blocks, + recursion_count + 1, + query_term_count, + ) + .await; + } + } + } + + if result_type == &ResultType::Count || result_type == &ResultType::TopkCount { + for plo in query_list.iter_mut() { + plo.p_block = 0; + } + + result_count_arc.store(0, Ordering::Relaxed); + + union_blockid( + shard, + non_unique_query_list, + &mut query_list, + not_query_list, + result_count_arc, + search_result, + top_k, + &ResultType::Count, + field_filter_set, + facet_filter, + ) + .await; + } +} diff --git a/mobile_app/rust/src/seekstorm/utils.rs b/mobile_app/rust/src/seekstorm/utils.rs new file mode 100644 index 0000000..0104864 --- /dev/null +++ b/mobile_app/rust/src/seekstorm/utils.rs @@ -0,0 +1,167 @@ +pub(crate) fn write_u8_ref(value: u8, vec8: &mut [u8], pos: &mut usize) { + vec8[*pos] = value; + *pos += 1; +} + +pub(crate) fn write_u16_ref(value: u16, vec8: &mut [u8], pos: &mut usize) { + vec8[*pos..(*pos + 2)].copy_from_slice(&value.to_le_bytes()); + *pos += 2; +} + +pub(crate) fn write_u32_ref(value: u32, vec8: &mut [u8], pos: &mut usize) { + vec8[*pos..(*pos + 4)].copy_from_slice(&value.to_le_bytes()); + *pos += 4; +} + +pub(crate) fn write_u64_ref(value: u64, vec8: &mut [u8], pos: &mut usize) { + vec8[*pos..(*pos + 8)].copy_from_slice(&value.to_le_bytes()); + *pos += 8; +} + +pub(crate) fn write_u16(value: u16, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 2)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_u32(value: u32, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 4)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_u64(value: u64, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 8)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_i8(value: i8, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 1)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_i16(value: i16, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 2)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_i32(value: i32, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 4)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_i64(value: i64, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 8)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_f32(value: f32, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 4)].copy_from_slice(&value.to_le_bytes()); +} + +pub(crate) fn write_f64(value: f64, vec8: &mut [u8], pos: usize) { + vec8[pos..(pos + 8)].copy_from_slice(&value.to_le_bytes()); +} + +#[inline] +pub(crate) fn read_u8_ref(vec8: &[u8], pos: &mut usize) -> u8 { + *pos += 1; + vec8[*pos - 1] +} + +#[inline] +pub(crate) fn read_u16_ref(vec8: &[u8], pos: &mut usize) -> u16 { + *pos += 2; + u16::from_le_bytes(vec8[*pos - 2..*pos].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u32_ref(vec8: &[u8], pos: &mut usize) -> u32 { + *pos += 4; + u32::from_le_bytes(vec8[*pos - 4..*pos].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u64_ref(vec8: &[u8], pos: &mut usize) -> u64 { + *pos += 8; + u64::from_le_bytes(vec8[*pos - 8..*pos].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u8(vec8: &[u8], pos: usize) -> u8 { + vec8[pos] +} + +#[inline] +pub(crate) fn read_i8(vec8: &[u8], pos: usize) -> i8 { + i8::from_le_bytes(vec8[pos..pos + 1].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u16(vec8: &[u8], pos: usize) -> u16 { + u16::from_le_bytes(vec8[pos..pos + 2].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_i16(vec8: &[u8], pos: usize) -> i16 { + i16::from_le_bytes(vec8[pos..pos + 2].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u32(vec8: &[u8], pos: usize) -> u32 { + u32::from_le_bytes(vec8[pos..pos + 4].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_i32(vec8: &[u8], pos: usize) -> i32 { + i32::from_le_bytes(vec8[pos..pos + 4].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_u64(vec8: &[u8], pos: usize) -> u64 { + u64::from_le_bytes(vec8[pos..pos + 8].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_i64(vec8: &[u8], pos: usize) -> i64 { + i64::from_le_bytes(vec8[pos..pos + 8].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_f32(vec8: &[u8], pos: usize) -> f32 { + f32::from_le_bytes(vec8[pos..pos + 4].try_into().unwrap()) +} + +#[inline] +pub(crate) fn read_f64(vec8: &[u8], pos: usize) -> f64 { + f64::from_le_bytes(vec8[pos..pos + 8].try_into().unwrap()) +} + +pub(crate) fn block_copy_mut( + source: &mut [u8], + source_offset: usize, + destination: &mut [u8], + destination_offset: usize, + len: usize, +) { + destination[destination_offset..(destination_offset + len)] + .copy_from_slice(&source[source_offset..(source_offset + len)]); +} + +pub(crate) fn block_copy( + source: &[u8], + source_offset: usize, + destination: &mut [u8], + destination_offset: usize, + len: usize, +) { + destination[destination_offset..(destination_offset + len)] + .copy_from_slice(&source[source_offset..(source_offset + len)]); +} + +/// Truncates a string to a maximum number of characters. +pub fn truncate(source: &str, max_chars: usize) -> &str { + match source.char_indices().nth(max_chars) { + None => source, + Some((idx, _)) => &source[..idx], + } +} + +/// Returns a substring of the given string, starting at the specified index and with the specified length. +pub fn substring(source: &str, start: usize, length: usize) -> String { + if source.len() <= start + length { + return source.to_string(); + } + source.chars().skip(start).take(length).collect() +} diff --git a/mobile_app/rust/src/seekstorm/word_segmentation.rs b/mobile_app/rust/src/seekstorm/word_segmentation.rs new file mode 100644 index 0000000..0400eea --- /dev/null +++ b/mobile_app/rust/src/seekstorm/word_segmentation.rs @@ -0,0 +1,185 @@ +use std::{ + cmp, + io::{BufRead, BufReader}, +}; + +use ahash::AHashMap; + +#[cfg(feature = "zh")] +static DICTIONARY_TXT: &str = + include_str!("../../assets/dictionaries/frequency_dictionary_zh_cn_349_045.txt"); + +/// word_segmentation_tm: Fast Word Segmentation with Triangular Matrix +/// Rust port of the original C# implementation: https://github.com/wolfgarbe/WordSegmentationTM +/// Copyright (C) 2024 Wolf Garbe +/// Author: Wolf Garbe wolf.garbe@seekstorm.com +/// URL: //https://github.com/wolfgarbe/word_segmentation_tm +/// Description: https://seekstorm.com/blog/fast-word-segmentation-noisy-text/ +/// Find best word segmentation for input string. +/// input_str: The string being word segmented. +/// maximum_dictionary_word_length=max_segmentation_word_length: The maximum word length that should be considered. +/// result: A tuple representing the suggested word segmented text and the sum of logarithmic word occurence probabilities. +#[cfg(feature = "zh")] +pub struct WordSegmentationTM { + pub n: f64, + pub dictionary: AHashMap, f64>, + pub maximum_dictionary_word_length: usize, + pub probability_log_estimation: Vec, +} + +#[cfg(feature = "zh")] +impl WordSegmentationTM { + /// Create a new instanc of WordSegmentationTM + pub(crate) fn new() -> Self { + WordSegmentationTM { + n: 0.0, + dictionary: AHashMap::new(), + maximum_dictionary_word_length: 0usize, + probability_log_estimation: Vec::new(), + } + } + + /// Load multiple dictionary entries from a file of word/frequency count pairs + /// Merges with any dictionary data already loaded. + /// corpus: The path+filename of the file. + /// term_index: The column position of the word. + /// count_index: The column position of the frequency count. + /// result: True if file loaded, or false if file not found. + pub fn load_dictionary( + &mut self, + term_index: usize, + count_index: usize, + skip_ascii: bool, + ) -> bool { + let reader = BufReader::new(DICTIONARY_TXT.as_bytes()); + + let mut count_sum = 0; + + for line in reader.lines() { + let line_string = line.unwrap(); + + let line_parts: Vec<&str> = line_string.split_ascii_whitespace().collect(); + if line_parts.len() >= 2 { + let key = line_parts[term_index]; + if skip_ascii && key.is_ascii() { + continue; + } + + if let Ok(count) = line_parts[count_index].parse::() { + let key_len = key.chars().count(); + + if key_len > self.maximum_dictionary_word_length { + self.maximum_dictionary_word_length = key_len; + } + + self.dictionary.insert(key.chars().collect(), count as f64); + count_sum += count; + } + } + } + + self.n = (count_sum * 3) as f64; + + for item in self.dictionary.iter_mut() { + *item.1 = (*item.1 / self.n).log10(); + } + + for i in 0..self.maximum_dictionary_word_length { + self.probability_log_estimation + .push((10.0f64 / self.n / (i + 1).pow(10) as f64).log10() * 10.0f64); + } + + true + } + + pub fn segment(&self, input: &str, skip_ascii: bool) -> (Vec, f64) { + let mut result_array: Vec = Vec::new(); + let mut probability_log_sum_best = 0.0; + + if !input.is_empty() { + if skip_ascii && input.is_ascii() { + return (vec![input.to_string()], 0.0); + } + + let input_chars: Vec = input.chars().collect(); + + let array_size = cmp::min(self.maximum_dictionary_word_length, input_chars.len()); + let array_width = ((input_chars.len() - 1) >> 6) + 1; + let array_width_byte = array_width << 3; + let mut segmented_space_bits = vec![vec![0usize; array_width]; array_size]; + let mut probability_log_sum = vec![0.0; array_size]; + let mut circular_index = 0usize; + + for j in 0..input_chars.len() { + let space_ulong_index = if j == 0 { 0 } else { (j - 1) >> 6 }; + let array_copy_byte = cmp::min((space_ulong_index + 1) << 3, array_width_byte); + + let array_copy_usize = array_copy_byte >> 3; + + if j > 0 { + segmented_space_bits[circular_index][space_ulong_index] |= + 1usize << ((j - 1) & 0x3f); + } + + let imax = cmp::min(input_chars.len() - j, self.maximum_dictionary_word_length); + + for i in 1..=imax { + let destination_index = (i + circular_index) % array_size; + + let part1_chars = &input_chars[j..(j + i)]; + + let probability_log_part1 = + if let Some(probability_log) = self.dictionary.get(part1_chars) { + *probability_log + } else { + self.probability_log_estimation[part1_chars.len() - 1] + }; + + if j == 0 { + probability_log_sum[destination_index] = probability_log_part1; + } else if (i == self.maximum_dictionary_word_length) + || (probability_log_sum[destination_index] + < probability_log_sum[circular_index] + probability_log_part1) + { + #[allow(clippy::needless_range_loop)] + for i in 0..array_copy_usize { + segmented_space_bits[destination_index][i] = + segmented_space_bits[circular_index][i]; + } + + probability_log_sum[destination_index] = + probability_log_sum[circular_index] + probability_log_part1; + } + } + + circular_index += 1; + if circular_index == array_size { + circular_index = 0; + } + } + + let mut last = 0; + for i in 0..(input_chars.len() - 1) { + if (segmented_space_bits[circular_index][i >> 6] & (1usize << (i & 0x3f))) > 0 { + if !result_array.is_empty() && ['+', '-'].contains(&input_chars[last]) { + result_array.push(input_chars[last..(i + 1)].iter().skip(1).collect()); + } else { + result_array.push(input_chars[last..(i + 1)].iter().collect()); + } + + last = i + 1; + } + } + + if !result_array.is_empty() && ['+', '-'].contains(&input_chars[last]) { + result_array.push(input_chars[last..].iter().skip(1).collect()); + } else { + result_array.push(input_chars[last..].iter().collect()); + } + + probability_log_sum_best += probability_log_sum[circular_index]; + } + + (result_array, probability_log_sum_best) + } +} diff --git a/mobile_app/windows/flutter/generated_plugin_registrant.cc b/mobile_app/windows/flutter/generated_plugin_registrant.cc index 8b6d468..48de52b 100644 --- a/mobile_app/windows/flutter/generated_plugin_registrant.cc +++ b/mobile_app/windows/flutter/generated_plugin_registrant.cc @@ -6,6 +6,9 @@ #include "generated_plugin_registrant.h" +#include void RegisterPlugins(flutter::PluginRegistry* registry) { + PermissionHandlerWindowsPluginRegisterWithRegistrar( + registry->GetRegistrarForPlugin("PermissionHandlerWindowsPlugin")); } diff --git a/mobile_app/windows/flutter/generated_plugins.cmake b/mobile_app/windows/flutter/generated_plugins.cmake index 275cfbf..97aa218 100644 --- a/mobile_app/windows/flutter/generated_plugins.cmake +++ b/mobile_app/windows/flutter/generated_plugins.cmake @@ -3,9 +3,11 @@ # list(APPEND FLUTTER_PLUGIN_LIST + permission_handler_windows ) list(APPEND FLUTTER_FFI_PLUGIN_LIST + flutter_tantivy rust_lib_mobile_app )