From f03e556efd5d23c6c73da2f950f21d3e049cc0de Mon Sep 17 00:00:00 2001 From: natalyatodorova Date: Wed, 1 Oct 2025 11:35:22 +0100 Subject: [PATCH 1/2] ODM-12769: omics/cells endpoint metadata filters --- openapi/v1/integrationCurator.yaml | 90 ++++++++++++++++++++++++------ openapi/v1/integrationUser.yaml | 90 ++++++++++++++++++++++++------ 2 files changed, 146 insertions(+), 34 deletions(-) diff --git a/openapi/v1/integrationCurator.yaml b/openapi/v1/integrationCurator.yaml index 76ee0f12..3d15059d 100644 --- a/openapi/v1/integrationCurator.yaml +++ b/openapi/v1/integrationCurator.yaml @@ -4385,23 +4385,79 @@ paths: name: studyFilter schema: type: string - - description: "Search for cells by their metadata attributes. The following attributes are supported:\n\ - 1. Strings: barcode, batch, cellType, cluster, and all custom attributes.\n\ - 2. Integers: nCounts \n\ - 3. Floats: percentMito\n\ - 4. Float coordinates: UMAP, PCA.\n\ - 5. All other attributes are considered to be custom and stored as string data type.\n\n\ - To use filters for cell metadata objects use the following query types: \n\ - 1. By key=value pair for attributes. Single words can be supplied as is; otherwise, use speech marks (\") to quote queries that include whitespace: cellType=T_cell, batch=\"PBMC batch 01\" - quote values that include spaces, nCounts=3000, custom_attribute=\"disease\" - custom attribute with string data type.\n\ - 2. It is possible to specify a set of possible values, separated by comma: cellType=Macrophage,Monocyte.\n\ - 3. Utilize range filters to search numeric attributes. Apply < (less than), > (greater than), and = (equal to) symbols to specify the desired ranges as follows: nCounts > 2000, -3 < percentMito < 10. To retrieve UMAP or PCA values use umap.1, umap.2, pca.1, pca.2, e.g. umap.1 > 0.5.\n\ - 4. Use substring search to get the records where the attribute field contains the provided substring: cellType =~ \"B cell\".\n\ - 5. Combine multiple filters for different feature attributes and measurements using AND (&&), OR (||), NOT (!) logical operators and parentheses:\n\ - * NOT cellType=Erythrocyte or cellType!=B_cell,T_cell: exclude objects with defined values from search.\n\ - * batch=BatchA && percentMito<0.8 or batch=BatchA AND percentMito<8: select all objects with BatchA and percentMito less than 0.8.\n\ - * cluster=1 || cluster=2 or cluster=1 OR cluster=2: select all objects where cluster 1 or 2.\n\ - * percentMito>0.2 && nCounts>=1000: select all objects where percentMito is greater than 0.2 and nCounts is more or equal to 1000.\n\ - * batch=BatchA && (cluster=3 || -3 < percentMito < 8): combine logical operators in one query." + - description: | + Search for objects via a full-text query over all study metadata fields. E.g. `RNA-Seq of human dendritic cells`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: studyQuery + schema: + type: string + - description: | + Filter by sample metadata (key-value metadata pair(s)). E.g. `"Species or strain"="Homo sapiens"` + in: query + name: sampleFilter + schema: + type: string + - description: | + Search for objects via a full-text query over all sample metadata fields. E.g. `Clozapine`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: sampleQuery + schema: + type: string + - description: | + Filter by library metadata (key-value metadata pair(s)). E.g. `"Library Type"=RNA-Seq-1` + in: query + name: libraryFilter + schema: + type: string + - description: | + Search for library objects via a full-text query over all library metadata fields. E.g. `illumina HiSeq500`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: libraryQuery + schema: + type: string + - description: | + Filter by preparation metadata (key-value metadata pair(s)). E.g. `Digestion=Trypsin` + in: query + name: preparationFilter + schema: + type: string + - description: | + Search for preparation objects via a full-text query over all preparation metadata fields. E.g. `reversed-phase liquid chromatography`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: preparationQuery + schema: + type: string + - description: |- + If the full-text query term is present in an ODM dictionary, enabling this parameter will modify the query to include child terms of the full-text query. + + For example, the search query "Body fluid" can be expanded to include the term "Blood" (a child term of "Body fluid") so files containing either "Body fluid" or "Blood" in their metadata will be returned in the search results. + + The parent-child relationship is defined by the key "broaders" or "subClassOf" in the dictionary. + + If the full query term is not present in a dictionary then this parameter has no effect. + in: query + name: searchSpecificTerms + schema: + type: boolean + - description: |- + Search for cells by their metadata attributes. The following attributes are supported: + 1. Strings: `barcode`, `batch`, `cellType`, `cluster`, and all custom attributes. + 2. Integers: `nCounts` + 3. Floats: `percentMito` + 4. Float coordinates: `UMAP`, `PCA`. + 5. All other attributes are considered to be custom and stored as string data type. + + To use filters for cell metadata objects use the following query types: + 1. By key=value pair for attributes. Single words can be supplied as is; otherwise, use speech marks (`"`) to quote queries that include whitespace: `cellType=T_cell`, `batch="PBMC batch 01"` - quote values that include spaces, `nCounts=3000`, `custom_attribute="disease"` - custom attribute with string data type. + 2. It is possible to specify a set of possible values, separated by comma: `cellType=Macrophage,Monocyte`. + 3. Utilize range filters to search numeric attributes. Apply `<` (less than), `>` (greater than), and `=` (equal to) symbols to specify the desired ranges as follows: `nCounts > 2000`, `-3 < percentMito < 10`. To retrieve UMAP or PCA values use `umap.1`, `umap.2`, `pca.1`, `pca.2`, e.g. `umap.1 > 0.5`. + 4. Use substring search to get the records where the attribute field contains the provided substring: `cellType =~ "B cell"`. + 5. Combine multiple filters for different feature attributes and measurements using `AND` (`&&`), `OR` (`||`), `NOT` (`!`) logical operators and parentheses: + * `NOT cellType=Erythrocyte` or `cellType!=B_cell,T_cell`: exclude objects with defined values from search. + * `batch=BatchA && percentMito<0.8` or `batch=BatchA AND percentMito<8`: select all objects with BatchA and percentMito less than 0.8. + * `cluster=1 || cluster=2` or `cluster=1 OR cluster=2`: select all objects where cluster 1 or 2. + * `percentMito>0.2 && nCounts>=1000`: select all objects where percentMito is greater than 0.2 and nCounts is more or equal to 1000. + * `batch=BatchA && (cluster=3 || -3 < percentMito < 8)`: combine logical operators in one query. in: query name: cellQuery schema: diff --git a/openapi/v1/integrationUser.yaml b/openapi/v1/integrationUser.yaml index 437cafa0..df5cefc9 100644 --- a/openapi/v1/integrationUser.yaml +++ b/openapi/v1/integrationUser.yaml @@ -2878,23 +2878,79 @@ paths: name: studyFilter schema: type: string - - description: "Search for cells by their metadata attributes. The following attributes are supported:\n\ - 1. Strings: barcode, batch, cellType, cluster, and all custom attributes.\n\ - 2. Integers: nCounts \n\ - 3. Floats: percentMito\n\ - 4. Float coordinates: UMAP, PCA.\n\ - 5. All other attributes are considered to be custom and stored as string data type.\n\n\ - To use filters for cell metadata objects use the following query types: \n\ - 1. By key=value pair for attributes. Single words can be supplied as is; otherwise, use speech marks (\") to quote queries that include whitespace: cellType=T_cell, batch=\"PBMC batch 01\" - quote values that include spaces, nCounts=3000, custom_attribute=\"disease\" - custom attribute with string data type.\n\ - 2. It is possible to specify a set of possible values, separated by comma: cellType=Macrophage,Monocyte.\n\ - 3. Utilize range filters to search numeric attributes. Apply < (less than), > (greater than), and = (equal to) symbols to specify the desired ranges as follows: nCounts > 2000, -3 < percentMito < 10. To retrieve UMAP or PCA values use umap.1, umap.2, pca.1, pca.2, e.g. umap.1 > 0.5.\n\ - 4. Use substring search to get the records where the attribute field contains the provided substring: cellType =~ \"B cell\".\n\ - 5. Combine multiple filters for different feature attributes and measurements using AND (&&), OR (||), NOT (!) logical operators and parentheses:\n\ - * NOT cellType=Erythrocyte or cellType!=B_cell,T_cell: exclude objects with defined values from search.\n\ - * batch=BatchA && percentMito<0.8 or batch=BatchA AND percentMito<8: select all objects with BatchA and percentMito less than 0.8.\n\ - * cluster=1 || cluster=2 or cluster=1 OR cluster=2: select all objects where cluster 1 or 2.\n\ - * percentMito>0.2 && nCounts>=1000: select all objects where percentMito is greater than 0.2 and nCounts is more or equal to 1000.\n\ - * batch=BatchA && (cluster=3 || -3 < percentMito < 8): combine logical operators in one query." + - description: | + Search for objects via a full-text query over all study metadata fields. E.g. `RNA-Seq of human dendritic cells`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: studyQuery + schema: + type: string + - description: | + Filter by sample metadata (key-value metadata pair(s)). E.g. `"Species or strain"="Homo sapiens"` + in: query + name: sampleFilter + schema: + type: string + - description: | + Search for objects via a full-text query over all sample metadata fields. E.g. `Clozapine`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: sampleQuery + schema: + type: string + - description: | + Filter by library metadata (key-value metadata pair(s)). E.g. `"Library Type"=RNA-Seq-1` + in: query + name: libraryFilter + schema: + type: string + - description: | + Search for library objects via a full-text query over all library metadata fields. E.g. `illumina HiSeq500`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: libraryQuery + schema: + type: string + - description: | + Filter by preparation metadata (key-value metadata pair(s)). E.g. `Digestion=Trypsin` + in: query + name: preparationFilter + schema: + type: string + - description: | + Search for preparation objects via a full-text query over all preparation metadata fields. E.g. `reversed-phase liquid chromatography`. Queries matching dictionary terms are automatically expanded to include synonyms. + in: query + name: preparationQuery + schema: + type: string + - description: |- + If the full-text query term is present in an ODM dictionary, enabling this parameter will modify the query to include child terms of the full-text query. + + For example, the search query "Body fluid" can be expanded to include the term "Blood" (a child term of "Body fluid") so files containing either "Body fluid" or "Blood" in their metadata will be returned in the search results. + + The parent-child relationship is defined by the key "broaders" or "subClassOf" in the dictionary. + + If the full query term is not present in a dictionary then this parameter has no effect. + in: query + name: searchSpecificTerms + schema: + type: boolean + - description: |- + Search for cells by their metadata attributes. The following attributes are supported: + 1. Strings: `barcode`, `batch`, `cellType`, `cluster`, and all custom attributes. + 2. Integers: `nCounts` + 3. Floats: `percentMito` + 4. Float coordinates: `UMAP`, `PCA`. + 5. All other attributes are considered to be custom and stored as string data type. + + To use filters for cell metadata objects use the following query types: + 1. By key=value pair for attributes. Single words can be supplied as is; otherwise, use speech marks (`"`) to quote queries that include whitespace: `cellType=T_cell`, `batch="PBMC batch 01"` - quote values that include spaces, `nCounts=3000`, `custom_attribute="disease"` - custom attribute with string data type. + 2. It is possible to specify a set of possible values, separated by comma: `cellType=Macrophage,Monocyte`. + 3. Utilize range filters to search numeric attributes. Apply `<` (less than), `>` (greater than), and `=` (equal to) symbols to specify the desired ranges as follows: `nCounts > 2000`, `-3 < percentMito < 10`. To retrieve UMAP or PCA values use `umap.1`, `umap.2`, `pca.1`, `pca.2`, e.g. `umap.1 > 0.5`. + 4. Use substring search to get the records where the attribute field contains the provided substring: `cellType =~ "B cell"`. + 5. Combine multiple filters for different feature attributes and measurements using `AND` (`&&`), `OR` (`||`), `NOT` (`!`) logical operators and parentheses: + * `NOT cellType=Erythrocyte` or `cellType!=B_cell,T_cell`: exclude objects with defined values from search. + * `batch=BatchA && percentMito<0.8` or `batch=BatchA AND percentMito<8`: select all objects with BatchA and percentMito less than 0.8. + * `cluster=1 || cluster=2` or `cluster=1 OR cluster=2`: select all objects where cluster 1 or 2. + * `percentMito>0.2 && nCounts>=1000`: select all objects where percentMito is greater than 0.2 and nCounts is more or equal to 1000. + * `batch=BatchA && (cluster=3 || -3 < percentMito < 8)`: combine logical operators in one query. in: query name: cellQuery schema: From cd5ede25d92f45712e50973e16e3c4bed2461236 Mon Sep 17 00:00:00 2001 From: natalyatodorova Date: Thu, 2 Oct 2025 14:15:35 +0100 Subject: [PATCH 2/2] make a list for conditions --- openapi/v1/integrationCurator.yaml | 8 ++++---- openapi/v1/integrationUser.yaml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/openapi/v1/integrationCurator.yaml b/openapi/v1/integrationCurator.yaml index 3d15059d..a9dd5608 100644 --- a/openapi/v1/integrationCurator.yaml +++ b/openapi/v1/integrationCurator.yaml @@ -4358,10 +4358,10 @@ paths: ## Conditions It is possible to supply conditions for: - Parent studies (full-text or metadata key-value pair) - Samples (full-text or metadata key-value pair) - Libraries (full-text or metadata key-value pair) - Preparations (full-text or metadata key-value pair) + 1. Parent studies (full-text or metadata key-value pair) + 2. Samples (full-text or metadata key-value pair) + 3. Libraries (full-text or metadata key-value pair) + 4. Preparations (full-text or metadata key-value pair) ## Metadata full-text queries Single words can be supplied as is; otherwise, use speech marks (`"`) to quote queries that include whitespace. Speech marks and backslash characters in the query need to be escaped with a backslash (`\`). diff --git a/openapi/v1/integrationUser.yaml b/openapi/v1/integrationUser.yaml index df5cefc9..033ae61f 100644 --- a/openapi/v1/integrationUser.yaml +++ b/openapi/v1/integrationUser.yaml @@ -2851,10 +2851,10 @@ paths: ## Conditions It is possible to supply conditions for: - Parent studies (full-text or metadata key-value pair) - Samples (full-text or metadata key-value pair) - Libraries (full-text or metadata key-value pair) - Preparations (full-text or metadata key-value pair) + 1. Parent studies (full-text or metadata key-value pair) + 2. Samples (full-text or metadata key-value pair) + 3. Libraries (full-text or metadata key-value pair) + 4. Preparations (full-text or metadata key-value pair) ## Metadata full-text queries Single words can be supplied as is; otherwise, use speech marks (`"`) to quote queries that include whitespace. Speech marks and backslash characters in the query need to be escaped with a backslash (`\`).