impresso · theorm · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/impresso/api_client/models/find_entities_order_by.py b/impresso/api_client/models/find_entities_order_by.py
@@ -26,3 +26,32 @@ def __str__(self) -> str:
     "-count",
     "-count-mentions",
 ]
+"""Specifies the sorting order for entity results using string literals.
+
+This type defines the valid string values that can be used to specify the
+field by which entity results should be ordered, and whether the order
+should be ascending or descending.
+
+Possible ordering fields:
+- `count`: Order by the total number of documents the entity appears in.
+- `count-mentions`: Order by the total number of times the entity is mentioned across all documents.
+- `name`: Order alphabetically by entity name.
+- `relevance`: Order by relevance score (specific to the query context, often the default).
+
+Ascending order is the default (e.g., `"name"` sorts A-Z).
+Descending order is indicated by a preceding hyphen (e.g., `"-count"`
+sorts from the highest count to the lowest).
+
+Usage Example:
+    ```python
+    # Assume 'client' is an initialized API client instance
+    # Example: Find entities and sort by the number of mentions (descending)
+    entities_by_mentions = client.find_entities(query="some query", order_by="-count-mentions")
+
+    # Example: Find entities and sort alphabetically by name (ascending)
+    entities_by_name = client.find_entities(query="another query", order_by="name")
+    ```
+
+See Also:
+    `FindEntitiesOrderBy`: An enum representation of these literal values.
+"""
diff --git a/impresso/api_client/models/find_media_sources_order_by.py b/impresso/api_client/models/find_media_sources_order_by.py
@@ -26,3 +26,35 @@ def __str__(self) -> str:
     "-lastIssue",
     "-countIssues",
 ]
+"""Specifies the sorting order for media source results using string literals.
+
+This type defines the valid string values that can be used to specify the
+field by which media source results should be ordered, and whether the order
+should be ascending or descending.
+
+Possible ordering fields:
+- `countIssues`: Order by the total number of issues available for the media source.
+- `firstIssue`: Order by the publication date of the earliest available issue.
+- `lastIssue`: Order by the publication date of the latest available issue.
+- `name`: Order alphabetically by the media source's name.
+
+Ascending order is the default (e.g., `"name"` sorts A-Z, `"firstIssue"` sorts oldest to newest).
+Descending order is indicated by a preceding hyphen (e.g., `"-countIssues"`
+sorts from the highest count to the lowest, `"-lastIssue"` sorts newest to oldest).
+
+Usage Example:
+    ```python
+    # Assume 'client' is an initialized API client instance
+    # Example: Find media sources and sort by the number of issues (descending)
+    sources_by_issue_count = client.find_media_sources(order_by="-countIssues")
+
+    # Example: Find media sources and sort alphabetically by name (ascending)
+    sources_by_name = client.find_media_sources(order_by="name")
+
+    # Example: Find media sources and sort by the date of the last issue (newest first)
+    sources_by_last_issue = client.find_media_sources(order_by="-lastIssue")
+    ```
+
+See Also:
+    `FindMediaSourcesOrderBy`: An enum representation of these literal values.
+"""
diff --git a/impresso/api_client/models/search_order_by.py b/impresso/api_client/models/search_order_by.py
@@ -6,9 +6,9 @@ class SearchOrderBy(str, Enum):
     DATE = "date"
     ID = "id"
     RELEVANCE = "relevance"
-    VALUE_0 = "-date"
-    VALUE_2 = "-relevance"
-    VALUE_5 = "-id"
+    VALUE_0 = "-date"  # Descending date
+    VALUE_2 = "-relevance"  # Descending relevance (default search behavior)
+    VALUE_5 = "-id"  # Descending ID
 
     def __str__(self) -> str:
         return str(self.value)
@@ -22,3 +22,30 @@ def __str__(self) -> str:
     "-relevance",
     "-id",
 ]
+"""
+Specifies the sorting order for search results using string literals.
+
+This type defines the valid string values that can be used to specify the
+field by which search results should be ordered, and whether the order
+should be ascending or descending.
+
+Ascending order is the default (e.g., `"date"` sorts from oldest to newest).
+Descending order is indicated by a preceding hyphen (e.g., `"-date"`
+sorts from newest to oldest).
+
+Usage Example:
+    ```python
+    # Example: Search for articles and sort by relevance (descending)
+    # Note: "-relevance" is often the default sorting for search APIs
+    results = client.search(query="example query", order_by="-relevance")
+
+    # Example: Search and sort by date (ascending)
+    results_by_date = client.search(query="another query", order_by="date")
+
+    # Example: Search and sort by date (descending)
+    results_by_date_desc = client.search(query="yet another query", order_by="-date")
+    ```
+
+See Also:
+    `SearchOrderBy`: An enum representation of these literal values.
+"""
diff --git a/impresso/data_container.py b/impresso/data_container.py
@@ -10,7 +10,12 @@ class DataContainer(Generic[IT, T]):
     """
     Generic container for responses from the Impresso API
     returned by resource methods (`get`, `find`).
-    Generally represents a single page of the result.
+
+    Generally represents a single page of the result. The results can be
+    paginated through by adjusting the `offset` and `limit` parameters
+    in the corresponding resource method call (e.g., `client.newspapers.find`).
+    The `total`, `limit`, `offset`, and `size` properties provide information
+    about the current page and the overall result set.
     """
 
     def __init__(
@@ -76,43 +81,47 @@ def _get_preview_image_(self) -> str | None:
 
     @property
     def raw(self) -> dict[str, Any]:
-        """Returns the response data as a python dictionary."""
+        """The response data as a python dictionary."""
         return getattr(self._data, "to_dict")()
 
     @property
     def pydantic(self) -> T:
-        """Returns the response data as a pydantic model."""
+        """The response data as a pydantic model."""
         return self._pydantic_model.model_validate(self.raw)
 
     @property
     def df(self) -> DataFrame:
-        """Returns the response data as a pandas dataframe."""
+        """
+        The response data for the current page as a pandas dataframe.
+
+        Note that this DataFrame only contains the items from the current
+        page of results, not the entire result set across all pages.
+        """
         return DataFrame.from_dict(self._data)  # type: ignore
 
     @property
     def total(self) -> int:
-        """Total number of results."""
+        """Total number of results available across all pages."""
         return self.raw.get("pagination", {}).get("total", 0)
 
     @property
     def limit(self) -> int:
-        """Current page size."""
+        """Maximum number of items requested for the current page."""
         return self.raw.get("pagination", {}).get("limit", 0)
 
     @property
     def offset(self) -> int:
-        """Current page offset."""
+        """The starting index (0-based) of the items on the current page."""
         return self.raw.get("pagination", {}).get("offset", 0)
 
     @property
     def size(self) -> int:
-        """Current page size."""
+        """Number of items actually present on the current page."""
         return len(self.raw.get("data", []))
 
     @property
     def url(self) -> str | None:
         """
-        URL of an Impresso web application page
-        representing the result set from this container.
+        URL of an Impresso web application page representing the result set.
         """
         return self._web_app_search_result_url
diff --git a/impresso/resources/collections.py b/impresso/resources/collections.py
@@ -66,6 +66,28 @@ def total(self) -> int:
 class CollectionsResource(Resource):
     """
     Work with collections.
+
+    Examples:
+        Find collections containing the term "war":
+        >>> results = collections.find(term="war") # doctest: +SKIP
+        >>> print(results.df) # doctest: +SKIP
+
+        Get a specific collection by its ID:
+        >>> collection_id = "some-collection-id" # Replace with a real ID
+        >>> collection = collections.get(collection_id) # doctest: +SKIP
+        >>> print(collection.df) # doctest: +SKIP
+
+        List items in a collection:
+        >>> items = collections.items(collection_id) # doctest: +SKIP
+        >>> print(items.df) # doctest: +SKIP
+
+        Add items to a collection:
+        >>> item_ids_to_add = ["item-id-1", "item-id-2"] # Replace with real item IDs
+        >>> collections.add_items(collection_id, item_ids_to_add) # doctest: +SKIP
+
+        Remove items from a collection:
+        >>> item_ids_to_remove = ["item-id-1"] # Replace with real item IDs
+        >>> collections.remove_items(collection_id, item_ids_to_remove) # doctest: +SKIP
     """
 
     name = "collections"

diff --git a/impresso/resources/search.py b/impresso/resources/search.py
@@ -86,15 +86,15 @@ def find(
         title: str | AND[str] | OR[str] | None = None,
         front_page: bool | None = None,
         entity_id: str | AND[str] | OR[str] | None = None,
-        newspaper_id: str | OR[str] | None = None,
+        newspaper_id: str | AND[str] | OR[str] | None = None,
         date_range: DateRange | None = None,
-        language: str | OR[str] | None = None,
+        language: str | AND[str] | OR[str] | None = None,
         mention: str | AND[str] | OR[str] | None = None,
         topic_id: str | AND[str] | OR[str] | None = None,
-        collection_id: str | OR[str] | None = None,
-        country: str | OR[str] | None = None,
-        partner_id: str | OR[str] | None = None,
-        text_reuse_cluster_id: str | OR[str] | None = None,
+        collection_id: str | AND[str] | OR[str] | None = None,
+        country: str | AND[str] | OR[str] | None = None,
+        partner_id: str | AND[str] | OR[str] | None = None,
+        text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
     ) -> SearchDataContainer:
         """
         Search for content items in Impresso.
@@ -179,15 +179,15 @@ def facet(
         title: str | AND[str] | OR[str] | None = None,
         front_page: bool | None = None,
         entity_id: str | AND[str] | OR[str] | None = None,
-        newspaper_id: str | OR[str] | None = None,
+        newspaper_id: str | AND[str] | OR[str] | None = None,
         date_range: DateRange | None = None,
-        language: str | OR[str] | None = None,
+        language: str | AND[str] | OR[str] | None = None,
         mention: str | AND[str] | OR[str] | None = None,
         topic_id: str | AND[str] | OR[str] | None = None,
-        collection_id: str | OR[str] | None = None,
-        country: str | OR[str] | None = None,
-        partner_id: str | OR[str] | None = None,
-        text_reuse_cluster_id: str | OR[str] | None = None,
+        collection_id: str | AND[str] | OR[str] | None = None,
+        country: str | AND[str] | OR[str] | None = None,
+        partner_id: str | AND[str] | OR[str] | None = None,
+        text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
     ) -> FacetDataContainer:
 
         facet_id = get_enum_from_literal(facet, GetSearchFacetId)
@@ -258,15 +258,15 @@ def _build_filters(
         title: str | AND[str] | OR[str] | None = None,
         front_page: bool | None = None,
         entity_id: str | AND[str] | OR[str] | None = None,
-        newspaper_id: str | OR[str] | None = None,
+        newspaper_id: str | AND[str] | OR[str] | None = None,
         date_range: DateRange | None = None,
-        language: str | OR[str] | None = None,
+        language: str | AND[str] | OR[str] | None = None,
         mention: str | AND[str] | OR[str] | None = None,
         topic_id: str | AND[str] | OR[str] | None = None,
-        collection_id: str | OR[str] | None = None,
-        country: str | OR[str] | None = None,
-        partner_id: str | OR[str] | None = None,
-        text_reuse_cluster_id: str | OR[str] | None = None,
+        collection_id: str | AND[str] | OR[str] | None = None,
+        country: str | AND[str] | OR[str] | None = None,
+        partner_id: str | AND[str] | OR[str] | None = None,
+        text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
     ) -> list[Filter]:
         filters: list[Filter] = []
         if string:

diff --git a/impresso/resources/text_reuse/clusters.py b/impresso/resources/text_reuse/clusters.py
@@ -51,7 +51,21 @@ def df(self) -> DataFrame:
 
 
 class TextReuseClustersResource(Resource):
-    """Text reuse clusters resource."""
+    """
+    Interact with the text reuse clusters endpoint of the Impresso API.
+
+    This resource allows searching for text reuse clusters based on various criteria
+    and retrieving facet information about these clusters.
+
+    Examples:
+        Find clusters with size between 10 and 20:
+        >>> results = textReuseClusters.find(cluster_size=(10, 20)) # doctest: +SKIP
+        >>> print(results.df) # doctest: +SKIP
+
+        Get the distribution of newspapers involved in clusters:
+        >>> facet_results = textReuseClusters.facet(facet='newspaper', order_by='count') # doctest: +SKIP
+        >>> print(facet_results.df) # doctest: +SKIP
+    """
 
     name = "textReuseClusters"
 
@@ -75,6 +89,40 @@ def find(
         mention: str | AND[str] | OR[str] | None = None,
         entity_id: str | AND[str] | OR[str] | None = None,
     ) -> FindTextReuseClustersContainer:
+        """
+        Find text reuse clusters based on various criteria.
+
+        Args:
+            term: Search for clusters containing specific text.
+            title: Filter clusters by the title of the articles within them.
+            order_by: Specify the sorting order for the results.
+            cluster_size: Filter clusters by the number of items they contain.
+            lexical_overlap: Filter clusters by the lexical overlap score.
+            day_delta: Filter clusters by the time span (in days) between the first and last item.
+            date_range: Filter clusters based on the date range of their items.
+            newspaper_id: Filter clusters containing items from specific newspapers.
+            collection_id: Filter clusters containing items from specific collections.
+            limit: Maximum number of clusters to return.
+            offset: Number of clusters to skip from the beginning.
+            front_page: Filter clusters containing items published on the front page.
+            topic_id: Filter clusters associated with specific topics.
+            language: Filter clusters by the language of their items.
+            country: Filter clusters by the country of publication of their items.
+            mention: Filter clusters containing specific mentions (named entities).
+            entity_id: Filter clusters associated with specific entity IDs.
+
+        Returns:
+            FindTextReuseClustersContainer: A container holding the search results.
+
+        Examples:
+            Find clusters with size between 10 and 20:
+            >>> results = textReuseClusters.find(cluster_size=(10, 20)) # doctest: +SKIP
+            >>> print(results.df) # doctest: +SKIP
+
+            Find clusters related to 'politics' in Swiss newspapers:
+            >>> results = textReuseClusters.find(term='politics', country='CH') # doctest: +SKIP
+            >>> print(results.df) # doctest: +SKIP
+        """
 
         filters = _build_filters(
             text=term,
@@ -130,6 +178,37 @@ def facet(
         lexical_overlap: Range | AND[Range] | OR[Range] | None = None,
         day_delta: Range | AND[Range] | OR[Range] | None = None,
     ) -> FacetDataContainer:
+        """
+        Get facet information for text reuse clusters based on specified filters.
+
+        Facets provide aggregated counts for different properties of the clusters,
+        such as the distribution of cluster sizes or newspapers involved.
+
+        Args:
+            facet: The specific facet to retrieve (e.g., 'newspaper', 'cluster_size').
+            order_by: How to order the facet values (e.g., 'value', 'count').
+            limit: Maximum number of facet values to return.
+            offset: Number of facet values to skip.
+            cluster_size: Filter clusters by size before calculating facets.
+            date_range: Filter clusters by date range before calculating facets.
+            newspaper_id: Filter clusters by newspaper before calculating facets.
+            lexical_overlap: Filter clusters by lexical overlap before calculating facets.
+            day_delta: Filter clusters by day delta before calculating facets.
+
+        Returns:
+            FacetDataContainer: A container holding the facet results.
+
+        Examples:
+            Get the top 10 newspapers involved in clusters:
+            >>> facet_results = textReuseClusters.facet(facet='newspaper', limit=10, order_by='count') # doctest: +SKIP
+            >>> print(facet_results.df) # doctest: +SKIP
+
+            Get the distribution of cluster sizes for clusters within a specific date range:
+            >>> from impresso.structures import DateRange
+            >>> date_filter = DateRange(start="1900-01-01", end="1910-12-31")
+            >>> facet_results = textReuseClusters.facet(facet='cluster_size', date_range=date_filter) # doctest: +SKIP
+            >>> print(facet_results.df) # doctest: +SKIP
+        """
         facet_id = get_enum_from_literal(facet, GetTrClustersFacetId)
         if isinstance(facet_id, Unset):
             raise ValueError(f"{facet} is not a valid value")