diff --git a/docs/resources.md b/docs/resources.md
index 6bc9986..afcc267 100644
--- a/docs/resources.md
+++ b/docs/resources.md
@@ -5,7 +5,25 @@
Search content items in the Impresso corpus.
```python
+# Search for content items
impresso.search.find(term='Titanic', limit=10)
+
+# Complex queries with AND/OR operators
+from impresso import AND, OR
+impresso.search.find(term=AND("hitler", "stalin") & OR("molotow", "ribbentrop"))
+
+# Search with date range
+from impresso import DateRange
+impresso.search.find(term="independence", date_range=DateRange("1921-05-21", "2001-01-02"))
+
+# Search by entity mentions
+impresso.search.find(entity_id=AND("aida-0001-54-Switzerland", "aida-0001-50-Albert_Einstein"))
+
+# Limit to specific newspapers
+impresso.search.find(term="independence", newspaper_id=OR("EXP", "GDL"))
+
+# Get facets to analyze search results
+impresso.search.facet(facet='newspaper', term='war')
```
::: impresso.resources.search.SearchResource
@@ -18,13 +36,28 @@ impresso.search.find(term='Titanic', limit=10)
Search entities in the Impresso corpus.
```python
+# Search for entities
impresso.entities.find(term="Douglas Adams")
+
+# Filter by entity type
+impresso.entities.find(term="Paris", entity_type="location")
+
+# Get entities with Wikidata details
+impresso.entities.find(term="Paris", resolve=True)
+
+# Search by Wikidata IDs
+from impresso import AND
+impresso.entities.find(wikidata_id=AND("Q2", "Q4", "Q42"))
+
+# Get a specific entity by ID
+impresso.entities.get("entity-id")
```
::: impresso.resources.entities.EntitiesResource
::: impresso.resources.entities.EntityType
::: impresso.api_client.models.find_entities_order_by.FindEntitiesOrderByLiteral
+::: impresso.resources.entities.FindEntitiesContainer
## Media sources
@@ -47,21 +80,170 @@ impresso.media_sources.find(
Get a single content item by ID.
```python
+# Get a content item by ID
impresso.content_items.get("NZZ-1794-08-09-a-i0002")
+
+# Get a content item with embeddings
+impresso.content_items.get("NZZ-1794-08-09-a-i0002", include_embeddings=True)
+
+# Get only the embeddings of a content item
+embeddings = impresso.content_items.get_embeddings("NZZ-1794-08-09-a-i0002")
+```
+
+::: impresso.resources.content_items.ContentItemsResource
+
+::: impresso.resources.content_items.ContentItemDataContainer
+
+## Images
+
+Search images in the Impresso corpus. Supports text search, filtering by various metadata, and visual similarity search using embeddings.
+
+```python
+# Search for images by keyword and content type
+impresso.images.find(term="rocket", content_type="object")
+
+# Get an image with its embeddings
+image = impresso.images.get("luxwort-1930-09-26-a-i0036", include_embeddings=True)
+
+# Search for similar images using an in-corpus image
+embeddings = impresso.images.get_embeddings("luxwort-1930-09-26-a-i0036")
+impresso.images.find(embedding=embeddings[0], limit=10)
+
+# Search for similar images using external image
+embedding = impresso.tools.embed_image("https://example.com/image.png", target="image")
+impresso.images.find(embedding=embedding, limit=10)
+
+# Multimodal search: find images using text
+text_embedding = impresso.tools.embed_text(text="portrait", target="multimodal")
+impresso.images.find(embedding=text_embedding, limit=10)
+```
+
+::: impresso.resources.images.ImagesResource
+
+::: impresso.api_client.models.find_images_order_by.FindImagesOrderByLiteral
+::: impresso.resources.images.FindImagesContainer
+::: impresso.resources.images.GetImageContainer
+
+## Topics
+
+Search topics in the Impresso database. Topics are thematic clusters discovered through topic modeling of the newspaper content.
+
+```python
+# Search for topics
+impresso.topics.find(term="economy")
+
+# Get a specific topic by ID
+impresso.topics.get("topic-id")
+```
+
+::: impresso.resources.topics.TopicsResource
+
+::: impresso.api_client.models.find_topics_order_by.FindTopicsOrderByLiteral
+::: impresso.resources.topics.FindTopicsContainer
+::: impresso.resources.topics.GetTopicContainer
+
+## Data Providers
+
+Search data providers in the Impresso database. Data providers are partner institutions that provide content to Impresso, such as libraries, archives, and media organizations.
+
+```python
+# Search for data providers
+impresso.data_providers.find(term="library")
+
+# Get a specific data provider by ID
+impresso.data_providers.get("provider-id")
+```
+
+::: impresso.resources.data_providers.DataProvidersResource
+
+::: impresso.resources.data_providers.FindDataProvidersContainer
+::: impresso.resources.data_providers.GetDataProviderContainer
+
+## Experiments
+
+Execute experiments with the Impresso platform. Experiments allow you to interact with various computational tools and models.
+
+```python
+# List all available experiments
+experiments = impresso.experiments.find()
+
+# Execute a specific experiment
+result = impresso.experiments.execute(
+ experiment_id="some-experiment-id",
+ body={"param": "value"}
+)
```
+::: impresso.resources.experiments.ExperimentsResource
+
+::: impresso.resources.experiments.FindExperimentsContainer
+
## Collections
Work with collections
+```python
+# Search for collections
+impresso.collections.find(term="war")
+
+# Get a specific collection by ID
+collection = impresso.collections.get("collection-id")
+collection_id = collection.raw["uid"]
+
+# List items in a collection
+items = impresso.collections.items(collection_id)
+
+# Add items to a collection (asynchronous - may take a few minutes)
+content_item = impresso.content_items.get("NZZ-1794-08-09-a-i0002")
+impresso.collections.add_items(collection_id, [content_item.pydantic.uid])
+
+# Remove items from a collection (asynchronous - may take a few minutes)
+impresso.collections.remove_items(collection_id, [content_item.pydantic.uid])
+```
+
::: impresso.resources.collections.CollectionsResource
::: impresso.api_client.models.find_collections_order_by.FindCollectionsOrderByLiteral
::: impresso.resources.collections.FindCollectionsContainer
+::: impresso.resources.collections.GetCollectionContainer
+
+## Tools: Named entity recognition and Embeddings
+
+The python library provides tools for text processing and semantic search:
+
+- **Named Entity Recognition (NER)**: Extract and classify named entities (people, places, organizations) from text.
+- **Named Entity Linking (NEL)**: Resolve recognized entities to Wikidata entries.
+- **Text Embeddings**: Generate semantic embeddings from text for similarity search across the corpus.
+- **Image Embeddings**: Generate embeddings from images for visual similarity search and multimodal retrieval.
+
+```python
+text = "Jean-Baptiste Nicolas Robert Schuman (29 June 1886 – 4 September 1963) was a Luxembourg-born French statesman."
+
+# Extract named entities from text (fast)
+result = impresso.tools.ner(text)
+result.df # View entities as DataFrame
+
+# Extract and link entities to Wikidata (slower but more detailed)
+result = impresso.tools.ner_nel(text)
+result.df # Includes Wikidata links
-## Named entity recognition
+# Link pre-tagged entities to external resources (requires [START] and [END] markers)
+tagged_text = "[START] Jean-Baptiste Nicolas Robert Schuman [END] was a statesman."
+impresso.tools.nel(tagged_text)
-The python library contains a set of named entity recognition methods that use the same NER model used to add entities to the Impresso database.
+# Generate text embeddings for semantic search
+text_embedding = impresso.tools.embed_text("European integration", target="text")
+results = impresso.search.find(embedding=text_embedding, limit=5)
+
+# Use in-corpus embedding for similar article search
+first_item_id = results.df.index[0]
+in_corpus_embedding = impresso.content_items.get_embeddings(first_item_id)[0]
+impresso.search.find(embedding=in_corpus_embedding, limit=10)
+
+# Generate image embeddings from URL
+image_embedding = impresso.tools.embed_image("https://example.com/image.png", target="image")
+impresso.images.find(embedding=image_embedding)
+```
::: impresso.resources.tools.ToolsResource
::: impresso.resources.tools.NerContainer
@@ -70,6 +252,20 @@ The python library contains a set of named entity recognition methods that use t
Two resources can be used to search text reuse clusters and passages.
+```python
+# Find text reuse clusters
+impresso.text_reuse_clusters.find(cluster_size=(10, 20))
+
+# Get facets for clusters (e.g., newspaper distribution)
+impresso.text_reuse_clusters.facet(facet='newspaper', order_by='count')
+
+# Find text reuse passages
+impresso.text_reuse_passages.find(term='revolution', country='FR')
+
+# Get facets for passages
+impresso.text_reuse_passages.facet(facet='newspaper')
+```
+
::: impresso.resources.text_reuse.clusters.TextReuseClustersResource
::: impresso.resources.text_reuse.passages.TextReusePassagesResource
diff --git a/docs/result.md b/docs/result.md
index 7a8e88f..d2bf329 100644
--- a/docs/result.md
+++ b/docs/result.md
@@ -2,6 +2,225 @@
When you execute a query, a `DataContainer` object is returned. This object encapsulates the query results along with metadata about the query. Additionally, it provides a suite of utility methods for accessing the results in various ways.
-In a Python notebook environment, the `DataContainer` object can render a preview of its data, facilitating quick inspection of the query results.
+## Understanding DataContainer
+
+All API methods that retrieve data (`find()`, `get()`, `facet()`) return specialized container objects that extend the base `DataContainer` class. These containers provide multiple ways to access and work with your results.
+
+## Accessing Result Data
+
+The `DataContainer` provides several properties for accessing the same data in different formats:
+
+### As a Pandas DataFrame
+
+The `df` property returns results as a pandas DataFrame, which is ideal for data analysis, filtering, and manipulation:
+
+```python
+results = impresso.search.find(term="revolution", limit=10)
+df = results.df
+
+# Use standard pandas operations
+print(df.head())
+print(df.columns)
+filtered = df[df['language'] == 'fr']
+```
+
+For image results, the DataFrame includes special formatting that renders thumbnail images in Jupyter notebooks.
+
+### As Raw Dictionary
+
+The `raw` property returns the complete API response as a Python dictionary:
+
+```python
+results = impresso.search.find(term="revolution", limit=10)
+raw_data = results.raw
+
+# Access the raw response structure
+items = raw_data['data']
+pagination = raw_data['pagination']
+```
+
+This is useful when you need the complete, unprocessed API response or want to serialize the results.
+
+### As Pydantic Model
+
+The `pydantic` property returns the data as a validated Pydantic model:
+
+```python
+results = impresso.search.find(term="revolution", limit=10)
+model = results.pydantic
+
+# Access data with IDE autocompletion and type checking
+for item in model.data:
+ print(item.title)
+```
+
+Pydantic models provide type validation and can be useful for structured data processing.
+
+## Pagination Information
+
+DataContainer objects include metadata about pagination:
+
+```python
+results = impresso.search.find(term="revolution", limit=20, offset=40)
+
+print(results.total) # Total number of results across all pages
+print(results.size) # Number of items in current page
+print(results.limit) # Maximum items per page
+print(results.offset) # Starting index of current page
+```
+
+## Iterating Through Pages
+
+Use the `pages()` method to iterate through all pages of results automatically:
+
+```python
+# Get first page with 50 items per page
+results = impresso.search.find(term="revolution", limit=50)
+
+# Iterate through all pages
+for page in results.pages():
+ print(f"Processing page at offset {page.offset}")
+ print(f"Contains {page.size} items")
+
+ # Process each page's data
+ for item in page.df.itertuples():
+ print(item.title)
+```
+
+This is efficient for processing large result sets without loading everything into memory at once.
+
+## Notebook Visualization
+
+In Jupyter notebooks or similar environments, DataContainer objects automatically render a rich HTML preview when displayed:
+
+```python
+results = impresso.search.find(term="revolution", limit=10)
+
+# Simply display the container - no need to access .df
+results # Renders as formatted HTML with preview
+```
+
+The preview includes:
+
+- **Result summary**: Type of result, number of items, and total count
+- **Link to Impresso App**: Direct link to view results in the web interface
+- **Data preview**: First few rows of the DataFrame
+- **Visual charts**: For facet results, displays a bar chart of the distribution
+
+## Specialized Containers
+
+Different API methods return specialized container types with additional features:
+
+### Search Results (`SearchDataContainer`)
+
+Returned by `search.find()`, contains content items with full-text search results.
+
+### Facet Results (`FacetDataContainer`)
+
+Returned by `search.facet()` and text reuse facet methods. Includes automatic chart visualization showing the distribution of facet values.
+
+```python
+facets = impresso.search.facet(facet='newspaper', term='war')
+
+# The preview automatically shows a bar chart
+facets # Displays chart in notebook
+
+# Access the data
+print(facets.df['count']) # Counts for each facet value
+```
+
+### Image Results (`FindImagesContainer`, `GetImageContainer`)
+
+Returned by `images.find()` and `images.get()`. The DataFrame includes special formatting to display image thumbnails in notebooks:
+
+```python
+images = impresso.images.find(term='portrait', limit=10)
+
+# In Jupyter, the DataFrame shows thumbnail images
+images.df # Displays images inline
+```
+
+### NER Results (`NerContainer`)
+
+Returned by `tools.ner()`, `tools.ner_nel()`, and `tools.nel()`. Contains named entities extracted from text:
+
+```python
+entities = impresso.tools.ner("Napoleon visited Paris in 1815.")
+
+# Access entities as DataFrame
+print(entities.df) # Shows entity text, type, and position
+```
+
+### Collection/Entity/Topic Containers
+
+Single-item containers returned by `get()` methods (`GetCollectionContainer`, `GetEntityContainer`, `GetTopicContainer`, etc.) typically contain one item and provide the same access patterns:
+
+```python
+entity = impresso.entities.get("entity-id")
+
+print(entity.df) # Single-row DataFrame
+print(entity.raw) # Dictionary with entity details
+print(entity.pydantic) # Pydantic model of the entity
+```
+
+## Web App Integration
+
+Most containers include a `url` property that links to the corresponding view in the Impresso web application:
+
+```python
+results = impresso.search.find(term="revolution")
+
+# Open this URL in a browser to see the results in the Impresso App
+print(results.url)
+```
+
+This is automatically displayed in the notebook preview, allowing you to seamlessly transition from programmatic exploration to the visual interface.
+
+## Common Patterns
+
+### Collecting All Results
+
+```python
+results = impresso.search.find(term="revolution", limit=100)
+
+# Collect all pages into a single DataFrame
+all_items = []
+for page in results.pages():
+ all_items.append(page.df)
+
+combined_df = pd.concat(all_items)
+print(f"Total items collected: {len(combined_df)}")
+```
+
+### Conditional Processing
+
+```python
+results = impresso.search.find(term="revolution", limit=50)
+
+for page in results.pages():
+ # Stop if we've found what we're looking for
+ if some_condition:
+ break
+
+ # Process page
+ process_items(page.df)
+```
+
+### Exporting Results
+
+```python
+results = impresso.search.find(term="revolution", limit=100)
+
+# Export to CSV
+results.df.to_csv('results.csv')
+
+# Export to JSON
+import json
+with open('results.json', 'w') as f:
+ json.dump(results.raw, f, indent=2)
+
+# Export as Parquet (efficient for large datasets)
+results.df.to_parquet('results.parquet')
+```
::: impresso.data_container.DataContainer
diff --git a/examples/notebooks/basic.ipynb b/examples/notebooks/basic.ipynb
index 412b9f3..684c1d1 100644
--- a/examples/notebooks/basic.ipynb
+++ b/examples/notebooks/basic.ipynb
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -72,21 +72,23 @@
"
type | \n",
" sourceMedium | \n",
" title | \n",
- " locationEntities | \n",
- " personEntities | \n",
- " organisationEntities | \n",
- " newsAgenciesEntities | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " issueUid | \n",
- " countryCode | \n",
- " providerCode | \n",
- " mediaUid | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
" \n",
" \n",
" | uid | \n",
@@ -109,6 +111,8 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
@@ -117,76 +121,83 @@
" in_cpy | \n",
" ar | \n",
" print | \n",
- " [REDACTED] | \n",
- " [] | \n",
- " [{'uid': '2-50-Pius_XII.', 'count': 1}] | \n",
- " [] | \n",
- " [] | \n",
+ " Europäischer Föderalistenkongreß in Rom | \n",
" [{'uid': 'tm-de-all-v2.0_tp25_de', 'relevance'... | \n",
" 733 | \n",
" 1 | \n",
" de | \n",
" False | \n",
" 1948-11-25T00:00:00+00:00 | \n",
- " luxwort-1948-11-25-a | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Pius_XII.', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Papst Pius XII', 'mentionCon... | \n",
+ " [] | \n",
+ " [] | \n",
" \n",
" \n",
" | FZG-1950-06-17-a-i0045 | \n",
" in_cpy | \n",
" ar | \n",
" print | \n",
- " [REDACTED] | \n",
- " [] | \n",
- " [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... | \n",
- " [] | \n",
- " [] | \n",
+ " Um EJnropa herum Die Furcht vor Krieg un... | \n",
" [{'uid': 'tm-de-all-v2.0_tp86_de', 'relevance'... | \n",
" 1200 | \n",
" 1 | \n",
" de | \n",
" True | \n",
" 1950-06-17T00:00:00+00:00 | \n",
- " FZG-1950-06-17-a | \n",
- " CH | \n",
- " SNL | \n",
- " FZG | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [1] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Adenauer', 'mentionConfidenc... | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
" | JDG-1954-11-03-a-i0032 | \n",
" in_cpy | \n",
" ar | \n",
" print | \n",
- " [REDACTED] | \n",
- " [{'uid': '2-54-Moscou', 'count': 1}, {'uid': '... | \n",
- " [{'uid': '2-50-Anthony_Eden', 'count': 1}, {'u... | \n",
- " [{'uid': '2-53-États-Unis', 'count': 1}, {'uid... | \n",
- " [] | \n",
+ " Washington et les négociations avec Moscou | \n",
" [{'uid': 'tm-fr-all-v2.0_tp29_fr', 'relevance'... | \n",
" 717 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
" 1954-11-03T00:00:00+00:00 | \n",
- " JDG-1954-11-03-a | \n",
- " CH | \n",
- " SNL | \n",
- " JDG | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Moscou', 'count': 1}, {'uid': '... | \n",
+ " [{'uid': '2-50-Anthony_Eden', 'count': 1}, {'u... | \n",
+ " [{'uid': '2-53-États-Unis', 'count': 1}, {'uid... | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Moscou', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'sir Anthony Eden', 'mentionC... | \n",
+ " [{'surfaceForm': 'Assemblée des Nations Unies'... | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "3 rows × 28 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 2,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -204,24 +215,34 @@
"metadata": {},
"source": [
"Below, we will search for a term \"European Union\" in the Impresso data.\n",
- "Then we will use the `result` variable, to access and print the excerpts of the first three articles returned by the search query.\n",
+ "Then we will use the `result` variable, to access and print titles of the first three articles returned by the search query.\n",
"\n",
"The `pydantic` property is a [Pydantic](https://docs.pydantic.dev/latest/) model representing the response of the Impresso API. It provides a way to ensure that the data conforms to specified types and constraints, making it easier to work with structured data in a reliable and consistent manner.\n",
- "We use the `data` property of the response to iterate over the page of the results and return excerpts of the articles that contain the search term."
+ "We use the `data` property of the response to iterate over the page of the results and return titles of the articles that contain the search term."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Europäischer Föderalistenkongreß in Rom\n",
+ "Um EJnropa herum Die Furcht vor Krieg un...\n",
+ "Washington et les négociations avec Moscou\n"
+ ]
+ }
+ ],
"source": [
"result = impresso.search.find(\n",
" term=\"European Union\",\n",
" order_by=\"date\",\n",
")\n",
"for article in result.pydantic.data[:3]:\n",
- " print(article.transcript)"
+ " print(article.title)"
]
},
{
@@ -233,7 +254,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -258,16 +279,16 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "SearchResponseSchema(data=[ContentItem(uid='luxwort-1948-11-25-a-i0033', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Pius_XII.', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp25_de', relevance=0.202), TopicMention(uid='tm-de-all-v2.0_tp52_de', relevance=0.16), TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.157), TopicMention(uid='tm-de-all-v2.0_tp14_de', relevance=0.112), TopicMention(uid='tm-de-all-v2.0_tp77_de', relevance=0.112), TopicMention(uid='tm-de-all-v2.0_tp95_de', relevance=0.096), TopicMention(uid='tm-de-all-v2.0_tp24_de', relevance=0.057)], transcriptLength=733.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1948, 11, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxwort-1948-11-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxwort', mediaType='newspaper'), ContentItem(uid='FZG-1950-06-17-a-i0045', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Konrad_Adenauer', count=1.0), NamedEntity(uid='2-50-Georges_Bidault', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.265), TopicMention(uid='tm-de-all-v2.0_tp95_de', relevance=0.181), TopicMention(uid='tm-de-all-v2.0_tp77_de', relevance=0.154), TopicMention(uid='tm-de-all-v2.0_tp41_de', relevance=0.09), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.08)], transcriptLength=1200.0, totalPages=1.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1950, 6, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1950-06-17-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='JDG-1954-11-03-a-i0032', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Moscou', count=1.0), NamedEntity(uid='2-54-Viêt_Nam', count=1.0), NamedEntity(uid='2-54-Chine', count=1.0), NamedEntity(uid='2-54-Pékin', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-États-Unis', count=1.0)], personEntities=[NamedEntity(uid='2-50-Anthony_Eden', count=1.0), NamedEntity(uid='2-50-Dwight_D._Eisenhower', count=1.0), NamedEntity(uid='2-50-John_Foster_Dulles', count=1.0), NamedEntity(uid='2-50-Konrad_Adenauer', count=1.0), NamedEntity(uid='2-50-Pierre_Mendès_France', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-États-Unis', count=1.0), NamedEntity(uid='2-53-Communauté_européenne_de_défense', count=1.0), NamedEntity(uid='2-53-Union_des_républiques_socialistes_soviétiques', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp29_fr', relevance=0.253), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp03_fr', relevance=0.098), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.096), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.082), TopicMention(uid='tm-fr-all-v2.0_tp00_fr', relevance=0.053)], transcriptLength=717.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1954, 11, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1954-11-03-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='JDG-1954-11-27-a-i0041', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Asie', count=1.0), NamedEntity(uid='2-54-Extrême-Orient', count=1.0), NamedEntity(uid='2-54-Corée', count=1.0), NamedEntity(uid='2-54-Proche-Orient', count=1.0), NamedEntity(uid='2-54-Korea', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Australie', count=2.0), NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Londres', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Organisation_des_Nations_unies', count=1.0), NamedEntity(uid='2-53-Amérique_latine', count=1.0), NamedEntity(uid='2-53-Conseil_économique_et_social_des_Nations_unies', count=2.0), NamedEntity(uid='2-53-Organisation_internationale_du_travail', count=1.0), NamedEntity(uid='2-53-Union_internationale_des_télécommunications', count=1.0), NamedEntity(uid='2-53-Science', count=1.0), NamedEntity(uid='2-53-Union_postale_universelle', count=2.0), NamedEntity(uid='2-53-Banque_mondiale', count=1.0), NamedEntity(uid=\"2-53-Organisation_de_l'aviation_civile_internationale\", count=1.0), NamedEntity(uid='2-53-International_Labour_Organization', count=1.0), NamedEntity(uid='2-53-World_Meteorological_Organization', count=2.0), NamedEntity(uid=\"2-53-Organisation_des_Nations_unies_pour_l'éducation,_la_science_et_la_culture\", count=1.0), NamedEntity(uid='2-53-Accord_général_sur_les_tarifs_douaniers_et_le_commerce', count=1.0), NamedEntity(uid='2-53-Océan_Atlantique', count=1.0), NamedEntity(uid=\"2-53-Organisation_du_traité_de_l'Atlantique_nord\", count=1.0), NamedEntity(uid='2-53-Organisation_européenne_pour_la_recherche_nucléaire', count=1.0), NamedEntity(uid=\"2-53-Union_de_l'Europe_occidentale\", count=2.0), NamedEntity(uid='2-53-Communauté_européenne_de_défense', count=1.0), NamedEntity(uid='2-53-Union_européenne_des_paiements', count=2.0), NamedEntity(uid='2-53-World_Federation_of_Trade_Unions', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.19), TopicMention(uid='tm-fr-all-v2.0_tp10_fr', relevance=0.123), TopicMention(uid='tm-fr-all-v2.0_tp16_fr', relevance=0.103), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.102), TopicMention(uid='tm-fr-all-v2.0_tp72_fr', relevance=0.094), TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.079)], transcriptLength=783.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1954, 11, 27, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1954-11-27-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='DTT-1962-04-30-a-i0121', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Vereinigte_Staaten', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Westeuropäische_Union', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp32_de', relevance=0.45), TopicMention(uid='tm-de-all-v2.0_tp52_de', relevance=0.259), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.072), TopicMention(uid='tm-de-all-v2.0_tp70_de', relevance=0.064)], transcriptLength=767.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1962, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='DTT-1962-04-30-a', countryCode='CH', providerCode='Migros', mediaUid='DTT', mediaType='newspaper'), ContentItem(uid='luxland-1984-12-21-a-i0018', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Jean_Giono', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.365), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.28), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.074), TopicMention(uid='tm-de-all-v2.0_tp80_de', relevance=0.056)], transcriptLength=853.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1984, 12, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1984-12-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='arbeitgeber-1991-03-21-a-i0020', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Chine', count=1.0), NamedEntity(uid='2-54-Aix-en-Provence', count=1.0), NamedEntity(uid='2-54-New_York', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-La_Nouvelle-Orléans', count=2.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.242), TopicMention(uid='tm-fr-all-v2.0_tp80_fr', relevance=0.106), TopicMention(uid='tm-fr-all-v2.0_tp08_fr', relevance=0.08), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.073), TopicMention(uid='tm-fr-all-v2.0_tp73_fr', relevance=0.072)], transcriptLength=864.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1991, 3, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-1991-03-21-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='JDG-1993-11-15-a-i0016', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Maastricht', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=2.0), NamedEntity(uid='2-53-Fleet', count=1.0), NamedEntity(uid='2-53-El_País', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.246), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.104), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.099), TopicMention(uid='tm-fr-all-v2.0_tp75_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp16_fr', relevance=0.062)], transcriptLength=643.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1993, 11, 15, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1993-11-15-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='FedGazDe-1994-01-25-a-i0002', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Mitteleuropa', count=1.0), NamedEntity(uid='2-54-Sowjetunion', count=2.0), NamedEntity(uid='2-54-Europäische_Union', count=1.0), NamedEntity(uid='2-54-Jugoslawien', count=1.0), NamedEntity(uid='2-54-Vereinigte_Staaten', count=3.0), NamedEntity(uid='2-54-Melbourne', count=1.0), NamedEntity(uid='2-54-Japanisches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Deutsches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Dritte_Französische_Republik', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=2.0), NamedEntity(uid='2-54-Mittlerer_Osten', count=1.0), NamedEntity(uid='2-54-Kambodscha', count=1.0), NamedEntity(uid='2-54-Afrika', count=1.0), NamedEntity(uid='2-54-Mittelmeer', count=1.0), NamedEntity(uid='2-54-Mexiko', count=1.0), NamedEntity(uid='2-54-Zweite_Polnische_Republik', count=1.0), NamedEntity(uid='2-54-Argentinien', count=1.0), NamedEntity(uid='2-54-Türkei', count=1.0), NamedEntity(uid='2-54-Schweiz', count=3.0), NamedEntity(uid='2-54-Rio_de_Janeiro', count=1.0), NamedEntity(uid='2-54-Norwegen', count=1.0), NamedEntity(uid='2-54-Schweden', count=2.0), NamedEntity(uid='2-54-Nordstaaten', count=1.0), NamedEntity(uid='2-54-Westeuropa', count=1.0), NamedEntity(uid='2-54-Uruguay', count=1.0), NamedEntity(uid='2-54-Genfersee', count=1.0), NamedEntity(uid='2-54-Genf', count=1.0), NamedEntity(uid='2-54-Luzern', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Arabische_Liga', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Osteuropa', count=1.0), NamedEntity(uid='2-54-Volksrepublik_China', count=1.0), NamedEntity(uid='2-54-Russland', count=1.0), NamedEntity(uid='2-54-Rhodesien', count=1.0), NamedEntity(uid='2-54-Irak', count=1.0), NamedEntity(uid='2-54-Libyen', count=1.0), NamedEntity(uid='2-54-Serbien', count=1.0), NamedEntity(uid='2-54-Nordkorea', count=1.0), NamedEntity(uid='2-54-New_York_City', count=1.0)], personEntities=[NamedEntity(uid='2-50-Helmut_Hubacher', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Bundespräsident_(Schweiz)', count=1.0), NamedEntity(uid='2-53-Allgemeines_Zoll-_und_Handelsabkommen', count=1.0), NamedEntity(uid='2-53-Europäischer_Wirtschaftsraum', count=1.0), NamedEntity(uid='2-53-Organisation_für_wirtschaftliche_Zusammenarbeit_und_Entwicklung', count=5.0), NamedEntity(uid='2-53-Warschauer_Pakt', count=1.0), NamedEntity(uid='2-53-Bundesverfassungsgericht', count=1.0), NamedEntity(uid='2-53-Demokratie', count=1.0), NamedEntity(uid='2-53-Bundesrepublik', count=1.0), NamedEntity(uid='2-53-Weltbank', count=1.0), NamedEntity(uid='2-53-ABC-Abwehr', count=1.0), NamedEntity(uid='2-53-Europäische_Menschenrechtskonvention', count=1.0), NamedEntity(uid='2-53-Vereinigte_Staaten', count=1.0), NamedEntity(uid='2-53-Swisslex', count=1.0), NamedEntity(uid='2-53-Klima', count=1.0), NamedEntity(uid='2-53-Umweltprogramm_der_Vereinten_Nationen', count=2.0), NamedEntity(uid='2-53-Entwicklungsprogramm_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-Kernwaffe', count=1.0), NamedEntity(uid='2-53-Europäische_Bank_für_Wiederaufbau_und_Entwicklung', count=1.0), NamedEntity(uid='2-53-CERN', count=1.0), NamedEntity(uid='2-53-Österreichisches_Rotes_Kreuz', count=1.0), NamedEntity(uid='2-53-Wirtschaftlichkeit', count=1.0), NamedEntity(uid='2-53-Umwelt', count=1.0), NamedEntity(uid='2-53-Industriestaat', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0), NamedEntity(uid='2-53-Constitution_de_la_Suisse', count=1.0), NamedEntity(uid='2-53-Aristide_Briand', count=1.0), NamedEntity(uid='2-53-Charta_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-Sicherheitsrat_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-United_Nations', count=1.0), NamedEntity(uid='2-53-Tiré_à_part', count=1.0), NamedEntity(uid='2-53-Vertrag_von_Maastricht', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschaftsgemeinschaft', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=27914.0, totalPages=90.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 1, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1994-01-25-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper'), ContentItem(uid='GDL-1994-04-21-a-i0061', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Technologie', count=1.0), NamedEntity(uid='2-53-Open_University', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=204.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 4, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1994-04-21-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='JDG-1994-05-19-a-i0041', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=206.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 5, 19, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1994-05-19-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='GDL-1994-05-19-a-i0037', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Master_of_Business_Administration', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=202.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 5, 19, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1994-05-19-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='arbeitgeber-1995-01-05-a-i0021', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.435), TopicMention(uid='tm-de-all-v2.0_tp21_de', relevance=0.212), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.073)], transcriptLength=576.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 1, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-1995-01-05-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='luxland-1995-05-05-a-i0062', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Esch_an_der_Alzette', count=1.0)], personEntities=[NamedEntity(uid='2-50-Georges_Prêtre', count=1.0), NamedEntity(uid='2-50-Johannes_Brahms', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.232), TopicMention(uid='tm-fr-all-v2.0_tp60_fr', relevance=0.132), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp24_fr', relevance=0.091), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.083), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.075)], transcriptLength=415.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1995-05-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='GDL-1995-05-11-a-i0065', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Union_européenne', count=1.0), NamedEntity(uid='2-54-Genève', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Europe', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=237.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-05-11-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='JDG-1995-05-11-a-i0056', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=232.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-05-11-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='JDG-1995-05-18-a-i0028', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=257.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 18, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-05-18-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='JDG-1995-06-02-a-i0094', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=251.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-06-02-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='GDL-1995-06-02-a-i0134', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Switzerland', count=1.0), NamedEntity(uid='2-54-European_Union', count=1.0), NamedEntity(uid='2-54-Lausanne', count=1.0), NamedEntity(uid='2-54-Geneva', count=2.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Europe', count=1.0), NamedEntity(uid='2-53-United_Kingdom', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=267.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-06-02-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='GDL-1995-06-07-a-i0039', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Genève', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=145.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-06-07-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='JDG-1995-06-07-a-i0039', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=149.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-06-07-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='JDG-1995-10-17-a-i0244', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=254.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 10, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-10-17-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='luxland-1995-11-10-a-i0058', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Bertrange_(Luxembourg)', count=1.0), NamedEntity(uid='2-54-Diekirch', count=1.0), NamedEntity(uid='2-54-Stadt_Brüssel', count=1.0), NamedEntity(uid='2-54-Palermo', count=1.0), NamedEntity(uid='2-54-Marseille', count=1.0)], personEntities=[NamedEntity(uid='2-50-Yves_Klein', count=1.0), NamedEntity(uid='2-50-Guy_Foissy', count=1.0), NamedEntity(uid='2-50-Victor_Haïm', count=1.0), NamedEntity(uid='2-50-John_Locke', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Théâtre_des_Capucins', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.228), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.142), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.122), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.111), TopicMention(uid='tm-fr-all-v2.0_tp49_fr', relevance=0.088), TopicMention(uid='tm-fr-all-v2.0_tp61_fr', relevance=0.086)], transcriptLength=742.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 11, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1995-11-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='GDL-1996-08-03-a-i0216', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.264), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.241), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.181)], transcriptLength=522.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='LLE-1996-08-03-a-i0157', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.241), TopicMention(uid='tm-fr-all-v2.0_tp80_fr', relevance=0.158), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.058)], transcriptLength=127.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper'), ContentItem(uid='EXP-1996-08-03-a-i0211', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Suisse', count=2.0), NamedEntity(uid='2-54-Londres', count=1.0)], personEntities=[NamedEntity(uid='2-50-Charles_Burney', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Football', count=1.0), NamedEntity(uid='2-53-Royal_Albert_Hall', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.544), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp50_fr', relevance=0.074)], transcriptLength=265.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper'), ContentItem(uid='IMP-1996-08-03-a-i0188', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.531), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.092), TopicMention(uid='tm-fr-all-v2.0_tp50_fr', relevance=0.069)], transcriptLength=329.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper'), ContentItem(uid='JDG-1996-10-07-a-i0157', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Piotr_Ilitch_Tchaïkovski', count=1.0), NamedEntity(uid='2-50-Frédéric_Chopin', count=1.0), NamedEntity(uid='2-50-Wolfgang_Amadeus_Mozart', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.46), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp85_fr', relevance=0.087), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.078)], transcriptLength=518.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 10, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1996-10-07-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='GDL-1997-01-21-a-i0284', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[NamedEntity(uid='2-54-Rome', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Treaty_of_Rome', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=181.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1997-01-21-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper'), ContentItem(uid='JDG-1997-01-21-a-i0263', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=177.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1997-01-21-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper'), ContentItem(uid='FZG-1997-02-10-a-i0102', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp67_de', relevance=0.767), TopicMention(uid='tm-de-all-v2.0_tp70_de', relevance=0.069)], transcriptLength=179.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 2, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1997-02-10-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='luxland-1997-06-13-a-i0468', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Pierre_Werner', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=2789.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 6, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-06-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-1997-06-13-a-i0466', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=2463.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 6, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-06-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-1997-08-22-a-i0038', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.572), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.064)], transcriptLength=240.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 8, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-08-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='FZG-1998-02-07-a-i0054', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.378), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.232), TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.12), TopicMention(uid='tm-de-all-v2.0_tp42_de', relevance=0.087)], transcriptLength=729.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 2, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1998-02-07-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='luxland-1998-05-29-a-i0174', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Frome', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=2.0)], personEntities=[NamedEntity(uid='2-50-Henri_Tudor', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=83.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 5, 29, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1998-05-29-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='FedGazDe-1998-08-11-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Freiburg_im_Üechtland', count=1.0), NamedEntity(uid='2-54-Bern', count=1.0), NamedEntity(uid='2-54-Genève', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Cambridge', count=1.0), NamedEntity(uid='2-54-Westeuropa', count=1.0), NamedEntity(uid='2-54-Dänemark', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-54-Deutschland', count=1.0), NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Spanien', count=1.0), NamedEntity(uid='2-54-Schweden', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Königreich_Griechenland', count=1.0), NamedEntity(uid='2-54-Königreich_Ungarn', count=1.0), NamedEntity(uid='2-54-Mexiko', count=1.0), NamedEntity(uid='2-54-Polen', count=1.0), NamedEntity(uid='2-54-Tschechoslowakei', count=1.0), NamedEntity(uid='2-54-Türkei', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-New_York_City', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Belgien', count=1.0), NamedEntity(uid='2-54-Japanisches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Niederlande', count=2.0), NamedEntity(uid='2-54-Schweiz', count=1.0), NamedEntity(uid='2-54-Aachen', count=1.0)], personEntities=[NamedEntity(uid='2-50-Leo_Schürmann', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Bundesrat_(Schweiz)', count=2.0), NamedEntity(uid='2-53-Deutsche_Bundesbank', count=2.0), NamedEntity(uid='2-53-Schweizerische_Nationalbank', count=6.0), NamedEntity(uid='2-53-US-Dollar', count=1.0), NamedEntity(uid='2-53-Federal_Reserve_System', count=2.0), NamedEntity(uid='2-53-Internationaler_Währungsfonds', count=1.0), NamedEntity(uid='2-53-Eidgenössisches_Finanzdepartement', count=1.0), NamedEntity(uid='2-53-Schweizer_Demokraten', count=1.0), NamedEntity(uid='2-53-Evangelische_Volkspartei', count=1.0), NamedEntity(uid='2-53-Sozialdemokratische_Partei_Deutschlands', count=1.0), NamedEntity(uid='2-53-Schweizerische_Volkspartei', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschafts-_und_Währungsunion', count=2.0), NamedEntity(uid='2-53-Schweizer_Franken', count=2.0), NamedEntity(uid='2-53-Bundesverfassung_der_Schweizerischen_Eidgenossenschaft', count=1.0), NamedEntity(uid='2-53-Geld', count=1.0), NamedEntity(uid='2-53-Währungspolitik', count=1.0), NamedEntity(uid='2-53-Volkswirtschaft', count=1.0), NamedEntity(uid='2-53-Europäische_Zentralbank', count=5.0), NamedEntity(uid='2-53-Europäisches_System_der_Zentralbanken', count=1.0), NamedEntity(uid='2-53-Oesterreichische_Nationalbank', count=2.0), NamedEntity(uid='2-53-Europäische_Gemeinschaft', count=1.0), NamedEntity(uid='2-53-Sten', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=2.0), NamedEntity(uid='2-53-European_Parliament', count=1.0), NamedEntity(uid='2-53-Landesregierung', count=1.0), NamedEntity(uid='2-53-Sozialdemokratische_Partei_Österreichs', count=1.0), NamedEntity(uid='2-53-Bastian_(Vorname)', count=1.0), NamedEntity(uid='2-53-Schweizer_Bankwesen', count=1.0), NamedEntity(uid='2-53-Arbeitsgruppe', count=1.0), NamedEntity(uid='2-53-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-53-Belgien', count=1.0), NamedEntity(uid='2-53-Gotham_City', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=29135.0, totalPages=74.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1998, 8, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1998-08-11-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper'), ContentItem(uid='FedGazFr-1998-08-11-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Suisse', count=1.0), NamedEntity(uid='2-54-Berne', count=1.0), NamedEntity(uid='2-54-Zurich', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-Zürich', count=1.0), NamedEntity(uid='2-54-Cambridge', count=1.0), NamedEntity(uid=\"2-54-Europe_de_l'Ouest\", count=1.0), NamedEntity(uid='2-54-Rhinow', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Espagne', count=1.0), NamedEntity(uid='2-54-Japon', count=1.0), NamedEntity(uid='2-54-Hongrie', count=1.0), NamedEntity(uid='2-54-Mexique', count=1.0), NamedEntity(uid='2-54-Pologne', count=1.0), NamedEntity(uid='2-54-Turquie', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Vienne_(Autriche)', count=1.0), NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Belgique', count=1.0), NamedEntity(uid='2-54-Suède', count=1.0), NamedEntity(uid='2-54-Autriche', count=1.0)], personEntities=[NamedEntity(uid='2-50-Peter_Nobel', count=1.0), NamedEntity(uid='2-50-Leo_Schürmann', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Franc_suisse', count=1.0), NamedEntity(uid='2-53-États-Unis', count=2.0), NamedEntity(uid='2-53-Fonds_monétaire_international', count=1.0), NamedEntity(uid='2-53-Association_suisse_des_banquiers', count=1.0), NamedEntity(uid='2-53-Assemblée_fédérale_(Suisse)', count=1.0), NamedEntity(uid='2-53-Département_fédéral_des_finances', count=1.0), NamedEntity(uid='2-53-Union_démocratique_fédérale', count=1.0), NamedEntity(uid='2-53-Démocrates_suisses', count=1.0), NamedEntity(uid='2-53-Union_suisse_des_arts_et_métiers', count=1.0), NamedEntity(uid='2-53-Union_économique_et_monétaire', count=1.0), NamedEntity(uid='2-53-Parti_socialiste_suisse', count=2.0), NamedEntity(uid='2-53-Union_démocratique_du_centre', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=2.0), NamedEntity(uid='2-53-Alliance_des_indépendants', count=1.0), NamedEntity(uid='2-53-Vorort', count=1.0), NamedEntity(uid='2-53-Parti_radical-démocratique', count=1.0), NamedEntity(uid='2-53-Union_syndicale_suisse', count=1.0), NamedEntity(uid='2-53-Parti_suisse_du_travail', count=1.0), NamedEntity(uid='2-53-Fédéralisme', count=1.0), NamedEntity(uid='2-53-Banque_nationale_suisse', count=1.0), NamedEntity(uid='2-53-Royaume-Uni', count=1.0), NamedEntity(uid='2-53-Banque_centrale_européenne', count=2.0), NamedEntity(uid='2-53-Pologne', count=1.0), NamedEntity(uid='2-53-Conseil_européen', count=1.0), NamedEntity(uid='2-53-Suisse', count=1.0), NamedEntity(uid='2-53-Pays-Bas_(pays_constitutif)', count=1.0), NamedEntity(uid='2-53-Schweizerische_Nationalbank', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=37607.0, totalPages=75.0, languageCode='fr', isOnFrontPage=True, publicationDate=datetime.datetime(1998, 8, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazFr-1998-08-11-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazFr', mediaType='newspaper'), ContentItem(uid='luxland-1998-11-13-a-i0062', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.189), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp19_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.073), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.06)], transcriptLength=116.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 11, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1998-11-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='FedGazDe-1999-06-01-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Bern', count=1.0), NamedEntity(uid='2-54-Liechtenstein', count=2.0), NamedEntity(uid='2-54-Schweiz', count=3.0), NamedEntity(uid='2-54-Kanton_Appenzell', count=1.0), NamedEntity(uid='2-54-Europäische_Freihandelsassoziation', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Japan', count=1.0), NamedEntity(uid='2-54-Basel', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Niederlande', count=1.0), NamedEntity(uid='2-54-Horden', count=1.0), NamedEntity(uid='2-54-Chicago', count=1.0), NamedEntity(uid='2-54-Zentralamerika', count=1.0), NamedEntity(uid='2-54-Belgien', count=1.0), NamedEntity(uid='2-54-Dänemark', count=1.0), NamedEntity(uid='2-54-Finnland', count=1.0), NamedEntity(uid='2-54-Norwegen', count=1.0), NamedEntity(uid='2-54-Schweden', count=1.0), NamedEntity(uid='2-54-Spanien', count=1.0), NamedEntity(uid='2-54-Alpen', count=1.0), NamedEntity(uid='2-54-Italien', count=1.0), NamedEntity(uid='2-54-Zurich', count=1.0), NamedEntity(uid='2-54-Helsinki', count=1.0), NamedEntity(uid='2-54-Vereinigte_Staaten', count=2.0), NamedEntity(uid='2-54-Nordwestschweiz', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Nordostschweiz', count=1.0), NamedEntity(uid='2-54-Kanton_Tessin', count=1.0), NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Bussen', count=1.0), NamedEntity(uid='2-54-Eschweiler_(Allemagne)', count=1.0)], personEntities=[NamedEntity(uid='2-50-Eduard_David', count=1.0), NamedEntity(uid='2-50-Ruth_Dreifuss', count=2.0), NamedEntity(uid='2-50-François_Couchepin', count=1.0), NamedEntity(uid='2-50-Julius_Bär', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Bundesrat_(Schweiz)', count=1.0), NamedEntity(uid='2-53-Kanton_Appenzell_Ausserrhoden', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=1.0), NamedEntity(uid='2-53-Freihandelsabkommen', count=1.0), NamedEntity(uid='2-53-International', count=1.0), NamedEntity(uid='2-53-Welthandelsorganisation', count=3.0), NamedEntity(uid='2-53-Weltgesundheitsorganisation', count=3.0), NamedEntity(uid='2-53-Eidgenössisches_Departement_des_Innern', count=2.0), NamedEntity(uid='2-53-Kontrollstelle', count=1.0), NamedEntity(uid='2-53-Bundesverwaltung', count=2.0), NamedEntity(uid='2-53-Soziale_Sicherheit', count=1.0), NamedEntity(uid='2-53-Washington,_D.C.', count=1.0), NamedEntity(uid='2-53-New_York_City', count=1.0), NamedEntity(uid='2-53-HIV', count=1.0), NamedEntity(uid='2-53-Buchstabe', count=1.0), NamedEntity(uid='2-53-Gute_klinische_Praxis', count=1.0), NamedEntity(uid='2-53-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-53-Swiss_Olympic', count=2.0), NamedEntity(uid='2-53-Test', count=1.0), NamedEntity(uid='2-53-Liste', count=1.0), NamedEntity(uid='2-53-Bundesamt_für_Gesundheit', count=1.0), NamedEntity(uid='2-53-Bundesamt_für_Sport', count=1.0), NamedEntity(uid='2-53-Bundesverfassung_der_Schweizerischen_Eidgenossenschaft', count=1.0), NamedEntity(uid=\"2-53-Département_fédéral_de_l'intérieur\", count=1.0), NamedEntity(uid='2-53-Europäische_Freihandelsassoziation', count=1.0), NamedEntity(uid='2-53-Europäischer_Wirtschaftsraum', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschaftsgemeinschaft', count=1.0), NamedEntity(uid='2-53-Food_and_Drug_Administration', count=1.0), NamedEntity(uid='2-53-Allgemeines_Abkommen_über_den_Handel_mit_Dienstleistungen', count=2.0), NamedEntity(uid='2-53-Allgemeines_Zoll-_und_Handelsabkommen', count=2.0), NamedEntity(uid='2-53-Vereinte_Nationen', count=2.0), NamedEntity(uid='2-53-Verkehr', count=1.0), NamedEntity(uid='2-53-Energie', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0), NamedEntity(uid='2-53-Bundesrechtspflegegesetz', count=1.0)], newsAgenciesEntities=[NamedEntity(uid='4-55-Reuters', count=201.0)], topics=[], transcriptLength=76080.0, totalPages=205.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1999, 6, 1, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1999-06-01-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper'), ContentItem(uid='FZG-1999-08-13-a-i0008', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.556), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.13), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.11)], transcriptLength=516.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1999, 8, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1999-08-13-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='luxland-1999-11-05-a-i0028', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Luxembourg', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Pétrusse', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.149), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.144), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.118), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.103)], transcriptLength=90.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1999, 11, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1999-11-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2000-11-24-a-i0030', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Union_européenne', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.169), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.058), TopicMention(uid='tm-fr-all-v2.0_tp19_fr', relevance=0.053)], transcriptLength=104.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2000, 11, 24, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2000-11-24-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='LLE-2001-04-23-a-i0253', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp89_fr', relevance=0.177), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.168), TopicMention(uid='tm-fr-all-v2.0_tp00_fr', relevance=0.09), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.074), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.069), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.066), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.057), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.051)], transcriptLength=1724.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 4, 23, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2001-04-23-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper'), ContentItem(uid='FZG-2001-08-04-a-i0012', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.726), TopicMention(uid='tm-de-all-v2.0_tp67_de', relevance=0.074), TopicMention(uid='tm-de-all-v2.0_tp62_de', relevance=0.069)], transcriptLength=1395.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 8, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2001-08-04-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='FZG-2001-10-04-a-i0220', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp62_de', relevance=0.377), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.21), TopicMention(uid='tm-de-all-v2.0_tp83_de', relevance=0.072)], transcriptLength=91.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 10, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2001-10-04-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='luxland-2001-11-16-a-i0018', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Luxembourg_City', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-European_Commission', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=353.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 11, 16, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2001-11-16-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2001-11-16-a-i0066', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.174), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.138), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.098), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.055)], transcriptLength=113.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 11, 16, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2001-11-16-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2002-01-11-a-i0009', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-États-Unis', count=1.0)], personEntities=[NamedEntity(uid='2-50-George_W._Bush', count=1.0), NamedEntity(uid='2-50-François_Biltgen', count=1.0), NamedEntity(uid='2-50-Jacques_Chirac', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Commission_européenne', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Conseil_européen', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.262), TopicMention(uid='tm-fr-all-v2.0_tp95_fr', relevance=0.229), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.095), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.067), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.062)], transcriptLength=1778.0, totalPages=2.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 1, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2002-01-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2002-04-12-a-i0031', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Israel', count=1.0), NamedEntity(uid='2-54-West_Bank', count=1.0)], personEntities=[NamedEntity(uid='2-50-Romano_Prodi', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=835.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 4, 12, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2002-04-12-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='IMP-2002-06-11-a-i0198', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.14), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.126), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.124), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.102), TopicMention(uid='tm-fr-all-v2.0_tp77_fr', relevance=0.096), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.069), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.055)], transcriptLength=533.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 6, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2002-06-11-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper'), ContentItem(uid='EXP-2002-06-11-a-i0229', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Lausanne', count=4.0), NamedEntity(uid='2-54-Berlin', count=5.0), NamedEntity(uid='2-54-Barcelone', count=1.0), NamedEntity(uid='2-54-Santiago', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Bordeaux', count=1.0), NamedEntity(uid='2-54-Londres', count=4.0)], personEntities=[NamedEntity(uid='2-50-Louisa_Hutton', count=4.0)], organisationEntities=[NamedEntity(uid='2-53-Architectural_Association_School_of_Architecture', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp77_fr', relevance=0.139), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.126), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.093), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.088), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.067), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.062)], transcriptLength=605.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 6, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2002-06-11-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper'), ContentItem(uid='luxland-2003-07-04-a-i0007', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Warschau', count=1.0), NamedEntity(uid='2-54-Niederlande', count=1.0), NamedEntity(uid='2-54-Den_Haag', count=1.0), NamedEntity(uid='2-54-Dublin', count=1.0), NamedEntity(uid='2-54-Polen', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Den_Haag', count=1.0), NamedEntity(uid='2-53-Europäischer_Konvent', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.241), TopicMention(uid='tm-de-all-v2.0_tp61_de', relevance=0.166), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.08), TopicMention(uid='tm-de-all-v2.0_tp14_de', relevance=0.079), TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.063), TopicMention(uid='tm-de-all-v2.0_tp24_de', relevance=0.061)], transcriptLength=1759.0, totalPages=2.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2003, 7, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2003-07-04-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2003-10-31-a-i0029', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.227), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.092), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.068), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.061), TopicMention(uid='tm-fr-all-v2.0_tp69_fr', relevance=0.057)], transcriptLength=121.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2003, 10, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2003-10-31-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-01-30-a-i0049', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Lille', count=1.0), NamedEntity(uid='2-54-France', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Interreg', count=1.0), NamedEntity(uid='2-53-European_Union', count=2.0), NamedEntity(uid='2-53-Euro', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=418.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 1, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-01-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-04-30-a-i0093', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[NamedEntity(uid='2-50-Daniel_Gros', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=2561.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-04-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-04-30-a-i0096', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Virginia', count=1.0), NamedEntity(uid='2-54-Vilnius', count=2.0), NamedEntity(uid='2-54-Lithuania', count=1.0), NamedEntity(uid='2-54-Latvia', count=1.0), NamedEntity(uid='2-54-Estonia', count=1.0), NamedEntity(uid='2-54-Spain', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Bratislava', count=1.0), NamedEntity(uid='2-53-European_Union', count=1.0), NamedEntity(uid='2-53-European_Commission', count=1.0), NamedEntity(uid='2-53-Differdange', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=950.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-04-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-05-07-a-i0198', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-United_States', count=1.0), NamedEntity(uid='2-54-China', count=1.0), NamedEntity(uid='2-54-France', count=2.0), NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=2298.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-05-07-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-05-21-a-i0025', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=594.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-05-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='FZG-2004-05-28-a-i0093', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.275), TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.272), TopicMention(uid='tm-de-all-v2.0_tp82_de', relevance=0.097), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.07), TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.058)], transcriptLength=162.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 28, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-05-28-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='LLE-2004-05-28-a-i0100', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Suisse', count=5.0), NamedEntity(uid='2-54-Eindhoven', count=1.0), NamedEntity(uid='2-54-Belgique', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Royaume-Uni', count=1.0), NamedEntity(uid='2-54-Pays-Bas', count=1.0)], personEntities=[NamedEntity(uid='2-50-Samuel_Schmid', count=6.0), NamedEntity(uid='2-50-Christophe_Keckeis', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Direction_départementale_de_la_Protection_des_populations', count=2.0), NamedEntity(uid='2-53-Royaume', count=1.0), NamedEntity(uid='2-53-Pays', count=1.0), NamedEntity(uid='2-53-Roger_Pratt', count=1.0), NamedEntity(uid='2-53-Conseil_de_sécurité_des_Nations_unies', count=1.0), NamedEntity(uid='2-53-Force_de_stabilisation', count=1.0), NamedEntity(uid=\"2-53-Force_de_l'Union_européenne\", count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.178), TopicMention(uid='tm-fr-all-v2.0_tp88_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.083), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.078), TopicMention(uid='tm-fr-all-v2.0_tp20_fr', relevance=0.065)], transcriptLength=955.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 28, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2004-05-28-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper'), ContentItem(uid='LLE-2004-08-03-a-i0245', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Strasbourg', count=4.0), NamedEntity(uid='2-54-Canton_de_Saint-Estève', count=1.0)], personEntities=[NamedEntity(uid='2-50-Zaha_Hadid', count=6.0), NamedEntity(uid='2-50-Ludwig_Mies_van_der_Rohe', count=2.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.424), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.176), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.058)], transcriptLength=453.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2004-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper'), ContentItem(uid='FZG-2004-09-03-a-i0049', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.227), TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.209), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.08), TopicMention(uid='tm-de-all-v2.0_tp00_de', relevance=0.059)], transcriptLength=175.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-09-03-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='IMP-2004-09-30-a-i0199', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.18), TopicMention(uid='tm-fr-all-v2.0_tp03_fr', relevance=0.132), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.106), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.067)], transcriptLength=474.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2004-09-30-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper'), ContentItem(uid='EXP-2004-09-30-a-i0226', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Berne', count=2.0), NamedEntity(uid='2-54-Europe', count=3.0), NamedEntity(uid='2-54-Sarajevo', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Portugal', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-Union_européenne', count=3.0), NamedEntity(uid='2-53-Bruxelles', count=1.0), NamedEntity(uid='2-53-Suisse', count=1.0), NamedEntity(uid=\"2-53-Organisation_du_traité_de_l'Atlantique_nord\", count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.271), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.119), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.082), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.074)], transcriptLength=467.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2004-09-30-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper'), ContentItem(uid='luxland-2004-11-05-a-i0037', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.145), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.136), TopicMention(uid='tm-fr-all-v2.0_tp90_fr', relevance=0.103), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp40_fr', relevance=0.082)], transcriptLength=143.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 11, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-11-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-12-03-a-i0060', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[NamedEntity(uid='2-53-European_Union', count=2.0)], newsAgenciesEntities=[], topics=[], transcriptLength=1023.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-03-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-12-03-a-i0167', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.177), TopicMention(uid='tm-de-all-v2.0_tp03_de', relevance=0.151), TopicMention(uid='tm-de-all-v2.0_tp58_de', relevance=0.106), TopicMention(uid='tm-de-all-v2.0_tp30_de', relevance=0.1), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.085), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.077)], transcriptLength=849.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-03-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2004-12-10-a-i0123', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0), NamedEntity(uid='2-54-Strasbourg', count=1.0), NamedEntity(uid='2-54-Lisbon', count=1.0)], personEntities=[NamedEntity(uid='2-50-Morgan_Meyer', count=1.0), NamedEntity(uid='2-50-Louis_Pasteur', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Université_du_Luxembourg', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=1669.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='FZG-2004-12-17-a-i0121', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.316), TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.199), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.075)], transcriptLength=112.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-12-17-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper'), ContentItem(uid='luxland-2005-01-21-a-i0027', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=476.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-01-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-02-25-a-i0067', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Luxemburg', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.235), TopicMention(uid='tm-de-all-v2.0_tp90_de', relevance=0.145), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.141), TopicMention(uid='tm-de-all-v2.0_tp82_de', relevance=0.063)], transcriptLength=99.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 2, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-02-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-03-25-a-i0031', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=1493.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 3, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-03-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-04-22-a-i0124', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-United_States', count=2.0), NamedEntity(uid='2-54-Luxembourg', count=2.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=2157.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 4, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-04-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-07-01-a-i0035', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0)], personEntities=[NamedEntity(uid='2-50-John_Locke', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=704.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 7, 1, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-07-01-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-09-09-a-i0019', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Rome', count=1.0)], personEntities=[NamedEntity(uid='2-50-Jacques_Chirac', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[], transcriptLength=1079.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 9, 9, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-09-09-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-11-11-a-i0136', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Austria', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Ankara', count=1.0), NamedEntity(uid='2-54-Cologne', count=1.0)], personEntities=[NamedEntity(uid='2-50-Denis_de_Rougemont', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Foreign_Policy_Centre', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=2986.0, totalPages=3.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 11, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-11-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2005-11-25-a-i0168', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Boston', count=1.0)], personEntities=[NamedEntity(uid='2-50-Jean-Claude_Juncker', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Laurence', count=1.0), NamedEntity(uid='2-53-Yves_Mersch', count=1.0)], newsAgenciesEntities=[], topics=[], transcriptLength=224.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 11, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-11-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2006-10-20-a-i0072', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Luxembourg', count=1.0)], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.201), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.165), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.115), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.112), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.079)], transcriptLength=162.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2006, 10, 20, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2006-10-20-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2006-12-22-a-i0172', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Stadt_Brüssel', count=1.0), NamedEntity(uid='2-54-Provinz_Namur', count=1.0), NamedEntity(uid='2-54-Esch_an_der_Alzette', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0)], personEntities=[NamedEntity(uid='2-50-Jean-Claude_Juncker', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp58_de', relevance=0.255), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.234), TopicMention(uid='tm-de-all-v2.0_tp43_de', relevance=0.079), TopicMention(uid='tm-de-all-v2.0_tp21_de', relevance=0.064), TopicMention(uid='tm-de-all-v2.0_tp59_de', relevance=0.06), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.051)], transcriptLength=939.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2006, 12, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2006-12-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2007-05-11-a-i0041', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Manchester', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0)], personEntities=[NamedEntity(uid='2-50-Maurice_Ravel', count=1.0), NamedEntity(uid='2-50-Ludwig_van_Beethoven', count=1.0), NamedEntity(uid='2-50-Johann_Sebastian_Bach', count=1.0), NamedEntity(uid='2-50-Claude_Lorrain', count=1.0), NamedEntity(uid='2-50-Karl_Marx', count=1.0), NamedEntity(uid='2-50-Joseph_Haydn', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.538), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.188), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.079)], transcriptLength=632.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2007, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2007-05-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='luxland-2007-08-10-a-i0040', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Europäische_Union', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-Schweiz', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0)], personEntities=[NamedEntity(uid='2-50-Colin_Davis', count=1.0)], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.455), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.138), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.105), TopicMention(uid='tm-de-all-v2.0_tp41_de', relevance=0.085)], transcriptLength=512.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2007, 8, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2007-08-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper'), ContentItem(uid='arbeitgeber-2008-02-14-a-i0022', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.439), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.185), TopicMention(uid='tm-de-all-v2.0_tp83_de', relevance=0.052), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.051)], transcriptLength=571.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 2, 14, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2008-02-14-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='arbeitgeber-2008-02-14-a-i0024', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.467), TopicMention(uid='tm-de-all-v2.0_tp89_de', relevance=0.171), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.168)], transcriptLength=585.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 2, 14, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2008-02-14-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='LLE-2008-10-02-a-i0112', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.277), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp76_fr', relevance=0.076)], transcriptLength=325.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 10, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2008-10-02-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper'), ContentItem(uid='IMP-2010-03-31-a-i0127', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[NamedEntity(uid='4-55-ATS_SDA', count=3.0), NamedEntity(uid='4-55-AFP', count=1.0)], topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp65_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp27_fr', relevance=0.055), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.055)], transcriptLength=187.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 3, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2010-03-31-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper'), ContentItem(uid='EXP-2010-03-31-a-i0128', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Bâle', count=4.0), NamedEntity(uid='2-54-Canton_de_Bâle-Ville', count=1.0), NamedEntity(uid='2-54-Lugano', count=2.0)], personEntities=[NamedEntity(uid='2-50-Alain_Lombard', count=2.0), NamedEntity(uid='2-50-Vladimir_Ashkenazy', count=1.0), NamedEntity(uid='2-50-Sol_Gabetta', count=2.0), NamedEntity(uid='2-50-Heinz_Holliger', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Kunstmuseum_(Bâle)', count=1.0), NamedEntity(uid='2-53-Siné_Hebdo', count=2.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.104), TopicMention(uid='tm-fr-all-v2.0_tp65_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp27_fr', relevance=0.07), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp53_fr', relevance=0.051)], transcriptLength=187.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 3, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2010-03-31-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper'), ContentItem(uid='arbeitgeber-2010-04-08-a-i0013', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.209), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.164), TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.147), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.146)], transcriptLength=509.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='arbeitgeber-2010-04-08-a-i0012', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.261), TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.184), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.082), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.079)], transcriptLength=452.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='arbeitgeber-2010-04-08-a-i0014', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, locationEntities=[], personEntities=[], organisationEntities=[], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.191), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.147), TopicMention(uid='tm-de-all-v2.0_tp93_de', relevance=0.14), TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.122), TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.083)], transcriptLength=530.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper'), ContentItem(uid='LLE-2012-05-03-a-i0180', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='[REDACTED]', transcript=None, locationEntities=[NamedEntity(uid='2-54-Canton_de_Fribourg', count=1.0), NamedEntity(uid='2-54-Suisse', count=1.0), NamedEntity(uid='2-54-Sainte-Thérèse', count=1.0), NamedEntity(uid='2-54-Alpes_suisses', count=1.0), NamedEntity(uid='2-54-Avry', count=1.0)], personEntities=[NamedEntity(uid='2-50-Thierry_Jobin', count=1.0), NamedEntity(uid='2-50-Georges_Schwizgebel', count=1.0)], organisationEntities=[NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Fédération_romande_des_consommateurs', count=1.0), NamedEntity(uid='2-53-Gare_de_Fribourg', count=1.0), NamedEntity(uid='2-53-Salle', count=1.0), NamedEntity(uid='2-53-Migros', count=1.0)], newsAgenciesEntities=[], topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.233), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp86_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp70_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp61_fr', relevance=0.072), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.055)], transcriptLength=306.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2012, 5, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2012-05-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper')], pagination=Pagination(total=91, limit=100, offset=0))"
+ "SearchResponseSchema(data=[ContentItem(uid='luxwort-1948-11-25-a-i0033', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Europäischer Föderalistenkongreß in Rom', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Pius_XII.', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Papst Pius XII', mentionConfidence=95.88, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp25_de', relevance=0.202), TopicMention(uid='tm-de-all-v2.0_tp52_de', relevance=0.16), TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.157), TopicMention(uid='tm-de-all-v2.0_tp14_de', relevance=0.112), TopicMention(uid='tm-de-all-v2.0_tp77_de', relevance=0.112), TopicMention(uid='tm-de-all-v2.0_tp95_de', relevance=0.096), TopicMention(uid='tm-de-all-v2.0_tp24_de', relevance=0.057)], embeddings=None, transcriptLength=733.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1948, 11, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxwort-1948-11-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxwort', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.6320533099849119, pageNumbers=[PageNumber(root=6.0)], collectionUids=[]), ContentItem(uid='FZG-1950-06-17-a-i0045', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Um EJnropa herum Die Furcht vor Krieg un...', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Konrad_Adenauer', count=1.0), NamedEntity(uid='2-50-Georges_Bidault', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Adenauer', mentionConfidence=96.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bidault', mentionConfidence=89.71, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.265), TopicMention(uid='tm-de-all-v2.0_tp95_de', relevance=0.181), TopicMention(uid='tm-de-all-v2.0_tp77_de', relevance=0.154), TopicMention(uid='tm-de-all-v2.0_tp41_de', relevance=0.09), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.08)], embeddings=None, transcriptLength=1200.0, totalPages=1.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1950, 6, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1950-06-17-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.5263532035322661, pageNumbers=[PageNumber(root=1.0)], collectionUids=[]), ContentItem(uid='JDG-1954-11-03-a-i0032', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Washington et les négociations avec Moscou', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Moscou', count=1.0), NamedEntity(uid='2-54-Viêt_Nam', count=1.0), NamedEntity(uid='2-54-Chine', count=1.0), NamedEntity(uid='2-54-Pékin', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-États-Unis', count=1.0)], persons=[NamedEntity(uid='2-50-Anthony_Eden', count=1.0), NamedEntity(uid='2-50-Dwight_D._Eisenhower', count=1.0), NamedEntity(uid='2-50-John_Foster_Dulles', count=1.0), NamedEntity(uid='2-50-Konrad_Adenauer', count=1.0), NamedEntity(uid='2-50-Pierre_Mendès_France', count=1.0)], organisations=[NamedEntity(uid='2-53-États-Unis', count=1.0), NamedEntity(uid='2-53-Communauté_européenne_de_défense', count=1.0), NamedEntity(uid='2-53-Union_des_républiques_socialistes_soviétiques', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Moscou', mentionConfidence=98.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vietnam', mentionConfidence=96.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Formose', mentionConfidence=98.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chine', mentionConfidence=90.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pékin', mentionConfidence=98.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Allemagne', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Amérique', mentionConfidence=92.28, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='sir Anthony Eden', mentionConfidence=98.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eisenhower', mentionConfidence=66.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dulles', mentionConfidence=83.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Adenauer', mentionConfidence=94.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mendès', mentionConfidence=91.23, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Assemblée des Nations Unies', mentionConfidence=93.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='WEU', mentionConfidence=78.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Western European Union', mentionConfidence=89.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Etats', mentionConfidence=79.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='CED', mentionConfidence=74.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union soviétique', mentionConfidence=89.98, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp29_fr', relevance=0.253), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp03_fr', relevance=0.098), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.096), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.082), TopicMention(uid='tm-fr-all-v2.0_tp00_fr', relevance=0.053)], embeddings=None, transcriptLength=717.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1954, 11, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1954-11-03-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.4800621694991533, pageNumbers=[PageNumber(root=3.0)], collectionUids=[]), ContentItem(uid='JDG-1954-11-27-a-i0041', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='AH, CES INITIALES !', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Asie', count=1.0), NamedEntity(uid='2-54-Extrême-Orient', count=1.0), NamedEntity(uid='2-54-Corée', count=1.0), NamedEntity(uid='2-54-Proche-Orient', count=1.0), NamedEntity(uid='2-54-Korea', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Australie', count=2.0), NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Londres', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Organisation_des_Nations_unies', count=1.0), NamedEntity(uid='2-53-Amérique_latine', count=1.0), NamedEntity(uid='2-53-Conseil_économique_et_social_des_Nations_unies', count=2.0), NamedEntity(uid='2-53-Organisation_internationale_du_travail', count=1.0), NamedEntity(uid='2-53-Union_internationale_des_télécommunications', count=1.0), NamedEntity(uid='2-53-Science', count=1.0), NamedEntity(uid='2-53-Union_postale_universelle', count=2.0), NamedEntity(uid='2-53-Banque_mondiale', count=1.0), NamedEntity(uid=\"2-53-Organisation_de_l'aviation_civile_internationale\", count=1.0), NamedEntity(uid='2-53-International_Labour_Organization', count=1.0), NamedEntity(uid='2-53-World_Meteorological_Organization', count=2.0), NamedEntity(uid=\"2-53-Organisation_des_Nations_unies_pour_l'éducation,_la_science_et_la_culture\", count=1.0), NamedEntity(uid='2-53-Accord_général_sur_les_tarifs_douaniers_et_le_commerce', count=1.0), NamedEntity(uid='2-53-Océan_Atlantique', count=1.0), NamedEntity(uid=\"2-53-Organisation_du_traité_de_l'Atlantique_nord\", count=1.0), NamedEntity(uid='2-53-Organisation_européenne_pour_la_recherche_nucléaire', count=1.0), NamedEntity(uid=\"2-53-Union_de_l'Europe_occidentale\", count=2.0), NamedEntity(uid='2-53-Communauté_européenne_de_défense', count=1.0), NamedEntity(uid='2-53-Union_européenne_des_paiements', count=2.0), NamedEntity(uid='2-53-World_Federation_of_Trade_Unions', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Asie', mentionConfidence=60.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Extrême', mentionConfidence=70.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='BIRD', mentionConfidence=53.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='IBRD', mentionConfidence=38.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='SUNFED', mentionConfidence=67.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Corée', mentionConfidence=44.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Proche Orient', mentionConfidence=84.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Korea', mentionConfidence=91.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNCURK', mentionConfidence=36.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=80.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='SACEUK', mentionConfidence=87.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Australie', mentionConfidence=50.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Austraiia', mentionConfidence=84.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='New', mentionConfidence=54.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='OECE', mentionConfidence=71.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=95.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paris', mentionConfidence=94.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='WFTU', mentionConfidence=40.57, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Puissances', mentionConfidence=56.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='SACEUR', mentionConfidence=34.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Commandant en chef allié en Europe', mentionConfidence=73.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='général Gruenther', mentionConfidence=94.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Commandant en chef des forces navales atlantiques', mentionConfidence=79.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='CINCAFMED', mentionConfidence=46.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Commandant en chef en terranée NATO', mentionConfidence=70.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Commander in Chief', mentionConfidence=62.13, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Nations Unies ONU', mentionConfidence=91.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conseil', mentionConfidence=59.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='latine', mentionConfidence=69.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Orient', mentionConfidence=72.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations ECOSOC', mentionConfidence=87.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie and Social CouncU ECE', mentionConfidence=82.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Commission for Europe ECLA', mentionConfidence=75.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Commission for Latin America ECAFE', mentionConfidence=72.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='travail', mentionConfidence=51.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union internationale des télécommunications', mentionConfidence=85.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='science', mentionConfidence=57.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='UPU', mentionConfidence=60.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union postale universelle', mentionConfidence=90.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internatioal Bank for Recorusbruetion and development', mentionConfidence=87.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Bank', mentionConfidence=81.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Monetary Fund FAO', mentionConfidence=76.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Food and Agricultural Organisation', mentionConfidence=75.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='ELO', mentionConfidence=50.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Labour Office', mentionConfidence=87.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='WMO', mentionConfidence=50.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Meteorologieal Organisation', mentionConfidence=76.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='ITU', mentionConfidence=43.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Télécommunication Union UNESCO', mentionConfidence=78.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Universel Postal Union Agences', mentionConfidence=74.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='assistance technique', mentionConfidence=45.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='TAB', mentionConfidence=55.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='assistance civile des Nations Unies', mentionConfidence=79.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNCUEK', mentionConfidence=50.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNKRA', mentionConfidence=57.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='secours', mentionConfidence=55.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='GATT', mentionConfidence=57.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='General Agreement on Tarilfis and Trade SUNFED', mentionConfidence=81.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Spécial United Nations Fund for Economie Development', mentionConfidence=81.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Technical Assistance Administration', mentionConfidence=78.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Technical Assistance Board UNCACK', mentionConfidence=73.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Commission for tbe Unification and thé Rehabilitation', mentionConfidence=86.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Korean', mentionConfidence=82.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Reconstruction Agency UNBWA', mentionConfidence=59.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Aalief and Works Agency', mentionConfidence=84.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Near Bast Pacte Atlantique OTAN', mentionConfidence=82.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Atlantique Nord', mentionConfidence=61.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='alliées', mentionConfidence=46.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='North Atlantic Treaty Organisation', mentionConfidence=87.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Allied Powers ia', mentionConfidence=73.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suprême Allied Commander', mentionConfidence=59.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suprême Allied Commandée', mentionConfidence=83.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Atlantic Naval Forces', mentionConfidence=89.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Allied Forces in thé Mediterranean', mentionConfidence=83.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='ANZUS', mentionConfidence=44.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='Centre européen de recherche', mentionConfidence=62.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='coopération économique', mentionConfidence=84.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='UEO', mentionConfidence=45.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Communauté européenne de défense', mentionConfidence=81.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='UEP', mentionConfidence=61.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne des paiements', mentionConfidence=91.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='ECSC', mentionConfidence=65.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schuman Plan', mentionConfidence=82.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Nuclear Research Centre', mentionConfidence=81.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='OEEC', mentionConfidence=47.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Organisation for European Economie Coopération', mentionConfidence=83.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='WEU', mentionConfidence=70.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Western European Union EPU', mentionConfidence=88.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Payments Union Divers BRI', mentionConfidence=84.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque des Règlements internationaux CICR', mentionConfidence=83.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='ICRC', mentionConfidence=44.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Committee of thé Red Cross ICEM', mentionConfidence=79.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Intergovernmental', mentionConfidence=46.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Migrations', mentionConfidence=64.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Confédération of Free Trade Unions', mentionConfidence=88.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Fédération of Trade Unions', mentionConfidence=86.33, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.19), TopicMention(uid='tm-fr-all-v2.0_tp10_fr', relevance=0.123), TopicMention(uid='tm-fr-all-v2.0_tp16_fr', relevance=0.103), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.102), TopicMention(uid='tm-fr-all-v2.0_tp72_fr', relevance=0.094), TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.079)], embeddings=None, transcriptLength=783.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1954, 11, 27, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1954-11-27-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.93, relevanceScore=0.40468972948334403, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='DTT-1962-04-30-a-i0121', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Fragen der Rüstung, und daneben besteht ...', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Vereinigte_Staaten', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Westeuropäische_Union', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Kern', mentionConfidence=65.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinigten', mentionConfidence=69.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Staaten', mentionConfidence=55.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kanada', mentionConfidence=91.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Frankreich', mentionConfidence=93.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritannien', mentionConfidence=87.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Benelux', mentionConfidence=63.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritanniens', mentionConfidence=54.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europa', mentionConfidence=57.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Norwegen', mentionConfidence=96.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweden', mentionConfidence=96.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dänemark', mentionConfidence=96.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rom', mentionConfidence=97.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italien', mentionConfidence=96.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='Montanunion', mentionConfidence=66.53, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Europas', mentionConfidence=39.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='EVG', mentionConfidence=55.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='WEU', mentionConfidence=47.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Westeuropäischen Union', mentionConfidence=66.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWU', mentionConfidence=70.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Western European Union', mentionConfidence=89.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWG', mentionConfidence=47.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italiens', mentionConfidence=40.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesrepublik Deutschland', mentionConfidence=49.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Brüsseler', mentionConfidence=66.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sechser', mentionConfidence=50.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kohle', mentionConfidence=33.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stahl', mentionConfidence=36.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rat', mentionConfidence=55.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='EURATOM', mentionConfidence=40.86, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp32_de', relevance=0.45), TopicMention(uid='tm-de-all-v2.0_tp52_de', relevance=0.259), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.072), TopicMention(uid='tm-de-all-v2.0_tp70_de', relevance=0.064)], embeddings=None, transcriptLength=767.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1962, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='DTT-1962-04-30-a', countryCode='CH', providerCode='Migros', mediaUid='DTT', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.6100651697905277, pageNumbers=[PageNumber(root=10.0)], collectionUids=[]), ContentItem(uid='luxland-1984-12-21-a-i0018', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Notizblock', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Jean_Giono', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Roger Krieps', mentionConfidence=91.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Jean Giono', mentionConfidence=91.24, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.365), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.28), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.074), TopicMention(uid='tm-de-all-v2.0_tp80_de', relevance=0.056)], embeddings=None, transcriptLength=853.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1984, 12, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1984-12-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.95, relevanceScore=0.5952877250579142, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-1991-03-21-a-i0020', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Chine', count=1.0), NamedEntity(uid='2-54-Aix-en-Provence', count=1.0), NamedEntity(uid='2-54-New_York', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-La_Nouvelle-Orléans', count=2.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='American', mentionConfidence=59.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chine', mentionConfidence=80.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zurich', mentionConfidence=97.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Aix-en-Provence', mentionConfidence=89.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Université', mentionConfidence=79.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kramer', mentionConfidence=32.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='John Wyley', mentionConfidence=61.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='New- York', mentionConfidence=68.09, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Australien Peter Blunt', mentionConfidence=77.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Roger Blanpain', mentionConfidence=92.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Marc Maurice', mentionConfidence=95.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='E., Doz', mentionConfidence=39.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Y, Laurent', mentionConfidence=76.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Quinn J. B, Mintzberg H', mentionConfidence=79.66, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Nouvelle- Orléans', mentionConfidence=59.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Aminu Mamman', mentionConfidence=84.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='University of Wisconsin Press', mentionConfidence=49.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wyley', mentionConfidence=59.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nouvelle-Orléans', mentionConfidence=59.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='struggle for European Unions', mentionConfidence=74.32, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.242), TopicMention(uid='tm-fr-all-v2.0_tp80_fr', relevance=0.106), TopicMention(uid='tm-fr-all-v2.0_tp08_fr', relevance=0.08), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.073), TopicMention(uid='tm-fr-all-v2.0_tp73_fr', relevance=0.072)], embeddings=None, transcriptLength=864.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1991, 3, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-1991-03-21-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.95, relevanceScore=0.3746023315725854, pageNumbers=[PageNumber(root=20.0)], collectionUids=[]), ContentItem(uid='JDG-1993-11-15-a-i0016', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='UNION OU COMMUNAUTE Dilemme lexical', transcript=None, entities=NamedEntities(locations=[], persons=[], organisations=[NamedEntity(uid='2-53-Maastricht', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=2.0), NamedEntity(uid='2-53-Fleet', count=1.0), NamedEntity(uid='2-53-El_País', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Bruxelles', mentionConfidence=98.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe des Douze', mentionConfidence=82.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Street', mentionConfidence=72.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Espagne', mentionConfidence=93.62, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Francesco', mentionConfidence=70.21, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Communauté', mentionConfidence=40.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Maastricht', mentionConfidence=51.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=68.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='EOK', mentionConfidence=52.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=79.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fleet', mentionConfidence=43.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vanguardia', mentionConfidence=43.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union Europea', mentionConfidence=82.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='El Pais', mentionConfidence=59.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Comunidad Europea', mentionConfidence=72.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='nauté contre', mentionConfidence=58.92, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.246), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.104), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.099), TopicMention(uid='tm-fr-all-v2.0_tp75_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp16_fr', relevance=0.062)], embeddings=None, transcriptLength=643.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1993, 11, 15, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1993-11-15-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.5291625148211508, pageNumbers=[PageNumber(root=3.0)], collectionUids=[]), ContentItem(uid='FedGazDe-1994-01-25-a-i0002', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Bericht über die Aussenpolitik der Schweiz in den 90er Jahren Anhang: [...]', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Mitteleuropa', count=1.0), NamedEntity(uid='2-54-Sowjetunion', count=2.0), NamedEntity(uid='2-54-Europäische_Union', count=1.0), NamedEntity(uid='2-54-Jugoslawien', count=1.0), NamedEntity(uid='2-54-Vereinigte_Staaten', count=3.0), NamedEntity(uid='2-54-Melbourne', count=1.0), NamedEntity(uid='2-54-Japanisches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Deutsches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Dritte_Französische_Republik', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=2.0), NamedEntity(uid='2-54-Mittlerer_Osten', count=1.0), NamedEntity(uid='2-54-Kambodscha', count=1.0), NamedEntity(uid='2-54-Afrika', count=1.0), NamedEntity(uid='2-54-Mittelmeer', count=1.0), NamedEntity(uid='2-54-Mexiko', count=1.0), NamedEntity(uid='2-54-Zweite_Polnische_Republik', count=1.0), NamedEntity(uid='2-54-Argentinien', count=1.0), NamedEntity(uid='2-54-Türkei', count=1.0), NamedEntity(uid='2-54-Schweiz', count=3.0), NamedEntity(uid='2-54-Rio_de_Janeiro', count=1.0), NamedEntity(uid='2-54-Norwegen', count=1.0), NamedEntity(uid='2-54-Schweden', count=2.0), NamedEntity(uid='2-54-Nordstaaten', count=1.0), NamedEntity(uid='2-54-Westeuropa', count=1.0), NamedEntity(uid='2-54-Uruguay', count=1.0), NamedEntity(uid='2-54-Genfersee', count=1.0), NamedEntity(uid='2-54-Genf', count=1.0), NamedEntity(uid='2-54-Luzern', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Arabische_Liga', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Osteuropa', count=1.0), NamedEntity(uid='2-54-Volksrepublik_China', count=1.0), NamedEntity(uid='2-54-Russland', count=1.0), NamedEntity(uid='2-54-Rhodesien', count=1.0), NamedEntity(uid='2-54-Irak', count=1.0), NamedEntity(uid='2-54-Libyen', count=1.0), NamedEntity(uid='2-54-Serbien', count=1.0), NamedEntity(uid='2-54-Nordkorea', count=1.0), NamedEntity(uid='2-54-New_York_City', count=1.0)], persons=[NamedEntity(uid='2-50-Helmut_Hubacher', count=1.0)], organisations=[NamedEntity(uid='2-53-Bundespräsident_(Schweiz)', count=1.0), NamedEntity(uid='2-53-Allgemeines_Zoll-_und_Handelsabkommen', count=1.0), NamedEntity(uid='2-53-Europäischer_Wirtschaftsraum', count=1.0), NamedEntity(uid='2-53-Organisation_für_wirtschaftliche_Zusammenarbeit_und_Entwicklung', count=5.0), NamedEntity(uid='2-53-Warschauer_Pakt', count=1.0), NamedEntity(uid='2-53-Bundesverfassungsgericht', count=1.0), NamedEntity(uid='2-53-Demokratie', count=1.0), NamedEntity(uid='2-53-Bundesrepublik', count=1.0), NamedEntity(uid='2-53-Weltbank', count=1.0), NamedEntity(uid='2-53-ABC-Abwehr', count=1.0), NamedEntity(uid='2-53-Europäische_Menschenrechtskonvention', count=1.0), NamedEntity(uid='2-53-Vereinigte_Staaten', count=1.0), NamedEntity(uid='2-53-Swisslex', count=1.0), NamedEntity(uid='2-53-Klima', count=1.0), NamedEntity(uid='2-53-Umweltprogramm_der_Vereinten_Nationen', count=2.0), NamedEntity(uid='2-53-Entwicklungsprogramm_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-Kernwaffe', count=1.0), NamedEntity(uid='2-53-Europäische_Bank_für_Wiederaufbau_und_Entwicklung', count=1.0), NamedEntity(uid='2-53-CERN', count=1.0), NamedEntity(uid='2-53-Österreichisches_Rotes_Kreuz', count=1.0), NamedEntity(uid='2-53-Wirtschaftlichkeit', count=1.0), NamedEntity(uid='2-53-Umwelt', count=1.0), NamedEntity(uid='2-53-Industriestaat', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0), NamedEntity(uid='2-53-Constitution_de_la_Suisse', count=1.0), NamedEntity(uid='2-53-Aristide_Briand', count=1.0), NamedEntity(uid='2-53-Charta_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-Sicherheitsrat_der_Vereinten_Nationen', count=1.0), NamedEntity(uid='2-53-United_Nations', count=1.0), NamedEntity(uid='2-53-Tiré_à_part', count=1.0), NamedEntity(uid='2-53-Vertrag_von_Maastricht', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschaftsgemeinschaft', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Baerlocher', mentionConfidence=58.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Onken', mentionConfidence=79.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schveiz', mentionConfidence=96.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sachbereichen', mentionConfidence=56.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mitteleuropas', mentionConfidence=45.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sowjetunion', mentionConfidence=68.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen', mentionConfidence=49.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Jugoslawiens', mentionConfidence=90.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ländern Osteuropas', mentionConfidence=59.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Visegrader', mentionConfidence=89.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='USA', mentionConfidence=85.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Umfeld', mentionConfidence=56.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='östlichen Maclnblockes', mentionConfidence=74.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Japan', mentionConfidence=94.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kanada', mentionConfidence=95.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Deutschland', mentionConfidence=92.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Frankreich', mentionConfidence=92.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritannien', mentionConfidence=94.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italien', mentionConfidence=94.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nahen Osten', mentionConfidence=64.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kambodscha', mentionConfidence=96.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='südlichen', mentionConfidence=58.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Afrika', mentionConfidence=44.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mittelmeerraumes', mentionConfidence=37.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mexiko', mentionConfidence=93.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Polen', mentionConfidence=87.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Argentinien', mentionConfidence=87.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Türkei', mentionConfidence=87.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Global', mentionConfidence=43.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Staatengemeinschaft', mentionConfidence=68.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rio', mentionConfidence=97.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='Norwegen', mentionConfidence=96.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweden', mentionConfidence=96.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweiz', mentionConfidence=94.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nord', mentionConfidence=45.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Westeuropas', mentionConfidence=78.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Uruguay', mentionConfidence=59.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zehnergruppe', mentionConfidence=51.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinigten Staaten', mentionConfidence=67.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Region Genf', mentionConfidence=65.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Genfs', mentionConfidence=50.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luzem', mentionConfidence=96.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luzern', mentionConfidence=97.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kantone', mentionConfidence=84.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='BERD', mentionConfidence=49.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Östlichen Europa', mentionConfidence=88.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Friedenszeiten', mentionConfidence=67.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='schweizerischen', mentionConfidence=41.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Blauhelme', mentionConfidence=85.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Liga', mentionConfidence=31.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kleinstaates', mentionConfidence=44.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='UdSSR', mentionConfidence=79.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='europäischen', mentionConfidence=46.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Osten Europas', mentionConfidence=56.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='China', mentionConfidence=96.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritaimicn', mentionConfidence=91.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Russland', mentionConfidence=94.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Staaten von Amerika', mentionConfidence=57.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rhodesien', mentionConfidence=97.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Irak', mentionConfidence=90.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Libyen', mentionConfidence=96.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Serbien', mentionConfidence=97.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nordkorea', mentionConfidence=96.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Oelembargo', mentionConfidence=91.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='New York', mentionConfidence=75.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schwedens', mentionConfidence=94.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Konstanten', mentionConfidence=95.97, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Frau Präsidentin', mentionConfidence=85.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Herr Präsident', mentionConfidence=62.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hubacher', mentionConfidence=43.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Couchepin', mentionConfidence=47.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Maximen', mentionConfidence=78.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ESA', mentionConfidence=49.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='sektnriellen Ansatz', mentionConfidence=84.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='EDA', mentionConfidence=93.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='EVD', mentionConfidence=75.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Femer', mentionConfidence=64.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Waffenstillstandsbeobachter', mentionConfidence=50.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Oehlervom', mentionConfidence=87.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Daniel Thürer', mentionConfidence=94.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Christian Dominici', mentionConfidence=96.92, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Bundespräsident', mentionConfidence=67.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='GATT', mentionConfidence=52.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Checks', mentionConfidence=56.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Westeuropäische Integration', mentionConfidence=41.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWR', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='OECD', mentionConfidence=74.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentralisierung', mentionConfidence=54.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dezentralisierung', mentionConfidence=48.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Warschauer', mentionConfidence=39.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesverfassungsgerichtes', mentionConfidence=70.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Demokratieprinzip', mentionConfidence=42.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesrepublik', mentionConfidence=61.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parallel', mentionConfidence=30.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Weltbank', mentionConfidence=50.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Waffen', mentionConfidence=40.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='EMRK', mentionConfidence=50.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Good governance', mentionConfidence=64.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wiener Menschenrechtskonferenz', mentionConfidence=59.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen Wirtschaftskommission', mentionConfidence=39.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wettbewerbs', mentionConfidence=63.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sozialund Umweltpolitik', mentionConfidence=54.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinigten', mentionConfidence=63.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Swisslex', mentionConfidence=50.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Klima', mentionConfidence=30.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNEP', mentionConfidence=75.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNDP', mentionConfidence=76.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kohärenzbedürfnisse', mentionConfidence=55.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Atomare', mentionConfidence=49.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque Européenne', mentionConfidence=74.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Bank für Wiederaufbau und Entwicklung Bank', mentionConfidence=61.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internationalen Zahlungsausgleich', mentionConfidence=57.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bretton Woods Institutionen', mentionConfidence=62.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Centre Européen', mentionConfidence=56.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nucléaire Europäisches Zentrum', mentionConfidence=59.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kernphysikalische Forschung', mentionConfidence=62.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Union Europäische Menschenrechtskonvention', mentionConfidence=74.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Space Agency Europäische Weltraumorganisation Europäische Agentur', mentionConfidence=65.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Forschung Europäischer Wirtschaftsraum', mentionConfidence=57.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='FATF', mentionConfidence=52.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Financial Action Task Force Arbeitsgruppe', mentionConfidence=75.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gemeinschaft Unabhängiger Staaten IAEA', mentionConfidence=57.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Atomic Energy Agency Internationale Atomenergie Agentur IDA', mentionConfidence=80.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Development Association Internationale Entwicklungsorganisation IKRK', mentionConfidence=78.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internationales Komitee', mentionConfidence=66.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Roten Kreuz KSZE', mentionConfidence=70.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='North Atlantic Treaty Organisation Nordatlantikpakt', mentionConfidence=74.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nordatlantische', mentionConfidence=39.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Verteidigungsgemeinschaft', mentionConfidence=66.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Organisation', mentionConfidence=53.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Coopération and Development Organisation', mentionConfidence=64.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='wirtschaftliche', mentionConfidence=35.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNCED', mentionConfidence=66.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Environment', mentionConfidence=61.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Conference on Trade and Development', mentionConfidence=64.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Handel und Entwicklung', mentionConfidence=61.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Development Program', mentionConfidence=66.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Environment Program', mentionConfidence=70.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Organisation Vereinte Nationen Western European Union Westeuropäische Union Gruppe', mentionConfidence=79.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Industriestaaten', mentionConfidence=64.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische', mentionConfidence=51.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Bundesverfassung', mentionConfidence=40.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Haager', mentionConfidence=50.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='VEB', mentionConfidence=39.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Briand', mentionConfidence=50.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kellogpakt', mentionConfidence=34.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='WEUCC', mentionConfidence=57.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Charta', mentionConfidence=45.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sicherheitsrates', mentionConfidence=52.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sanktionensystem', mentionConfidence=53.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Enfoncement Mcasurcs and Ncutrality', mentionConfidence=62.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Völkerrechts', mentionConfidence=39.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sonderdruck', mentionConfidence=49.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Recueil de Jurisprudence Neuchâteloise', mentionConfidence=77.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinten Nationen Wirtschaftssanktionen', mentionConfidence=79.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='EPZ', mentionConfidence=57.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='GASP', mentionConfidence=63.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Maastrichter', mentionConfidence=57.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinbarkeit von Neutralität', mentionConfidence=54.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen Wirtschaftsgemeinschaft', mentionConfidence=47.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nationonen', mentionConfidence=53.1, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=27914.0, totalPages=90.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 1, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1994-01-25-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.95, relevanceScore=0.036167531654516444, pageNumbers=[PageNumber(root=25.0), PageNumber(root=26.0), PageNumber(root=27.0), PageNumber(root=28.0), PageNumber(root=29.0), PageNumber(root=30.0), PageNumber(root=31.0), PageNumber(root=32.0), PageNumber(root=33.0), PageNumber(root=34.0), PageNumber(root=35.0), PageNumber(root=36.0), PageNumber(root=37.0), PageNumber(root=38.0), PageNumber(root=39.0), PageNumber(root=40.0), PageNumber(root=41.0), PageNumber(root=42.0), PageNumber(root=43.0), PageNumber(root=44.0), PageNumber(root=45.0), PageNumber(root=46.0), PageNumber(root=47.0), PageNumber(root=48.0), PageNumber(root=49.0), PageNumber(root=50.0), PageNumber(root=51.0), PageNumber(root=52.0), PageNumber(root=53.0), PageNumber(root=54.0), PageNumber(root=55.0), PageNumber(root=56.0), PageNumber(root=57.0), PageNumber(root=58.0), PageNumber(root=59.0), PageNumber(root=60.0), PageNumber(root=61.0), PageNumber(root=62.0), PageNumber(root=63.0), PageNumber(root=64.0), PageNumber(root=65.0), PageNumber(root=66.0), PageNumber(root=67.0), PageNumber(root=68.0), PageNumber(root=69.0), PageNumber(root=70.0), PageNumber(root=71.0), PageNumber(root=72.0), PageNumber(root=73.0), PageNumber(root=74.0), PageNumber(root=75.0), PageNumber(root=76.0), PageNumber(root=77.0), PageNumber(root=78.0), PageNumber(root=79.0), PageNumber(root=80.0), PageNumber(root=81.0), PageNumber(root=82.0), PageNumber(root=83.0), PageNumber(root=84.0), PageNumber(root=85.0), PageNumber(root=86.0), PageNumber(root=87.0), PageNumber(root=88.0), PageNumber(root=89.0), PageNumber(root=90.0), PageNumber(root=91.0), PageNumber(root=92.0), PageNumber(root=93.0), PageNumber(root=94.0), PageNumber(root=95.0), PageNumber(root=96.0), PageNumber(root=97.0), PageNumber(root=98.0), PageNumber(root=99.0), PageNumber(root=100.0), PageNumber(root=101.0), PageNumber(root=102.0), PageNumber(root=103.0), PageNumber(root=104.0), PageNumber(root=105.0), PageNumber(root=106.0), PageNumber(root=107.0), PageNumber(root=108.0), PageNumber(root=109.0), PageNumber(root=110.0), PageNumber(root=111.0), PageNumber(root=112.0), PageNumber(root=113.0), PageNumber(root=114.0)], collectionUids=[]), ContentItem(uid='GDL-1994-04-21-a-i0061', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Technologie', count=1.0), NamedEntity(uid='2-53-Open_University', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='EUROPE', mentionConfidence=92.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geneva', mentionConfidence=97.61, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Anne Williams', mentionConfidence=87.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mrs', mentionConfidence=43.94, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='TbeOpen University', mentionConfidence=73.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='BSc', mentionConfidence=44.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='MBA', mentionConfidence=43.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mathematics Social Sciences', mentionConfidence=44.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Technology', mentionConfidence=38.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bureau', mentionConfidence=60.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Open University', mentionConfidence=76.31, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=204.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 4, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1994-04-21-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5169090940562496, pageNumbers=[PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='JDG-1994-05-19-a-i0041', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=206.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 5, 19, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1994-05-19-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5075083734093407, pageNumbers=[PageNumber(root=6.0)], collectionUids=[]), ContentItem(uid='GDL-1994-05-19-a-i0037', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[], persons=[], organisations=[NamedEntity(uid='2-53-Master_of_Business_Administration', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='EUROPE', mentionConfidence=94.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geneva', mentionConfidence=97.61, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Anne Williams', mentionConfidence=87.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mrs', mentionConfidence=45.01, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='TheOpen University', mentionConfidence=78.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='BSc', mentionConfidence=39.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='MBA', mentionConfidence=40.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mathematics', mentionConfidence=45.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sciences', mentionConfidence=42.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Open University', mentionConfidence=75.02, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=202.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1994, 5, 19, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1994-05-19-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5017807883265586, pageNumbers=[PageNumber(root=6.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-1995-01-05-a-i0021', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Dr. Peter Hasler', mentionConfidence=99.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Se- kretär Dr. Daniel W. Hefti', mentionConfidence=91.79, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='China Training Centre for Senior Personnel Manage- ment Officials', mentionConfidence=94.06, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.435), TopicMention(uid='tm-de-all-v2.0_tp21_de', relevance=0.212), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.073)], embeddings=None, transcriptLength=576.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 1, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-1995-01-05-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.93, relevanceScore=0.6814228593353777, pageNumbers=[PageNumber(root=21.0)], collectionUids=[]), ContentItem(uid='luxland-1995-05-05-a-i0062', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Agenda', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Esch_an_der_Alzette', count=1.0)], persons=[NamedEntity(uid='2-50-Georges_Prêtre', count=1.0), NamedEntity(uid='2-50-Johannes_Brahms', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Europe', mentionConfidence=71.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ville de Luxembourg', mentionConfidence=96.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Esch/Alzette', mentionConfidence=87.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='53 boulevard Royal', mentionConfidence=87.18, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Grand'rue à Rumelange une exposition\", mentionConfidence=80.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Théâtre de la Ville', mentionConfidence=78.59, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Hubert Clement', mentionConfidence=89.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lucien Kayser', mentionConfidence=97.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Joseph Paul Schneider', mentionConfidence=94.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Georges Prêtre', mentionConfidence=97.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Felix Mendelssohn', mentionConfidence=98.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Johannes Brahms', mentionConfidence=98.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Elisabeth Vermast', mentionConfidence=47.79, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Europa Grafica', mentionConfidence=57.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grand Orient de Luxembourg', mentionConfidence=96.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alzette', mentionConfidence=67.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Centre Emile', mentionConfidence=81.04, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Maison syndicale de Rumelange et l'association Archéologie et Histoire industrielle\", mentionConfidence=65.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='café „am Heim', mentionConfidence=66.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cercle artistique', mentionConfidence=76.61, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Centre d'animation pédagogique et de loisirs\", mentionConfidence=93.81, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.232), TopicMention(uid='tm-fr-all-v2.0_tp60_fr', relevance=0.132), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp24_fr', relevance=0.091), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.083), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.075)], embeddings=None, transcriptLength=415.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1995-05-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.5898354311332158, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='GDL-1995-05-11-a-i0065', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Union_européenne', count=1.0), NamedEntity(uid='2-54-Genève', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Europe', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='European Union', mentionConfidence=54.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dow Europe', mentionConfidence=87.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='DuPont Forfurther', mentionConfidence=69.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lausanne', mentionConfidence=97.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Switzerland', mentionConfidence=97.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fax', mentionConfidence=40.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='GENÈVE', mentionConfidence=94.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='GENEVA', mentionConfidence=93.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUROPEAN', mentionConfidence=94.63, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Corne', mentionConfidence=73.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Claude Fussler', mentionConfidence=96.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Colin Hines', mentionConfidence=97.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Larry Kohler', mentionConfidence=95.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Juan Rada', mentionConfidence=97.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vivian Sheridan', mentionConfidence=96.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kari m Zein', mentionConfidence=96.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='EEM A Secrétariat', mentionConfidence=92.62, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Européen Association for Environmental Managmenl Education eema Careers and Environment', mentionConfidence=76.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Environmental Profession in Europe Needs', mentionConfidence=57.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='European', mentionConfidence=50.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Network', mentionConfidence=60.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Environmental Partnership', mentionConfidence=71.62, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=237.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-05-11-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5050485816411017, pageNumbers=[PageNumber(root=10.0)], collectionUids=[]), ContentItem(uid='JDG-1995-05-11-a-i0056', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=232.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-05-11-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5050485816411017, pageNumbers=[PageNumber(root=10.0)], collectionUids=[]), ContentItem(uid='JDG-1995-05-18-a-i0028', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=257.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 5, 18, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-05-18-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4777393669962179, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='JDG-1995-06-02-a-i0094', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=251.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-06-02-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4905165838220827, pageNumbers=[PageNumber(root=18.0)], collectionUids=[]), ContentItem(uid='GDL-1995-06-02-a-i0134', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Switzerland', count=1.0), NamedEntity(uid='2-54-European_Union', count=1.0), NamedEntity(uid='2-54-Lausanne', count=1.0), NamedEntity(uid='2-54-Geneva', count=2.0)], persons=[], organisations=[NamedEntity(uid='2-53-Europe', count=1.0), NamedEntity(uid='2-53-United_Kingdom', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Switzerland', mentionConfidence=74.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=54.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dow Europe', mentionConfidence=68.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lausanne', mentionConfidence=95.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='GENÈVE', mentionConfidence=97.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='GENEVA', mentionConfidence=73.48, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Corne', mentionConfidence=60.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Claude Fussler', mentionConfidence=96.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Colin Hines', mentionConfidence=96.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Larry Kohler', mentionConfidence=95.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Juan Rada', mentionConfidence=96.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vivian Sheridan', mentionConfidence=96.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Karim Zein', mentionConfidence=96.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='EEMA Secrétariat', mentionConfidence=62.55, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Européen Association for Environmental Managmenl Education eema', mentionConfidence=79.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUROPtAN ENVIRONMINTAL MANAGEMENT ASSOCIATION', mentionConfidence=57.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Environmental Profession in Europe', mentionConfidence=57.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='European', mentionConfidence=50.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Network', mentionConfidence=49.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='UK', mentionConfidence=73.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Environmental Partnership', mentionConfidence=58.55, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=267.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-06-02-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4813191906137038, pageNumbers=[PageNumber(root=18.0)], collectionUids=[]), ContentItem(uid='JDG-1995-06-07-a-i0039', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=149.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-06-07-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5255356529816838, pageNumbers=[PageNumber(root=8.0)], collectionUids=[]), ContentItem(uid='GDL-1995-06-07-a-i0039', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Genève', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Geneva', mentionConfidence=97.8, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Cointrin', mentionConfidence=87.17, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='BSc', mentionConfidence=45.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='MBA', mentionConfidence=40.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hôtel Penta', mentionConfidence=51.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Open University', mentionConfidence=84.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chemin des Morillons', mentionConfidence=71.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='University Education and Training', mentionConfidence=64.68, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=145.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 6, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1995-06-07-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5360321723551998, pageNumbers=[PageNumber(root=8.0)], collectionUids=[]), ContentItem(uid='JDG-1995-10-17-a-i0244', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[], embeddings=None, transcriptLength=254.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 10, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1995-10-17-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4767470494730738, pageNumbers=[PageNumber(root=39.0)], collectionUids=[]), ContentItem(uid='luxland-1995-11-10-a-i0058', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Agenda', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Bertrange_(Luxembourg)', count=1.0), NamedEntity(uid='2-54-Diekirch', count=1.0), NamedEntity(uid='2-54-Stadt_Brüssel', count=1.0), NamedEntity(uid='2-54-Palermo', count=1.0), NamedEntity(uid='2-54-Marseille', count=1.0)], persons=[NamedEntity(uid='2-50-Yves_Klein', count=1.0), NamedEntity(uid='2-50-Guy_Foissy', count=1.0), NamedEntity(uid='2-50-Victor_Haïm', count=1.0), NamedEntity(uid='2-50-John_Locke', count=1.0)], organisations=[NamedEntity(uid='2-53-Théâtre_des_Capucins', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Théâtre', mentionConfidence=91.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bartringen', mentionConfidence=98.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Diekirch', mentionConfidence=96.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Centre Convict', mentionConfidence=72.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='avenue Marie-Thérèse', mentionConfidence=67.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Brüssel', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Palermo', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Marseille', mentionConfidence=95.76, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Yves Klein', mentionConfidence=93.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Guy Foissy', mentionConfidence=64.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Victor Haïm', mentionConfidence=99.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Claudine Pelletier', mentionConfidence=90.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='M. Locke', mentionConfidence=99.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='H. I. F', mentionConfidence=81.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='J.-B', mentionConfidence=90.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dipl', mentionConfidence=70.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Päd', mentionConfidence=70.22, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Casino Luxembourg', mentionConfidence=81.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Théâtre des Capucins', mentionConfidence=95.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='City-Concorde', mentionConfidence=47.18, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Fondation de l'architecture et de l'ingénierie\", mentionConfidence=92.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ordre des architectes et ingénieurs-conseils', mentionConfidence=92.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union Européenne', mentionConfidence=37.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Villa Vauban', mentionConfidence=63.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Convict', mentionConfidence=41.73, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.228), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.142), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.122), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.111), TopicMention(uid='tm-fr-all-v2.0_tp49_fr', relevance=0.088), TopicMention(uid='tm-fr-all-v2.0_tp61_fr', relevance=0.086)], embeddings=None, transcriptLength=742.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1995, 11, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1995-11-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.91, relevanceScore=0.40586327614508716, pageNumbers=[PageNumber(root=15.0)], collectionUids=[]), ContentItem(uid='EXP-1996-08-03-a-i0211', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Suisse', count=2.0), NamedEntity(uid='2-54-Londres', count=1.0)], persons=[NamedEntity(uid='2-50-Charles_Burney', count=1.0)], organisations=[NamedEntity(uid='2-53-Football', count=1.0), NamedEntity(uid='2-53-Royal_Albert_Hall', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Suisse', mentionConfidence=90.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=90.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paraboles', mentionConfidence=53.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=97.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Binggis', mentionConfidence=85.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Binggis', mentionConfidence=85.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vârs', mentionConfidence=72.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vârs', mentionConfidence=72.11, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='M. Marconi', mentionConfidence=96.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Charles Burney', mentionConfidence=95.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wetterfrosch', mentionConfidence=87.54, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='football', mentionConfidence=56.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Royal Albert Hall', mentionConfidence=88.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Youth Orchestra', mentionConfidence=80.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schnabelweid', mentionConfidence=35.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fussballmeisterschaft', mentionConfidence=58.59, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.544), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp50_fr', relevance=0.074)], embeddings=None, transcriptLength=265.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.93, relevanceScore=0.6648336280734956, pageNumbers=[PageNumber(root=26.0)], collectionUids=[]), ContentItem(uid='GDL-1996-08-03-a-i0216', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.264), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.241), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.181)], embeddings=None, transcriptLength=522.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.5029897182322514, pageNumbers=[PageNumber(root=30.0)], collectionUids=[]), ContentItem(uid='LLE-1996-08-03-a-i0157', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='ESPACE 2', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.241), TopicMention(uid='tm-fr-all-v2.0_tp80_fr', relevance=0.158), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.058)], embeddings=None, transcriptLength=127.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.881581918051657, pageNumbers=[PageNumber(root=23.0)], collectionUids=[]), ContentItem(uid='IMP-1996-08-03-a-i0188', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.531), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.092), TopicMention(uid='tm-fr-all-v2.0_tp50_fr', relevance=0.069)], embeddings=None, transcriptLength=329.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-1996-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.92, relevanceScore=0.5892413013186858, pageNumbers=[PageNumber(root=19.0)], collectionUids=[]), ContentItem(uid='JDG-1996-10-07-a-i0157', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='RADIO', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Piotr_Ilitch_Tchaïkovski', count=1.0), NamedEntity(uid='2-50-Frédéric_Chopin', count=1.0), NamedEntity(uid='2-50-Wolfgang_Amadeus_Mozart', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='RSR-ESPACE', mentionConfidence=95.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Haendel', mentionConfidence=90.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Enesco', mentionConfidence=84.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='RSR-COULËUR', mentionConfidence=89.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Iles de France', mentionConfidence=68.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Synergie', mentionConfidence=79.99, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm=\"Zapp'monde\", mentionConfidence=55.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Medtner', mentionConfidence=94.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schumann', mentionConfidence=95.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tchaï-kovski', mentionConfidence=93.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tchaï-kovski', mentionConfidence=93.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Souffler', mentionConfidence=75.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chopin', mentionConfidence=91.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chopin', mentionConfidence=91.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ravel', mentionConfidence=90.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Radu Stan, musicologue', mentionConfidence=95.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stoé', mentionConfidence=93.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mitrea-Celarianu', mentionConfidence=79.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vesselin Stanev, piano', mentionConfidence=80.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Liszt', mentionConfidence=68.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gustav-Mahler', mentionConfidence=95.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mozart', mentionConfidence=92.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mozart', mentionConfidence=92.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schubert', mentionConfidence=93.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sa lieri', mentionConfidence=91.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='musicien 1830 Jazz', mentionConfidence=57.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gil Sha ham, violon', mentionConfidence=83.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sarasate', mentionConfidence=93.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='FRÂNCEINFER', mentionConfidence=59.77, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='RADIO', mentionConfidence=33.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cécile B. Loupan', mentionConfidence=74.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Baroque Orchestra', mentionConfidence=82.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kunstmuseum de', mentionConfidence=67.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Laser', mentionConfidence=37.51, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.46), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp85_fr', relevance=0.087), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp94_fr', relevance=0.078)], embeddings=None, transcriptLength=518.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1996, 10, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1996-10-07-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.5029897182322514, pageNumbers=[PageNumber(root=27.0)], collectionUids=[]), ContentItem(uid='GDL-1997-01-21-a-i0284', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Rome', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Treaty_of_Rome', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Rome', mentionConfidence=65.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='boulevard Konrad Adenauer', mentionConfidence=64.26, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='EUROPEAN INVESTMENT BANK', mentionConfidence=65.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='EIB', mentionConfidence=62.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Treaty', mentionConfidence=51.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='English Mother Tongue Secretaries', mentionConfidence=64.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Country of the European Union', mentionConfidence=79.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUROPEAN INVESTMENT BANK Recruitment Division', mentionConfidence=82.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='RHA 9703', mentionConfidence=40.63, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=181.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='GDL-1997-01-21-a', countryCode='CH', providerCode='SNL', mediaUid='GDL', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.7787571437226115, pageNumbers=[PageNumber(root=42.0)], collectionUids=[]), ContentItem(uid='JDG-1997-01-21-a-i0263', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[EntityMention(surfaceForm='Rome', mentionConfidence=71.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='boulevard Konrad Adenauer', mentionConfidence=66.56, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='EIB', mentionConfidence=68.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Treaty', mentionConfidence=48.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='English Mother Tongue Secretaries', mentionConfidence=68.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Country of the European Union', mentionConfidence=75.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUROPEANINVESTMENTBANK Recruitment Division', mentionConfidence=70.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='RHA', mentionConfidence=44.84, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=177.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='JDG-1997-01-21-a', countryCode='CH', providerCode='SNL', mediaUid='JDG', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.7713265903707955, pageNumbers=[PageNumber(root=42.0)], collectionUids=[]), ContentItem(uid='FZG-1997-02-10-a-i0102', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='DRS1 DRS 2', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp67_de', relevance=0.767), TopicMention(uid='tm-de-all-v2.0_tp70_de', relevance=0.069)], embeddings=None, transcriptLength=179.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 2, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1997-02-10-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.8782083644661646, pageNumbers=[PageNumber(root=6.0)], collectionUids=[]), ContentItem(uid='luxland-1997-06-13-a-i0466', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"Positioning in a Changing World The world's capital markets, domestic [...]\", transcript=None, entities=None, mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Edmond', mentionConfidence=85.48, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2463.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 6, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-06-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.15912939622020983, pageNumbers=[PageNumber(root=38.0), PageNumber(root=40.0)], collectionUids=[]), ContentItem(uid='luxland-1997-06-13-a-i0468', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='An Institution in Transition In May 1998 the Institut Monétaire Luxemb[...]', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Pierre_Werner', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Pierre Werner', mentionConfidence=95.5, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2789.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 6, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-06-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.15690348293440037, pageNumbers=[PageNumber(root=41.0), PageNumber(root=42.0)], collectionUids=[]), ContentItem(uid='luxland-1997-08-22-a-i0038', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='En avant les musiques', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.572), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.064)], embeddings=None, transcriptLength=240.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1997, 8, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1997-08-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.7366189312615937, pageNumbers=[PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='FZG-1998-02-07-a-i0054', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Wilde Waldfrau', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.378), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.232), TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.12), TopicMention(uid='tm-de-all-v2.0_tp42_de', relevance=0.087)], embeddings=None, transcriptLength=729.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 2, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1998-02-07-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.6097026970888518, pageNumbers=[PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='luxland-1998-05-29-a-i0174', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title='Publicité 7 Page 32', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Frome', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=2.0)], persons=[NamedEntity(uid='2-50-Henri_Tudor', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='EUROPEAN', mentionConfidence=48.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='FRAMEWORK', mentionConfidence=47.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='LUXEMBOURG', mentionConfidence=57.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=96.78, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='CRP', mentionConfidence=58.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Henri Tudor', mentionConfidence=88.64, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Européen', mentionConfidence=68.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Télécommunications', mentionConfidence=67.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Institut Supérieur de Technologie 6', mentionConfidence=86.52, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=83.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 5, 29, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1998-05-29-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=0.9279302605349172, pageNumbers=[PageNumber(root=32.0)], collectionUids=[]), ContentItem(uid='FedGazDe-1998-08-11-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Botschaft über einen neuen Geld- und Währungsartikel in der Bundesverf[...]', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Freiburg_im_Üechtland', count=1.0), NamedEntity(uid='2-54-Bern', count=1.0), NamedEntity(uid='2-54-Genève', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Cambridge', count=1.0), NamedEntity(uid='2-54-Westeuropa', count=1.0), NamedEntity(uid='2-54-Dänemark', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-54-Deutschland', count=1.0), NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Spanien', count=1.0), NamedEntity(uid='2-54-Schweden', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Königreich_Griechenland', count=1.0), NamedEntity(uid='2-54-Königreich_Ungarn', count=1.0), NamedEntity(uid='2-54-Mexiko', count=1.0), NamedEntity(uid='2-54-Polen', count=1.0), NamedEntity(uid='2-54-Tschechoslowakei', count=1.0), NamedEntity(uid='2-54-Türkei', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-New_York_City', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Belgien', count=1.0), NamedEntity(uid='2-54-Japanisches_Kaiserreich', count=1.0), NamedEntity(uid='2-54-Niederlande', count=2.0), NamedEntity(uid='2-54-Schweiz', count=1.0), NamedEntity(uid='2-54-Aachen', count=1.0)], persons=[NamedEntity(uid='2-50-Leo_Schürmann', count=1.0)], organisations=[NamedEntity(uid='2-53-Bundesrat_(Schweiz)', count=2.0), NamedEntity(uid='2-53-Deutsche_Bundesbank', count=2.0), NamedEntity(uid='2-53-Schweizerische_Nationalbank', count=6.0), NamedEntity(uid='2-53-US-Dollar', count=1.0), NamedEntity(uid='2-53-Federal_Reserve_System', count=2.0), NamedEntity(uid='2-53-Internationaler_Währungsfonds', count=1.0), NamedEntity(uid='2-53-Eidgenössisches_Finanzdepartement', count=1.0), NamedEntity(uid='2-53-Schweizer_Demokraten', count=1.0), NamedEntity(uid='2-53-Evangelische_Volkspartei', count=1.0), NamedEntity(uid='2-53-Sozialdemokratische_Partei_Deutschlands', count=1.0), NamedEntity(uid='2-53-Schweizerische_Volkspartei', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschafts-_und_Währungsunion', count=2.0), NamedEntity(uid='2-53-Schweizer_Franken', count=2.0), NamedEntity(uid='2-53-Bundesverfassung_der_Schweizerischen_Eidgenossenschaft', count=1.0), NamedEntity(uid='2-53-Geld', count=1.0), NamedEntity(uid='2-53-Währungspolitik', count=1.0), NamedEntity(uid='2-53-Volkswirtschaft', count=1.0), NamedEntity(uid='2-53-Europäische_Zentralbank', count=5.0), NamedEntity(uid='2-53-Europäisches_System_der_Zentralbanken', count=1.0), NamedEntity(uid='2-53-Oesterreichische_Nationalbank', count=2.0), NamedEntity(uid='2-53-Europäische_Gemeinschaft', count=1.0), NamedEntity(uid='2-53-Sten', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=2.0), NamedEntity(uid='2-53-European_Parliament', count=1.0), NamedEntity(uid='2-53-Landesregierung', count=1.0), NamedEntity(uid='2-53-Sozialdemokratische_Partei_Österreichs', count=1.0), NamedEntity(uid='2-53-Bastian_(Vorname)', count=1.0), NamedEntity(uid='2-53-Schweizer_Bankwesen', count=1.0), NamedEntity(uid='2-53-Arbeitsgruppe', count=1.0), NamedEntity(uid='2-53-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-53-Belgien', count=1.0), NamedEntity(uid='2-53-Gotham_City', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Münzfüss', mentionConfidence=78.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerfrankens', mentionConfidence=64.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Freiburg', mentionConfidence=92.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bern', mentionConfidence=69.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Münzfuss', mentionConfidence=46.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Genève', mentionConfidence=91.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kantone', mentionConfidence=83.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='KfK', mentionConfidence=49.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Münzmonopol', mentionConfidence=45.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Etappen', mentionConfidence=96.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Landesindex', mentionConfidence=52.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Measuring', mentionConfidence=33.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cambridge', mentionConfidence=90.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Westeuropa', mentionConfidence=96.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dänemark', mentionConfidence=96.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritannien', mentionConfidence=97.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='Deutschland', mentionConfidence=84.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Frankreich', mentionConfidence=90.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Spanien', mentionConfidence=92.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweden', mentionConfidence=97.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europas', mentionConfidence=72.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kanada', mentionConfidence=96.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Griechenland', mentionConfidence=96.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ungarn', mentionConfidence=95.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mexico', mentionConfidence=95.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Polen', mentionConfidence=94.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tschechische', mentionConfidence=68.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Türkei', mentionConfidence=88.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Primärmarkt', mentionConfidence=78.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=96.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='New York', mentionConfidence=79.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paris', mentionConfidence=96.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Belgien', mentionConfidence=94.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Japan', mentionConfidence=95.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italien', mentionConfidence=92.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Niederlande', mentionConfidence=93.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Niederlanden', mentionConfidence=94.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='ECU', mentionConfidence=40.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweiz', mentionConfidence=88.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Solidarische', mentionConfidence=45.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paritätsänderungen', mentionConfidence=87.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='ASP', mentionConfidence=44.02, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Richli Paul', mentionConfidence=94.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kommentar BV', mentionConfidence=67.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nobel Peter', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Leo Schürmann', mentionConfidence=96.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='NBG', mentionConfidence=37.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Klauser Peter', mentionConfidence=91.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='recht', mentionConfidence=80.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Femer', mentionConfidence=60.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='Häfelin Ulrich', mentionConfidence=96.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Haller Walter', mentionConfidence=95.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Giovanoli Mario', mentionConfidence=96.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lusser Markus', mentionConfidence=87.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Heise Arne', mentionConfidence=81.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kommentar', mentionConfidence=58.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schürmann', mentionConfidence=53.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='KLAUSER', mentionConfidence=91.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gygi', mentionConfidence=93.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lastra Rosa Maria', mentionConfidence=75.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lastra', mentionConfidence=80.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Letho Taru', mentionConfidence=84.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hepperle', mentionConfidence=43.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vgl', mentionConfidence=59.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='Roger Scott', mentionConfidence=89.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Despres', mentionConfidence=66.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kindleberger', mentionConfidence=73.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Davon', mentionConfidence=62.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eichenberger', mentionConfidence=82.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Koller Heinrich', mentionConfidence=97.65, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Schweizerischen Bundesrates', mentionConfidence=41.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='WAK', mentionConfidence=48.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='BV', mentionConfidence=56.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='SNB', mentionConfidence=33.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nationalbankgesetzes', mentionConfidence=75.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dollars', mentionConfidence=39.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='amerikanischen Zentralbank', mentionConfidence=58.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wirtschaftsvenvaltungs', mentionConfidence=37.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internationalen Währungsfonds', mentionConfidence=66.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerisches Bundesstaatsrecht', mentionConfidence=53.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='schweizerische Nationalbank', mentionConfidence=61.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Solidarische Schweiz', mentionConfidence=38.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Währungsverfassung', mentionConfidence=41.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössischen Finanzdepartements', mentionConfidence=56.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizer Demokraten', mentionConfidence=57.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ande', mentionConfidence=35.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Gewerbeverband', mentionConfidence=52.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Evangelische Volkspartei', mentionConfidence=75.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Christlichnationale Gewerkschaftsbund', mentionConfidence=54.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='SPS', mentionConfidence=46.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='SVP', mentionConfidence=59.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Gewerkschaftsbund', mentionConfidence=64.28, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinigung Schweizerischer Angestelltenverbände', mentionConfidence=59.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesratsvariante', mentionConfidence=44.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Gewerkschaftsbund', mentionConfidence=43.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWS', mentionConfidence=33.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWU', mentionConfidence=37.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Deutschen Bundesbank', mentionConfidence=87.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentralbankbuchgeld', mentionConfidence=45.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Notenmonopol', mentionConfidence=40.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Nationalbank', mentionConfidence=82.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Franken Münzen', mentionConfidence=56.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Franken Noten', mentionConfidence=38.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Bundesverfassung', mentionConfidence=50.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Nationalbank', mentionConfidence=83.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='NotenbankpoHtik', mentionConfidence=53.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Repo', mentionConfidence=40.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Offenmarktgeschäfte', mentionConfidence=47.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geld', mentionConfidence=42.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Währungspolitik', mentionConfidence=50.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Goldstandard', mentionConfidence=40.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentralbankautonomie', mentionConfidence=71.28, startOffset=None, endOffset=None), EntityMention(surfaceForm='Volkswirtschaft', mentionConfidence=55.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Aggregate Price Level', mentionConfidence=69.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Performance and Poîicy', mentionConfidence=58.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Séries', mentionConfidence=38.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cast ofLiving', mentionConfidence=70.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Senate Finance Committee', mentionConfidence=69.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Advisory Commission', mentionConfidence=55.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stanford University', mentionConfidence=68.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Boskin', mentionConfidence=60.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wirtschaftsverfassitngsrecht', mentionConfidence=61.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizer Wirtschaft', mentionConfidence=51.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Zentralbank', mentionConfidence=60.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen Zentralbank', mentionConfidence=55.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='ESZB', mentionConfidence=58.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bank of Japan Law', mentionConfidence=86.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='amerikanische Federai Reserve Act', mentionConfidence=71.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nationalbanken', mentionConfidence=71.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Bundesbank', mentionConfidence=83.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='schweizerischen Zentralbank', mentionConfidence=55.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='EZB', mentionConfidence=41.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Notenbanken', mentionConfidence=31.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='EG', mentionConfidence=53.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='EZSB', mentionConfidence=67.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Central Banking and Banking Régulation', mentionConfidence=88.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Central Bank Independence in Another Eleven Countries', mentionConfidence=89.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sten', mentionConfidence=51.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen Union', mentionConfidence=69.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='nationalen Zentralbanken', mentionConfidence=71.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=59.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Economy', mentionConfidence=73.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parlaments', mentionConfidence=52.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gesamtbundesrat', mentionConfidence=44.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Landesregierung', mentionConfidence=71.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='sozialdemokratischen', mentionConfidence=43.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Notenbank Transparenz', mentionConfidence=74.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bassat Avraham', mentionConfidence=46.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Optimal Composition ofForeign Exchange Réserves', mentionConfidence=70.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Journal of Economies', mentionConfidence=68.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Demand for International Réserves', mentionConfidence=70.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='IMF', mentionConfidence=48.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Staff Papers', mentionConfidence=55.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Theory', mentionConfidence=49.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bank of Finland Discussion Papers', mentionConfidence=79.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bastian', mentionConfidence=63.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Hochschulschriften', mentionConfidence=54.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Management ofForeign Exchange Rate Réserves', mentionConfidence=71.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Papers', mentionConfidence=53.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Survey of Récent Empirical Sttidies', mentionConfidence=76.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Applied Economies', mentionConfidence=66.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Länder und Zentralbanken', mentionConfidence=74.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='SZR', mentionConfidence=44.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Minorìly', mentionConfidence=49.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Brookings Institution', mentionConfidence=65.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizer Banken', mentionConfidence=58.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Goldlending', mentionConfidence=56.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesfìnanzhaushaltes', mentionConfidence=53.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='Arbeitsgruppe', mentionConfidence=44.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Anlagepolitik', mentionConfidence=43.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nationalbankgewinne', mentionConfidence=53.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nationalbankgewinns', mentionConfidence=55.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='NR', mentionConfidence=48.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Union', mentionConfidence=66.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Länder Österreich', mentionConfidence=37.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='UK USA', mentionConfidence=40.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Österreich Belgien', mentionConfidence=30.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='BIZ', mentionConfidence=48.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Monthly Surveillance Statistics', mentionConfidence=69.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Finance Statistics', mentionConfidence=71.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='OECD Main Economie Indicators', mentionConfidence=86.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BIP', mentionConfidence=73.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Importe', mentionConfidence=30.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Eidgenossenschaft', mentionConfidence=57.85, startOffset=None, endOffset=None)], newsAgencies=[EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=54.99, startOffset=None, endOffset=None)]), topics=[], embeddings=None, transcriptLength=29135.0, totalPages=74.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1998, 8, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1998-08-11-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.93, relevanceScore=0.034107668629357275, pageNumbers=[PageNumber(root=1.0), PageNumber(root=2.0), PageNumber(root=3.0), PageNumber(root=4.0), PageNumber(root=5.0), PageNumber(root=6.0), PageNumber(root=7.0), PageNumber(root=8.0), PageNumber(root=9.0), PageNumber(root=10.0), PageNumber(root=11.0), PageNumber(root=12.0), PageNumber(root=13.0), PageNumber(root=14.0), PageNumber(root=15.0), PageNumber(root=16.0), PageNumber(root=17.0), PageNumber(root=18.0), PageNumber(root=19.0), PageNumber(root=20.0), PageNumber(root=21.0), PageNumber(root=22.0), PageNumber(root=23.0), PageNumber(root=24.0), PageNumber(root=25.0), PageNumber(root=26.0), PageNumber(root=27.0), PageNumber(root=28.0), PageNumber(root=29.0), PageNumber(root=30.0), PageNumber(root=31.0), PageNumber(root=32.0), PageNumber(root=33.0), PageNumber(root=34.0), PageNumber(root=35.0), PageNumber(root=36.0), PageNumber(root=37.0), PageNumber(root=38.0), PageNumber(root=39.0), PageNumber(root=40.0), PageNumber(root=41.0), PageNumber(root=42.0), PageNumber(root=43.0), PageNumber(root=44.0), PageNumber(root=45.0), PageNumber(root=46.0), PageNumber(root=47.0), PageNumber(root=48.0), PageNumber(root=49.0), PageNumber(root=50.0), PageNumber(root=51.0), PageNumber(root=52.0), PageNumber(root=53.0), PageNumber(root=54.0), PageNumber(root=55.0), PageNumber(root=56.0), PageNumber(root=57.0), PageNumber(root=58.0), PageNumber(root=59.0), PageNumber(root=60.0), PageNumber(root=61.0), PageNumber(root=62.0), PageNumber(root=63.0), PageNumber(root=64.0), PageNumber(root=65.0), PageNumber(root=66.0), PageNumber(root=67.0), PageNumber(root=68.0), PageNumber(root=69.0), PageNumber(root=70.0), PageNumber(root=71.0), PageNumber(root=72.0), PageNumber(root=73.0), PageNumber(root=74.0)], collectionUids=[]), ContentItem(uid='FedGazFr-1998-08-11-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Message concernant un nouvel article constitutionnel sur la monnaie du[...]', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Suisse', count=1.0), NamedEntity(uid='2-54-Berne', count=1.0), NamedEntity(uid='2-54-Zurich', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-Zürich', count=1.0), NamedEntity(uid='2-54-Cambridge', count=1.0), NamedEntity(uid=\"2-54-Europe_de_l'Ouest\", count=1.0), NamedEntity(uid='2-54-Rhinow', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Espagne', count=1.0), NamedEntity(uid='2-54-Japon', count=1.0), NamedEntity(uid='2-54-Hongrie', count=1.0), NamedEntity(uid='2-54-Mexique', count=1.0), NamedEntity(uid='2-54-Pologne', count=1.0), NamedEntity(uid='2-54-Turquie', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Vienne_(Autriche)', count=1.0), NamedEntity(uid='2-54-États-Unis', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Belgique', count=1.0), NamedEntity(uid='2-54-Suède', count=1.0), NamedEntity(uid='2-54-Autriche', count=1.0)], persons=[NamedEntity(uid='2-50-Peter_Nobel', count=1.0), NamedEntity(uid='2-50-Leo_Schürmann', count=1.0)], organisations=[NamedEntity(uid='2-53-Franc_suisse', count=1.0), NamedEntity(uid='2-53-États-Unis', count=2.0), NamedEntity(uid='2-53-Fonds_monétaire_international', count=1.0), NamedEntity(uid='2-53-Association_suisse_des_banquiers', count=1.0), NamedEntity(uid='2-53-Assemblée_fédérale_(Suisse)', count=1.0), NamedEntity(uid='2-53-Département_fédéral_des_finances', count=1.0), NamedEntity(uid='2-53-Union_démocratique_fédérale', count=1.0), NamedEntity(uid='2-53-Démocrates_suisses', count=1.0), NamedEntity(uid='2-53-Union_suisse_des_arts_et_métiers', count=1.0), NamedEntity(uid='2-53-Union_économique_et_monétaire', count=1.0), NamedEntity(uid='2-53-Parti_socialiste_suisse', count=2.0), NamedEntity(uid='2-53-Union_démocratique_du_centre', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=2.0), NamedEntity(uid='2-53-Alliance_des_indépendants', count=1.0), NamedEntity(uid='2-53-Vorort', count=1.0), NamedEntity(uid='2-53-Parti_radical-démocratique', count=1.0), NamedEntity(uid='2-53-Union_syndicale_suisse', count=1.0), NamedEntity(uid='2-53-Parti_suisse_du_travail', count=1.0), NamedEntity(uid='2-53-Fédéralisme', count=1.0), NamedEntity(uid='2-53-Banque_nationale_suisse', count=1.0), NamedEntity(uid='2-53-Royaume-Uni', count=1.0), NamedEntity(uid='2-53-Banque_centrale_européenne', count=2.0), NamedEntity(uid='2-53-Pologne', count=1.0), NamedEntity(uid='2-53-Conseil_européen', count=1.0), NamedEntity(uid='2-53-Suisse', count=1.0), NamedEntity(uid='2-53-Pays-Bas_(pays_constitutif)', count=1.0), NamedEntity(uid='2-53-Schweizerische_Nationalbank', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Suisse', mentionConfidence=94.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berne', mentionConfidence=41.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zurich', mentionConfidence=88.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Varticle', mentionConfidence=37.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Allemagne', mentionConfidence=96.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wandel', mentionConfidence=74.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zürich', mentionConfidence=76.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cambridge', mentionConfidence=81.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe occidentale', mentionConfidence=81.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rhinow', mentionConfidence=81.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='France', mentionConfidence=87.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Espagne', mentionConfidence=88.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Japon', mentionConfidence=95.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hongrie', mentionConfidence=88.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mexique', mentionConfidence=90.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pologne', mentionConfidence=88.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='République tchèque', mentionConfidence=77.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Turquie', mentionConfidence=86.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=97.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paris', mentionConfidence=97.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wien', mentionConfidence=93.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='USA', mentionConfidence=90.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italie', mentionConfidence=88.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Belgique', mentionConfidence=87.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suède', mentionConfidence=85.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='UME', mentionConfidence=60.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='ECU', mentionConfidence=75.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='PJA', mentionConfidence=37.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Autriche', mentionConfidence=74.11, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Richli Paul', mentionConfidence=94.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Junod', mentionConfidence=95.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nobel Peter', mentionConfidence=96.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Leo Schürmann', mentionConfidence=95.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Klauser Peter', mentionConfidence=90.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schürmann Leo', mentionConfidence=94.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Giovanoli Mario', mentionConfidence=86.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lusscr Markus', mentionConfidence=91.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hcise Arne', mentionConfidence=94.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Beat Kleiner', mentionConfidence=95.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Weber Rolf H', mentionConfidence=80.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hallcr Walter', mentionConfidence=93.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='3lquinquies', mentionConfidence=34.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='KJauscr', mentionConfidence=94.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lastra Rosa Maria', mentionConfidence=93.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eijffinger Sylvester', mentionConfidence=97.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='van Keulen Martijn', mentionConfidence=88.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paul Richli', mentionConfidence=93.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hcpperle Bastian', mentionConfidence=75.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Letho', mentionConfidence=85.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lctho', mentionConfidence=93.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Koller Heinrich', mentionConfidence=98.29, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Ledergerber', mentionConfidence=66.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='franc suisse', mentionConfidence=44.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fondation Suisse solidaire', mentionConfidence=79.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Direction générale de la Banque', mentionConfidence=70.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='schweizerischen', mentionConfidence=37.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Société des juristes bernois', mentionConfidence=71.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen', mentionConfidence=40.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Notenbank', mentionConfidence=41.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gesellschaft', mentionConfidence=45.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='LBN', mentionConfidence=37.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bretton Woods', mentionConfidence=46.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berner Kommentar zum Obligationenrecht', mentionConfidence=63.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Unis', mentionConfidence=42.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wìrtschaftsvenvaìtungsrecht', mentionConfidence=62.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='lending', mentionConfidence=49.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fonds monétaire international', mentionConfidence=65.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerisches Bundesstaatsrecht', mentionConfidence=78.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Association suisse des banquiers', mentionConfidence=90.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pétalon', mentionConfidence=70.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chambres fédérales', mentionConfidence=59.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Département fédéral des finances', mentionConfidence=81.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='groupe 3498', mentionConfidence=46.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='CQC', mentionConfidence=67.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parti suisse de la liberté', mentionConfidence=95.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union démocratique fédérale', mentionConfidence=93.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Démocrates suisses', mentionConfidence=82.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union suisse des arts et métiers', mentionConfidence=94.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parti évangéliste populaire', mentionConfidence=85.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Confédération des syndicats chrétiens de Suisse', mentionConfidence=87.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union monétaire européenne', mentionConfidence=84.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parti socialiste suisse', mentionConfidence=92.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='PSS', mentionConfidence=34.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union démocratique du centre', mentionConfidence=90.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=90.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alliance des indépendants', mentionConfidence=81.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vorort', mentionConfidence=57.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='PRD', mentionConfidence=63.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union syndicale suisse', mentionConfidence=92.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Direction générale de la BNS', mentionConfidence=85.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parti du travail', mentionConfidence=91.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Maastricht', mentionConfidence=68.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='fédérales désuètes', mentionConfidence=89.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fédéral Reserve System américain', mentionConfidence=90.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Deutsche Bundesbank', mentionConfidence=82.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geldpolitik im Disput', mentionConfidence=56.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Konjunkturpolitik', mentionConfidence=44.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='fédéralisme', mentionConfidence=69.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geldhoheli', mentionConfidence=76.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bargeld', mentionConfidence=42.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Währungshoheit', mentionConfidence=60.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='cashless society', mentionConfidence=60.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque nationale suisse', mentionConfidence=91.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Measuring thé Aggregate Price Level', mentionConfidence=81.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Implications', mentionConfidence=48.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Performance and Policy', mentionConfidence=78.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Séries', mentionConfidence=37.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Senate Finance Committee', mentionConfidence=77.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Advisory Commission', mentionConfidence=51.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stanford Univcrsity', mentionConfidence=81.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Boskin', mentionConfidence=62.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wirtschaftsverfassungsrecht', mentionConfidence=59.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='travail UEM', mentionConfidence=74.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union économique et monétaire européenne', mentionConfidence=89.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grande', mentionConfidence=80.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque centrale européenne', mentionConfidence=87.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Système européen de banques centrales', mentionConfidence=77.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='SEBC', mentionConfidence=64.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bank of Japan Law', mentionConfidence=73.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fédéral Reserve Act', mentionConfidence=84.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Bundesbank', mentionConfidence=82.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque fédérale', mentionConfidence=89.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='République', mentionConfidence=70.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='traité', mentionConfidence=44.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Central Banking and Banking Régulation', mentionConfidence=92.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='BNL Quarterly Revicw', mentionConfidence=56.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='communes', mentionConfidence=66.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Convergence', mentionConfidence=41.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='thé European Union', mentionConfidence=57.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Commission', mentionConfidence=61.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Economy', mentionConfidence=56.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='BCE 52', mentionConfidence=57.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conseil européen', mentionConfidence=68.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Confédération', mentionConfidence=61.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='banque centrale Introduction', mentionConfidence=49.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bcn', mentionConfidence=62.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bassat Avraham', mentionConfidence=69.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Optimal Composition of Foreign Exchange Reserves', mentionConfidence=72.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Journal of Economies', mentionConfidence=68.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Demand for International Reserves and Their Opporlunity Costs', mentionConfidence=75.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='IMF Staff Papers', mentionConfidence=58.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lctho Taru', mentionConfidence=73.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bank of Finland Discussion Papers', mentionConfidence=85.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Porfoliomanagcmcnt', mentionConfidence=41.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Management of Foreign Exchange Rate Reserves', mentionConfidence=78.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Economie Papcrs', mentionConfidence=67.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Survey of Récent EmpiricalStudies', mentionConfidence=77.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Applied Economies', mentionConfidence=72.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conférence', mentionConfidence=49.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pays', mentionConfidence=75.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='PUEM', mentionConfidence=56.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='ECU officiel', mentionConfidence=46.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='BNS 257', mentionConfidence=67.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Minority View', mentionConfidence=63.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Brookings Institution', mentionConfidence=73.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesverfassung', mentionConfidence=43.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Uni USA', mentionConfidence=72.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='FMI International Finance Statistics', mentionConfidence=74.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='OCDE Main Economie Indicatori', mentionConfidence=83.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Assemblée fédérale de la Confédération suisse', mentionConfidence=92.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque Nationale Suisse', mentionConfidence=92.18, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=37607.0, totalPages=75.0, languageCode='fr', isOnFrontPage=True, publicationDate=datetime.datetime(1998, 8, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazFr-1998-08-11-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazFr', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.98, relevanceScore=0.014459886392813148, pageNumbers=[PageNumber(root=1.0), PageNumber(root=2.0), PageNumber(root=3.0), PageNumber(root=4.0), PageNumber(root=5.0), PageNumber(root=6.0), PageNumber(root=7.0), PageNumber(root=8.0), PageNumber(root=9.0), PageNumber(root=10.0), PageNumber(root=11.0), PageNumber(root=12.0), PageNumber(root=13.0), PageNumber(root=14.0), PageNumber(root=15.0), PageNumber(root=16.0), PageNumber(root=17.0), PageNumber(root=18.0), PageNumber(root=19.0), PageNumber(root=20.0), PageNumber(root=21.0), PageNumber(root=22.0), PageNumber(root=23.0), PageNumber(root=24.0), PageNumber(root=25.0), PageNumber(root=26.0), PageNumber(root=27.0), PageNumber(root=28.0), PageNumber(root=29.0), PageNumber(root=30.0), PageNumber(root=31.0), PageNumber(root=32.0), PageNumber(root=33.0), PageNumber(root=34.0), PageNumber(root=35.0), PageNumber(root=36.0), PageNumber(root=37.0), PageNumber(root=38.0), PageNumber(root=39.0), PageNumber(root=40.0), PageNumber(root=41.0), PageNumber(root=42.0), PageNumber(root=43.0), PageNumber(root=44.0), PageNumber(root=45.0), PageNumber(root=46.0), PageNumber(root=47.0), PageNumber(root=48.0), PageNumber(root=49.0), PageNumber(root=50.0), PageNumber(root=51.0), PageNumber(root=52.0), PageNumber(root=53.0), PageNumber(root=54.0), PageNumber(root=55.0), PageNumber(root=56.0), PageNumber(root=57.0), PageNumber(root=58.0), PageNumber(root=59.0), PageNumber(root=60.0), PageNumber(root=61.0), PageNumber(root=62.0), PageNumber(root=63.0), PageNumber(root=64.0), PageNumber(root=65.0), PageNumber(root=66.0), PageNumber(root=67.0), PageNumber(root=68.0), PageNumber(root=69.0), PageNumber(root=70.0), PageNumber(root=71.0), PageNumber(root=72.0), PageNumber(root=73.0), PageNumber(root=74.0), PageNumber(root=75.0)], collectionUids=[]), ContentItem(uid='luxland-1998-11-13-a-i0062', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Ministère de la Culture', transcript=None, entities=None, mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Sir Colin', mentionConfidence=60.37, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.189), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp19_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.073), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.06)], embeddings=None, transcriptLength=116.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1998, 11, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1998-11-13-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.9270905684109706, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='FedGazDe-1999-06-01-a-i0001', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Botschaft zu einem Bundesgesetz über Arzneimittel und Medizinprodukte [...]', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Bern', count=1.0), NamedEntity(uid='2-54-Liechtenstein', count=2.0), NamedEntity(uid='2-54-Schweiz', count=3.0), NamedEntity(uid='2-54-Kanton_Appenzell', count=1.0), NamedEntity(uid='2-54-Europäische_Freihandelsassoziation', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0), NamedEntity(uid='2-54-Japan', count=1.0), NamedEntity(uid='2-54-Basel', count=1.0), NamedEntity(uid='2-54-Kanton_Zürich', count=1.0), NamedEntity(uid='2-54-Niederlande', count=1.0), NamedEntity(uid='2-54-Horden', count=1.0), NamedEntity(uid='2-54-Chicago', count=1.0), NamedEntity(uid='2-54-Zentralamerika', count=1.0), NamedEntity(uid='2-54-Belgien', count=1.0), NamedEntity(uid='2-54-Dänemark', count=1.0), NamedEntity(uid='2-54-Finnland', count=1.0), NamedEntity(uid='2-54-Norwegen', count=1.0), NamedEntity(uid='2-54-Schweden', count=1.0), NamedEntity(uid='2-54-Spanien', count=1.0), NamedEntity(uid='2-54-Alpen', count=1.0), NamedEntity(uid='2-54-Italien', count=1.0), NamedEntity(uid='2-54-Zurich', count=1.0), NamedEntity(uid='2-54-Helsinki', count=1.0), NamedEntity(uid='2-54-Vereinigte_Staaten', count=2.0), NamedEntity(uid='2-54-Nordwestschweiz', count=1.0), NamedEntity(uid='2-54-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-54-Kanada', count=1.0), NamedEntity(uid='2-54-Nordostschweiz', count=1.0), NamedEntity(uid='2-54-Kanton_Tessin', count=1.0), NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Bussen', count=1.0), NamedEntity(uid='2-54-Eschweiler_(Allemagne)', count=1.0)], persons=[NamedEntity(uid='2-50-Eduard_David', count=1.0), NamedEntity(uid='2-50-Ruth_Dreifuss', count=2.0), NamedEntity(uid='2-50-François_Couchepin', count=1.0), NamedEntity(uid='2-50-Julius_Bär', count=1.0)], organisations=[NamedEntity(uid='2-53-Bundesrat_(Schweiz)', count=1.0), NamedEntity(uid='2-53-Kanton_Appenzell_Ausserrhoden', count=1.0), NamedEntity(uid='2-53-Europäische_Union', count=1.0), NamedEntity(uid='2-53-Freihandelsabkommen', count=1.0), NamedEntity(uid='2-53-International', count=1.0), NamedEntity(uid='2-53-Welthandelsorganisation', count=3.0), NamedEntity(uid='2-53-Weltgesundheitsorganisation', count=3.0), NamedEntity(uid='2-53-Eidgenössisches_Departement_des_Innern', count=2.0), NamedEntity(uid='2-53-Kontrollstelle', count=1.0), NamedEntity(uid='2-53-Bundesverwaltung', count=2.0), NamedEntity(uid='2-53-Soziale_Sicherheit', count=1.0), NamedEntity(uid='2-53-Washington,_D.C.', count=1.0), NamedEntity(uid='2-53-New_York_City', count=1.0), NamedEntity(uid='2-53-HIV', count=1.0), NamedEntity(uid='2-53-Buchstabe', count=1.0), NamedEntity(uid='2-53-Gute_klinische_Praxis', count=1.0), NamedEntity(uid='2-53-Vereinigtes_Königreich', count=1.0), NamedEntity(uid='2-53-Swiss_Olympic', count=2.0), NamedEntity(uid='2-53-Test', count=1.0), NamedEntity(uid='2-53-Liste', count=1.0), NamedEntity(uid='2-53-Bundesamt_für_Gesundheit', count=1.0), NamedEntity(uid='2-53-Bundesamt_für_Sport', count=1.0), NamedEntity(uid='2-53-Bundesverfassung_der_Schweizerischen_Eidgenossenschaft', count=1.0), NamedEntity(uid=\"2-53-Département_fédéral_de_l'intérieur\", count=1.0), NamedEntity(uid='2-53-Europäische_Freihandelsassoziation', count=1.0), NamedEntity(uid='2-53-Europäischer_Wirtschaftsraum', count=1.0), NamedEntity(uid='2-53-Europäische_Wirtschaftsgemeinschaft', count=1.0), NamedEntity(uid='2-53-Food_and_Drug_Administration', count=1.0), NamedEntity(uid='2-53-Allgemeines_Abkommen_über_den_Handel_mit_Dienstleistungen', count=2.0), NamedEntity(uid='2-53-Allgemeines_Zoll-_und_Handelsabkommen', count=2.0), NamedEntity(uid='2-53-Vereinte_Nationen', count=2.0), NamedEntity(uid='2-53-Verkehr', count=1.0), NamedEntity(uid='2-53-Energie', count=1.0), NamedEntity(uid='2-53-Schweiz', count=1.0), NamedEntity(uid='2-53-Bundesrechtspflegegesetz', count=1.0)], newsAgencies=[NamedEntity(uid='4-55-Reuters', count=201.0), NamedEntity(uid='', count=None)]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='RTVG', mentionConfidence=54.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bern', mentionConfidence=94.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fürstentum Liechtenstein', mentionConfidence=57.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='BAG', mentionConfidence=91.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='internationaler', mentionConfidence=76.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ebene', mentionConfidence=49.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Medizinprodukte', mentionConfidence=83.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Umfeld', mentionConfidence=80.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fürstentums Liechtenstein', mentionConfidence=91.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Davon', mentionConfidence=91.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kanton Appenzell', mentionConfidence=97.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='CADREAC', mentionConfidence=84.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='EFTA', mentionConfidence=44.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europa', mentionConfidence=92.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Japan', mentionConfidence=96.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gütem', mentionConfidence=96.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Basel', mentionConfidence=88.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zielland', mentionConfidence=58.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Groupe', mentionConfidence=41.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pompidou', mentionConfidence=67.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kantons', mentionConfidence=44.28, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dormami', mentionConfidence=76.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gentherapie', mentionConfidence=52.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Niederlanden', mentionConfidence=62.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Horden', mentionConfidence=52.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Salben', mentionConfidence=38.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Staaten', mentionConfidence=76.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Buckstabe', mentionConfidence=96.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tierarz', mentionConfidence=81.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ice', mentionConfidence=45.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentralamerika', mentionConfidence=96.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Belgien', mentionConfidence=95.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dänemark', mentionConfidence=95.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Finnland', mentionConfidence=93.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Norwegen', mentionConfidence=93.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweden', mentionConfidence=93.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Spanien', mentionConfidence=93.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tests', mentionConfidence=47.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alp', mentionConfidence=56.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Parasiten', mentionConfidence=51.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kälbern', mentionConfidence=97.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bisher', mentionConfidence=73.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italien', mentionConfidence=76.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schulthess', mentionConfidence=58.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zürich', mentionConfidence=93.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Helsinki', mentionConfidence=95.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='United', mentionConfidence=53.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='USA', mentionConfidence=95.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nordwestschweiz', mentionConfidence=73.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Grossbritannien', mentionConfidence=90.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kanada', mentionConfidence=96.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Nordostschweiz', mentionConfidence=72.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tessin', mentionConfidence=96.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Frankreich', mentionConfidence=96.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='UVEK', mentionConfidence=76.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bussen', mentionConfidence=64.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='ESSM', mentionConfidence=90.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='EpG', mentionConfidence=64.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fremdzellen', mentionConfidence=91.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Danach', mentionConfidence=62.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Euratom', mentionConfidence=34.07, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Frau Präsidentin', mentionConfidence=74.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schmid Werner', mentionConfidence=86.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dormann', mentionConfidence=71.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='David', mentionConfidence=77.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundespräsidentin', mentionConfidence=69.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ruth Dreifuss', mentionConfidence=96.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='François Couchepin', mentionConfidence=94.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Julius Bär', mentionConfidence=81.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Saclie', mentionConfidence=46.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Brillen', mentionConfidence=31.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Orphan Drugs', mentionConfidence=72.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Abgabekategorie', mentionConfidence=41.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chiropraktikerinnen', mentionConfidence=39.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hebammen', mentionConfidence=37.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Homöopathin', mentionConfidence=68.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Herzschrittmacher', mentionConfidence=33.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dominique Spnimont', mentionConfidence=97.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Prüfer', mentionConfidence=90.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alzheimerpatienten', mentionConfidence=78.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geheimnisherr', mentionConfidence=84.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cottier', mentionConfidence=75.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Orphari Drugs', mentionConfidence=87.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Agency', mentionConfidence=85.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Virus HMG BG', mentionConfidence=43.95, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Implantaten', mentionConfidence=59.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Bundesrates', mentionConfidence=52.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=62.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Humanarzneimitteln', mentionConfidence=38.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Hightech', mentionConfidence=53.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Strahlenschutzverordnung', mentionConfidence=44.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ausserrhoden', mentionConfidence=33.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischen Union', mentionConfidence=62.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Medicines Evaluation Agency', mentionConfidence=79.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='EMEA', mentionConfidence=53.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='New and global approach', mentionConfidence=56.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Comité européen de normalisation électrique', mentionConfidence=72.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Associated Countries', mentionConfidence=81.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Memoranda of Understanding', mentionConfidence=43.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Evaluation Reports', mentionConfidence=46.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='PER', mentionConfidence=47.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Freihandelszone', mentionConfidence=37.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='International', mentionConfidence=56.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Welthandelsorganisation', mentionConfidence=48.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='WTO', mentionConfidence=56.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Weltgesundheitsorganisation', mentionConfidence=68.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Health Organisation', mentionConfidence=76.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Health Assembly', mentionConfidence=58.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Arzneimittelgesetzes', mentionConfidence=49.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='MepV', mentionConfidence=34.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössische Departement des Innern', mentionConfidence=72.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössischen Departement des Innern', mentionConfidence=74.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rahmenerlasse', mentionConfidence=45.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kontrollstelle', mentionConfidence=47.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesverwaltung', mentionConfidence=41.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamt für Sozialversicherung', mentionConfidence=52.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentralverwaltung des Bundes', mentionConfidence=72.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Essential Drug', mentionConfidence=87.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Substance Abuse', mentionConfidence=88.82, startOffset=None, endOffset=None), EntityMention(surfaceForm='Narcotic Drugs', mentionConfidence=73.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Versandhandel', mentionConfidence=36.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kantonsapotheken', mentionConfidence=52.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='LMG', mentionConfidence=38.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ernährungskommission', mentionConfidence=47.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='soziale Sicherheit', mentionConfidence=60.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerisches Arzneimittelgesetz', mentionConfidence=50.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Standesinitiative Bern', mentionConfidence=53.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='EFBS', mentionConfidence=59.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWG', mentionConfidence=45.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bst', mentionConfidence=46.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kantonsapothekerinnen', mentionConfidence=48.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='Voyame', mentionConfidence=31.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Magistralrezepturen', mentionConfidence=51.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='mi ttel', mentionConfidence=60.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Know', mentionConfidence=37.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schwarzmarktdroge', mentionConfidence=42.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Metamphetamin', mentionConfidence=30.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ziellandes', mentionConfidence=55.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Exportstaates Department for Policy Coordination and Sustainable Development', mentionConfidence=76.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='United Nations Publications', mentionConfidence=84.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='York', mentionConfidence=38.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Commerce', mentionConfidence=81.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wetlbewerbskommission', mentionConfidence=45.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Good Distribution Practice', mentionConfidence=57.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='GDP', mentionConfidence=59.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Typ', mentionConfidence=39.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tierarzneimitteln', mentionConfidence=37.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='CENELEC', mentionConfidence=58.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Buchstabe', mentionConfidence=32.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Silikon', mentionConfidence=44.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Herzklappen', mentionConfidence=32.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Beatmungs', mentionConfidence=47.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Narkose', mentionConfidence=35.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geräte', mentionConfidence=44.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tessiner', mentionConfidence=32.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kommentar Bundesverfassung', mentionConfidence=61.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Wissenschaftsfreiheit', mentionConfidence=33.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Harmonised Tripartite', mentionConfidence=37.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Guideline', mentionConfidence=49.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='Good Clinìcal Practice', mentionConfidence=62.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='International Conference on Harmonisation', mentionConfidence=58.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Institut suisse de droit comparé', mentionConfidence=80.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zentrales Element', mentionConfidence=55.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kingdom', mentionConfidence=81.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mutual Récognition Agreements', mentionConfidence=78.09, startOffset=None, endOffset=None), EntityMention(surfaceForm='Medicai Products Agency', mentionConfidence=75.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='New Public Management', mentionConfidence=65.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='BPG', mentionConfidence=61.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Risikoprofil', mentionConfidence=61.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Finan', mentionConfidence=56.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='BUWAL', mentionConfidence=44.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bussenrahmen', mentionConfidence=62.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Arbeitgeber Bund', mentionConfidence=67.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bund der PKB', mentionConfidence=79.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fairplay', mentionConfidence=45.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='roparatskonvention', mentionConfidence=46.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Doping', mentionConfidence=34.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischen Olympischen Verband', mentionConfidence=62.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mutual Récognition Agreement', mentionConfidence=80.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vgl', mentionConfidence=30.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='RL78', mentionConfidence=43.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Richtlinien', mentionConfidence=42.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='RL90', mentionConfidence=30.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Humanund Veterinärmedizin', mentionConfidence=34.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='RL73', mentionConfidence=37.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Committee for Proprietary Médicinal Products', mentionConfidence=70.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='CPMP', mentionConfidence=56.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='RichÜinie', mentionConfidence=36.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Committee for veterinary médicinal products', mentionConfidence=72.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='CVMP', mentionConfidence=54.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Globalen', mentionConfidence=44.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Neuen', mentionConfidence=39.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Test', mentionConfidence=31.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Reagenzien', mentionConfidence=45.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='Liste', mentionConfidence=69.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäische Datenbank', mentionConfidence=53.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='BAG BASPO', mentionConfidence=42.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamt für Gesundheit', mentionConfidence=65.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamt für Sport', mentionConfidence=62.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössische Sportschule Magglingen', mentionConfidence=76.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesblatt BG', mentionConfidence=52.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesverfassung der Schweizerischen Eidgenossenschaft', mentionConfidence=63.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamt für Veterinärwesen', mentionConfidence=73.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Collaboration Agreement of Drug Regulatory Authorities in Union Associated Countries', mentionConfidence=77.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Comité européen de normalisation électricité', mentionConfidence=71.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='proprietary médicinal products', mentionConfidence=61.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössisches Departement des Innern', mentionConfidence=78.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fachkommission für biologische Sicherheit', mentionConfidence=62.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Free Trade Association EG Europäische Gemeinschaft', mentionConfidence=76.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössische Pharmakopöekommission', mentionConfidence=64.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamt', mentionConfidence=68.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europäischer Wirtschaftsraum', mentionConfidence=50.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='EWG Europäische Wirtschaftsgemeinschaft', mentionConfidence=51.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='FDA', mentionConfidence=38.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Drug', mentionConfidence=43.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='USA GATS', mentionConfidence=63.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='General Agreement on Trade in Services', mentionConfidence=52.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='GATT', mentionConfidence=57.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='General Agreement on Tariffs and Trade', mentionConfidence=56.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='GCP Good Clihical Practice', mentionConfidence=54.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='GDP Good', mentionConfidence=46.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='GLP Good Laboratory Practice', mentionConfidence=60.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='GMP Good Manufacturing Practice', mentionConfidence=68.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='GPKV', mentionConfidence=38.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='GSASA Gesellschaft', mentionConfidence=47.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Spitalapotheker', mentionConfidence=39.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internationales Olympisches', mentionConfidence=43.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='wirtschaftliche Zusammenarbeit', mentionConfidence=60.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Inspection Convention', mentionConfidence=54.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundes RL', mentionConfidence=72.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Richtlinie RVOG', mentionConfidence=51.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischer Apothekerverein', mentionConfidence=70.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerische Normenvereinigung', mentionConfidence=54.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischer Olympischer Verband', mentionConfidence=45.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Property', mentionConfidence=44.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNO United Nations Organization', mentionConfidence=77.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vereinte Nationen', mentionConfidence=80.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Health Organization', mentionConfidence=66.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='World Trade Organization', mentionConfidence=66.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eidgenössisches Departement für Umwelt', mentionConfidence=61.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Verkehr', mentionConfidence=54.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Energie', mentionConfidence=44.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizerischert Eidgenossenschaft', mentionConfidence=64.86, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pharmacopoea Europaea', mentionConfidence=44.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesamtes für Aussenwirtschaft', mentionConfidence=64.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bundesrechtspflegegesetz', mentionConfidence=44.85, startOffset=None, endOffset=None)], newsAgencies=[EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=62.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=63.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=64.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='. B.', mentionConfidence=65.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=64.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='B VET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='BVET', mentionConfidence=44.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=62.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='PFC', mentionConfidence=68.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=59.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='z. B.', mentionConfidence=57.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='n eu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='ne u', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None), EntityMention(surfaceForm='neu', mentionConfidence=47.75, startOffset=None, endOffset=None)]), topics=[], embeddings=None, transcriptLength=76080.0, totalPages=205.0, languageCode='de', isOnFrontPage=True, publicationDate=datetime.datetime(1999, 6, 1, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FedGazDe-1999-06-01-a', countryCode='CH', providerCode='SFA', mediaUid='FedGazDe', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.9, relevanceScore=0.013883636872427033, pageNumbers=[PageNumber(root=1.0), PageNumber(root=2.0), PageNumber(root=3.0), PageNumber(root=4.0), PageNumber(root=5.0), PageNumber(root=6.0), PageNumber(root=7.0), PageNumber(root=8.0), PageNumber(root=9.0), PageNumber(root=10.0), PageNumber(root=11.0), PageNumber(root=12.0), PageNumber(root=13.0), PageNumber(root=14.0), PageNumber(root=15.0), PageNumber(root=16.0), PageNumber(root=17.0), PageNumber(root=18.0), PageNumber(root=19.0), PageNumber(root=20.0), PageNumber(root=21.0), PageNumber(root=22.0), PageNumber(root=23.0), PageNumber(root=24.0), PageNumber(root=25.0), PageNumber(root=26.0), PageNumber(root=27.0), PageNumber(root=28.0), PageNumber(root=29.0), PageNumber(root=30.0), PageNumber(root=31.0), PageNumber(root=32.0), PageNumber(root=33.0), PageNumber(root=34.0), PageNumber(root=35.0), PageNumber(root=36.0), PageNumber(root=37.0), PageNumber(root=38.0), PageNumber(root=39.0), PageNumber(root=40.0), PageNumber(root=41.0), PageNumber(root=42.0), PageNumber(root=43.0), PageNumber(root=44.0), PageNumber(root=45.0), PageNumber(root=46.0), PageNumber(root=47.0), PageNumber(root=48.0), PageNumber(root=49.0), PageNumber(root=50.0), PageNumber(root=51.0), PageNumber(root=52.0), PageNumber(root=53.0), PageNumber(root=54.0), PageNumber(root=55.0), PageNumber(root=56.0), PageNumber(root=57.0), PageNumber(root=58.0), PageNumber(root=59.0), PageNumber(root=60.0), PageNumber(root=61.0), PageNumber(root=62.0), PageNumber(root=63.0), PageNumber(root=64.0), PageNumber(root=65.0), PageNumber(root=66.0), PageNumber(root=67.0), PageNumber(root=68.0), PageNumber(root=69.0), PageNumber(root=70.0), PageNumber(root=71.0), PageNumber(root=72.0), PageNumber(root=73.0), PageNumber(root=74.0), PageNumber(root=75.0), PageNumber(root=76.0), PageNumber(root=77.0), PageNumber(root=78.0), PageNumber(root=79.0), PageNumber(root=80.0), PageNumber(root=81.0), PageNumber(root=82.0), PageNumber(root=83.0), PageNumber(root=84.0), PageNumber(root=85.0), PageNumber(root=86.0), PageNumber(root=87.0), PageNumber(root=88.0), PageNumber(root=89.0), PageNumber(root=90.0), PageNumber(root=91.0), PageNumber(root=92.0), PageNumber(root=93.0), PageNumber(root=94.0), PageNumber(root=95.0), PageNumber(root=96.0), PageNumber(root=97.0), PageNumber(root=98.0), PageNumber(root=99.0), PageNumber(root=100.0), PageNumber(root=101.0), PageNumber(root=102.0), PageNumber(root=103.0), PageNumber(root=104.0), PageNumber(root=105.0), PageNumber(root=106.0), PageNumber(root=107.0), PageNumber(root=108.0), PageNumber(root=109.0), PageNumber(root=110.0), PageNumber(root=111.0), PageNumber(root=112.0), PageNumber(root=113.0), PageNumber(root=114.0), PageNumber(root=115.0), PageNumber(root=116.0), PageNumber(root=117.0), PageNumber(root=118.0), PageNumber(root=119.0), PageNumber(root=120.0), PageNumber(root=121.0), PageNumber(root=122.0), PageNumber(root=123.0), PageNumber(root=124.0), PageNumber(root=125.0), PageNumber(root=126.0), PageNumber(root=127.0), PageNumber(root=128.0), PageNumber(root=129.0), PageNumber(root=130.0), PageNumber(root=131.0), PageNumber(root=132.0), PageNumber(root=133.0), PageNumber(root=134.0), PageNumber(root=135.0), PageNumber(root=136.0), PageNumber(root=137.0), PageNumber(root=138.0), PageNumber(root=139.0), PageNumber(root=140.0), PageNumber(root=141.0), PageNumber(root=142.0), PageNumber(root=143.0), PageNumber(root=144.0), PageNumber(root=145.0), PageNumber(root=146.0), PageNumber(root=147.0), PageNumber(root=148.0), PageNumber(root=149.0), PageNumber(root=150.0), PageNumber(root=151.0), PageNumber(root=152.0), PageNumber(root=153.0), PageNumber(root=154.0), PageNumber(root=155.0), PageNumber(root=156.0), PageNumber(root=157.0), PageNumber(root=158.0), PageNumber(root=159.0), PageNumber(root=160.0), PageNumber(root=161.0), PageNumber(root=162.0), PageNumber(root=163.0), PageNumber(root=164.0), PageNumber(root=165.0), PageNumber(root=166.0), PageNumber(root=167.0), PageNumber(root=168.0), PageNumber(root=169.0), PageNumber(root=170.0), PageNumber(root=171.0), PageNumber(root=172.0), PageNumber(root=173.0), PageNumber(root=174.0), PageNumber(root=175.0), PageNumber(root=176.0), PageNumber(root=177.0), PageNumber(root=178.0), PageNumber(root=179.0), PageNumber(root=180.0), PageNumber(root=181.0), PageNumber(root=182.0), PageNumber(root=183.0), PageNumber(root=184.0), PageNumber(root=185.0), PageNumber(root=186.0), PageNumber(root=187.0), PageNumber(root=188.0), PageNumber(root=189.0), PageNumber(root=190.0), PageNumber(root=191.0), PageNumber(root=192.0), PageNumber(root=193.0), PageNumber(root=194.0), PageNumber(root=195.0), PageNumber(root=196.0), PageNumber(root=197.0), PageNumber(root=198.0), PageNumber(root=199.0), PageNumber(root=200.0), PageNumber(root=201.0), PageNumber(root=202.0), PageNumber(root=203.0), PageNumber(root=204.0), PageNumber(root=205.0)], collectionUids=[]), ContentItem(uid='FZG-1999-08-13-a-i0008', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Mythische Musik-Erlebnisse Neuer Intenda...', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.556), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.13), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.11)], embeddings=None, transcriptLength=516.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(1999, 8, 13, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-1999-08-13-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=0.7351644823199176, pageNumbers=[PageNumber(root=7.0)], collectionUids=[]), ContentItem(uid='luxland-1999-11-05-a-i0028', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Ministère de la Culture', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Luxembourg', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Pétrusse', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Luxembourg', mentionConfidence=76.27, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Ministère de la Culture Avis European Union Youth Orchestra', mentionConfidence=89.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUYO', mentionConfidence=40.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=85.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='jeunes', mentionConfidence=48.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pétrusse', mentionConfidence=65.02, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.149), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.144), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.118), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.103)], embeddings=None, transcriptLength=90.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(1999, 11, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-1999-11-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.98, relevanceScore=0.983312364950827, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='luxland-2000-11-24-a-i0030', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"Ministère de la Culture, de l'Enseignement supérieur et de la Recherch[...]\", transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Union_européenne', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='entre', mentionConfidence=82.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=76.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=75.49, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='européenne', mentionConfidence=32.28, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.169), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp57_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.058), TopicMention(uid='tm-fr-all-v2.0_tp19_fr', relevance=0.053)], embeddings=None, transcriptLength=104.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2000, 11, 24, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2000-11-24-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.98, relevanceScore=0.970744663664452, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='LLE-2001-04-23-a-i0253', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Lfl LIBERTE 31 LUNDI 23 AVRIL 2001', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp89_fr', relevance=0.177), TopicMention(uid='tm-fr-all-v2.0_tp44_fr', relevance=0.168), TopicMention(uid='tm-fr-all-v2.0_tp00_fr', relevance=0.09), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.074), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.069), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.066), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.057), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.051)], embeddings=None, transcriptLength=1724.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 4, 23, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2001-04-23-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.89, relevanceScore=0.24829240147360462, pageNumbers=[PageNumber(root=31.0)], collectionUids=[]), ContentItem(uid='FZG-2001-08-04-a-i0012', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='SF2 HÜ MTV 3 1«H HORF 1 ORF 2', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.726), TopicMention(uid='tm-de-all-v2.0_tp67_de', relevance=0.074), TopicMention(uid='tm-de-all-v2.0_tp62_de', relevance=0.069)], embeddings=None, transcriptLength=1395.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 8, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2001-08-04-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.3425488917673736, pageNumbers=[PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='FZG-2001-10-04-a-i0220', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='6.00 Mattmata 9.00 Kontext 9.35 Musik ä ...', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp62_de', relevance=0.377), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.21), TopicMention(uid='tm-de-all-v2.0_tp83_de', relevance=0.072)], embeddings=None, transcriptLength=91.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 10, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2001-10-04-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.91, relevanceScore=0.9939425126863282, pageNumbers=[PageNumber(root=29.0)], collectionUids=[]), ContentItem(uid='luxland-2001-11-16-a-i0018', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='European Planning Observation Network', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Luxembourg_City', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-European_Commission', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='ESPON', mentionConfidence=61.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg City', mentionConfidence=90.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rue du Plébiscite', mentionConfidence=83.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Closing', mentionConfidence=95.08, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Isabelle Biever', mentionConfidence=95.14, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='European Planning Observation Network', mentionConfidence=78.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=78.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Spatial Development Perspective', mentionConfidence=78.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Commission', mentionConfidence=64.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='ESPON 2006', mentionConfidence=46.5, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=353.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 11, 16, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2001-11-16-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4625917036173705, pageNumbers=[PageNumber(root=10.0)], collectionUids=[]), ContentItem(uid='luxland-2001-11-16-a-i0066', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"Ministère de la Culture, de l'Enseignement supérieur et de la Recherch[...]\", transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.174), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.138), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.098), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.055)], embeddings=None, transcriptLength=113.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2001, 11, 16, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2001-11-16-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.9288221259949283, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='luxland-2002-01-11-a-i0009', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Protection des données', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-États-Unis', count=1.0)], persons=[NamedEntity(uid='2-50-George_W._Bush', count=1.0), NamedEntity(uid='2-50-François_Biltgen', count=1.0), NamedEntity(uid='2-50-Jacques_Chirac', count=1.0)], organisations=[NamedEntity(uid='2-53-Commission_européenne', count=1.0), NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Conseil_européen', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Etats- Unis', mentionConfidence=70.34, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='président américain George W. Bush', mentionConfidence=98.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='représentant des Etats-Unis, Mark Richard', mentionConfidence=97.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='François Biltgen', mentionConfidence=95.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='président français Jacques Chirac', mentionConfidence=99.24, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Commission européenne', mentionConfidence=69.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union', mentionConfidence=48.72, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"organisation internationale de- défense des droits de- l'homme Statewatch\", mentionConfidence=88.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conseil européen', mentionConfidence=84.94, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chambre des fonctionnaires et employés publics', mentionConfidence=58.59, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Commission consultative des droits de l'homme\", mentionConfidence=97.82, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.262), TopicMention(uid='tm-fr-all-v2.0_tp95_fr', relevance=0.229), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.095), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.067), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp36_fr', relevance=0.062)], embeddings=None, transcriptLength=1778.0, totalPages=2.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 1, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2002-01-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.98, relevanceScore=0.24857742915417744, pageNumbers=[PageNumber(root=8.0), PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='luxland-2002-04-12-a-i0031', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Petition to the European Union', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Israel', count=1.0), NamedEntity(uid='2-54-West_Bank', count=1.0)], persons=[NamedEntity(uid='2-50-Romano_Prodi', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Israel', mentionConfidence=82.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='West Bank', mentionConfidence=94.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue de la Loi', mentionConfidence=95.58, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Mr. Romano Prodi', mentionConfidence=82.65, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=835.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 4, 12, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2002-04-12-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5630565756866772, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='EXP-2002-06-11-a-i0229', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"L'homme au centre des espaces\", transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Lausanne', count=4.0), NamedEntity(uid='2-54-Berlin', count=5.0), NamedEntity(uid='2-54-Barcelone', count=1.0), NamedEntity(uid='2-54-Santiago', count=1.0), NamedEntity(uid='2-54-Paris', count=1.0), NamedEntity(uid='2-54-Bordeaux', count=1.0), NamedEntity(uid='2-54-Londres', count=4.0)], persons=[NamedEntity(uid='2-50-Louisa_Hutton', count=4.0)], organisations=[NamedEntity(uid='2-53-Architectural_Association_School_of_Architecture', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Lausanne', mentionConfidence=66.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lausanne', mentionConfidence=66.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lausanne', mentionConfidence=66.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lausanne', mentionConfidence=66.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=98.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=98.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=98.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=98.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=98.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Barcelone', mentionConfidence=98.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Santiago du Chili', mentionConfidence=81.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='Paris', mentionConfidence=98.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bordeaux', mentionConfidence=98.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=95.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Londres', mentionConfidence=95.77, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Matthias Sauerhruch', mentionConfidence=96.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Louisa Hutton', mentionConfidence=95.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Louisa Hutton', mentionConfidence=95.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Louisa Hutton', mentionConfidence=95.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Louisa Hutton', mentionConfidence=95.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Matthias Sauerbruch', mentionConfidence=96.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Matthias Sauerbruch', mentionConfidence=96.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Matthias Sauerbruch', mentionConfidence=96.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='WYSIWYG', mentionConfidence=77.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='WYSIWYG', mentionConfidence=77.15, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sauerbruch Hutton architectes', mentionConfidence=91.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sauerbruch Hutton architekten', mentionConfidence=78.8, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Forum architectures', mentionConfidence=83.56, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Forum d'architectures Lausanne\", mentionConfidence=89.61, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Forum d'architectures Lausanne\", mentionConfidence=89.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Architectural Association de Londres', mentionConfidence=87.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Architectural School Or Architecture', mentionConfidence=84.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='centre Photonics', mentionConfidence=60.56, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Finaliste de l'European Union Prize for Contempo rary Architecture\", mentionConfidence=82.33, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp77_fr', relevance=0.139), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.126), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.11), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.093), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.088), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.067), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.062)], embeddings=None, transcriptLength=605.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 6, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2002-06-11-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.502587491669864, pageNumbers=[PageNumber(root=23.0)], collectionUids=[]), ContentItem(uid='IMP-2002-06-11-a-i0198', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"L'homme au centre des espaces\", transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.14), TopicMention(uid='tm-fr-all-v2.0_tp78_fr', relevance=0.126), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.124), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.102), TopicMention(uid='tm-fr-all-v2.0_tp77_fr', relevance=0.096), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.069), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.055)], embeddings=None, transcriptLength=533.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2002, 6, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2002-06-11-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.99, relevanceScore=0.558196464272011, pageNumbers=[PageNumber(root=23.0)], collectionUids=[]), ContentItem(uid='luxland-2003-07-04-a-i0007', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Hochzeithalten mit Europa', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Warschau', count=1.0), NamedEntity(uid='2-54-Niederlande', count=1.0), NamedEntity(uid='2-54-Den_Haag', count=1.0), NamedEntity(uid='2-54-Dublin', count=1.0), NamedEntity(uid='2-54-Polen', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Den_Haag', count=1.0), NamedEntity(uid='2-53-Europäischer_Konvent', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Europa Peter', mentionConfidence=71.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Warschau', mentionConfidence=97.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Niederlanden', mentionConfidence=46.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Haag lizenziert', mentionConfidence=67.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Dublin', mentionConfidence=98.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='seit', mentionConfidence=89.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='EU-Schnitt', mentionConfidence=92.06, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Haag', mentionConfidence=50.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='E U-Konvent', mentionConfidence=86.79, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.241), TopicMention(uid='tm-de-all-v2.0_tp61_de', relevance=0.166), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.08), TopicMention(uid='tm-de-all-v2.0_tp14_de', relevance=0.079), TopicMention(uid='tm-de-all-v2.0_tp86_de', relevance=0.063), TopicMention(uid='tm-de-all-v2.0_tp24_de', relevance=0.061)], embeddings=None, transcriptLength=1759.0, totalPages=2.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2003, 7, 4, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2003-07-04-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.95, relevanceScore=0.3710976450597606, pageNumbers=[PageNumber(root=6.0), PageNumber(root=7.0)], collectionUids=[]), ContentItem(uid='luxland-2003-10-31-a-i0029', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='European Union Youth Orchestra-Euyo', transcript=None, entities=NamedEntities(locations=[], persons=[], organisations=[NamedEntity(uid='2-53-Union_européenne', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[], organisations=[EntityMention(surfaceForm='Union Youth Orchestra-Euyo Orchestre symphonique', mentionConfidence=77.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='Orchestre des Jeunes', mentionConfidence=91.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=51.3, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.227), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.092), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.068), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.061), TopicMention(uid='tm-fr-all-v2.0_tp69_fr', relevance=0.057)], embeddings=None, transcriptLength=121.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2003, 10, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2003-10-31-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.9162513955682234, pageNumbers=[PageNumber(root=12.0)], collectionUids=[]), ContentItem(uid='luxland-2004-01-30-a-i0049', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Project and Events Officer (contract until end 2006) In charge of:', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Lille', count=1.0), NamedEntity(uid='2-54-France', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Interreg', count=1.0), NamedEntity(uid='2-53-European_Union', count=2.0), NamedEntity(uid='2-53-Euro', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Media', mentionConfidence=49.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lille', mentionConfidence=98.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='France', mentionConfidence=98.01, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Events', mentionConfidence=49.78, startOffset=None, endOffset=None), EntityMention(surfaceForm='Task', mentionConfidence=63.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='INTERREG IIIC', mentionConfidence=48.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Deputy Programme Manager', mentionConfidence=81.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Project Development Officer', mentionConfidence=64.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Communication Officer', mentionConfidence=68.5, startOffset=None, endOffset=None), EntityMention(surfaceForm='Task B', mentionConfidence=59.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='West zone', mentionConfidence=70.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lead Partners', mentionConfidence=56.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='INTERact Managing Authority', mentionConfidence=63.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='European regional', mentionConfidence=60.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Structural Funds', mentionConfidence=83.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=57.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='West Zone member', mentionConfidence=59.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUR', mentionConfidence=46.27, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=418.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 1, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-01-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.41770050745312765, pageNumbers=[PageNumber(root=20.0)], collectionUids=[]), ContentItem(uid='luxland-2004-04-30-a-i0093', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='The New Europe', transcript=None, entities=NamedEntities(locations=[], persons=[NamedEntity(uid='2-50-Daniel_Gros', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Daniel Gros', mentionConfidence=54.35, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2561.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-04-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.6082177692256652, pageNumbers=[PageNumber(root=32.0), PageNumber(root=33.0)], collectionUids=[]), ContentItem(uid='luxland-2004-04-30-a-i0096', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='A Lithuanian Perspective', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Virginia', count=1.0), NamedEntity(uid='2-54-Vilnius', count=2.0), NamedEntity(uid='2-54-Lithuania', count=1.0), NamedEntity(uid='2-54-Latvia', count=1.0), NamedEntity(uid='2-54-Estonia', count=1.0), NamedEntity(uid='2-54-Spain', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Bratislava', count=1.0), NamedEntity(uid='2-53-European_Union', count=1.0), NamedEntity(uid='2-53-European_Commission', count=1.0), NamedEntity(uid='2-53-Differdange', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Virginija', mentionConfidence=73.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lithuanians', mentionConfidence=43.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vilnius', mentionConfidence=47.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='capital', mentionConfidence=74.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eastern and Central Europe', mentionConfidence=66.35, startOffset=None, endOffset=None), EntityMention(surfaceForm='Prague', mentionConfidence=97.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='Budapest', mentionConfidence=97.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ljubljana', mentionConfidence=98.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lithuania', mentionConfidence=94.73, startOffset=None, endOffset=None), EntityMention(surfaceForm='Latvia', mentionConfidence=95.28, startOffset=None, endOffset=None), EntityMention(surfaceForm='Estonia', mentionConfidence=95.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Spain', mentionConfidence=96.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='European', mentionConfidence=47.61, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Poskute', mentionConfidence=57.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=59.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='consulting', mentionConfidence=43.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mercer Human Resources', mentionConfidence=69.79, startOffset=None, endOffset=None), EntityMention(surfaceForm='UNESCO', mentionConfidence=55.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Baltic States', mentionConfidence=60.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Commission', mentionConfidence=62.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union states', mentionConfidence=56.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='IMPALLA', mentionConfidence=73.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='CEPS', mentionConfidence=62.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='INSTEAD', mentionConfidence=42.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='Differdange', mentionConfidence=59.7, startOffset=None, endOffset=None), EntityMention(surfaceForm='Leuven University', mentionConfidence=69.8, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=950.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 4, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-04-30-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.44325772502906535, pageNumbers=[PageNumber(root=33.0)], collectionUids=[]), ContentItem(uid='luxland-2004-05-07-a-i0198', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='The BRussells Tribunal (14-17 April 2004)', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-United_States', count=1.0), NamedEntity(uid='2-54-China', count=1.0), NamedEntity(uid='2-54-France', count=2.0), NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='US', mentionConfidence=58.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='China', mentionConfidence=95.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='F.urope', mentionConfidence=86.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='France', mentionConfidence=89.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=62.48, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Richard', mentionConfidence=86.28, startOffset=None, endOffset=None), EntityMention(surfaceForm='Charles', mentionConfidence=76.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='Robert', mentionConfidence=74.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='David', mentionConfidence=51.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Henry', mentionConfidence=63.2, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2298.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 7, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-05-07-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.29119933762798117, pageNumbers=[PageNumber(root=13.0), PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='luxland-2004-05-21-a-i0025', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Economic Effects of EU Enlargement', transcript=None, entities=None, mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Professor Marek', mentionConfidence=99.02, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=594.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-05-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.6160894952298251, pageNumbers=[PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='LLE-2004-05-28-a-i0100', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"SUISSE /£\\\\ L'armée veut s'acheter deux a...\", transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Suisse', count=5.0), NamedEntity(uid='2-54-Eindhoven', count=1.0), NamedEntity(uid='2-54-Belgique', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Allemagne', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Royaume-Uni', count=1.0), NamedEntity(uid='2-54-Pays-Bas', count=1.0)], persons=[NamedEntity(uid='2-50-Samuel_Schmid', count=6.0), NamedEntity(uid='2-50-Christophe_Keckeis', count=1.0)], organisations=[NamedEntity(uid='2-53-Direction_départementale_de_la_Protection_des_populations', count=2.0), NamedEntity(uid='2-53-Royaume', count=1.0), NamedEntity(uid='2-53-Pays', count=1.0), NamedEntity(uid='2-53-Roger_Pratt', count=1.0), NamedEntity(uid='2-53-Conseil_de_sécurité_des_Nations_unies', count=1.0), NamedEntity(uid='2-53-Force_de_stabilisation', count=1.0), NamedEntity(uid=\"2-53-Force_de_l'Union_européenne\", count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='SUISSE', mentionConfidence=96.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=95.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=95.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=95.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=95.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Eindhoven', mentionConfidence=80.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Belgique', mentionConfidence=95.16, startOffset=None, endOffset=None), EntityMention(surfaceForm='France', mentionConfidence=90.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Allemagne', mentionConfidence=93.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italie', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Royaume-Uni', mentionConfidence=92.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pays-Bas', mentionConfidence=91.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='lEUFOR', mentionConfidence=92.03, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='M. ERIK REUMANN', mentionConfidence=94.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='conseiller fédéral Samuel Schmid', mentionConfidence=97.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='EACC', mentionConfidence=45.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='EACC', mentionConfidence=45.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='EACC', mentionConfidence=45.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Christophe Keckeis', mentionConfidence=97.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Samuel Schmid', mentionConfidence=94.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Samuel Schmid', mentionConfidence=94.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Samuel Schmid', mentionConfidence=94.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Samuel Schmid', mentionConfidence=94.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Samuel Schmid', mentionConfidence=94.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alfred Markwalder, le patron dArmasuisse', mentionConfidence=85.34, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='DDPS', mentionConfidence=59.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='DDPS', mentionConfidence=59.64, startOffset=None, endOffset=None), EntityMention(surfaceForm='Airlift Coordination Cell', mentionConfidence=79.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='Royaume', mentionConfidence=66.99, startOffset=None, endOffset=None), EntityMention(surfaceForm='Pays', mentionConfidence=64.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='conseiller fédéral', mentionConfidence=64.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conseil fé- SFOR', mentionConfidence=96.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stabilization Force', mentionConfidence=83.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Force', mentionConfidence=61.32, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.178), TopicMention(uid='tm-fr-all-v2.0_tp88_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.083), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.078), TopicMention(uid='tm-fr-all-v2.0_tp20_fr', relevance=0.065)], embeddings=None, transcriptLength=955.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 28, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2004-05-28-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.4039962871394241, pageNumbers=[PageNumber(root=9.0)], collectionUids=[]), ContentItem(uid='FZG-2004-05-28-a-i0093', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Die Schweiz stellt 20 Mann für Bosnien A...', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.275), TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.272), TopicMention(uid='tm-de-all-v2.0_tp82_de', relevance=0.097), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.07), TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.058)], embeddings=None, transcriptLength=162.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 5, 28, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-05-28-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.9805413794993553, pageNumbers=[PageNumber(root=17.0)], collectionUids=[]), ContentItem(uid='LLE-2004-08-03-a-i0245', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='La ville, ce champ de déplacements', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Strasbourg', count=4.0), NamedEntity(uid='2-54-Canton_de_Saint-Estève', count=1.0)], persons=[NamedEntity(uid='2-50-Zaha_Hadid', count=6.0), NamedEntity(uid='2-50-Ludwig_Mies_van_der_Rohe', count=2.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Strasbourg', mentionConfidence=96.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Strasbourg', mentionConfidence=96.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Strasbourg', mentionConfidence=96.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Strasbourg', mentionConfidence=96.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='JACQUES STERCHI', mentionConfidence=94.25, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zaha Hadid', mentionConfidence=93.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mies Van der Rohe', mentionConfidence=68.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Mies Van der Rohe', mentionConfidence=68.95, startOffset=None, endOffset=None), EntityMention(surfaceForm='Van der', mentionConfidence=43.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Van der', mentionConfidence=43.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Car Park', mentionConfidence=64.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Car Park', mentionConfidence=64.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lars Muller', mentionConfidence=95.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='LARS MULLER', mentionConfidence=82.2, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='European Union Prize for contemporary Architecture Mies', mentionConfidence=82.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='der Rohe Award', mentionConfidence=59.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Car Park and Terminus Strasbourg', mentionConfidence=68.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Car Park and Terminus Strasbourg', mentionConfidence=68.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Terminus Strasbourg', mentionConfidence=48.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Terminus Strasbourg', mentionConfidence=48.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Publishers', mentionConfidence=31.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='PUBLISHERS', mentionConfidence=44.6, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.424), TopicMention(uid='tm-fr-all-v2.0_tp55_fr', relevance=0.176), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.058)], embeddings=None, transcriptLength=453.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 8, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2004-08-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.98, relevanceScore=0.5896874071424244, pageNumbers=[PageNumber(root=31.0)], collectionUids=[]), ContentItem(uid='FZG-2004-09-03-a-i0049', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Ja zum Botsrhaftssrhiitz', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.227), TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.209), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.08), TopicMention(uid='tm-de-all-v2.0_tp00_de', relevance=0.059)], embeddings=None, transcriptLength=175.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-09-03-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.86, relevanceScore=0.9424220429099364, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='EXP-2004-09-30-a-i0226', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Un accord sur la gestion des crises', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Berne', count=2.0), NamedEntity(uid='2-54-Europe', count=3.0), NamedEntity(uid='2-54-Sarajevo', count=1.0), NamedEntity(uid='2-54-Italie', count=1.0), NamedEntity(uid='2-54-Portugal', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-Union_européenne', count=3.0), NamedEntity(uid='2-53-Bruxelles', count=1.0), NamedEntity(uid='2-53-Suisse', count=1.0), NamedEntity(uid=\"2-53-Organisation_du_traité_de_l'Atlantique_nord\", count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Berne', mentionConfidence=98.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berne', mentionConfidence=98.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=96.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=96.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=96.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sfor', mentionConfidence=82.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sarajevo', mentionConfidence=97.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Italie', mentionConfidence=94.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='Portugal', mentionConfidence=94.18, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='EUPM', mentionConfidence=76.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUPM', mentionConfidence=76.42, startOffset=None, endOffset=None), EntityMention(surfaceForm='EUPM', mentionConfidence=76.42, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Union européenne', mentionConfidence=90.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union européenne', mentionConfidence=90.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bruxelles', mentionConfidence=30.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Tanguy Verhoosel', mentionConfidence=41.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Police\" Mission', mentionConfidence=80.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='fédérales', mentionConfidence=52.83, startOffset=None, endOffset=None), EntityMention(surfaceForm='Otan', mentionConfidence=72.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Police Mission', mentionConfidence=89.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='UE', mentionConfidence=43.6, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.271), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.119), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.082), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.074)], embeddings=None, transcriptLength=467.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2004-09-30-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.86119734705005, pageNumbers=[PageNumber(root=25.0)], collectionUids=[]), ContentItem(uid='IMP-2004-09-30-a-i0199', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Un accord sur la gestion des crises', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.18), TopicMention(uid='tm-fr-all-v2.0_tp03_fr', relevance=0.132), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.106), TopicMention(uid='tm-fr-all-v2.0_tp87_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp46_fr', relevance=0.085), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.067)], embeddings=None, transcriptLength=474.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 9, 30, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2004-09-30-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.86321238889546, pageNumbers=[PageNumber(root=23.0)], collectionUids=[]), ContentItem(uid='luxland-2004-11-05-a-i0037', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"Postes vacants Orchestre des Jeunes de l'Union Européenne\", transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.145), TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.136), TopicMention(uid='tm-fr-all-v2.0_tp90_fr', relevance=0.103), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp40_fr', relevance=0.082)], embeddings=None, transcriptLength=143.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 11, 5, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-11-05-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.858930525945308, pageNumbers=[PageNumber(root=18.0)], collectionUids=[]), ContentItem(uid='luxland-2004-12-03-a-i0060', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Design industry', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[NamedEntity(uid='2-53-European_Union', count=2.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Europe', mentionConfidence=57.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='BEDA', mentionConfidence=84.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Düsseldorf', mentionConfidence=94.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alicante', mentionConfidence=84.16, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Stephen Hitchins', mentionConfidence=96.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='President', mentionConfidence=39.36, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='European Union', mentionConfidence=73.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Commission and Parliament', mentionConfidence=56.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='Enterprise', mentionConfidence=43.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Design Achievement Award', mentionConfidence=51.25, startOffset=None, endOffset=None), EntityMention(surfaceForm='OHIM', mentionConfidence=45.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Harmonisation', mentionConfidence=62.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Internal Market', mentionConfidence=69.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Trade Marks', mentionConfidence=68.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Designs', mentionConfidence=33.39, startOffset=None, endOffset=None), EntityMention(surfaceForm='Design', mentionConfidence=53.4, startOffset=None, endOffset=None), EntityMention(surfaceForm='Future Needs', mentionConfidence=51.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='BEDA Communications Series', mentionConfidence=57.38, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=1023.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-03-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.6038091728138982, pageNumbers=[PageNumber(root=32.0)], collectionUids=[]), ContentItem(uid='luxland-2004-12-03-a-i0167', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"d'Ländchen Humoristesch-satiresch Säit vum Lëtzebuerger Land\", transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.177), TopicMention(uid='tm-de-all-v2.0_tp03_de', relevance=0.151), TopicMention(uid='tm-de-all-v2.0_tp58_de', relevance=0.106), TopicMention(uid='tm-de-all-v2.0_tp30_de', relevance=0.1), TopicMention(uid='tm-de-all-v2.0_tp45_de', relevance=0.085), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.077)], embeddings=None, transcriptLength=849.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-03-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.93, relevanceScore=0.8350462881491375, pageNumbers=[PageNumber(root=24.0)], collectionUids=[]), ContentItem(uid='luxland-2004-12-10-a-i0123', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Université, CRP ... O, Jeeminee!', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0), NamedEntity(uid='2-54-Strasbourg', count=1.0), NamedEntity(uid='2-54-Lisbon', count=1.0)], persons=[NamedEntity(uid='2-50-Morgan_Meyer', count=1.0), NamedEntity(uid='2-50-Louis_Pasteur', count=1.0)], organisations=[NamedEntity(uid='2-53-Université_du_Luxembourg', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Rieseradon', mentionConfidence=89.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Srh ueberfouer', mentionConfidence=82.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=91.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=98.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxemburg', mentionConfidence=78.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Strasbourg', mentionConfidence=95.45, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lefebvre', mentionConfidence=44.17, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lisbon', mentionConfidence=78.07, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Jeeminee', mentionConfidence=87.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='Morgan Meyer', mentionConfidence=94.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Delanty', mentionConfidence=61.67, startOffset=None, endOffset=None), EntityMention(surfaceForm='Louis Pasteur', mentionConfidence=68.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='Basil Blackwell', mentionConfidence=97.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cozzens', mentionConfidence=49.48, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='University of Luxembourg', mentionConfidence=90.88, startOffset=None, endOffset=None), EntityMention(surfaceForm='Challenging Knowledge', mentionConfidence=67.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Open University Press', mentionConfidence=72.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stellenwert', mentionConfidence=62.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Université du Luxembourg', mentionConfidence=82.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Chambre de Commerce', mentionConfidence=85.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Entreprise Luxembourg', mentionConfidence=70.22, startOffset=None, endOffset=None), EntityMention(surfaceForm='Forum Recherche S Développement', mentionConfidence=79.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='National System of Innovation', mentionConfidence=80.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Changing University', mentionConfidence=52.59, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lisbon strategy', mentionConfidence=53.55, startOffset=None, endOffset=None), EntityMention(surfaceForm='Office for Official Publications of the European Communities', mentionConfidence=85.91, startOffset=None, endOffset=None), EntityMention(surfaceForm='Trash Picture Company', mentionConfidence=76.5, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=1669.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2004-12-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.19459281141726265, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='FZG-2004-12-17-a-i0121', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='20 Schweizer im Einsatz Der Nationalrat ...', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp06_de', relevance=0.316), TopicMention(uid='tm-de-all-v2.0_tp48_de', relevance=0.199), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.075)], embeddings=None, transcriptLength=112.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2004, 12, 17, 0, 0, tzinfo=TzInfo(UTC)), issueUid='FZG-2004-12-17-a', countryCode='CH', providerCode='SNL', mediaUid='FZG', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=1.0, pageNumbers=[PageNumber(root=21.0)], collectionUids=[]), ContentItem(uid='luxland-2005-01-21-a-i0027', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='National preference versus European objectives', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Europe', mentionConfidence=42.91, startOffset=None, endOffset=None)], persons=[], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=476.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 1, 21, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-01-21-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.41860440292759193, pageNumbers=[PageNumber(root=15.0)], collectionUids=[]), ContentItem(uid='luxland-2005-02-25-a-i0067', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Armand Clesse', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Luxemburg', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Luxemburger', mentionConfidence=74.79, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Foreign Policy Institute', mentionConfidence=51.26, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Luxemburger Institut d'études européennes et internationales\", mentionConfidence=95.32, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.235), TopicMention(uid='tm-de-all-v2.0_tp90_de', relevance=0.145), TopicMention(uid='tm-de-all-v2.0_tp11_de', relevance=0.141), TopicMention(uid='tm-de-all-v2.0_tp82_de', relevance=0.063)], embeddings=None, transcriptLength=99.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 2, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-02-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=0.9997100391480327, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='luxland-2005-03-25-a-i0031', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Implementing Kyoto in Europe', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm=\"Europe's\", mentionConfidence=75.51, startOffset=None, endOffset=None)], persons=[], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=1493.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 3, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-03-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.2203231948487305, pageNumbers=[PageNumber(root=12.0)], collectionUids=[]), ContentItem(uid='luxland-2005-04-22-a-i0124', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='The segmented-assimilation theory', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-United_States', count=2.0), NamedEntity(uid='2-54-Luxembourg', count=2.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='U.S', mentionConfidence=80.65, startOffset=None, endOffset=None), EntityMention(surfaceForm='United States', mentionConfidence=87.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Furthermore', mentionConfidence=64.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lin colnwood', mentionConfidence=88.5, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Luxembourg's\", mentionConfidence=76.13, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='identity', mentionConfidence=89.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxemburg Mann', mentionConfidence=85.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='interpellent', mentionConfidence=66.68, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm=\"Centre d'Etudes sur la Situation des Jeunes en Europe\", mentionConfidence=82.29, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2157.0, totalPages=2.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 4, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-04-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.1681817657291718, pageNumbers=[PageNumber(root=12.0), PageNumber(root=13.0)], collectionUids=[]), ContentItem(uid='luxland-2005-07-01-a-i0035', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='European Union', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0)], persons=[NamedEntity(uid='2-50-John_Locke', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='France', mentionConfidence=85.06, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=77.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=86.41, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='John Locke', mentionConfidence=98.34, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Luxembourg Institute for European', mentionConfidence=86.92, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=704.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 7, 1, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-07-01-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.5309814703158672, pageNumbers=[PageNumber(root=18.0)], collectionUids=[]), ContentItem(uid='luxland-2005-09-09-a-i0019', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title=\"Conflicting views about Europe's future\", transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Rome', count=1.0)], persons=[NamedEntity(uid='2-50-Jacques_Chirac', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm=\"Europe's\", mentionConfidence=94.24, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=72.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Rome', mentionConfidence=81.04, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Chirac', mentionConfidence=96.83, startOffset=None, endOffset=None)], organisations=[], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=1079.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 9, 9, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-09-09-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.27796829788277516, pageNumbers=[PageNumber(root=11.0)], collectionUids=[]), ContentItem(uid='luxland-2005-11-11-a-i0136', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='The Luxembourg Institute for European and International Studies at Akd[...]', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Luxembourg', count=1.0), NamedEntity(uid='2-54-Austria', count=1.0), NamedEntity(uid='2-54-Europe', count=1.0), NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Ankara', count=1.0), NamedEntity(uid='2-54-Cologne', count=1.0)], persons=[NamedEntity(uid='2-50-Denis_de_Rougemont', count=1.0)], organisations=[NamedEntity(uid='2-53-Foreign_Policy_Centre', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Luxembourg', mentionConfidence=70.02, startOffset=None, endOffset=None), EntityMention(surfaceForm='Austria', mentionConfidence=78.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europe', mentionConfidence=72.89, startOffset=None, endOffset=None), EntityMention(surfaceForm='France', mentionConfidence=83.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ankara', mentionConfidence=83.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='Cologne', mentionConfidence=82.91, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Adrian Papst', mentionConfidence=99.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Director of the WEIS', mentionConfidence=78.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='Denis de Rougemont', mentionConfidence=98.96, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Luxembourg Institute for European and International', mentionConfidence=89.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Foreign Policy Centre', mentionConfidence=80.17, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=2986.0, totalPages=3.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 11, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-11-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.14538180150187663, pageNumbers=[PageNumber(root=14.0), PageNumber(root=15.0), PageNumber(root=16.0)], collectionUids=[]), ContentItem(uid='luxland-2005-11-25-a-i0168', copyrightStatus='in_cpy', type='ad', sourceMedium='print', title='Publicité 2 Page 35', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-France', count=1.0), NamedEntity(uid='2-54-Boston', count=1.0)], persons=[NamedEntity(uid='2-50-Jean-Claude_Juncker', count=1.0)], organisations=[NamedEntity(uid='2-53-Laurence', count=1.0), NamedEntity(uid='2-53-Yves_Mersch', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='France', mentionConfidence=80.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='Boston', mentionConfidence=42.93, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg Laurence', mentionConfidence=60.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque', mentionConfidence=83.64, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Munich Paul Helminger Mayor, City of Luxembourg Jean-Claude Juncker Prime Minister and Minister of Finance', mentionConfidence=81.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='J.Koriikoff', mentionConfidence=63.93, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Laurence', mentionConfidence=36.12, startOffset=None, endOffset=None), EntityMention(surfaceForm='Yves Mersch', mentionConfidence=58.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque centrale du Luxembourg', mentionConfidence=96.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='Central Bank', mentionConfidence=81.13, startOffset=None, endOffset=None), EntityMention(surfaceForm='H BANQUE', mentionConfidence=51.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Banque Internationale', mentionConfidence=77.52, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg Organisation Office Luxembourg Financial', mentionConfidence=68.51, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[], embeddings=None, transcriptLength=224.0, totalPages=1.0, languageCode='en', isOnFrontPage=False, publicationDate=datetime.datetime(2005, 11, 25, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2005-11-25-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=False, ocrQualityScore=None, relevanceScore=0.4767470494730738, pageNumbers=[PageNumber(root=35.0)], collectionUids=[]), ContentItem(uid='luxland-2006-10-20-a-i0072', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='European Union Youth Orchestra – EUYO www.euyo.org.uk', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Luxembourg', count=1.0)], persons=[], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Conservatoire', mentionConfidence=70.71, startOffset=None, endOffset=None), EntityMention(surfaceForm='entre', mentionConfidence=89.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxembourg', mentionConfidence=72.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue du Fort Thüngen', mentionConfidence=75.76, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='euyo', mentionConfidence=70.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='org', mentionConfidence=70.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='fabeck@mcesr', mentionConfidence=70.77, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Union Youth Orchestra-EUYO www', mentionConfidence=66.64, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Orchestre des Jeunes de l'Union européenne\", mentionConfidence=88.03, startOffset=None, endOffset=None), EntityMention(surfaceForm='Conservatoire de musique de la Ville de Luxembourg', mentionConfidence=73.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Thüngen', mentionConfidence=48.54, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"ministère de la Culture, de l'Enseignement supérieur et de la Recherche-Commissariat\", mentionConfidence=71.1, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp96_fr', relevance=0.201), TopicMention(uid='tm-fr-all-v2.0_tp99_fr', relevance=0.165), TopicMention(uid='tm-fr-all-v2.0_tp35_fr', relevance=0.115), TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.112), TopicMention(uid='tm-fr-all-v2.0_tp33_fr', relevance=0.079)], embeddings=None, transcriptLength=162.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2006, 10, 20, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2006-10-20-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.97, relevanceScore=0.8419051273248652, pageNumbers=[PageNumber(root=26.0)], collectionUids=[]), ContentItem(uid='luxland-2006-12-22-a-i0172', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Blog', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Stadt_Brüssel', count=1.0), NamedEntity(uid='2-54-Provinz_Namur', count=1.0), NamedEntity(uid='2-54-Esch_an_der_Alzette', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0)], persons=[NamedEntity(uid='2-50-Jean-Claude_Juncker', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Brüssel', mentionConfidence=97.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Namur', mentionConfidence=98.46, startOffset=None, endOffset=None), EntityMention(surfaceForm='Esch', mentionConfidence=97.3, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxemburg', mentionConfidence=92.15, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='rh', mentionConfidence=70.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='Premier Jean-Claude Juncker', mentionConfidence=97.19, startOffset=None, endOffset=None), EntityMention(surfaceForm='L-Präsident Jean', mentionConfidence=84.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='CSV-Politiker Albert', mentionConfidence=81.77, startOffset=None, endOffset=None), EntityMention(surfaceForm='Andrée', mentionConfidence=96.05, startOffset=None, endOffset=None), EntityMention(surfaceForm='Daniel', mentionConfidence=97.36, startOffset=None, endOffset=None), EntityMention(surfaceForm='Innenminister Jean-Marie', mentionConfidence=98.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Reding', mentionConfidence=97.0, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='demokratischen Partei', mentionConfidence=88.72, startOffset=None, endOffset=None), EntityMention(surfaceForm='Union des European Monitoring Centre on Racism', mentionConfidence=95.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Neugründung ratifiziert', mentionConfidence=60.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='CGT-Gewerkschaft FLTL', mentionConfidence=63.37, startOffset=None, endOffset=None), EntityMention(surfaceForm='Kaempf-Kohler', mentionConfidence=63.63, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp58_de', relevance=0.255), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.234), TopicMention(uid='tm-de-all-v2.0_tp43_de', relevance=0.079), TopicMention(uid='tm-de-all-v2.0_tp21_de', relevance=0.064), TopicMention(uid='tm-de-all-v2.0_tp59_de', relevance=0.06), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.051)], embeddings=None, transcriptLength=939.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2006, 12, 22, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2006-12-22-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.5587021431889059, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='luxland-2007-05-11-a-i0041', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Echternacher Festival', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Frankreich', count=1.0), NamedEntity(uid='2-54-Manchester', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0)], persons=[NamedEntity(uid='2-50-Maurice_Ravel', count=1.0), NamedEntity(uid='2-50-Ludwig_van_Beethoven', count=1.0), NamedEntity(uid='2-50-Johann_Sebastian_Bach', count=1.0), NamedEntity(uid='2-50-Claude_Lorrain', count=1.0), NamedEntity(uid='2-50-Karl_Marx', count=1.0), NamedEntity(uid='2-50-Joseph_Haydn', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Frankreich', mentionConfidence=65.53, startOffset=None, endOffset=None), EntityMention(surfaceForm='Manchester', mentionConfidence=99.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxemburg', mentionConfidence=64.66, startOffset=None, endOffset=None), EntityMention(surfaceForm='Européens', mentionConfidence=73.4, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Ravel', mentionConfidence=98.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='Beethovens', mentionConfidence=94.54, startOffset=None, endOffset=None), EntityMention(surfaceForm='Johann Sebastian Bach', mentionConfidence=98.6, startOffset=None, endOffset=None), EntityMention(surfaceForm='Claude', mentionConfidence=63.62, startOffset=None, endOffset=None), EntityMention(surfaceForm='Marx', mentionConfidence=94.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Haydn', mentionConfidence=98.49, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='St. Peter-und-Paul-Kirche', mentionConfidence=93.07, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.538), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.188), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.079)], embeddings=None, transcriptLength=632.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2007, 5, 11, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2007-05-11-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.6304929132540756, pageNumbers=[PageNumber(root=18.0)], collectionUids=[]), ContentItem(uid='luxland-2007-08-10-a-i0040', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Klassik', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Europäische_Union', count=1.0), NamedEntity(uid='2-54-Luxemburg', count=1.0), NamedEntity(uid='2-54-Berlin', count=1.0), NamedEntity(uid='2-54-Schweiz', count=1.0), NamedEntity(uid='2-54-Europa', count=1.0)], persons=[NamedEntity(uid='2-50-Colin_Davis', count=1.0)], organisations=[], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='Europäischen Union', mentionConfidence=74.98, startOffset=None, endOffset=None), EntityMention(surfaceForm='Luxemburg', mentionConfidence=46.84, startOffset=None, endOffset=None), EntityMention(surfaceForm='Berlin', mentionConfidence=96.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='Zwischendurch', mentionConfidence=56.47, startOffset=None, endOffset=None), EntityMention(surfaceForm='Europa', mentionConfidence=60.32, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Tragischen Ouvertürevon Johannes Brahms', mentionConfidence=94.51, startOffset=None, endOffset=None), EntityMention(surfaceForm=\"Herbert o'omstedt\", mentionConfidence=93.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bernard', mentionConfidence=90.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sir Colin Davis', mentionConfidence=99.5, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='International Youth Foundation', mentionConfidence=56.95, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp15_de', relevance=0.455), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.138), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.105), TopicMention(uid='tm-de-all-v2.0_tp41_de', relevance=0.085)], embeddings=None, transcriptLength=512.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2007, 8, 10, 0, 0, tzinfo=TzInfo(UTC)), issueUid='luxland-2007-08-10-a', countryCode='LU', providerCode='BNL', mediaUid='luxland', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.9694078463389233, pageNumbers=[PageNumber(root=17.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-2008-02-14-a-i0022', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[EntityMention(surfaceForm='Trends', mentionConfidence=75.63, startOffset=None, endOffset=None), EntityMention(surfaceForm='Restaurant', mentionConfidence=79.26, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Unternehmen zu senden', mentionConfidence=55.2, startOffset=None, endOffset=None), EntityMention(surfaceForm='Schweizer Arbeitgeber', mentionConfidence=57.85, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.439), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.185), TopicMention(uid='tm-de-all-v2.0_tp83_de', relevance=0.052), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.051)], embeddings=None, transcriptLength=571.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 2, 14, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2008-02-14-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.6795700351668768, pageNumbers=[PageNumber(root=22.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-2008-02-14-a-i0024', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[EntityMention(surfaceForm='Institut', mentionConfidence=81.96, startOffset=None, endOffset=None)], persons=[], organisations=[EntityMention(surfaceForm='Institut der Universität Zürich', mentionConfidence=92.15, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.467), TopicMention(uid='tm-de-all-v2.0_tp89_de', relevance=0.171), TopicMention(uid='tm-de-all-v2.0_tp29_de', relevance=0.168)], embeddings=None, transcriptLength=585.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 2, 14, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2008-02-14-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.6544247085534268, pageNumbers=[PageNumber(root=24.0)], collectionUids=[]), ContentItem(uid='LLE-2008-10-02-a-i0112', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='GÉORGIE Première mission de', transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-fr-all-v2.0_tp71_fr', relevance=0.277), TopicMention(uid='tm-fr-all-v2.0_tp52_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp76_fr', relevance=0.076)], embeddings=None, transcriptLength=325.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2008, 10, 2, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2008-10-02-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.95, relevanceScore=0.7141650679537147, pageNumbers=[PageNumber(root=4.0)], collectionUids=[]), ContentItem(uid='IMP-2010-03-31-a-i0127', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='En bref', transcript=None, entities=NamedEntities(locations=[], persons=[], organisations=[], newsAgencies=[NamedEntity(uid='4-55-ATS_SDA', count=3.0), NamedEntity(uid='4-55-AFP', count=1.0), NamedEntity(uid='', count=None)]), mentions=EntityMentions(locations=[], persons=[], organisations=[], newsAgencies=[EntityMention(surfaceForm='ats', mentionConfidence=98.01, startOffset=None, endOffset=None), EntityMention(surfaceForm='ats', mentionConfidence=90.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='ats', mentionConfidence=98.27, startOffset=None, endOffset=None), EntityMention(surfaceForm='afp', mentionConfidence=97.57, startOffset=None, endOffset=None)]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp31_fr', relevance=0.1), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.097), TopicMention(uid='tm-fr-all-v2.0_tp65_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp81_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp27_fr', relevance=0.055), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.055)], embeddings=None, transcriptLength=187.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 3, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='IMP-2010-03-31-a', countryCode='CH', providerCode='SNL', mediaUid='IMP', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=0.825831499020578, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='EXP-2010-03-31-a-i0128', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='En bref', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Bâle', count=4.0), NamedEntity(uid='2-54-Canton_de_Bâle-Ville', count=1.0), NamedEntity(uid='2-54-Lugano', count=2.0)], persons=[NamedEntity(uid='2-50-Alain_Lombard', count=2.0), NamedEntity(uid='2-50-Vladimir_Ashkenazy', count=1.0), NamedEntity(uid='2-50-Sol_Gabetta', count=2.0), NamedEntity(uid='2-50-Heinz_Holliger', count=1.0)], organisations=[NamedEntity(uid='2-53-Kunstmuseum_(Bâle)', count=1.0), NamedEntity(uid='2-53-Siné_Hebdo', count=2.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='BÂLE', mentionConfidence=94.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bâle', mentionConfidence=30.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bâle', mentionConfidence=30.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='Bâle', mentionConfidence=30.1, startOffset=None, endOffset=None), EntityMention(surfaceForm='canton de Bâle-Ville', mentionConfidence=98.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lugano', mentionConfidence=96.08, startOffset=None, endOffset=None), EntityMention(surfaceForm='Lugano', mentionConfidence=96.08, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Laurenz', mentionConfidence=57.57, startOffset=None, endOffset=None), EntityMention(surfaceForm='mécène Maja Oeri', mentionConfidence=90.31, startOffset=None, endOffset=None), EntityMention(surfaceForm='architectes bâlois Christ & Gantenbein', mentionConfidence=83.38, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gabetta', mentionConfidence=68.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Gabetta', mentionConfidence=68.43, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alain Lombard', mentionConfidence=95.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Alain Lombard', mentionConfidence=95.49, startOffset=None, endOffset=None), EntityMention(surfaceForm='Vladimir Ashkenazy', mentionConfidence=96.23, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sol Gabetta', mentionConfidence=88.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Sol Gabetta', mentionConfidence=88.29, startOffset=None, endOffset=None), EntityMention(surfaceForm='Heinz Holliger', mentionConfidence=95.79, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Kunstmuseum de Bâle', mentionConfidence=61.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Siné Hebdo', mentionConfidence=35.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='Siné Hebdo', mentionConfidence=35.18, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union Youth Orchestra', mentionConfidence=73.91, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.131), TopicMention(uid='tm-fr-all-v2.0_tp07_fr', relevance=0.104), TopicMention(uid='tm-fr-all-v2.0_tp65_fr', relevance=0.081), TopicMention(uid='tm-fr-all-v2.0_tp27_fr', relevance=0.07), TopicMention(uid='tm-fr-all-v2.0_tp37_fr', relevance=0.064), TopicMention(uid='tm-fr-all-v2.0_tp53_fr', relevance=0.051)], embeddings=None, transcriptLength=187.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 3, 31, 0, 0, tzinfo=TzInfo(UTC)), issueUid='EXP-2010-03-31-a', countryCode='CH', providerCode='SNL', mediaUid='EXP', mediaType='newspaper', hasOLR=True, ocrQualityScore=1.0, relevanceScore=0.8263497272331255, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-2010-04-08-a-i0013', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=None, topics=[TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.209), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.164), TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.147), TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.146)], embeddings=None, transcriptLength=509.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.94, relevanceScore=0.7351644823199176, pageNumbers=[PageNumber(root=13.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-2010-04-08-a-i0012', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[EntityMention(surfaceForm='Paris', mentionConfidence=98.64, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Thomas Stein- tiert', mentionConfidence=97.85, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Union Contest for Young', mentionConfidence=91.31, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.261), TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.184), TopicMention(uid='tm-de-all-v2.0_tp79_de', relevance=0.082), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.079)], embeddings=None, transcriptLength=452.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.96, relevanceScore=0.7371063920630754, pageNumbers=[PageNumber(root=12.0)], collectionUids=[]), ContentItem(uid='arbeitgeber-2010-04-08-a-i0014', copyrightStatus='in_cpy', type='page', sourceMedium='print', title=None, transcript=None, entities=None, mentions=EntityMentions(locations=[], persons=[EntityMention(surfaceForm='Natio- de la Decouverte', mentionConfidence=52.57, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Motoren', mentionConfidence=30.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Stiftung Schweizer seum und Versuchslabor für angehende flyer', mentionConfidence=88.04, startOffset=None, endOffset=None), EntityMention(surfaceForm='Centre National de la Re- vorstellen durften', mentionConfidence=88.4, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-de-all-v2.0_tp20_de', relevance=0.191), TopicMention(uid='tm-de-all-v2.0_tp87_de', relevance=0.147), TopicMention(uid='tm-de-all-v2.0_tp93_de', relevance=0.14), TopicMention(uid='tm-de-all-v2.0_tp92_de', relevance=0.122), TopicMention(uid='tm-de-all-v2.0_tp91_de', relevance=0.083)], embeddings=None, transcriptLength=530.0, totalPages=1.0, languageCode='de', isOnFrontPage=False, publicationDate=datetime.datetime(2010, 4, 8, 0, 0, tzinfo=TzInfo(UTC)), issueUid='arbeitgeber-2010-04-08-a', countryCode='CH', providerCode='SWA', mediaUid='arbeitgeber', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.92, relevanceScore=0.705741086393967, pageNumbers=[PageNumber(root=14.0)], collectionUids=[]), ContentItem(uid='LLE-2012-05-03-a-i0180', copyrightStatus='in_cpy', type='ar', sourceMedium='print', title='Estavayer joue sa survie ce soir', transcript=None, entities=NamedEntities(locations=[NamedEntity(uid='2-54-Canton_de_Fribourg', count=1.0), NamedEntity(uid='2-54-Suisse', count=1.0), NamedEntity(uid='2-54-Sainte-Thérèse', count=1.0), NamedEntity(uid='2-54-Alpes_suisses', count=1.0), NamedEntity(uid='2-54-Avry', count=1.0)], persons=[NamedEntity(uid='2-50-Thierry_Jobin', count=1.0), NamedEntity(uid='2-50-Georges_Schwizgebel', count=1.0)], organisations=[NamedEntity(uid='2-53-Union_européenne', count=1.0), NamedEntity(uid='2-53-Fédération_romande_des_consommateurs', count=1.0), NamedEntity(uid='2-53-Gare_de_Fribourg', count=1.0), NamedEntity(uid='2-53-Salle', count=1.0), NamedEntity(uid='2-53-Migros', count=1.0)], newsAgencies=[]), mentions=EntityMentions(locations=[EntityMention(surfaceForm='canton de Fribourg', mentionConfidence=93.68, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suisse', mentionConfidence=95.51, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue Albert-Gockel', mentionConfidence=81.76, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue Joseph-Pilier', mentionConfidence=85.0, startOffset=None, endOffset=None), EntityMention(surfaceForm='Café de lAncienne Gare', mentionConfidence=55.74, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue des Epouses', mentionConfidence=65.26, startOffset=None, endOffset=None), EntityMention(surfaceForm='Ste-Thérèse', mentionConfidence=66.56, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue de Romont', mentionConfidence=90.11, startOffset=None, endOffset=None), EntityMention(surfaceForm='r- Alpes', mentionConfidence=93.34, startOffset=None, endOffset=None), EntityMention(surfaceForm='av du Midi', mentionConfidence=91.14, startOffset=None, endOffset=None), EntityMention(surfaceForm='Avry-Centre', mentionConfidence=91.4, startOffset=None, endOffset=None)], persons=[EntityMention(surfaceForm='Laurent Scacchi', mentionConfidence=95.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='Jean-Michel Bonvin', mentionConfidence=97.33, startOffset=None, endOffset=None), EntityMention(surfaceForm='Thierry Jobin', mentionConfidence=95.96, startOffset=None, endOffset=None), EntityMention(surfaceForm='Georges Schwizgebel', mentionConfidence=94.07, startOffset=None, endOffset=None), EntityMention(surfaceForm='Suzanne Romaine, prof. Uni Oxford', mentionConfidence=90.48, startOffset=None, endOffset=None), EntityMention(surfaceForm='E. Kim rédactrice en chef du magazine', mentionConfidence=94.61, startOffset=None, endOffset=None), EntityMention(surfaceForm='Geneviève Dessibourg', mentionConfidence=96.69, startOffset=None, endOffset=None), EntityMention(surfaceForm='M.-Brigitte See', mentionConfidence=95.11, startOffset=None, endOffset=None)], organisations=[EntityMention(surfaceForm='Société fri bourgeoise des sciences naturelles', mentionConfidence=95.32, startOffset=None, endOffset=None), EntityMention(surfaceForm='Groupe E Greenwatt SA', mentionConfidence=70.21, startOffset=None, endOffset=None), EntityMention(surfaceForm='European Union', mentionConfidence=72.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='du plurilinguisme', mentionConfidence=56.41, startOffset=None, endOffset=None), EntityMention(surfaceForm='Fédération romande des consommateurs', mentionConfidence=95.58, startOffset=None, endOffset=None), EntityMention(surfaceForm='section Fribourg', mentionConfidence=77.81, startOffset=None, endOffset=None), EntityMention(surfaceForm='de lAncienne Gare', mentionConfidence=31.8, startOffset=None, endOffset=None), EntityMention(surfaceForm='Librairie Bien-Etre', mentionConfidence=77.9, startOffset=None, endOffset=None), EntityMention(surfaceForm='rue des Epouses 5', mentionConfidence=30.97, startOffset=None, endOffset=None), EntityMention(surfaceForm='salle', mentionConfidence=36.87, startOffset=None, endOffset=None), EntityMention(surfaceForm='PLAUDERSTUNDE', mentionConfidence=34.92, startOffset=None, endOffset=None), EntityMention(surfaceForm='LivrEchange', mentionConfidence=58.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='LivrEchange', mentionConfidence=58.85, startOffset=None, endOffset=None), EntityMention(surfaceForm='Restaurant Migros', mentionConfidence=48.44, startOffset=None, endOffset=None), EntityMention(surfaceForm='AOirt', mentionConfidence=37.81, startOffset=None, endOffset=None)], newsAgencies=[]), topics=[TopicMention(uid='tm-fr-all-v2.0_tp58_fr', relevance=0.233), TopicMention(uid='tm-fr-all-v2.0_tp92_fr', relevance=0.141), TopicMention(uid='tm-fr-all-v2.0_tp86_fr', relevance=0.079), TopicMention(uid='tm-fr-all-v2.0_tp70_fr', relevance=0.077), TopicMention(uid='tm-fr-all-v2.0_tp61_fr', relevance=0.072), TopicMention(uid='tm-fr-all-v2.0_tp25_fr', relevance=0.055)], embeddings=None, transcriptLength=306.0, totalPages=1.0, languageCode='fr', isOnFrontPage=False, publicationDate=datetime.datetime(2012, 5, 3, 0, 0, tzinfo=TzInfo(UTC)), issueUid='LLE-2012-05-03-a', countryCode='CH', providerCode='SNL', mediaUid='LLE', mediaType='newspaper', hasOLR=True, ocrQualityScore=0.92, relevanceScore=0.6440425031661728, pageNumbers=[PageNumber(root=20.0)], collectionUids=[])], pagination=Pagination(total=91, limit=100, offset=0))"
]
},
- "execution_count": 5,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -287,7 +308,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -315,21 +336,23 @@
" type | \n",
" sourceMedium | \n",
" title | \n",
- " locationEntities | \n",
- " personEntities | \n",
- " organisationEntities | \n",
- " newsAgenciesEntities | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " issueUid | \n",
- " countryCode | \n",
- " providerCode | \n",
- " mediaUid | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
" \n",
" \n",
" | uid | \n",
@@ -352,6 +375,8 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
@@ -360,69 +385,64 @@
" in_cpy | \n",
" ar | \n",
" print | \n",
- " [REDACTED] | \n",
- " [] | \n",
- " [{'uid': '2-50-Pius_XII.', 'count': 1}] | \n",
- " [] | \n",
- " [] | \n",
+ " Europäischer Föderalistenkongreß in Rom | \n",
" [{'uid': 'tm-de-all-v2.0_tp25_de', 'relevance'... | \n",
" 733 | \n",
" 1 | \n",
" de | \n",
" False | \n",
" 1948-11-25T00:00:00+00:00 | \n",
- " luxwort-1948-11-25-a | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Pius_XII.', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Papst Pius XII', 'mentionCon... | \n",
+ " [] | \n",
+ " [] | \n",
" \n",
" \n",
" | FZG-1950-06-17-a-i0045 | \n",
" in_cpy | \n",
" ar | \n",
" print | \n",
- " [REDACTED] | \n",
- " [] | \n",
- " [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... | \n",
- " [] | \n",
- " [] | \n",
+ " Um EJnropa herum Die Furcht vor Krieg un... | \n",
" [{'uid': 'tm-de-all-v2.0_tp86_de', 'relevance'... | \n",
" 1200 | \n",
" 1 | \n",
" de | \n",
" True | \n",
" 1950-06-17T00:00:00+00:00 | \n",
- " FZG-1950-06-17-a | \n",
- " CH | \n",
- " SNL | \n",
- " FZG | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [1] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Adenauer', 'mentionConfidenc... | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "2 rows × 28 columns
\n",
""
],
"text/plain": [
- " copyrightStatus type sourceMedium title \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 in_cpy ar print [REDACTED] \n",
- "FZG-1950-06-17-a-i0045 in_cpy ar print [REDACTED] \n",
- "\n",
- " locationEntities \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 [] \n",
- "FZG-1950-06-17-a-i0045 [] \n",
- "\n",
- " personEntities \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 [{'uid': '2-50-Pius_XII.', 'count': 1}] \n",
- "FZG-1950-06-17-a-i0045 [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... \n",
+ " copyrightStatus type sourceMedium \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 in_cpy ar print \n",
+ "FZG-1950-06-17-a-i0045 in_cpy ar print \n",
"\n",
- " organisationEntities newsAgenciesEntities \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 [] [] \n",
- "FZG-1950-06-17-a-i0045 [] [] \n",
+ " title \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 Europäischer Föderalistenkongreß in Rom \n",
+ "FZG-1950-06-17-a-i0045 Um EJnropa herum Die Furcht vor Krieg un... \n",
"\n",
" topics \\\n",
"uid \n",
@@ -434,23 +454,45 @@
"luxwort-1948-11-25-a-i0033 733 1 de \n",
"FZG-1950-06-17-a-i0045 1200 1 de \n",
"\n",
- " isOnFrontPage publicationDate \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 False 1948-11-25T00:00:00+00:00 \n",
- "FZG-1950-06-17-a-i0045 True 1950-06-17T00:00:00+00:00 \n",
+ " isOnFrontPage publicationDate ... \\\n",
+ "uid ... \n",
+ "luxwort-1948-11-25-a-i0033 False 1948-11-25T00:00:00+00:00 ... \n",
+ "FZG-1950-06-17-a-i0045 True 1950-06-17T00:00:00+00:00 ... \n",
+ "\n",
+ " pageNumbers collectionUids entities.locations \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [6] [] [] \n",
+ "FZG-1950-06-17-a-i0045 [1] [] [] \n",
+ "\n",
+ " entities.persons \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [{'uid': '2-50-Pius_XII.', 'count': 1}] \n",
+ "FZG-1950-06-17-a-i0045 [{'uid': '2-50-Konrad_Adenauer', 'count': 1}, ... \n",
+ "\n",
+ " entities.organisations entities.newsAgencies \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [] [] \n",
+ "FZG-1950-06-17-a-i0045 [] [] \n",
"\n",
- " issueUid countryCode providerCode \\\n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 luxwort-1948-11-25-a LU BNL \n",
- "FZG-1950-06-17-a-i0045 FZG-1950-06-17-a CH SNL \n",
+ " mentions.locations \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [] \n",
+ "FZG-1950-06-17-a-i0045 [] \n",
"\n",
- " mediaUid mediaType \n",
- "uid \n",
- "luxwort-1948-11-25-a-i0033 luxwort newspaper \n",
- "FZG-1950-06-17-a-i0045 FZG newspaper "
+ " mentions.persons \\\n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [{'surfaceForm': 'Papst Pius XII', 'mentionCon... \n",
+ "FZG-1950-06-17-a-i0045 [{'surfaceForm': 'Adenauer', 'mentionConfidenc... \n",
+ "\n",
+ " mentions.organisations mentions.newsAgencies \n",
+ "uid \n",
+ "luxwort-1948-11-25-a-i0033 [] [] \n",
+ "FZG-1950-06-17-a-i0045 [] [] \n",
+ "\n",
+ "[2 rows x 28 columns]"
]
},
- "execution_count": 6,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -471,7 +513,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -509,21 +551,22 @@
" type | \n",
" sourceMedium | \n",
" transcript | \n",
- " locationEntities | \n",
- " personEntities | \n",
- " organisationEntities | \n",
- " newsAgenciesEntities | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
- " publicationDate | \n",
- " issueUid | \n",
- " countryCode | \n",
- " providerCode | \n",
- " mediaUid | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
" \n",
" \n",
" \n",
@@ -533,32 +576,34 @@
" in_cpy | \n",
" page | \n",
" print | \n",
- " [REDACTED] | \n",
- " [{'uid': '2-54-Paris', 'count': 1}, {'uid': '2... | \n",
- " [{'uid': '2-50-François_Hanriot', 'count': 1}] | \n",
- " [] | \n",
- " [] | \n",
+ " ^chutch schleunige Vorkehrungen der bewafneten... | \n",
" [{'uid': 'tm-de-all-v2.0_tp47_de', 'relevance'... | \n",
" 988 | \n",
" 1 | \n",
" de | \n",
" False | \n",
- " 1794-08-09T00:00:00+00:00 | \n",
- " NZZ-1794-08-09-a | \n",
- " CH | \n",
- " NZZ | \n",
- " NZZ | \n",
- " newspaper | \n",
+ " ... | \n",
+ " [2] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Paris', 'count': 1}, {'uid': '2... | \n",
+ " [{'uid': '2-50-François_Hanriot', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Couthon', 'mentionConfidence... | \n",
+ " [{'surfaceForm': 'Roberepiere', 'mentionConfid... | \n",
+ " [] | \n",
+ " [] | \n",
" \n",
" \n",
"\n",
+ "1 rows × 28 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 7,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -577,16 +622,16 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'[REDACTED]'"
+ "'^chutch schleunige Vorkehrungen der bewafnetenNazi «» « ulmacht versichert. unV t >;.. 3 >; Stadthaus auf n...!)..,. zlVbereyiere wit sc! i! enAn. b >; wgsri! sich befand, mm \\'. ügcn l » ssl ». Während der N « chi cischlcncndie vnser,«\\'?.«.» \\'NißliNererwaliungen i » P.. ric vor een,.« eum »., um »« lftlven il », cr Anbanglichteitzu veillcl\\'ern. Gegen ren Älprgen <;» ml >; t dae Stadthaus erode. <;. leda »! ermordete \\'Dh selbK. D >; e beyden Roberepiere versuchten ras glci- H ». D, r\\'ältere schoß sich einen Theil de >;>; Gesichte hincks, jlnv rer jüngere stürz « sich aue dem Fenster. und Herbräch b <; yde Beine. Dcr ältere wu » de.. ufeiner Trag » , dnhrevortenKonvcntssaal geblecht, abermal. e. lMdt » erpicht, ihn herumzutragen. Hm ic,. Lhenmder (2z. Iich) Abends wurden beyde Roberepiere, Couthon, der . Migadcgeneral Lavalelce, Hanriot, Hominandont der Marter Nazionalgarde, Dümas, Präsident des Nevolu » Hsd »« ribnnals, St. Iuft, Payän Nazivna!» Aa « nt » n der \\'Komüne ven Paris, Vivier, Präsident oer Iatobmer » <; vel sammlung. Gobiau, Substitut des öffentlich, n Anklägers. Fltmiüt. chaire von Paris, ne.\\' st n Nunizi- \\'Palräihtn hingerichtet. vÄn dm zwey folgenden Tagen \\'. darauf trafdasgleichc-Schickftl z; andere Glieder des \\'Homünenra. hs. Iin Moniteur wird Robitspieres un, d seiner Miischuldigen Hinrichtung mit folgenden Refic « Dienen btglett. t:.» Möge sich hieran jeder spiegeln, der \\'es inekünftigwagenftllte, d! eNazl0nalsoitveräl!.! tät zu ^mißbrauchen! Glorwürdign Tag, herauf einmal alle . Hofnungen derVoa. ißetttn vereicelthat! Diese rechne » ^fen darauf, daß Ie init emein Niktateur kapitülier «\\'« könnten, jllbu, nein! Sie Wen, keine » anbew Ditta, . tfurÄls tli. s sfanzMche Volt haben, u » d vq » diesti » . habe,, fif weder Medtu ». och Wassensi. Nstand Fu \\'hoffe,,. Was für ein erhabenes Schauspiel ist, nicht die Anergische Emnlüthigkeit des Konvents! Wie Mr mG sen 7die Hgioteurs zittern, » enn sie sehern wie festen Schrilles das Pariservolk gehe » und wie willig sich alle Bürger « ui den Konvent herum versiimmeln! Dies « Revoluzien giebt deni Konvent eine neue Stäck, aber H auch zugleich das Todesurtheil\\'für alle diejenigen, die es wagtn » iKchten, ihn zu trennen oder ben. bzu. pi\\'n-, pigen. Vergebens werden sich. die unaufhkrlichen Verläumder dc « volkes dieser neuen Ereignisse als einerGe\\'lsgcnheit bedienen können, dassel. be der Wetferw^ndig « keit zu beschuleigen. Das Volk ist ste «. niemen Uilhei- Ungerecht. Es will die Freyheit, und liebt nur jene, dj « diese vertheidigen. Ke weniger es Individuen «« bechet, je standhafter wird es. t >; as Vaterland\\'lieben. Ie wetng. r Individuen in Ansehn stehen,, je / mehr wird sich die if » ftntliche Freyheit befestigen. Wer so mMtig witd, daß er » en Versuch wagen kann, sich üb « Vie Gesetze hinweg Zusetzen, muß in jedem Bürger einen Plutus finden. Ner zu grosse Einfluß eilifs einzeln Menschen ist rie ge » sährl. chfte Plage für eine Republik.« Me wschtWtn Verznberungen, die als « ine Folge dieser ErHitgnW « lkgeseh «, werden kinnen, \\'bestehen dKlim,, daß das Revo « luz! lmttib: ii!.. l ganz iü,! e ftNi. ver e, h^lt, so wie auch die Hell? c\\'o,!!!!,! sjl! 0i! durch!>; i. « ü. ger Brennd, Eschas « se. i >; iu ten, Urc. n, laioi, Tburicc. Trcillard und- Taille « . ergänz! wol den ist.) Nl >;! m. si <; a msch » lle Mon... fn, jcre HoUlMls. dn 4ieThc, l,, eu oc «\\' ezt werdcn. Sanlerre, wtlche Ülovriepitx m Vn d^. ft netznun ließ, wurde durch oi,« vereinigicn Hello-, und Gicht. hutktom. msi. c\\'ne!. in Frey » heit\\'gcftzt. ei scl ic » oer demKonvem. und vuMng » te, ale Ko, pora! otc ». a ^gemeiner Soldat angcftellt zu werten. In Bezi5hui. gHUf die ^ffentüct\\'en Hinrich « lungen trat Tallien « ic einer wlchligen Beschuldigung gegen Robel kpicr « nus. ss hätten sich nämlich, sagte er, versch! tdt.! e teuce al » Gefangene in dieMrestebü. t «\\' ser eingc^Iicheu \\', deren Geschaft einzig barmn best «»-den würe, über die vorhandenen Gefangenen gewisse Men zu verfertigen, und Mm Vei. rainen, den » Ro « »« spiere zuliefern, weicher dann jedesmal diejenige?, welche ihn, am MfählliMcn-geschienen hätten, dem Revoluzione. nbunale zur Hinrichtung empfehlen Habe. Ndrva «««. Aus Anlwefpcn schreibt der Volts «» . vrästntant Richmd unterm 9. Thcrniidor s 27. Iul.) Fol » gendesan die Heilefdmmisston: « Ich berichte euch^ daß der Feind das Fort Lillogeräumt habe. DieTruppen der lXepublit sind gestern dort eingerückt. Die Englander shaven, als sie abzogen, an dem linken Ufer die Dämme ^durchbrochen, und dastand ^Meilen im Umfange un « terWaffer gesezt. Die beschädigten Einwohn er verwün- \\'fchen sie. Sie sind ilber HalS und Hopf davon geficchen. ^Unsere leichten Truppen sind ihnen mehr als io Meilen auf die Spür gegangen, che sie ihre Avantgarde ens « deckten. Ee scheint, fie ziehen sich unter dieKanonenDra . Breda uno Nerg \\'. ep\\'Zovm. liessen si. IAntwerpen uns; 8 Kanonen zurück. Doit haben wir « uch ansehn » liche Magazine, besonders von Haber gefunden, von welchen gegen Säcke vorhanden seyn können. Mehr als zoo Privatleute baben Sachen an sich genommen, die der feindlichen Armee gehörten. Ich und laurent sin » jezt beschäft »\\';«, die nöthigcn\\'Untersuchungen anzustellen, und dasjenige, was wir finden, für die Republik... Beschlag zu nehmen.« Der D. visionc\\'gn. eral Elnsuf meldet aus seinem Hauptquartier z « Warem unterm 9. Tb^lmiidor s 27. Iul) Nachstehendes an die Heilskonmussion: n DieSiimb «« und Maaßärwee hat diesen Morgen sich in Bewegung gesezt,\\' in der Absicht, den Fcmd immer näher gegen Mastricht hin zu träng m, ihn über die Maaß zum Rückzug zu nöthigen, und M von Mlich Meister zu machen. Die^lvantMrde mai >; schittte an lie I « ar, der linke Flügel lehnte sick an Älentt. gl «\\' und der rechte an St. Nikolas. Derselbe igriffdie veslüttich befindlichen Stellungen d,!» ^eindtl. an, der z^ar einige Zeit unserm Kanonenfeuer wider » stund! aber endlich, alsuMre Truppen mit bem Bätc!» \"« t eindrängln, » ie Flilcht ergreife » «. ußte. Mrto^l »'"
]
},
- "execution_count": 8,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -597,7 +642,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -642,7 +687,7 @@
"0 NZZ-1794-08-09-a-i0002 CH de"
]
},
- "execution_count": 9,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -662,7 +707,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -696,7 +741,7 @@
" \n",
" \n",
" | CH | \n",
- " 197704 | \n",
+ " 197740 | \n",
"
\n",
" \n",
" | FR | \n",
@@ -704,7 +749,7 @@
"
\n",
" \n",
" | LU | \n",
- " 7474 | \n",
+ " 7478 | \n",
"
\n",
" \n",
"\n",
@@ -713,12 +758,12 @@
"text/plain": [
" count\n",
"value \n",
- "CH 197704\n",
+ "CH 197740\n",
"FR 33993\n",
- "LU 7474"
+ "LU 7478"
]
},
- "execution_count": 10,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/examples/notebooks/collections.ipynb b/examples/notebooks/collections.ipynb
index 142f0c3..9965a01 100644
--- a/examples/notebooks/collections.ipynb
+++ b/examples/notebooks/collections.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎉 You are now connected to the Impresso API! 🎉\n",
- "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
- ]
- }
- ],
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"from impresso import connect\n",
"\n",
@@ -83,13 +74,21 @@
"result"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_collection_id = result.raw[\"uid\"]\n",
+ "test_collection_id"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Get collection items\n",
- "\n",
- "Get items from a collection by its ID."
+ "## Add item to collection"
]
},
{
@@ -98,16 +97,17 @@
"metadata": {},
"outputs": [],
"source": [
- "colection_id = result.raw[\"uid\"]\n",
- "items = impresso.collections.items(colection_id)\n",
- "items"
+ "content_item = impresso.content_items.get(\"NZZ-1794-08-09-a-i0002\")\n",
+ "impresso.collections.add_items(test_collection_id, [content_item.pydantic.uid])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Remove items from collection"
+ "## Get collection items\n",
+ "\n",
+ "Get items from a collection by its ID."
]
},
{
@@ -116,8 +116,15 @@
"metadata": {},
"outputs": [],
"source": [
- "item_id = items.pydantic.data[0].uid\n",
- "item_id"
+ "items = impresso.collections.items(test_collection_id)\n",
+ "items"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Remove items from collection"
]
},
{
@@ -126,14 +133,16 @@
"metadata": {},
"outputs": [],
"source": [
- "impresso.collections.remove_items(colection_id, [item_id])"
+ "impresso.collections.remove_items(test_collection_id, [content_item.pydantic.uid])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Add items to collection"
+ "## Get items again to verify that it has been removed\n",
+ "\n",
+ "NOTE: You may still see the old items for some time because the results may still be cached by the database."
]
},
{
@@ -142,13 +151,13 @@
"metadata": {},
"outputs": [],
"source": [
- "impresso.collections.add_items(colection_id, [item_id])"
+ "impresso.collections.items(test_collection_id)"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "impresso-py3.11",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -162,7 +171,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/examples/notebooks/data_providers.ipynb b/examples/notebooks/data_providers.ipynb
new file mode 100644
index 0000000..4d270fb
--- /dev/null
+++ b/examples/notebooks/data_providers.ipynb
@@ -0,0 +1,727 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cell-0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from impresso import connect\n",
+ "\n",
+ "impresso = connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-1",
+ "metadata": {},
+ "source": [
+ "# Data Providers\n",
+ "\n",
+ "Data providers are partner institutions that provide content to Impresso, such as libraries, archives, and media organizations. This notebook shows how to search and explore data providers."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-2",
+ "metadata": {},
+ "source": [
+ "## List all data providers\n",
+ "\n",
+ "Get a list of all data providers in the Impresso database."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "cell-3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindDataProviders result
\n",
+ "
Contains 20 items (0 - 20) of 38 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " names | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ArcInfo | \n",
+ " ArcInfo | \n",
+ " [{'langCode': 'fr', 'name': 'ArcInfo'}, {'lang... | \n",
+ "
\n",
+ " \n",
+ " | BBC | \n",
+ " British Broadcasting Corporation | \n",
+ " [{'langCode': 'en', 'name': 'British Broadcast... | \n",
+ "
\n",
+ " \n",
+ " | BCUF | \n",
+ " Bibliothèque cantonale et universitaire de Fri... | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque cant... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.data_providers.find()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-4",
+ "metadata": {},
+ "source": [
+ "## Search data providers by name\n",
+ "\n",
+ "Search for data providers using a search term that matches their names in different languages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cell-5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindDataProviders result
\n",
+ "
Contains 12 items of 12 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " names | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | BCUF | \n",
+ " Bibliothèque cantonale et universitaire de Fri... | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque cant... | \n",
+ "
\n",
+ " \n",
+ " | BCUL | \n",
+ " Bibliothèque cantonale et universitaire de Lau... | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque cant... | \n",
+ "
\n",
+ " \n",
+ " | BL | \n",
+ " British Library | \n",
+ " [{'langCode': 'en', 'name': 'British Library'}] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.data_providers.find(term=\"library\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cell-6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindDataProviders result
\n",
+ "
Contains 6 items of 6 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " names | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | BNF | \n",
+ " National Library of France | \n",
+ " [{'langCode': 'en', 'name': 'National Library ... | \n",
+ "
\n",
+ " \n",
+ " | BNL | \n",
+ " National Library of Luxembourg | \n",
+ " [{'langCode': 'en', 'name': 'National Library ... | \n",
+ "
\n",
+ " \n",
+ " | CNA | \n",
+ " Centre national de l'audiovisuel | \n",
+ " [{'langCode': 'fr', 'name': 'Centre national d... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.data_providers.find(term=\"national\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-7",
+ "metadata": {},
+ "source": [
+ "## Pagination\n",
+ "\n",
+ "Iterate through all pages of results if there are many data providers."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "cell-8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total data providers: 38\n",
+ "Got page 0 - 5 of 38. First provider ID: ArcInfo\n",
+ "Got page 5 - 10 of 38. First provider ID: BNF\n",
+ "Got page 10 - 15 of 38. First provider ID: DR\n",
+ "Got page 15 - 20 of 38. First provider ID: KB\n",
+ "Got page 20 - 25 of 38. First provider ID: LLE\n",
+ "Got page 25 - 30 of 38. First provider ID: ORF\n",
+ "Got page 30 - 35 of 38. First provider ID: SA\n",
+ "Got page 35 - 38 of 38. First provider ID: Swissinfo\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.data_providers.find(limit=5)\n",
+ "\n",
+ "print(f\"Total data providers: {result.total}\")\n",
+ "for page in result.pages():\n",
+ " print(\n",
+ " f\"Got page {page.offset} - {page.offset + page.size} of {page.total}. \"\n",
+ " + f\"First provider ID: {page.raw['data'][0]['id']}\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-9",
+ "metadata": {},
+ "source": [
+ "## Get a data provider by ID\n",
+ "\n",
+ "Retrieve detailed information about a specific data provider using its unique identifier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "cell-10",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Provider ID: ArcInfo\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
GetDataProvider result
\n",
+ "
Contains 0 items of 0 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " names | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ArcInfo | \n",
+ " ArcInfo | \n",
+ " [{'langCode': 'fr', 'name': 'ArcInfo'}, {'lang... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# First, let's get a provider ID from the search results\n",
+ "result = impresso.data_providers.find(limit=1)\n",
+ "provider_id = result.raw['data'][0]['id']\n",
+ "print(f\"Provider ID: {provider_id}\")\n",
+ "\n",
+ "# Now get the full provider details\n",
+ "impresso.data_providers.get(provider_id)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-13",
+ "metadata": {},
+ "source": [
+ "## View as DataFrame\n",
+ "\n",
+ "Convert the results to a pandas DataFrame for easier analysis."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "cell-14",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " names | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ArcInfo | \n",
+ " ArcInfo | \n",
+ " [{'langCode': 'fr', 'name': 'ArcInfo'}, {'lang... | \n",
+ "
\n",
+ " \n",
+ " | BBC | \n",
+ " British Broadcasting Corporation | \n",
+ " [{'langCode': 'en', 'name': 'British Broadcast... | \n",
+ "
\n",
+ " \n",
+ " | BCUF | \n",
+ " Bibliothèque cantonale et universitaire de Fri... | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque cant... | \n",
+ "
\n",
+ " \n",
+ " | BCUL | \n",
+ " Bibliothèque cantonale et universitaire de Lau... | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque cant... | \n",
+ "
\n",
+ " \n",
+ " | BL | \n",
+ " British Library | \n",
+ " [{'langCode': 'en', 'name': 'British Library'}] | \n",
+ "
\n",
+ " \n",
+ " | BNF | \n",
+ " National Library of France | \n",
+ " [{'langCode': 'en', 'name': 'National Library ... | \n",
+ "
\n",
+ " \n",
+ " | BNL | \n",
+ " National Library of Luxembourg | \n",
+ " [{'langCode': 'en', 'name': 'National Library ... | \n",
+ "
\n",
+ " \n",
+ " | BVCF | \n",
+ " Bibliothèque de la Ville de La Chaux-de-Fonds | \n",
+ " [{'langCode': 'fr', 'name': 'Bibliothèque de l... | \n",
+ "
\n",
+ " \n",
+ " | BVU | \n",
+ " Bote vom Untersee und Rhein | \n",
+ " [{'langCode': 'de', 'name': 'Bote vom Untersee... | \n",
+ "
\n",
+ " \n",
+ " | CNA | \n",
+ " Centre national de l'audiovisuel | \n",
+ " [{'langCode': 'fr', 'name': 'Centre national d... | \n",
+ "
\n",
+ " \n",
+ " | DR | \n",
+ " Deutschlandradio | \n",
+ " [{'langCode': 'de', 'name': 'Deutschlandradio'... | \n",
+ "
\n",
+ " \n",
+ " | FedGaz | \n",
+ " Swiss Federal Archives | \n",
+ " [{'langCode': 'en', 'name': 'Swiss Federal Arc... | \n",
+ "
\n",
+ " \n",
+ " | FRN | \n",
+ " Freiburger Nachrichten | \n",
+ " [{'langCode': 'de', 'name': 'Freiburger Nachri... | \n",
+ "
\n",
+ " \n",
+ " | Gruyere | \n",
+ " La Gruyère | \n",
+ " [{'langCode': 'fr', 'name': 'La Gruyère'}] | \n",
+ "
\n",
+ " \n",
+ " | INA | \n",
+ " Institut national de l'audiovisuel | \n",
+ " [{'langCode': 'fr', 'name': 'Institut national... | \n",
+ "
\n",
+ " \n",
+ " | KB | \n",
+ " Royal Library of the Netherlands | \n",
+ " [{'langCode': 'en', 'name': 'Royal Library of ... | \n",
+ "
\n",
+ " \n",
+ " | KBR | \n",
+ " Royal Library of Belgium | \n",
+ " [{'langCode': 'en', 'name': 'Royal Library of ... | \n",
+ "
\n",
+ " \n",
+ " | LCE | \n",
+ " Le Confédéré | \n",
+ " [{'langCode': 'fr', 'name': 'Le Confédéré'}] | \n",
+ "
\n",
+ " \n",
+ " | LES | \n",
+ " L'Essor | \n",
+ " [{'langCode': 'fr', 'name': 'L'Essor'}] | \n",
+ "
\n",
+ " \n",
+ " | LeTemps | \n",
+ " Le Temps | \n",
+ " [{'langCode': 'fr', 'name': 'Le Temps'}, {'lan... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name \\\n",
+ "id \n",
+ "ArcInfo ArcInfo \n",
+ "BBC British Broadcasting Corporation \n",
+ "BCUF Bibliothèque cantonale et universitaire de Fri... \n",
+ "BCUL Bibliothèque cantonale et universitaire de Lau... \n",
+ "BL British Library \n",
+ "BNF National Library of France \n",
+ "BNL National Library of Luxembourg \n",
+ "BVCF Bibliothèque de la Ville de La Chaux-de-Fonds \n",
+ "BVU Bote vom Untersee und Rhein \n",
+ "CNA Centre national de l'audiovisuel \n",
+ "DR Deutschlandradio \n",
+ "FedGaz Swiss Federal Archives \n",
+ "FRN Freiburger Nachrichten \n",
+ "Gruyere La Gruyère \n",
+ "INA Institut national de l'audiovisuel \n",
+ "KB Royal Library of the Netherlands \n",
+ "KBR Royal Library of Belgium \n",
+ "LCE Le Confédéré \n",
+ "LES L'Essor \n",
+ "LeTemps Le Temps \n",
+ "\n",
+ " names \n",
+ "id \n",
+ "ArcInfo [{'langCode': 'fr', 'name': 'ArcInfo'}, {'lang... \n",
+ "BBC [{'langCode': 'en', 'name': 'British Broadcast... \n",
+ "BCUF [{'langCode': 'fr', 'name': 'Bibliothèque cant... \n",
+ "BCUL [{'langCode': 'fr', 'name': 'Bibliothèque cant... \n",
+ "BL [{'langCode': 'en', 'name': 'British Library'}] \n",
+ "BNF [{'langCode': 'en', 'name': 'National Library ... \n",
+ "BNL [{'langCode': 'en', 'name': 'National Library ... \n",
+ "BVCF [{'langCode': 'fr', 'name': 'Bibliothèque de l... \n",
+ "BVU [{'langCode': 'de', 'name': 'Bote vom Untersee... \n",
+ "CNA [{'langCode': 'fr', 'name': 'Centre national d... \n",
+ "DR [{'langCode': 'de', 'name': 'Deutschlandradio'... \n",
+ "FedGaz [{'langCode': 'en', 'name': 'Swiss Federal Arc... \n",
+ "FRN [{'langCode': 'de', 'name': 'Freiburger Nachri... \n",
+ "Gruyere [{'langCode': 'fr', 'name': 'La Gruyère'}] \n",
+ "INA [{'langCode': 'fr', 'name': 'Institut national... \n",
+ "KB [{'langCode': 'en', 'name': 'Royal Library of ... \n",
+ "KBR [{'langCode': 'en', 'name': 'Royal Library of ... \n",
+ "LCE [{'langCode': 'fr', 'name': 'Le Confédéré'}] \n",
+ "LES [{'langCode': 'fr', 'name': 'L'Essor'}] \n",
+ "LeTemps [{'langCode': 'fr', 'name': 'Le Temps'}, {'lan... "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.data_providers.find()\n",
+ "result.df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-15",
+ "metadata": {},
+ "source": [
+ "## Explore data provider details\n",
+ "\n",
+ "Data providers have names in multiple languages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "cell-16",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Provider ID: ArcInfo\n",
+ "\n",
+ "Names in different languages:\n",
+ " [fr] ArcInfo\n",
+ " [de] ArcInfo\n",
+ " [en] ArcInfo\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.data_providers.find(limit=1)\n",
+ "provider = result.raw['data'][0]\n",
+ "\n",
+ "print(f\"Provider ID: {provider['id']}\")\n",
+ "print(f\"\\nNames in different languages:\")\n",
+ "for name_info in provider.get('names', []):\n",
+ " print(f\" [{name_info['langCode']}] {name_info['name']}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-17",
+ "metadata": {},
+ "source": [
+ "## Find content from a specific data provider\n",
+ "\n",
+ "Once you have a data provider ID, you can use it to search for content items from that provider."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "cell-18",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Searching content from provider: BNL\n",
+ "\n",
+ "Found 3641155 content items total\n",
+ "Showing 10 items from this page\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get a data provider\n",
+ "providers = impresso.data_providers.find(term=\"lux\", limit=1)\n",
+ "provider_id = providers.raw['data'][0]['id']\n",
+ "print(f\"Searching content from provider: {provider_id}\")\n",
+ "\n",
+ "# Search for content items from this data provider\n",
+ "# Note: This uses the search resource with a provider filter\n",
+ "content_result = impresso.search.find(\n",
+ " partner_id=provider_id,\n",
+ " limit=10,\n",
+ ")\n",
+ "\n",
+ "print(f\"\\nFound {content_result.total} content items total\")\n",
+ "print(f\"Showing {content_result.size} items from this page\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "impresso-py3.13 (3.13.7)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/notebooks/entities.ipynb b/examples/notebooks/entities.ipynb
index 7f0b2f9..9fb3e92 100644
--- a/examples/notebooks/entities.ipynb
+++ b/examples/notebooks/entities.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎉 You are now connected to the Impresso API! 🎉\n",
- "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from impresso import connect\n",
"\n",
@@ -31,7 +22,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -40,7 +31,7 @@
"\n",
"
\n",
"
FindEntities result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 1 items of 1 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -64,18 +55,39 @@
"
\n",
" \n",
" | \n",
+ " label | \n",
+ " type | \n",
+ " wikidataId | \n",
+ " totalMentions | \n",
+ " totalContentItems | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | 2-50-Douglas_Adams | \n",
+ " Douglas Adams | \n",
+ " person | \n",
+ " Q42 | \n",
+ " 0 | \n",
+ " 20 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 6,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -93,7 +105,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -102,7 +114,7 @@
"\n",
"
\n",
"
FindEntities result
\n",
- "
Contains 5 items of 5 total items.
\n",
+ "
Contains 10 items (0 - 10) of 18 total items.
\n",
"
\n",
"
\n",
"
\n",
@@ -127,9 +139,9 @@
" | \n",
" label | \n",
" type | \n",
- " wikidataId | \n",
" totalMentions | \n",
" totalContentItems | \n",
+ " wikidataId | \n",
" \n",
" \n",
" | uid | \n",
@@ -142,38 +154,38 @@
" \n",
"
\n",
" \n",
- " | 2-54-San_José | \n",
- " San José | \n",
+ " 2-54-San_José_$28$Californie$29$ | \n",
+ " San José (Californie) | \n",
" location | \n",
- " Q173718 | \n",
- " 163 | \n",
- " 114 | \n",
+ " 0 | \n",
+ " 339 | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | 2-54-Provinz_San_José | \n",
- " Provinz San José | \n",
+ " 2-54-San_José_$28$Costa_Rica$29$ | \n",
+ " San José (Costa Rica) | \n",
" location | \n",
- " Q647808 | \n",
- " 2 | \n",
- " 2 | \n",
+ " 0 | \n",
+ " 116 | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | 2-54-San_José_$28$Costa_Rica$29$ | \n",
- " San José (Costa Rica) | \n",
+ " 2-54-San_José_$28$Kalifornien$29$ | \n",
+ " San José (Kalifornien) | \n",
" location | \n",
+ " 0 | \n",
+ " 50 | \n",
" NaN | \n",
- " 2 | \n",
- " 2 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -184,7 +196,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -193,7 +205,7 @@
"\n",
"
\n",
"
FindEntities result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 4 items of 4 total items.
\n",
"
\n",
"
\n",
"
\n",
@@ -233,30 +245,38 @@
" \n",
" \n",
" \n",
- " | 2-50-San_José | \n",
- " San José | \n",
- " person | \n",
- " Q173718 | \n",
- " 20 | \n",
- " 20 | \n",
- "
\n",
- " \n",
" | 2-50-José_de_San_Martín | \n",
" José de San Martín | \n",
" person | \n",
" Q134160 | \n",
- " 8 | \n",
- " 8 | \n",
+ " 0 | \n",
+ " 159 | \n",
+ "
\n",
+ " \n",
+ " | 2-50-José_María_San_Martín | \n",
+ " José María San Martín | \n",
+ " person | \n",
+ " Q880867 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 2-50-Juan_José_Espinosa_San_Martín | \n",
+ " Juan José Espinosa San Martín | \n",
+ " person | \n",
+ " Q5810539 | \n",
+ " 0 | \n",
+ " 3 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 8,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -274,19 +294,21 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Total items in the result set: 87\n",
- "Got page 0 - 20 of 87. The first title is Angers\n",
- "Got page 20 - 40 of 87. The first title is Château Saint-Ange\n",
- "Got page 40 - 60 of 87. The first title is Sainte-Marie-des-Anges\n",
- "Got page 60 - 80 of 87. The first title is Museum für angewandte Kunst\n",
- "Got page 80 - 87 of 87. The first title is Angelo\n"
+ "Total items in the result set: 129\n",
+ "Got page 0 - 20 of 129. The first title is Angers\n",
+ "Got page 20 - 40 of 129. The first title is Lamadeleine-Val-des-Anges\n",
+ "Got page 40 - 60 of 129. The first title is Watts (Los Angeles)\n",
+ "Got page 60 - 80 of 129. The first title is Université de Californie à Los Angeles\n",
+ "Got page 80 - 100 of 129. The first title is Saint-Ange\n",
+ "Got page 100 - 120 of 129. The first title is Angelsdorf\n",
+ "Got page 120 - 129 of 129. The first title is Los Angeles Union Station\n"
]
}
],
@@ -314,7 +336,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -352,18 +374,6 @@
" wikidataId | \n",
" totalMentions | \n",
" totalContentItems | \n",
- " wikidataDetails.id | \n",
- " wikidataDetails.type | \n",
- " wikidataDetails.labels.de | \n",
- " wikidataDetails.labels.en | \n",
- " wikidataDetails.labels.fr | \n",
- " wikidataDetails.labels.it | \n",
- " wikidataDetails.descriptions.fr | \n",
- " wikidataDetails.descriptions.it | \n",
- " wikidataDetails.descriptions.de | \n",
- " wikidataDetails.descriptions.en | \n",
- " wikidataDetails.coordinates.latitude | \n",
- " wikidataDetails.coordinates.longitude | \n",
" \n",
" \n",
" | uid | \n",
@@ -372,18 +382,6 @@
" | \n",
" | \n",
" | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
"
\n",
" \n",
" \n",
@@ -392,30 +390,18 @@
" Paris | \n",
" location | \n",
" Q90 | \n",
- " 236823 | \n",
- " 211169 | \n",
- " Q90 | \n",
- " location | \n",
- " Paris | \n",
- " Paris | \n",
- " Paris | \n",
- " Parigi | \n",
- " capitale de la France | \n",
- " capitale della Francia | \n",
- " Hauptstadt und bevölkerungsreichste Stadt Fran... | \n",
- " capital city of France | \n",
- " 48.856667 | \n",
- " 2.352222 | \n",
+ " 0 | \n",
+ " 3713955 | \n",
" \n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 12,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -433,7 +419,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -468,9 +454,9 @@
" | \n",
" label | \n",
" type | \n",
- " wikidataId | \n",
" totalMentions | \n",
" totalContentItems | \n",
+ " wikidataId | \n",
" \n",
" \n",
" | uid | \n",
@@ -483,30 +469,30 @@
" \n",
"
\n",
" \n",
- " | 2-54-San_José | \n",
- " San José | \n",
- " location | \n",
- " Q173718 | \n",
- " 163 | \n",
- " 114 | \n",
- "
\n",
- " \n",
" | 2-54-San_José_$28$Costa_Rica$29$ | \n",
" San José (Costa Rica) | \n",
" location | \n",
+ " 0 | \n",
+ " 116 | \n",
" NaN | \n",
+ "
\n",
+ " \n",
+ " | 2-54-San_José | \n",
+ " San José | \n",
+ " location | \n",
+ " 0 | \n",
" 2 | \n",
- " 2 | \n",
+ " Q3070 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 14,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -530,7 +516,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -539,7 +525,7 @@
"\n",
"
\n",
"
FindEntities result
\n",
- "
Contains 10 items (0 - 10) of 18 total items.
\n",
+ "
Contains 9 items of 9 total items.
\n",
"
\n",
"
\n",
"
\n",
@@ -579,38 +565,38 @@
" \n",
" \n",
" \n",
- " | 2-54-Terre | \n",
+ " 2-53-Terre | \n",
" Terre | \n",
- " location | \n",
+ " organisation | \n",
" Q2 | \n",
- " 1947 | \n",
- " 1800 | \n",
+ " 0 | \n",
+ " 1995 | \n",
"
\n",
" \n",
- " | 2-54-Erde | \n",
+ " 2-53-Erde | \n",
" Erde | \n",
- " location | \n",
+ " organisation | \n",
" Q2 | \n",
- " 576 | \n",
- " 575 | \n",
+ " 0 | \n",
+ " 373 | \n",
"
\n",
" \n",
- " | 2-53-Terre | \n",
- " Terre | \n",
+ " 2-53-Mort | \n",
+ " Mort | \n",
" organisation | \n",
- " Q2 | \n",
- " 421 | \n",
- " 410 | \n",
+ " Q4 | \n",
+ " 0 | \n",
+ " 316 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -629,7 +615,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -638,7 +624,7 @@
"\n",
"
\n",
"
FindEntities result
\n",
- "
Contains 10 items (0 - 10) of 1111 total items.
\n",
+ "
Contains 10 items (0 - 10) of 2445 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -667,18 +653,6 @@
"
wikidataId | \n",
"
totalMentions | \n",
"
totalContentItems | \n",
- "
wikidataDetails.id | \n",
- "
wikidataDetails.type | \n",
- "
wikidataDetails.labels.de | \n",
- "
wikidataDetails.labels.en | \n",
- "
wikidataDetails.labels.fr | \n",
- "
wikidataDetails.labels.it | \n",
- "
wikidataDetails.descriptions.fr | \n",
- "
wikidataDetails.descriptions.it | \n",
- "
wikidataDetails.descriptions.de | \n",
- "
wikidataDetails.descriptions.en | \n",
- "
wikidataDetails.coordinates.latitude | \n",
- "
wikidataDetails.coordinates.longitude | \n",
" \n",
"
\n",
" | uid | \n",
@@ -687,18 +661,6 @@
" | \n",
" | \n",
" | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
"
\n",
" \n",
"
\n",
@@ -707,70 +669,34 @@
" Paris | \n",
" location | \n",
" Q90 | \n",
- " 236823 | \n",
- " 211169 | \n",
- " Q90 | \n",
- " location | \n",
- " Paris | \n",
- " Paris | \n",
- " Paris | \n",
- " Parigi | \n",
- " capitale de la France | \n",
- " capitale della Francia | \n",
- " Hauptstadt und bevölkerungsreichste Stadt Fran... | \n",
- " capital city of France | \n",
- " 48.856667 | \n",
- " 2.352222 | \n",
+ " 0 | \n",
+ " 3713955 | \n",
" \n",
" \n",
" | 2-53-Paris | \n",
" Paris | \n",
" organisation | \n",
" Q90 | \n",
- " 6713 | \n",
- " 5099 | \n",
- " Q90 | \n",
- " location | \n",
- " Paris | \n",
- " Paris | \n",
- " Paris | \n",
- " Parigi | \n",
- " capitale de la France | \n",
- " capitale della Francia | \n",
- " Hauptstadt und bevölkerungsreichste Stadt Fran... | \n",
- " capital city of France | \n",
- " 48.856667 | \n",
- " 2.352222 | \n",
+ " 0 | \n",
+ " 109639 | \n",
"
\n",
" \n",
- " | 2-50-Paris | \n",
- " Paris | \n",
- " person | \n",
- " Q90 | \n",
- " 4986 | \n",
- " 4445 | \n",
- " Q90 | \n",
- " location | \n",
- " Paris | \n",
- " Paris | \n",
- " Paris | \n",
- " Parigi | \n",
- " capitale de la France | \n",
- " capitale della Francia | \n",
- " Hauptstadt und bevölkerungsreichste Stadt Fran... | \n",
- " capital city of France | \n",
- " 48.856667 | \n",
- " 2.352222 | \n",
+ " 2-53-Banque_de_Paris | \n",
+ " Banque de Paris | \n",
+ " organisation | \n",
+ " Q69404174 | \n",
+ " 0 | \n",
+ " 28862 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 17,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -782,7 +708,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "impresso-py3.11",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -796,7 +722,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/examples/notebooks/experiments.ipynb b/examples/notebooks/experiments.ipynb
new file mode 100644
index 0000000..d225043
--- /dev/null
+++ b/examples/notebooks/experiments.ipynb
@@ -0,0 +1,875 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "8925620d-a388-4e65-948d-2dbb8110da50",
+ "metadata": {},
+ "source": [
+ "# Exploring Sentence, Chunk, and Entity Embeddings for Retrieval \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9b62617-ad8b-4a17-92f7-02bf97ac173f",
+ "metadata": {},
+ "source": [
+ "This notebook provides a **proof of concept (PoC)** for working with embeddings in the *Impresso* corpus across three levels of granularity: \n",
+ "\n",
+ "1. **Sentence embeddings** – fine-grained retrieval at the level of individual sentences (e.g., `lepetitparisien-1912-11-13-a-i0001-s-11`). \n",
+ "2. **Chunk embeddings** – broader retrieval at the level of aggregated text chunks (e.g., `lepetitparisien-1912-11-13-a-i0001-c-1`). \n",
+ "3. **Entity embeddings** – retrieval of linked entities (e.g., `Q380083` or `Jonas Furrer`). \n",
+ "\n",
+ "We refer to sentences and chunks as _subdocuments_ and we test these *subdoc* embeddings with two complementary query scenarios: \n",
+ "\n",
+ "- **In-corpus queries** – selecting a query directly from the *Impresso* corpus. \n",
+ "- **Out-of-corpus queries** – embedding an external query (e.g., manually formulated or from another source). \n",
+ "\n",
+ "For the purpose of this PoC:\n",
+ "\n",
+ "👉 The **subdocs** = sentence and chunk embeddings for **front pages** of all newspapers in **1912 (Titanic)** and **1986 (Tchernobyl)**. \n",
+ "👉 The **entities** = entity embeddings for **person entities** in the years [+/- 5y] around the same years.\n",
+ "\n",
+ "From now on, we refer to this PoC as a set of **experiments**. The experiments use direct queries to our internal retrieval system ([Solr](https://solr.apache.org/))."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8f950b1-132d-4e96-8b24-6d7ef3205f44",
+ "metadata": {},
+ "source": [
+ "Let's first connect to Impresso:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "babd8f0e-b1fd-4000-8bd4-386fbdf83da8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🎉 You are now connected to the Impresso API! 🎉\n",
+ "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
+ ]
+ }
+ ],
+ "source": [
+ "from impresso import connect\n",
+ "\n",
+ "impresso = connect('https://dev.impresso-project.ch/public-api/v1')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "27cbe663",
+ "metadata": {},
+ "source": [
+ "## Get all experiments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "509603ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindExperiments result
\n",
+ "
Contains 2 items of 2 total items.
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " description | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | subdoc-embeddings | \n",
+ " Experiment with sentence and character level e... | \n",
+ " \\n Generates embeddings for subdocuments usin... | \n",
+ "
\n",
+ " \n",
+ " | entity-profiles | \n",
+ " Experiment with entity profiles and their embe... | \n",
+ " \\n Generates embeddings for subdocuments usin... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.experiments.find()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aa567e33",
+ "metadata": {},
+ "source": [
+ "### Subdoc Embeddings Experiments"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4969dfde-1089-45c2-bea1-f99c3de2118b",
+ "metadata": {},
+ "source": [
+ "#### Sentence Embeddings - In-corpus queries\n",
+ "\n",
+ "Let's search for some documents, take their embeddings and then search by embedding in Impresso."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "5ef16862-23ad-4447-be29-49be71260333",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Got 5 Solr document(s)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "sentence = \"Le congrès international s'est tenu à Paris pour discuter des avancées scientifiques de la décennie.\"\n",
+ "\n",
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"subdoc-embeddings\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": f\"content_txt_fr:({sentence}) AND type_s:s\", # type_s:s restricts search to sentences\n",
+ " \"limit\": 5,\n",
+ " \"params\": {\"hl\": False}\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "docs = result[\"solrResponse\"][\"response\"][\"docs\"]\n",
+ "print(f\"✅ Got {len(docs)} Solr document(s)\\n\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "df2b9dab",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Result 1 ---\n",
+ "Mais c’ est une question qui ne peut se régler en congrès internationaux et c’ est pourquoi le pays cjui ne présente pas une natalité suffisante sera étranglé, ce qui ne sera d’ ailleurs qu’ une avance sur son sui- cide.\n",
+ "--- Result 2 ---\n",
+ "ÉDITION DE PARIS Les Eves nouvelles Les suffragettes françaises on * tenu, dimanche, une réunion où fut discutée l' intéressante question du vote municipal des femmes.\n",
+ "--- Result 3 ---\n",
+ "Le 20 juillet 1889, au Congrès socialiste intermfci mal de Paris, il proposa la résolution suivante : « Il sera organisé une grande manifestation internationale à dr.\n",
+ "--- Result 4 ---\n",
+ "Le congrès socialiste de 19 10 a réservé la question de principe, mais il avait stipulé que conformément aux résolutions des congrès internationaux de Paris et d’ Amsterdam, il n’ admettait pas comme possible la participation individuelle de certains socialistes, sans l’ assentiment du parti ouvrier, à un ministère quelconque ; le congrès de 1910 avait décidé, pour le surplus, que la question de la participation gouvernementale est « une question de tactique, et non de principe », qui devra être tranchée par un congrès spécial.\n",
+ "--- Result 5 ---\n",
+ "A l' un des derniers congrès de radiologie, tenu à Bruxelles en septembre 1910.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, d in enumerate(docs, 1):\n",
+ " print(f\"--- Result {i} ---\")\n",
+ " print(d.get(\"content_txt_fr\", \"[No text]\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "c3dcd90c-0db4-4980-acb5-6f90c2ba7b77",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['id',\n",
+ " 'imp_ids_ss',\n",
+ " 'surfaces_ss',\n",
+ " 'ci_ids_ss',\n",
+ " 'mention_keys_ss',\n",
+ " 'ci_lg_s',\n",
+ " 'wiki_masterlabel_s',\n",
+ " 'wiki_url_s',\n",
+ " 'date_of_birth_dt',\n",
+ " 'date_of_death_dt',\n",
+ " 'wkd_occupations_ss',\n",
+ " 'wkd_occupation_qids_ss',\n",
+ " 'wkd_entity_types_ss',\n",
+ " 'wiki_summaries_t',\n",
+ " 'contexts_ss',\n",
+ " 'entity_mixed_emb_v768',\n",
+ " 'entity_encyc_emb_v768',\n",
+ " 'entity_media_emb_v768',\n",
+ " '_version_',\n",
+ " '_root_']"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(docs[0].keys())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ca0d7ea8-7545-449a-aad3-e95d2ea33fb2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Mais c’ est une question qui ne peut se régler en congrès internationaux et c’ est pourquoi le pays cjui ne présente pas une natalité suffisante sera étranglé, ce qui ne sera d’ ailleurs qu’ une avanc'"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "docs[0]['content_txt_fr'][:200] # first 200 characters of the document content"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54991815-d7d6-4436-82eb-d1948a35aaf8",
+ "metadata": {},
+ "source": [
+ "Let's take the first returned document's embedding."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0b4d2ef2-bfc1-4941-aba4-ec6ab7002310",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-0.081427164, 0.064372316, -0.045108054]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "embedding = docs[0]['gte_multi_v768']\n",
+ "\n",
+ "embedding[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "f85b2817",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "embedding = result['solrResponse']['response']['docs'][0]['gte_multi_v768']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "b97665a0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Got 3 Solr document(s)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"subdoc-embeddings\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": \"{!knn f=gte_multi_v768 topK=3}\" + str(embedding),\n",
+ " \"limit\": 3,\n",
+ " \"params\": {\n",
+ " \"fq\": \"type_s:s\", # type_s:s restricts search to sentences (s=sentence)\n",
+ " # \"fl\": \"id,score,content_txt_fr,ci_id_s\", -- add these later if you want to return only specific fields\n",
+ " # for now let's return everything\n",
+ " \"hl\": False\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "docs = result[\"solrResponse\"][\"response\"][\"docs\"]\n",
+ "print(f\"✅ Got {len(docs)} Solr document(s)\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "09ec22e2-8605-47fe-ae74-10371ffb8453",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# docs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "fcc6262c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Result 0 ---\n",
+ "Mais c’ est une question qui ne peut se régler en congrès internationaux et c’ est pourquoi le pays cjui ne présente pas une natalité suffisante sera étranglé, ce qui ne sera d’ ailleurs qu’ une avance sur son sui- cide.\n",
+ "[No text]\n",
+ "--- Result 1 ---\n",
+ "[No text]\n",
+ "weil sie nicht die ganze Nation in dem Parlament vertreten sehen wolle.\n",
+ "--- Result 2 ---\n",
+ "[No text]\n",
+ "Es steht zu hoffen, daß damit die vom Bundesrate getanen völlig gewesen Schritte nicht nutzlos sind, und daß in nicht allzu ferner Zeit doch noch eine solche internationale Konferenz sich mit dein Problem beschäftigen wird.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, d in enumerate(docs):\n",
+ " print(f\"--- Result {i} ---\")\n",
+ " print(d.get(\"content_txt_fr\", \"[No text]\"))\n",
+ " print(d.get(\"content_txt_de\", \"[No text]\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9822c685-ca20-4277-b4a5-a26290350627",
+ "metadata": {},
+ "source": [
+ "#### Sentence Embeddings - Out-of-corpus queries\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "5e6798ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gte-768:SldQPEKgFj3hqBq9KbZDO44Ntr2m3B28HGQiPBZYHDzoyi89VZGjvdn/jD376Ia83lSSO/TikzyG1fu9RWKxPOTYkj0jF8k99JncPaxsarx18VI92R20PTlvrbxuzWS9A4uUO6lYFTsWTQ09+QKVvVQiXL3mpiE9FbOxvRhr/DyplVE8vIZ6PQ/ivj0660e90aF9PE0Yrzx12ZC9Awv/O0xEortNwYO8vr0rvTctNrtTsAu87z18PJPXGL2lOO08N3NSPORAgz1FtBY9YC4cPY3tFT0JM8g8aQ5zvHa067v9YS+9wol3vAzUtj2SRWG9jPGMO4I2M7uCBLy8qxM3PaXMYb3Rs1+9ukY5POf2lbycXIc9j/XyvM9A4L29ToA8lBeBPAsP6DwO+3i9y0m4PQqOAL3f1eC8NpOEPDLQpzzHMxK9MQc1PLpK9rwbOxI9EH4ZvZiKrrzGDFi8qVTbuztLPDr/IYQ9HwPZPcMXfT1SmI06B6WOvNNM9jwBFWo4X+24vJDvoL1biHC8vca5O6NWYbySuqW8KkXkvG8SirzGp+w8J0WSvCIjuLtRdjG9irEYvYnl8juWY/Y7iURPvEciEL1BHzm9gkebvSwx3Dz4smQ8f1lKPRGzYD0NzEa9ujSDOyduHLyrzYI78vxhvT/5xb24sBi9IgSzPBhvsr1Yufc8kyFIvYcaGT2KvtK69kKCPI9oD73T+bq8vFyDvW2jp7wwuTM9+FC6PFAzlbuWlns7sEdiPbj6ibxZYEI9Cr4dPMEeXT2Qsom8FObQvPubDT3o8xs9N69IOwMPsLtImW09uayXvFIUMDz8ssK7eXa7vIYX6zw2G3O9R5eKvCH+kz0plv48P516PSE+lb1heNW8rjr2O8RRrTvoQ8O84cOaPX0YbjwHJ008qCejPNlKFDy5BPO85f6RvDaqWzsYDvU8DCqAPck/ib15Z549f/jiO5MYiL2aEFi9g2uNvdMusDuMeWW89HEiOrifxLuT5/68qWlzPELhkLxlnx26bZ5gvQMIqzzIuh+9I29dvb5NzL2iVKg8H+dSPSoi4jwsUfQ7Us74PFfB/Tw7mQc9gCcMO8i3DrxQuqI93QKwvZBMlbwOeDu92eZ2PfDgHr0hceM8eXcOPLT6Qj0xYc28MxYQvNjWWLyGeAy9Hy5PPIzfzbvL/FO8BdwMvVc8vruTIwY9kj3oPBX2nrwDmZg8s0gKvax5g7yExK27HuzZvARX8rxO4WO9LiUxPHk4IT0GuSM96wYpvDCkWj17SpU97A4TPLheAjzD2A09msmtPYejgTuQ36Q7MCxBO5jv0byGpwy9y4fEPKPWYTw8C4g8Bde+O7lKoTxL52W9c3bfvAC3R7zzEWU9fjGdPSf0DzwujVS8GTTXu+F63rzw8/C85oxCPBictr1Uqt48f3ANvUjtvLxsUoS9akKavbwdZjsdC4K9g4/JPKI9ebwsjwG9IGL4vBjdEb3vcwY9vHraPIgLpjyJa9U8pI+6Ou8tczyHiyK9BrZcPAtFID2hL428dssKPHPZAT3vj328gukevVedsjoP2l67Q87MPG0+UL2jQ6C9SCOpvIoPLjt5qqa98ClGvbh/grxj5t88MaO4vEMEHr1I0lW7MtXMu2BKdr3D3Y07pDxFPSTOkLt3nwO88CIvOwINJ732aYo8DmtXvWzYAz2rKFi81umjPCFn2rztD2Q8nAO6PZrZfrx1rY88hdZvu84DZrxGVgm6oOdnO6M5IL3EDR+8CHvvPOqfWjzwU9g8G9LWO57VkjwS7AA9cNl4PZgLRLwTH5Q8GPLaPPB0Uj14VAO81bAXvTMsXT1miSo9yZ/Lu1DVBD1gAAc8HfPduVAtArpzzAg9PhOfPLZOoTyD8T09ZoOGPCkKzDwVN9I7tbN4PbC/ojy33809mnSJPK74u7ydqI48Py87PfUwTj07PyI99QgOPCUkML1rGmI8xfWuuxJrTbo5oAa9hQKMvCtBJT0LqwO92gktPYYOHzqFJZI8K9zBPKvZY7ydjKM8ITWPPL6STj3Jk6+8Yjj8Ou6GD73hv548IxnFPHO/hroNlK07y1xbPSNPbry/EkW9EpY0PJ+llTw3WyC92u1ePMt68jzVFhk7dIrXu7AvwTzE1HW8OMjPvJxmID3Hi2Q9MhslvG0I0jyDNuc7PX2RPEIv5ryD6Q89mA2xPFmPkzxMmQ29ZG7EO7C8mzxmC2Q7xvNPPdHN9T38sj29uaZ1Oon1KD3lHQw9cZa2vOLyobxI5Ky73ognvUGXjj34NMG87i4fPcQCQLwmKxw95JgkvYD2rjoKs9e8vh9nPPnyFT2SxMq7QSfSPO2Uaj1cg2e9ZVTSvOJi2DvWmGO9tXSlvRIh3bzDv+a7AHGaPVdrlLwZwgM82pskvYYZmrugogK94/xUPAnAaDybN2y8ac+VvNSJs7vLEAe8T13NuFKJHTxaIeC8txy5OQR2Qz3OV7A9f1veO9WCCjxkPKo9qTedvIziM70ku8y7M3sTPb3zKjxasbC8m/LcOuQUT7zwkxY8y7ZRPc+QLD2liRI8j41UPBwZvzzd2iq8n0UivZOIIL14hOQ8jOhSubJuhb2wygI9n02aPBAwgD1I4i89suYWvZTsDT30Sgk88ViYvP462bx4ERi9eiHpvKMiMLpM2BS9quuSuhE92rwTqsQ8Ql2FvETkZ7tTU4881053vYSMBjzJ/vg8msYGPJBYUbzIuvE84bzDvDuObLzGBgy9nbQ8vH4gybwY9nK9TKzFu2hGFT1AzGu8JDQ3vX7INjsP15E807EmvP9zD7xkM4g8brKgPM36ezxeRyC7i50Tuh1DZztFbMM8BXc5PfLF2zyy6Jo8+9Y+vZ4cxzxdaYg8YKHGvDQLAT0wkAo9jIGBO7+dm73l5EI949GWPDyqO737bM68PJ7cPFqxaz0M3g89rmJzPcvVRz2tMTK8h9t9vU60WDzeqeE7TUxlPZwdQj24nly7EYSRvD4t3bxrkTY93UUaPbecEr29YBW9FAoAvcRjHr28S3A8PVcXPbEnEL3aZni9R6o8vVV/4Lz4AGm9awvPvPZMZL2ftXW7FYMZPWYiOL0wyBM9Pd7avB7+4TykTxe8M5GxPB6Rkrvy7zA9fg+/vAAtnbye6wy6UAahPKCmjjsO2li9pAyLPC2+7DzxjVS7tKQLOzZfADwFSYY9SpayPGD5yrwXPOO8U3EzvVMowLvChQY4KoY7PAtcNr3Yn/C8jNorPDs2Ur3Ep/S6ZDcNPOERwzyQOFK91z4FPacj6by5VSG8FWWavHhzozuLHXi8vBl7PcjivTyLEX46ZyszPXFYyDxZqwW8I6o6vWSscb2KCjm82I8TvHHGkTtKaEs8pbAGPczajb2oMwY7jmytvDxx77s/oOe7tHJovRItaryOxPo8n7kkPaxfBj3Pq7E7hl0yvX2MgzxRdrM89UasvBW1pbtT2Co9sG3HuxY7Ej0QcH+83tuKPC6/HrweOfA6aouOvIVCKTzW8Hk9isd6PAE7j7zTPrA95qZbvYaNHr2TUZs8PVvuPO12S70tIwu9GsJWPf3UQT1N6Fe89Q9qvUPpfLsaqlO8r1xbPBQAX7zVRXq9nWmxPEbKWT37ptc8U5c/vAvL+LtCF5o85UzXvIu5UL3a/Tw9BKy9PFy07zz4z487dbTEPNhaBT3kVh49oDU/vMUT7jwseLC8fhhaPYjDjryaaFO7DA0WPFdKgDwvWJC7xXaoPO59nD3bPd47aus1PX0Hr7xLSkC8VMfOO8xbjTxY4+e8qUI6vbafzLp4h+I7o5e/vJoZary7EBu9Edy+PIJmW7we60Y9u7gQO8mIEjx9kI68U0ByPG0WXjyRYJO7st/DPCULnTxz8ha8uPoMvOeHdT1r5KW8GekzPKDXzLy7Bdc7vo8iPToKCDz/+ni9Wdc0PHQYubkXgM461MiGvEauOT3kayA7AsFSvGGLnTsVyyy8WotJvSC1m7xcYgk9ibkyPYtmbjwUNoQ80Q3tPJUc1rxMbEq8k0abPXBMgLyeXyu98+qvPMBW/jzV/Fq8ax3AvNz//zyo2KO8XTsxPd3CNr2uj3c9'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sentence = \"Le congrès international s'est tenu à Paris pour discuter des avancées scientifiques de la décennie.\"\n",
+ "\n",
+ "embedding = impresso.tools.embed_text(text=sentence, target=\"text\")\n",
+ "embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "0e58b62c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "768"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import base64\n",
+ "import struct\n",
+ "\n",
+ "# convert base64 string to a float array\n",
+ "_, arr = embedding.split(':')\n",
+ "arr = base64.b64decode(arr)\n",
+ "outof_corpus_emb = [struct.unpack('f', arr[i:i+4])[0] for i in range(0, len(arr), 4)]\n",
+ "\n",
+ "len(outof_corpus_emb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "09489886-3fd4-498e-b534-298dda3ce96c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[0.012716123834252357,\n",
+ " 0.0367739275097847,\n",
+ " -0.037758711725473404,\n",
+ " 0.002986321458593011,\n",
+ " -0.08889304101467133]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "outof_corpus_emb[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "27b47638",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Got 3 Solr document(s)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"subdoc-embeddings\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": \"{!knn f=gte_multi_v768 topK=3}\" + str(outof_corpus_emb),\n",
+ " \"limit\": 3,\n",
+ " \"params\": {\n",
+ " \"hl\": False\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "docs = result[\"solrResponse\"][\"response\"][\"docs\"]\n",
+ "print(f\"✅ Got {len(docs)} Solr document(s)\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "4194268d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Result 0 ---\n",
+ "[No text]\n",
+ "Bei der internationalen Delegierten konferenz der Notare han.\n",
+ "--- Result 1 ---\n",
+ "[No text]\n",
+ "l \" \" \" erwähnten Kongreß « « « gesetzte internatio. \" \"\n",
+ "--- Result 2 ---\n",
+ "Paris, 10 janvier.\n",
+ "[No text]\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, d in enumerate(docs):\n",
+ " print(f\"--- Result {i} ---\")\n",
+ " print(d.get(\"content_txt_fr\", \"[No text]\"))\n",
+ " print(d.get(\"content_txt_de\", \"[No text]\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a37c1ea1-6427-40a0-a3e2-101f0804481b",
+ "metadata": {},
+ "source": [
+ "#### Chunk Embeddings - In-corpus queries\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2eb1d5ad-b9e6-42f0-9a07-8e38f9c9c2c6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Got 5 Solr document(s)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "chunk = (\n",
+ " \"Le congrès international s'est tenu à Paris pour discuter des avancées scientifiques de la décennie. \"\n",
+ " \"Des chercheurs venus de nombreux pays ont présenté leurs travaux les plus récents dans les domaines de la physique, \"\n",
+ " \"de la biologie et des sciences sociales. \"\n",
+ " \"Les débats ont mis en lumière les progrès réalisés grâce à la collaboration entre institutions européennes et américaines, \"\n",
+ " \"ainsi que les défis à venir pour une recherche plus ouverte et interdisciplinaire. \"\n",
+ " \"La rencontre s’est conclue par l’adoption d’une résolution encourageant la diffusion libre des connaissances scientifiques.\")\n",
+ "\n",
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"subdoc-embeddings\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": f\"content_txt_fr:({chunk}) AND type_s:c\",\n",
+ " \"limit\": 5,\n",
+ " \"params\": {\"hl\": False}\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "docs = result[\"solrResponse\"][\"response\"][\"docs\"]\n",
+ "print(f\"✅ Got {len(docs)} Solr document(s)\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "9d949cbb-3422-435a-8e7e-1816a0ae76b9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Result 0 ---\n",
+ "et leurs gouvernements respectifs. '.. \".'•••En outre, la nouvelle convention accorde la franchise de port pour la correspondance drdniaire des institutions nationales ayant un caractère scientifique et d' intérêt général ; ainsi qu' aux congrès scientifiques sud-américains composés de la majorité des pays de ce continent.Cette sage disposition constitue pour les sciences un bel encouragement et elle contribuera à répandre avec plus de facilité sur les immenses territoires de l' Amérique du Sud les progrès dus aux- efforts du génie humain.D' autre pari, le congrès postal dont nous analysons les travaux n' a pas blié les services rendus jpar la presse dans le domaine de l' éducation - des peuples.Il a cru devoir également favoriser la diffusion des idées, dans le louable désir d' accélérer - l' avènement de la turité - politique des citoyens.C' est pourquoi la franchise de.port est accordée aux éditeurs de journaux quotidiens et de publications périodiques sud-américains pour les exemplaires jusqu' au nombre de deux échangés par.ces éditeurs entre eux., '.Les Américains, qu' ils soient de souche latine ou anglo-saxonne, ont décidément une manière charmante et bien à eux d' apprêter toutes choses.C.-.\n",
+ "[No text]\n",
+ "--- Result 1 ---\n",
+ "Ces deux domaines nécessitent plus que jamais une collaboration au niveau international.Le séjour de jeunes scientifiques et artistes dans d' autres pays industrialisés d' Europe et d' outre-mer permet un transfert et un échange féconds des connaissances.Aucun scientifique, aucun artiste ne peut de nos jours, s' il veut œuvrer à l' avant-garde de sa discipline ou de son art, ignorer les impulsions venant de l' étranger.La manière la plus efficace d' apprendre à connaître une culture et un patrimoine étrangers est, comme toujours, de séjourner dans le pays concerné.Les contacts avec d' autres pays ont toujours été très importants pour la Suisse.Bien que de nombreux établissements de formation supérieure de l' ensemble des pays maintenant en général leurs portes ouvertes aux étrangers, des bourses gouvernementales, dans certains pays, représentent la seule possibilité pour des Suisses d' avoir accès à leurs hautes écoles ou académies.\n",
+ "[No text]\n",
+ "--- Result 2 ---\n",
+ "Il a écrit dix mémoires ou livres ins- pirés par ses recherches expérimentales.Enfin, il a fourni dix-huit mémoires de physique, publiés dans la Revue scientifique, sur l' évolution de la matière.De plus, Gustave Le Bon a donné à la Bibliothèque de philosophie scientifique, fondée et dirigée par lui, cinq volumes, sur L' Evolution de la Matière, La Psychologie de VEducation, La Psychologie politique, Les Opinions et les Croyances, et, tout dernièrement.\n",
+ "[No text]\n",
+ "--- Result 3 ---\n",
+ "Elle a été intellectualiste.Or, la philosophie nouvelle débute par un # critique, subtile et puisante, de La nature des vérités scientifiques, et de la valeur de l' intelligence comme faculté de connaître.Selon Bergson, le domaine propre de l' intelligence, et aussi bien de la science, œuvre de l' intelligenoe, ce n' est pas le vivant, c' est le matériel, l' inorganique. «JA > monde de la vie et de l' âme, en ce qu' il a d' essentiel et de profond, relève non plus de la connaissance scientifique mais d' uno connaissance spéciale, qui est proprement la connaissance philosophique ou métaphysique », ou encore l' intuition ( 2 ).IJrrgson mtfirme tloac la priorité sur 1 activité réfléchie d' une activité plus obscure et plus riche, qui consiste dans la faculté de sympathiser avec les choses, et qui est.très proche peut-être de l' amour.Cette indépendance de la science et de la connaissance profonde ou intuitive, l' une se bornant à prendre contact avec les choses, 1 autre visant à les comprendre, est d' une extrême conséquence.Le champ de la science, ce n' est donc plus le vrai mais l' utile : son rôle c' est de renforcer notre action sur la nature extérieure, d' aider à la satisfaction de nos besoins matériels, et non point de nous faire connaître la vérité.Pour un philosophe wmme Blondel, oomme Le Roy, \" comme Henri Poincaré, la science elle-même est quelque chose d' infiniment moins réel, eu somme, que la philosophie ; c' est une sorte de symbolisme, arbitraire en son principe, suivi et lié dans son développement continu, et qui d' ailleurs n a point à se préoccuper d' expliquer le fond des choses, mais seulement, de constituer un système de relations cohérentes, en vue de certaines fins pratiques.Devant elle, la loi se présente comme une traduction commode du monde extérieur et, non plus comme un décret qui, ravissant à l' homme sa liberté l, prétend guider sa conduite.L apologétique moderne reçoit de cette philosophie une forme nouvelle.\n",
+ "[No text]\n",
+ "--- Result 4 ---\n",
+ "L' idé.en est due au citoyen Lavigue, de Bordeaux, secrétaire de la- « Fédération nationale des Syndicats de France ».Le 20 juillet 1889, au Congrès socialiste intermfci mal de Paris, il proposa la résolution suivante : « Il sera organisé une grande manifestation internationale à dr.te fixe, de manière que, dans tous les pays et dans toutes les grandes villes à la fois, le même jour _ convenu, les travailleurs mettent les pouvoirs publics en demeure de réduire légalement à huit heures la jovrnée de travail et d' appliquer les _ atit es résolutions du Congrès international de Paris. »Ce texte fut adopté d' enthousiasme.\n",
+ "[No text]\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, d in enumerate(docs):\n",
+ " print(f\"--- Result {i} ---\")\n",
+ " print(d.get(\"content_txt_fr\", \"[No text]\"))\n",
+ " print(d.get(\"content_txt_de\", \"[No text]\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "2311f073-e4c7-48cc-9ed1-d595c8c73e60",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-0.039117645, 0.062711135, -0.060027212]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "embedding = docs[0]['gte_multi_v768']\n",
+ "\n",
+ "embedding[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "18b92de3-53b7-4e59-adca-5abf30d0a777",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Got 3 Solr document(s)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"subdoc-embeddings\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": \"{!knn f=gte_multi_v768 topK=3}\" + str(embedding),\n",
+ " \"limit\": 3,\n",
+ " \"params\": {\n",
+ " \"fq\": \"type_s:c\", # type_s:c restricts search to chunks! (c=chunk)\n",
+ " # \"fl\": \"id,score,content_txt_fr,ci_id_s\", -- add these later if you want to return only specific fields\n",
+ " # for now let's return everything\n",
+ " \"hl\": False\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "docs = result[\"solrResponse\"][\"response\"][\"docs\"]\n",
+ "print(f\"✅ Got {len(docs)} Solr document(s)\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "96684e03-a785-4c6f-bb2d-443db7566995",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Result 0 ---\n",
+ "[No text]\n",
+ "gskosten Briefbeföid «.auf die eigentliche Beförderung zwei Drittel aber auf den den Schalterdienst « nd Bestelldienst fallen.Der große Gedanke macht « Schule : er iibec » schritt die Grenzen des Landes ud n versuchte sich auch auf internationalem Boden.Nebst dem Weltpostverein, der uns im Jahre 1875 das internationale Einheitsporto für Briefe brachte, entstanden eine Anzahl Sonderpost der.eine, die Hauptsächlich den Zweck hatten, d « Inlandsporto der Vertragsstaaten auch auf ihren ieg « nseitiaen Auhenveikehr auszudehnen.Selbst uor dem Meere macht « das Einheitsporlo « ich » Halt.Im Jahre 1898 Antrag führte England auf den seines Abgeordneten Henniker-Heaton im Verkehr mit seinen Kolonien das Penny.Porto ein ; Deutschland folgte diesem Beispiele 1N99. Seit 19N8 19l ) 9 verkehrt Großbritannien unö seit auch Deutschland mit den Vereinigten Staaten von Amerika zum Inlandssahe.Begehren Auch in der Schweiz wurden schon laut, öfters die ähnliche Abkommen mit unsern Nachbarstaaten forderten ; nackig doch hart- wehren sich maßgebenden unsere Verwalfundstellen stets dagegen.Immer wird von ihnen nur darauf hingewiesen, baß die nung Ausdeh.\n",
+ "--- Result 1 ---\n",
+ "[No text]\n",
+ "treffenden Branche.Im allgemeinen kann man sagen, batz bei de « gegenwärtigen Sachlage die Ve » arbeltun « de » südamerikanischen Markte » durch entsprechen » vorgebildete Meisende und Spe » zialagenten der Vermittlung durch allgemein « Exporthäuser bei weitem vorzuziehen ist.Frei » lich ist es bei der kleinen Zahl wirklich zuverlässige, und vertrauenswürdiger Agentuifiimen auf den südamerikanischen Plätzen nicht leicht, einen « « igneten Vertreter zu finden und die Fabriken weiden sich im allgemeinen dazu entschließen müssen, neben einer angemessene n Provision einen festen Bureau zufchiih zu leisten und wohlassortierte Muster giati » zu stellen.In der Papierbranche und den ihr verwandten Geschäftszweigen hat der Gedanke, direkt zu exportleren, gleichfalls gemacht.\n",
+ "--- Result 2 ---\n",
+ "[No text]\n",
+ "Am August 24. hat Präsident Taft die Pauamakaual'Vorlage unterzeichnet und ihr ein Memorandum an den Kongreß beigegeben.In Veröffentlichung gelangten dieser zur Denk » schrift führt Taft aus, die Vorlage sei eine der segensreichsten, die je erlassen worden.Er sehe trotz den gegen sie erhobenen Einwendungen lei » nen Grund, bie unbedingt notwendigen Vorkeh » runge » zu verschieben, damit die Welt ihre Vor » bereitungen für die Eröffnung des Panamakanals treffen lönne, wissend, unter welchen Bedingungen dies geschehen werde.Eingehend be » ' handelt das Memorandum die verschiedenen Einwendungen, am ausführlichsten den eng « wegen der Verletzung t » es lischen Protest Ha y. » P a u n c e f o t e'V e r t r a g e s. Dieser Abschnitt darf deshalb eine besonderes Interesse beanspruchen, weil einiges hier zum erstenmal Mls der Protestnote Englands bekannt wird.Taft kommt zu dem Schluß, dah die Vereinigten Staaten üurch den genannten Vertrag sich nicht des Nechts begeben hätten, ihre Schiffe abgaben » frei zu machen oder die Zölle zurüszugeben.Ar « tikel 3 des Vertrages, um den der Streit sich drehe, sei eine Erklärung der Vereinigten Staa » ten, daß der Kanal neutral bleiben solle und bah die Vereinigten Staaten alle Staaten gleich behandeln wollten, sofern diese die Verträgst « « binnimaen erfüllen.DerArtikel stelle mit andern Worten eine Meistbegünstigungsklausel dar, deren Unterlage nicht die Vorteile seien, die die Vereinigten Staaten eigenen ihren Landeskin » öern gewähren, sondern bie Behandlung, die sie andern Nationen angedeihen liehen.Der eng » tische Einspruch würde zu der absurden Schluß » folgerung leiten, daß die Regierung, die den Kn » nal baut, unterhält und verteidigt, sich um das Recht verkürzt sieht, ihren eigene » Handel nach eigenem Ermessen zu führen, während alle an » dern Nationen in dem Wettbewerb mit Amerika dieses Recht uneingeschränkt besitzen, nämlich das Recht der Iollrü'ckvergütungen.Taft protestiert gegen diese Ansicht, als ob die Vereinigten Staaten auf das Recht, ihren Handel zu regeln, verzichten sollten, ein Recht, auf das wederGroßbritannien noch eine andere den Kanal durch » fahrende Nation verzichtet hätte oder verzichten Wolle.Die hat der Nill wie dem sie beglei » renden Memorandum Tafts sofort einen Kom » mental gewidmet, der an Deutlichkeit nichts zu wünschen übrig Iaht und die Stimmung in Eng « land klar illustriert.DaS Londoner Weltblatt bemerkt zunächst, daß der Wortlaut des Pa » namakanalgesetzes sowie der Denkschrift des Präsidenten vorliege, noch wicht baß aber, wenn die telegraphischen Berichte nur richtig einigermaßen seien, das Gesetz mit dem offenkundigen Hin » des Hay-Pauncefote-Vertrages unvereinbar sei.Der letzte Vorschlag des Präsidenten sei etwas Neues in der Geschichte deS Völker » « Feuilleton.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, d in enumerate(docs):\n",
+ " print(f\"--- Result {i} ---\")\n",
+ " print(d.get(\"content_txt_fr\", \"[No text]\"))\n",
+ " print(d.get(\"content_txt_de\", \"[No text]\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8fc1e72",
+ "metadata": {},
+ "source": [
+ "## Entities profiles experiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "686d1e8f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['id',\n",
+ " 'imp_ids_ss',\n",
+ " 'surfaces_ss',\n",
+ " 'ci_ids_ss',\n",
+ " 'mention_keys_ss',\n",
+ " 'ci_lg_s',\n",
+ " 'wiki_masterlabel_s',\n",
+ " 'wiki_url_s',\n",
+ " 'date_of_birth_dt',\n",
+ " 'date_of_death_dt',\n",
+ " 'wkd_occupations_ss',\n",
+ " 'wkd_occupation_qids_ss',\n",
+ " 'wkd_entity_types_ss',\n",
+ " 'wiki_summaries_t',\n",
+ " 'contexts_ss',\n",
+ " 'entity_mixed_emb_v768',\n",
+ " 'entity_encyc_emb_v768',\n",
+ " 'entity_media_emb_v768',\n",
+ " '_version_',\n",
+ " '_root_']"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"entity-profiles\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": \"wiki_url_s:*Simone*de*Beauvoir*\",\n",
+ " \"limit\": 1,\n",
+ " \"params\": {\n",
+ " \"hl\": False\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "entity_doc = result['solrResponse']['response']['docs'][0]\n",
+ "\n",
+ "list(entity_doc.keys())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "28d1b084-853b-4e61-8a65-0d61146eb522",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['political philosopher',\n",
+ " 'journalist',\n",
+ " 'novelist',\n",
+ " 'autobiographer',\n",
+ " 'essayist',\n",
+ " 'political activist',\n",
+ " 'diarist',\n",
+ " 'women letter writer',\n",
+ " 'philosopher',\n",
+ " 'literary critic',\n",
+ " 'writer',\n",
+ " 'author',\n",
+ " 'feminist',\n",
+ " 'philosophy teacher']"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "entity_doc['wkd_occupations_ss']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "677ca42e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://fr.wikipedia.org/wiki/Hélène_de_Beauvoir\n",
+ "https://fr.wikipedia.org/wiki/Jean_Beauvoir\n",
+ "https://fr.wikipedia.org/wiki/Simone_Chalon\n",
+ "https://fr.wikipedia.org/wiki/Sylvia_Earle\n",
+ "https://fr.wikipedia.org/wiki/Gustave_Simon\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.experiments.execute(\n",
+ " experiment_id=\"entity-profiles\",\n",
+ " body={\n",
+ " \"solrPayload\": {\n",
+ " \"query\": \"{!knn f=entity_mixed_emb_v768 topK=5}\" + str(entity_doc['entity_mixed_emb_v768']),\n",
+ " \"filter\": [\n",
+ " f\"-id:{entity_doc['id']}\" # exclude target entity itself\n",
+ " ],\n",
+ " \"limit\": 5,\n",
+ " \"params\": {\n",
+ " \"hl\": False\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n",
+ "docs = result['solrResponse']['response']['docs']\n",
+ "for doc in docs:\n",
+ " print(doc['wiki_url_s'])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "impresso-py3.13 (3.13.7)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/notebooks/images.ipynb b/examples/notebooks/images.ipynb
new file mode 100644
index 0000000..51f4c17
--- /dev/null
+++ b/examples/notebooks/images.ipynb
@@ -0,0 +1,1074 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6238137f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from impresso import connect\n",
+ "\n",
+ "impresso = connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ba0ce8a9",
+ "metadata": {},
+ "source": [
+ "## Search images"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "ac7fa88d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 2 items of 2 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " caption | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " previewUrl | \n",
+ " imageTypes.visualContentType | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxland-1996-10-11-a-i0076 | \n",
+ " luxland-1996-10-11-a | \n",
+ "  | \n",
+ " 1996-10-11 | \n",
+ " Silvie Fleury, Soft Rocket, 1995. | \n",
+ " [11] | \n",
+ " luxland | \n",
+ " d'Letzeburger Land | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " https://iiif.eluxemburgensia.lu/image/iiif/2/ark:70795%2fhmrj0j%2fpages%2f11/2728,1968,615,803/max/0/default.jpg | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ " | luxwort-1930-09-26-a-i0036 | \n",
+ " luxwort-1930-09-26-a | \n",
+ "  | \n",
+ " 1930-09-26 | \n",
+ " Das Urbild aller Lokomotiven: Stephensons berühmte „Rocket\", mit der er das Preisausschreiben der Bahngesellschaft Liverpool—Manchester gewann. | \n",
+ " [6] | \n",
+ " luxwort | \n",
+ " Luxemburger Wort | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " https://iiif.eluxemburgensia.lu/image/iiif/2/ark:70795%2f0pn9pt%2fpages%2f6/283,1847,1345,876/max/0/default.jpg | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.images.find(term=\"rocket\",content_type=\"object\")\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d44a92af",
+ "metadata": {},
+ "source": [
+ "## Image embedding search"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "202f993c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
GetImage result
\n",
+ "
Contains 0 items of 0 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " caption | \n",
+ " pageNumbers | \n",
+ " embeddings | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " previewUrl | \n",
+ " imageTypes.visualContentType | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxwort-1930-09-26-a-i0036 | \n",
+ " luxwort-1930-09-26-a | \n",
+ "  | \n",
+ " 1930-09-26 | \n",
+ " [REDACTED] | \n",
+ " [6] | \n",
+ " ['openclip-768:EEyfu8hgLj2pVjA8KxTeu5WGILwSyQ29pliHvbJICD2zTZO8aBpWOpW1qjx1IBm8luGMPFq3SLye9Yw8stBIvX0XiTtK4bk8NHqZvDE7QbuQQlq89jf2u1uGHz21nEg78K0bPJ3T1jwCHk28+12IO8YWujyW2dC80EblO6Qwhbzh8Ia9S4iBvD8siT1a+6g7cOuAPNcVqzuyMbC8fkj/PMwClDzLelM9xwY5vQFd2rtgv6C8xEZIPJso8LuuzLi9DgQ/vOFwHbz04cW7iJTXvLSNkzxNEJE9fIfWvKsLpLkgw5m82iKCvLo8RzypgIA80T3LvCK6iLyCihg9QvgSPMacVDzhf4E7HP3BuvbuZj0gcnA8hZ7fvDMl+by19Ac9DbA7OxVSmzwv2OK7c0HwuyoD6Dvsi528XaQUPH9HGDyxb0Q9canNPJA3kzzoOuq8Hkg2vcfo0zxv3Aq9RkRfvf6MDr2br907fIHtPNXBRD3EYbS8b2nfPEc92T2ni4C9O8EMvCGp2rqjqJc84OkWvWW23DuBRK88ACIvPSLvwzx73jk6rYo2PHVv7bvt3gU75onkvOvMBj1a7807doH1vAvtWrzH7c476oUvPAn+wryrP2i8elWEPE1NrjzQkS69CK3RvKlHnj1tTko84kpxPC2QhDzAFYM8MvqqvDOs+DpfNs48VCOlOxgndDrjpoO9dgckPIXKaTz2TRi9Z7iAPf7CJbysuwO9a0UHvc7MOT3HnpU8Z2osO3KfRbwk5ru7ClovvN+imbz305S9f0eGO0vmHruBgG489hQMPlntVjwFaBu8gZo2PLj1lbw4g8+8+p94vKy8IDsYx+i6xrccPA8pFL1/5yc8JHzDu1TtXrt7kV69sEZbPSyZAT3f4wA9252dvMWk/737QSW8umVEPOrvZjy2omU9Ze4ivZ3EH70bzgO9tY3kO+G6ijy4KjY86MEEPUfiq7zJDWi8OpNOOyuYeLsjhyc89u6TvLXFhrsn0xk80MihPHjxAbxmcB49JzthPTFlvrw1LA28ui62vP2c4jxFz9A8AFBtPWRJ/zsMALa4XDCPvELkobycYRs8HsvXvMoEaTtpJYk8jnzRvKVwFz3EXIw8VQKRvAYGy7yL13u85ZdKvXYTNTyPhUy7guINvIEyE72bHJe6ACoGPR1eFzxoKRY95NJcPd4+erxkrE+9E+edPMcJhT0X8Qu8+NvPPCQCzDxgHxG9RsyEPOFjwjxXsAC8e1ZDPX6zSj0drhE8xKS4uwijVrynbpy9Uohcu1o+Gz0OfWO82vhePb81qTx1KwQ94DZ7uDJ0xjxu4/W7Pw21PGGW/bw/ySc8uTAUvVpaGTyMp6W749MxO+0LBT3rh1y9EyWwvQ/QQjzu2ts8FO0WPToxgb00iuy9yPAqvUw3Rjw1DyE7d9tGPb5AsbxtvYy8jyIHvdWicTy8dEy8t9jEPODzrjtJbSu9bY4mPYZNSj2iz5S8rjuhvZLNMz1kLbA8sRPNPGb0uLwBJw28EsoWOycXFTwfiVy8C06wPCnKvjxoOIw7feK5O4fezTt2Zh09EC/Oui/NErx/uig9Vx2CPC/mzzqR9ju8TLmCvExGoTzsHbe8SBy6POfxh7w5EZw9mDJNPRQFtzzLw7c8rJqwvKSkrjyOKTG9YfKzuelUijzWREm97YSiPAaSPLyLb0Q8rHWRvdGY+ry8mI49VvI6PLGutDySIea8cAUPvT5AHzwBrma8TMT/vEB5G7zDKgu6WtcSPbql9bx0uK280ArcPEmTHT3ZDcc8WfEkvQNXuD2q8Tm9qwy/u22vXr1ay3g95O5KvGrx5ryG3Ts82XgtPDUQmLqc9RA9ABkgvKBIojzFrNe8s1QpvYHDabsBvEo44leovI10IDtSAoy880rZPKLxH72LD8K785QHPauGBr5BpYY8HODMPKqB4ztVrDY8giRjPAx/hDyzaKy8bijxvNIyMbveA828eFMzvQ0ctDqwqCA9/1fRvICL/7wnCdU8m2NMvbeuUL0Nopc93CHGPHhWkTyXrLS8LkEcu/Zr6byKUeA+midMPGS0ODwgwlg8HRr/O+iArztEyOe8d9TLvBxonzvEbvK7gJlIvNXNFDzdFNk7tTEIPJE6Lrzzk6I8/t1ivLDHl7ut6YI7TZj+OsPsJb3zkSC9BQkHunAiITyN5Ee9EY/0vMXNcr1wi3y81t2uOwhbD731SKg8hzYfPTEBCz3Kcdi8d8cfvXHgWzzkT5e8iEAIvObog7ut/MS8yopxO/+B8jwlxIk9E4mAOyu66LzHvEK9puW3PFIenDu2bfQ8wLP1u1aUQbw1bSM9coEYPQ4g/DtWBIc9JvhDPSG4Gryd67U7V+R6vCLfcTxKkxu7PaboO38yjLw+dYk8mKOIPc83FTx27Rw8tAVKPIjjhz11jKM8k2GePJFrFr1eVBq6/ErNPPzuVT2SM+A8Tvqdu4ct/rznvB09NocxPbXuFTx3F9A7T+/LvGuikjwE7JG7Jz+SPITylTkEc9m7l4uFPA5jrzyF1du8FdAfPOxVKr38UTY9oGrjPDbH1zywiYW8L4VfPClF0bzctmG9AcuTvO9vPz3cVx496UwzvRtMGLy0OYy+I0j4vFBc+ry++9m8go3JvKcAPzwN7ka9RZI+vNYfV7zUlqu9zEExvRogtrm9gYi87J8XPdgbSD1IMxI9wTiyPMiSzDyvRe28784UPVYaCzyCCR09Z9Yku9+FWjquOw87sIYnvaMM1LxCHeo7Qz2gPPXvxDpobKy7rmJdvI1GhLwhy7i8Z4Whu0Fwt7slmK+7orPDvJgYGb3FCTS90HwZPf6GE70NNHo8kW6zO6M2pLw295A77nYYPdhM+DvwwAE9MyP5u5gwg7uUp5C9CDdDPGhTATsFgis6bswUPAG8XD25A8I8UZ68PRzhOrwWFqQ8g7rhu89gUbyQKTi9Id+OO+GmhzwCmDu73OuKPKsn6rrgYLC6C094PALlIb13vkY98uogPLW2WDt2xI28CF41PaluGj38XdA7jP6sPMNOGD1+Nhm82RmHPAkTmTxsPyc7ZNAbPVlTKT18+y47TtiJvE5MBryg97k83sbwvL/ajr2wFaI91KOFPN2cT72UgXE8Qpm9O0eMibz4mae6Xs4RO05d9Tx8ZV28kbLSO5StHTxGSsi8KSpKuwZIzzyXb3O7H3IzPZXlRrvxo5O8BZ5NPZ6AT70Z2F6873TnOtZQED3MMLA5GjFkPVP3Hrylsdq8vNylvHzZET32hhY94aa9PAJTkzvQTTw94FNVvDqKkj3yiEA9rttdPIVGtDxhcMG86PqCvMtlNT1Wgi49O352vUzStrzGBnE8l9CkPE5fmzyZZz68Ha6aOpPFUTzLjBc5Eg6AOyd1Dr3afR29rki7O9/6IL1sbMG9fpH1vLAWdTwUBRw732V+PQXGnbxzhr67uZZkPGt7OT1Ni4q8w8savfQzrjvYQB87WKTtuU6nWTx45K88CwPnPAy8zLvMHQk9egiDveIOoDwgZI09daROvHp7/7sTRTs8kxfnO2kQMzvcBq28MzUXPbxOKzy2UIY7h30KvVRlBTxQO4w8MJnMuzhMU70tuOQ8FbhXvPMhdzrkk3U8zclJPDvObr3SJSk8PAasvMtLJb3H/Be8bpDlvGLLiTz9vX29GmWNPAGyer0m3c68VJucvFiShzxMJBa97rc1PJCrGL1kYes74iI+Pbe8PTyVqL08/7LGvABnDz3qPw69EkMXOuOpRrw17bm8v85PPJHo4DtoOMs8fLCmPCUhi7geIcS890S2u4dSZ72AFSe9NChDPB41sz3IahA9VmrxO7/p8Ttv2ls8l3jBPDIwJzsf7iy9+AaFPMxnLDzttXS8oOQAPNNb/7yjvKO84VW6u0uMTb2gJ6k87bkuPYxPp7w6ZcQ8msBFPZZUP7w5XC68lfMhPJKMeboR/Vq8+LMTPCe4LT0l/ZE8XouEPGGRXjwihUu8Jpumu+7uurpPt707vx+mvEdV1Tq8v9Q8sQfOPPmAu7zkDCU9jwdjvR8qUbzUoqk7bqLxvCn3Rj0onuW75Pz/vLnojL1mkKk7', 'dinov2-1024:1IF2vQHRlj0qWvg8bHLZuo/G2zzlK2K8+PyguzkoPDyRqC09j1N7PUl5prwCxRK72t5BvEt3ajwSagM9mpdePfOf7jykpC28AbSXOzqL2ryaHDe6INhbvV8KrD2FpXI7LhZEvEfBoTw8p5M8etrzu6owJjzeQds8lkhXvUxUmb18CaI85GRfu0ICrLv8PZy7Q7A5PZzcoTzBwxE8kR6mPJ3Fmrv1jC29O22ePCuVpTyNn9W81Ua8vG4kSb34eMk6zq8NPUFrN73Dzme81h4ePYvAJb0nAMq8024zPeVAiT3g3Yi89p0sPYlJiLyFP4k86aTcPClPI7wPrgc99Zjyu0bRdzz+WO+7kDkFPVQelzxtlq48typKvBZ0Urz9sQS8xxVlvQAzsbwt0OQ8WmJwPUyA+DzqTf27P7RFva8uIz2Ggnm7JXMRvVE0+ryBHr+7lUeGPEtxYztK0O68Gf+avHXm2jwVuQI9SvUeu4jfbzwW+JS78hgaPGRdF7mEaiq9wKDVPIwAAr2tEsW8wQw8PQY9nbzmhWW6T6sDPSQSrLvUFAs9UxWXvEmVGzznDkM9jRAKPTGPdb0kfnO87OaxvGjLKz2eHFw8n3VsvUw38LyN3jo9JR4st5A7QrknEOO7sitnPfUp97uDY9I8WRivPBw/1rxK17Q8DAM0vF1hgjuCAku849/YPG/vCjuzB3c8MGeRPPv4BT1OdeK8LO5iu2K0ebmSZBO8Mn8fvYtGEbw1/E08V02gPEp0Ar2V2aS8A6BcPCPKWz3fUIg7d82vOwAiDT253Va9EemIvTWinD0gsAK8DzFFPXPj2br0aEO8B/wCPfkZJj3D7hi9WIOQvGElBL36Fu88J4sKvGz9mLw6EF29qt21vFzLVDzLle47oA0EvHWu77z3Lko8AAx/vdh8HL2NrB49wunXvKj2hT28iyM8WWkCPW0xLT3YlwK9szwnPYTSqT2lTmO9hYMZvR9+wDzqw3E9TCo1PcQfNL3dp068fV6qvEdV1zx+vP2877jMO3iKoDq9Q6e8HuqXu09ezbyscG689QMdvCU1NL07fzm8i9lxvET0IL2XzK88zZRPOse1Lj2JSh69tv0FPZ6tfDvsrpY8ytCvPILr5DsX2d68LQ9NurQtFD1kfbM8pvWIPBOaGbzqED086PmEPW73SjzxS4I7NlC2vNiV6DzetFo8Y/+4PO1Ikz3oscs8/cA7PEgV6zx2fyQ9kk45PN6zED3azii9/RNIvKahdz3p3Be8qKO/PQd0hLxx0Xy83MnXO/uLrDxBkTg9p4UXvDUX8DsdEXa91KYGuuN7qD0Acdo8TiJrO9YmSrx6VKs8myfcPNi2Ibx/Wxq99rPBPCwENryP5yY9r22yOp+dUz30tuI85RSHO5Ld17zFdSW8Ra8aO9tDsj0v+JU8sF+qvIuhRL0ZpSK9jFsCvFi2Pjqv6qC7jftuvLv3xzyXHSy9nsjRvD7gT73FDVu9JCNoPdN7GD23fji9uz20vCeoGb345k28uEIZu1xhuTz4VFm8MQgGPAseg72h4pY8bQRjvS8Z9TzHYiI8KvIZvNrsejwrI6u7f6bRvOmdmrpYaDs8u+kHPNn+ujzBGGU8a75LvIf6Lzs/GZo9TPIRvMWpNbsXVfk6dmiOPcz7IT261VE7kuGyPLT1WL0B0cG75F4hPYcDNrtrVmO8/9QGPO1cD737RPm8Pg4KvegThLliD2k9zsivPETmt7tpwUO8LJ5/PG1gUz2HNCu8KG+xPC71dbwjmPO8LNxOvTT/qjz88JU8666xPOPCTb2W6A+5y5G5PL/xKj3nhqy8xY3vvEQL1rz9Lo+8IJMZPRZ5Rjuuw6G7wAJTuyxAFL0irBM8ZhtFvESwl7zLd3A8kQ/svDfAKD2eXfe8RyiBvUC3Erx5bKK7QvalvPiqi7w6LeG73ZKfPGjoObzBVAg6vfwgvfgLyjndDgo76VflPCYy0Ty0jwe81G2XPPoqCTw6G4+7FipaPEieVT13VnC8L26AvK0WyjvnQhm8zhbIPJmuQz38La08eHNku8h75DyzeK87ww0tvIT1uTtVjd+7wMJ6O90CpjssucA8YBI+PVaAiDoWCc48OSD/vDvMYTtmvAs9mISDuo7dIb2KDhe8eop0uzWLvDxmrFq95RpPvRTGoTyzKy09ZLEMPLyv2birq707SMtgPXjdPb0cyRE8T3TsPO4sIL2mQLy8iQ4TvaFjA72pGIs8O9mLvA36ND02wvY8VOWgPNuU1zyRyoc8/bELvOIvdr1r/sy8QsNbPUmMgDyv9i29pnEAPcYLebwIig870YSCvAraqrz2mZY8rP2pPOQ06LpJxZI8EvXlO9zJQ71S18C7ACtVvazwCbzwFgI8Ez5ovRbZETy4xBA7KtUtPbMakzyx6pc99XyHvIaGHTtZ8yG9Btv4PNdYmrv3rK88Rfv8vKILYL0Z4g+9PI9/vRwNeDvE7XG8Jswru5cuOL03hus71ceMu3I9Oj1n5nU8PGgBOwO+lbyMg9E7Ah2hvIwDX73YsDa82BULuzh7HjwgGy29uPLDvPo9WbqAG7G89ImfvL6qZTxo8kg94UREvd/Nhz1Q7Aa9RAMPvTTphDvAuFO8MdSzvOOXx7xxqoE870OUPIf3e7t58sA7f8s1vfGFhLxuHOk8yuKZvHQvYzzxKYY9iopRvfz1Gz1Mlr08UtT3PDxfYjrmBvu7Acw4PaViIz1kl7S8aLA6PIbTWruQa+k8ePa2PFVFQLyURuK8nTBIO0WPmb0aDMk8+ao8Oxm1gz0R5pm9FqEqPZO8dT1Q7+A8DtA9vPoWAbts6go96cL+vEvrPbxV77k8L5EjPfo9brwaN/y897R9u2/VlzuVHnW7TJ+avED21DzOl3I94c8cvW33oDxdz9K8sNXEvLSZJ71Galy8i7SpPB5bRTyeMq8870B4PYBQTLyiv/Y8FvcyO/JgkL2WIXw9uMVkPRnMP703KtI8yWo6u7Uea7xP6J08pCsBPVrqwTypeZK7R/KePU4D4Tt8s1q8sgAHPTCy07nkXz49NFNCOpKCXrw/efm8IbUnPTwQ/bvN/Lc7K64GvV8wYj0r6PY8ieb7OlfcN731sNw8JymoPFKoJDyb0/Q7c4ubPORx8Lrus1+7sqqnu4fLKLu6p6w89BgHO4DBpzwwFto8y8iZvIwJMr1L+aO8j1HFPMmevDpHLiE9ZYKMPAgSN724Qru80nyTPZ5eFDrkYBw9e7QZO1/LG73OiKC7CYpHPCp1x7xI2SS9hYTYPO7+Sb1bnYW8u1jVvNkPmL1pfOK8YzghPaOB+DuuylI999RivMWNiDyNo1m7yxmquqPNhT1/wza8B9wuvTuqkjsYYj+7yi8LvDSzxbxrpEM8CJZvvOVS0zxADQc9jQK1vAw8IzyJHaS8Zpy+vJ08ujo7pSW8YoOduy3yBrusqAY8WKIJvcMIxzv+GkK9jqqZPDnbGjtYEuY8V4GiuyF/lbzbC8e8G3bsvNS3D71Li3m8/XYPvehMVbzeUze9As8WvBwkUDo1wC68PRYgPSVAdLyWjDu9sHEGvWy8mTwj7Fc9Hsr+PPoyBL1etrA9maXHvHS9KbrJ/2y8ib6bPRu0N734G2M75IsRPeExEL031yK86VSEu+cWH7vZ8hG8LD8MPd0SlbuwAQc8oMPQPFkuCb2tP1c9jNV7PKdB67xoCwe9qzuGPTF7GryR2hU9hpyFPHcmp7zF4G44PWGEvKRcNL1I16W7FFtUPXZpTb3VAKa77+sdvXx1TTtkcmi88RYNPfD2MT3wDuE8z6YwvUw0Zr3Gon08Cb4SPROTbzwS5BA9OHssPBZVg7wHbU8940AGPJeg0TwfsR69M558vPWxGD1B9G495VLnPEUkqjypRZC7ZvQEvTbnFL0K0J88gK8TPYZwcD1/5uA6TNLBO42LxjyoAKu9S7PnvMhhyDxo7xQ6z0IOvR79Njw0rGo8JrQYOcjxibyYECU9jhDhvFNvfr2PjjK9tmKTvVlBSL1D5jo97ybtPB+drjqRc9A8qEzEO0snazx6uAi8HMpsvRE5jD2/cMI8pb6MvMH1JTrrlhA7FSXlvJunobpSBUo8SWESO2GbmzyDENS8aNGIu7cmjr2vJA68bYWFPYiJwTq6jNe7rmpsPURXZTzNj3C8gOGYPXA8eb3zykq9VVpJPfmsCr2QGr28M60UPAM81by2F+E8iRIpvNr8gTyU9G49s+GVOwlX5Tx/bQS9PyLpPG1FNb3IAAa9gCU1PQ4pjLxGiw49Dlmeu6BW5jxiNgo7nMBjvKxzojxajdo5YJINveVZ+TxjqCc92SD8vJTYQbrz+wu4OPttPA8EA7tThpQ78YcRvcgH/Lqi2ZK91eAru7zDsDygfGO9XIOnvIdoJry+Et28auovPQNI27wV0tg8NRFNvHhEeTx/upS8W3b8PBwLozz5MwG9orGpPOsdGDzoReU8VgqevMwoxrzNwAO7S9AmvVkQwjwT9z68J3vZPETmt7vIl4C81jJkvEEcg7zhPRq8XEITveHZV72Eqzq8H3iru5ykBjuk2S08pXA5vLf1Jjs6OYC94AITPCqOCL0ZILy7SuXLOyGQ97ygjCK9zj1NPILM6LyNwKo85SuIvKSdWb3n+KC7hbO2PARiQTy9dWw7sp5tulLrOjvRBFw9aUu2PPbboT39LUm8oVrdOgPtaLwHhms8yq9oPMiWgjzY5wE8wTsuPWOMWD1zI3G8xzMfPbPDjL1nxIq86XCxPMQkGjwhA9S8wUSSPH8IH7uXRK49OIX1vEQ0pryraDu9aIlQvOaXQr0iktc8NksfvY+1Hz39ggs8Ml0HPD8czjwd52O77ryuvO5+g73veoi8TYRMve+iNL1+pJe7tqwJPNXXf70hVXS9tOITvWH7q7xfdDG7LakovPdxUruwdis9NDLLvCBtzTzP8Vq8NeF/O6Qr0btSnkY9J2ZGvcq0wDuwTIy9tV+DvL4dtTyKYXe8uhhvvH2GPj1IQpw8N/tUPWDuKr2QATm7mt8SvTiUS7sY7Qa8qwGgPC6dUjsYv107xZNGvFQCy7t7pty8z4OBPcz4JT35Iy48QjElPX9jjL3bKES9UNe/vOz/i72QHpG8AiI/PBwgAz1K8mo8d3XDPE3T6TyoyU671B8GvDa9d71H5Tu8gppTPePyl71oWk09ZuNtOw1hC70CUSq8gR/7vOIFKTyJk6q8aw0cu3aoCT1HPrM8O6sCvNfhxTvstIg8jkQQu7Cl97qGf967PJEnPRepvbtnloE6F3BjvXfcvLxzwCW931AqvQDgnbxKuOG7510UvXfHED0QBGQ93TNEvWgINDxbxLS84Fg+vUTiZj2bCTs8aU2/OznU27sMCsY81ccvvAQwH70Edam8WN5cPJoG3bw2zm483haMPANiRL1XmxW8x3EJvWEKq7y2IUm8rzUavQ=='] | \n",
+ " luxwort | \n",
+ " Luxemburger Wort | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "image_id = \"luxwort-1930-09-26-a-i0036\"\n",
+ "image = impresso.images.get(image_id, include_embeddings=True)\n",
+ "image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "dfade8ce",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['openclip-768:EEyfu8hgLj2pVjA8KxTeu5WGILwSyQ29pliHvbJICD2zTZO8aBpWOpW1qjx1IBm8luGMPFq3SLye9Yw8stBIvX0XiTtK4bk8NHqZvDE7QbuQQlq89jf2u1uGHz21nEg78K0bPJ3T1jwCHk28+12IO8YWujyW2dC80EblO6Qwhbzh8Ia9S4iBvD8siT1a+6g7cOuAPNcVqzuyMbC8fkj/PMwClDzLelM9xwY5vQFd2rtgv6C8xEZIPJso8LuuzLi9DgQ/vOFwHbz04cW7iJTXvLSNkzxNEJE9fIfWvKsLpLkgw5m82iKCvLo8RzypgIA80T3LvCK6iLyCihg9QvgSPMacVDzhf4E7HP3BuvbuZj0gcnA8hZ7fvDMl+by19Ac9DbA7OxVSmzwv2OK7c0HwuyoD6Dvsi528XaQUPH9HGDyxb0Q9canNPJA3kzzoOuq8Hkg2vcfo0zxv3Aq9RkRfvf6MDr2br907fIHtPNXBRD3EYbS8b2nfPEc92T2ni4C9O8EMvCGp2rqjqJc84OkWvWW23DuBRK88ACIvPSLvwzx73jk6rYo2PHVv7bvt3gU75onkvOvMBj1a7807doH1vAvtWrzH7c476oUvPAn+wryrP2i8elWEPE1NrjzQkS69CK3RvKlHnj1tTko84kpxPC2QhDzAFYM8MvqqvDOs+DpfNs48VCOlOxgndDrjpoO9dgckPIXKaTz2TRi9Z7iAPf7CJbysuwO9a0UHvc7MOT3HnpU8Z2osO3KfRbwk5ru7ClovvN+imbz305S9f0eGO0vmHruBgG489hQMPlntVjwFaBu8gZo2PLj1lbw4g8+8+p94vKy8IDsYx+i6xrccPA8pFL1/5yc8JHzDu1TtXrt7kV69sEZbPSyZAT3f4wA9252dvMWk/737QSW8umVEPOrvZjy2omU9Ze4ivZ3EH70bzgO9tY3kO+G6ijy4KjY86MEEPUfiq7zJDWi8OpNOOyuYeLsjhyc89u6TvLXFhrsn0xk80MihPHjxAbxmcB49JzthPTFlvrw1LA28ui62vP2c4jxFz9A8AFBtPWRJ/zsMALa4XDCPvELkobycYRs8HsvXvMoEaTtpJYk8jnzRvKVwFz3EXIw8VQKRvAYGy7yL13u85ZdKvXYTNTyPhUy7guINvIEyE72bHJe6ACoGPR1eFzxoKRY95NJcPd4+erxkrE+9E+edPMcJhT0X8Qu8+NvPPCQCzDxgHxG9RsyEPOFjwjxXsAC8e1ZDPX6zSj0drhE8xKS4uwijVrynbpy9Uohcu1o+Gz0OfWO82vhePb81qTx1KwQ94DZ7uDJ0xjxu4/W7Pw21PGGW/bw/ySc8uTAUvVpaGTyMp6W749MxO+0LBT3rh1y9EyWwvQ/QQjzu2ts8FO0WPToxgb00iuy9yPAqvUw3Rjw1DyE7d9tGPb5AsbxtvYy8jyIHvdWicTy8dEy8t9jEPODzrjtJbSu9bY4mPYZNSj2iz5S8rjuhvZLNMz1kLbA8sRPNPGb0uLwBJw28EsoWOycXFTwfiVy8C06wPCnKvjxoOIw7feK5O4fezTt2Zh09EC/Oui/NErx/uig9Vx2CPC/mzzqR9ju8TLmCvExGoTzsHbe8SBy6POfxh7w5EZw9mDJNPRQFtzzLw7c8rJqwvKSkrjyOKTG9YfKzuelUijzWREm97YSiPAaSPLyLb0Q8rHWRvdGY+ry8mI49VvI6PLGutDySIea8cAUPvT5AHzwBrma8TMT/vEB5G7zDKgu6WtcSPbql9bx0uK280ArcPEmTHT3ZDcc8WfEkvQNXuD2q8Tm9qwy/u22vXr1ay3g95O5KvGrx5ryG3Ts82XgtPDUQmLqc9RA9ABkgvKBIojzFrNe8s1QpvYHDabsBvEo44leovI10IDtSAoy880rZPKLxH72LD8K785QHPauGBr5BpYY8HODMPKqB4ztVrDY8giRjPAx/hDyzaKy8bijxvNIyMbveA828eFMzvQ0ctDqwqCA9/1fRvICL/7wnCdU8m2NMvbeuUL0Nopc93CHGPHhWkTyXrLS8LkEcu/Zr6byKUeA+midMPGS0ODwgwlg8HRr/O+iArztEyOe8d9TLvBxonzvEbvK7gJlIvNXNFDzdFNk7tTEIPJE6Lrzzk6I8/t1ivLDHl7ut6YI7TZj+OsPsJb3zkSC9BQkHunAiITyN5Ee9EY/0vMXNcr1wi3y81t2uOwhbD731SKg8hzYfPTEBCz3Kcdi8d8cfvXHgWzzkT5e8iEAIvObog7ut/MS8yopxO/+B8jwlxIk9E4mAOyu66LzHvEK9puW3PFIenDu2bfQ8wLP1u1aUQbw1bSM9coEYPQ4g/DtWBIc9JvhDPSG4Gryd67U7V+R6vCLfcTxKkxu7PaboO38yjLw+dYk8mKOIPc83FTx27Rw8tAVKPIjjhz11jKM8k2GePJFrFr1eVBq6/ErNPPzuVT2SM+A8Tvqdu4ct/rznvB09NocxPbXuFTx3F9A7T+/LvGuikjwE7JG7Jz+SPITylTkEc9m7l4uFPA5jrzyF1du8FdAfPOxVKr38UTY9oGrjPDbH1zywiYW8L4VfPClF0bzctmG9AcuTvO9vPz3cVx496UwzvRtMGLy0OYy+I0j4vFBc+ry++9m8go3JvKcAPzwN7ka9RZI+vNYfV7zUlqu9zEExvRogtrm9gYi87J8XPdgbSD1IMxI9wTiyPMiSzDyvRe28784UPVYaCzyCCR09Z9Yku9+FWjquOw87sIYnvaMM1LxCHeo7Qz2gPPXvxDpobKy7rmJdvI1GhLwhy7i8Z4Whu0Fwt7slmK+7orPDvJgYGb3FCTS90HwZPf6GE70NNHo8kW6zO6M2pLw295A77nYYPdhM+DvwwAE9MyP5u5gwg7uUp5C9CDdDPGhTATsFgis6bswUPAG8XD25A8I8UZ68PRzhOrwWFqQ8g7rhu89gUbyQKTi9Id+OO+GmhzwCmDu73OuKPKsn6rrgYLC6C094PALlIb13vkY98uogPLW2WDt2xI28CF41PaluGj38XdA7jP6sPMNOGD1+Nhm82RmHPAkTmTxsPyc7ZNAbPVlTKT18+y47TtiJvE5MBryg97k83sbwvL/ajr2wFaI91KOFPN2cT72UgXE8Qpm9O0eMibz4mae6Xs4RO05d9Tx8ZV28kbLSO5StHTxGSsi8KSpKuwZIzzyXb3O7H3IzPZXlRrvxo5O8BZ5NPZ6AT70Z2F6873TnOtZQED3MMLA5GjFkPVP3Hrylsdq8vNylvHzZET32hhY94aa9PAJTkzvQTTw94FNVvDqKkj3yiEA9rttdPIVGtDxhcMG86PqCvMtlNT1Wgi49O352vUzStrzGBnE8l9CkPE5fmzyZZz68Ha6aOpPFUTzLjBc5Eg6AOyd1Dr3afR29rki7O9/6IL1sbMG9fpH1vLAWdTwUBRw732V+PQXGnbxzhr67uZZkPGt7OT1Ni4q8w8savfQzrjvYQB87WKTtuU6nWTx45K88CwPnPAy8zLvMHQk9egiDveIOoDwgZI09daROvHp7/7sTRTs8kxfnO2kQMzvcBq28MzUXPbxOKzy2UIY7h30KvVRlBTxQO4w8MJnMuzhMU70tuOQ8FbhXvPMhdzrkk3U8zclJPDvObr3SJSk8PAasvMtLJb3H/Be8bpDlvGLLiTz9vX29GmWNPAGyer0m3c68VJucvFiShzxMJBa97rc1PJCrGL1kYes74iI+Pbe8PTyVqL08/7LGvABnDz3qPw69EkMXOuOpRrw17bm8v85PPJHo4DtoOMs8fLCmPCUhi7geIcS890S2u4dSZ72AFSe9NChDPB41sz3IahA9VmrxO7/p8Ttv2ls8l3jBPDIwJzsf7iy9+AaFPMxnLDzttXS8oOQAPNNb/7yjvKO84VW6u0uMTb2gJ6k87bkuPYxPp7w6ZcQ8msBFPZZUP7w5XC68lfMhPJKMeboR/Vq8+LMTPCe4LT0l/ZE8XouEPGGRXjwihUu8Jpumu+7uurpPt707vx+mvEdV1Tq8v9Q8sQfOPPmAu7zkDCU9jwdjvR8qUbzUoqk7bqLxvCn3Rj0onuW75Pz/vLnojL1mkKk7',\n",
+ " 'dinov2-1024:1IF2vQHRlj0qWvg8bHLZuo/G2zzlK2K8+PyguzkoPDyRqC09j1N7PUl5prwCxRK72t5BvEt3ajwSagM9mpdePfOf7jykpC28AbSXOzqL2ryaHDe6INhbvV8KrD2FpXI7LhZEvEfBoTw8p5M8etrzu6owJjzeQds8lkhXvUxUmb18CaI85GRfu0ICrLv8PZy7Q7A5PZzcoTzBwxE8kR6mPJ3Fmrv1jC29O22ePCuVpTyNn9W81Ua8vG4kSb34eMk6zq8NPUFrN73Dzme81h4ePYvAJb0nAMq8024zPeVAiT3g3Yi89p0sPYlJiLyFP4k86aTcPClPI7wPrgc99Zjyu0bRdzz+WO+7kDkFPVQelzxtlq48typKvBZ0Urz9sQS8xxVlvQAzsbwt0OQ8WmJwPUyA+DzqTf27P7RFva8uIz2Ggnm7JXMRvVE0+ryBHr+7lUeGPEtxYztK0O68Gf+avHXm2jwVuQI9SvUeu4jfbzwW+JS78hgaPGRdF7mEaiq9wKDVPIwAAr2tEsW8wQw8PQY9nbzmhWW6T6sDPSQSrLvUFAs9UxWXvEmVGzznDkM9jRAKPTGPdb0kfnO87OaxvGjLKz2eHFw8n3VsvUw38LyN3jo9JR4st5A7QrknEOO7sitnPfUp97uDY9I8WRivPBw/1rxK17Q8DAM0vF1hgjuCAku849/YPG/vCjuzB3c8MGeRPPv4BT1OdeK8LO5iu2K0ebmSZBO8Mn8fvYtGEbw1/E08V02gPEp0Ar2V2aS8A6BcPCPKWz3fUIg7d82vOwAiDT253Va9EemIvTWinD0gsAK8DzFFPXPj2br0aEO8B/wCPfkZJj3D7hi9WIOQvGElBL36Fu88J4sKvGz9mLw6EF29qt21vFzLVDzLle47oA0EvHWu77z3Lko8AAx/vdh8HL2NrB49wunXvKj2hT28iyM8WWkCPW0xLT3YlwK9szwnPYTSqT2lTmO9hYMZvR9+wDzqw3E9TCo1PcQfNL3dp068fV6qvEdV1zx+vP2877jMO3iKoDq9Q6e8HuqXu09ezbyscG689QMdvCU1NL07fzm8i9lxvET0IL2XzK88zZRPOse1Lj2JSh69tv0FPZ6tfDvsrpY8ytCvPILr5DsX2d68LQ9NurQtFD1kfbM8pvWIPBOaGbzqED086PmEPW73SjzxS4I7NlC2vNiV6DzetFo8Y/+4PO1Ikz3oscs8/cA7PEgV6zx2fyQ9kk45PN6zED3azii9/RNIvKahdz3p3Be8qKO/PQd0hLxx0Xy83MnXO/uLrDxBkTg9p4UXvDUX8DsdEXa91KYGuuN7qD0Acdo8TiJrO9YmSrx6VKs8myfcPNi2Ibx/Wxq99rPBPCwENryP5yY9r22yOp+dUz30tuI85RSHO5Ld17zFdSW8Ra8aO9tDsj0v+JU8sF+qvIuhRL0ZpSK9jFsCvFi2Pjqv6qC7jftuvLv3xzyXHSy9nsjRvD7gT73FDVu9JCNoPdN7GD23fji9uz20vCeoGb345k28uEIZu1xhuTz4VFm8MQgGPAseg72h4pY8bQRjvS8Z9TzHYiI8KvIZvNrsejwrI6u7f6bRvOmdmrpYaDs8u+kHPNn+ujzBGGU8a75LvIf6Lzs/GZo9TPIRvMWpNbsXVfk6dmiOPcz7IT261VE7kuGyPLT1WL0B0cG75F4hPYcDNrtrVmO8/9QGPO1cD737RPm8Pg4KvegThLliD2k9zsivPETmt7tpwUO8LJ5/PG1gUz2HNCu8KG+xPC71dbwjmPO8LNxOvTT/qjz88JU8666xPOPCTb2W6A+5y5G5PL/xKj3nhqy8xY3vvEQL1rz9Lo+8IJMZPRZ5Rjuuw6G7wAJTuyxAFL0irBM8ZhtFvESwl7zLd3A8kQ/svDfAKD2eXfe8RyiBvUC3Erx5bKK7QvalvPiqi7w6LeG73ZKfPGjoObzBVAg6vfwgvfgLyjndDgo76VflPCYy0Ty0jwe81G2XPPoqCTw6G4+7FipaPEieVT13VnC8L26AvK0WyjvnQhm8zhbIPJmuQz38La08eHNku8h75DyzeK87ww0tvIT1uTtVjd+7wMJ6O90CpjssucA8YBI+PVaAiDoWCc48OSD/vDvMYTtmvAs9mISDuo7dIb2KDhe8eop0uzWLvDxmrFq95RpPvRTGoTyzKy09ZLEMPLyv2birq707SMtgPXjdPb0cyRE8T3TsPO4sIL2mQLy8iQ4TvaFjA72pGIs8O9mLvA36ND02wvY8VOWgPNuU1zyRyoc8/bELvOIvdr1r/sy8QsNbPUmMgDyv9i29pnEAPcYLebwIig870YSCvAraqrz2mZY8rP2pPOQ06LpJxZI8EvXlO9zJQ71S18C7ACtVvazwCbzwFgI8Ez5ovRbZETy4xBA7KtUtPbMakzyx6pc99XyHvIaGHTtZ8yG9Btv4PNdYmrv3rK88Rfv8vKILYL0Z4g+9PI9/vRwNeDvE7XG8Jswru5cuOL03hus71ceMu3I9Oj1n5nU8PGgBOwO+lbyMg9E7Ah2hvIwDX73YsDa82BULuzh7HjwgGy29uPLDvPo9WbqAG7G89ImfvL6qZTxo8kg94UREvd/Nhz1Q7Aa9RAMPvTTphDvAuFO8MdSzvOOXx7xxqoE870OUPIf3e7t58sA7f8s1vfGFhLxuHOk8yuKZvHQvYzzxKYY9iopRvfz1Gz1Mlr08UtT3PDxfYjrmBvu7Acw4PaViIz1kl7S8aLA6PIbTWruQa+k8ePa2PFVFQLyURuK8nTBIO0WPmb0aDMk8+ao8Oxm1gz0R5pm9FqEqPZO8dT1Q7+A8DtA9vPoWAbts6go96cL+vEvrPbxV77k8L5EjPfo9brwaN/y897R9u2/VlzuVHnW7TJ+avED21DzOl3I94c8cvW33oDxdz9K8sNXEvLSZJ71Galy8i7SpPB5bRTyeMq8870B4PYBQTLyiv/Y8FvcyO/JgkL2WIXw9uMVkPRnMP703KtI8yWo6u7Uea7xP6J08pCsBPVrqwTypeZK7R/KePU4D4Tt8s1q8sgAHPTCy07nkXz49NFNCOpKCXrw/efm8IbUnPTwQ/bvN/Lc7K64GvV8wYj0r6PY8ieb7OlfcN731sNw8JymoPFKoJDyb0/Q7c4ubPORx8Lrus1+7sqqnu4fLKLu6p6w89BgHO4DBpzwwFto8y8iZvIwJMr1L+aO8j1HFPMmevDpHLiE9ZYKMPAgSN724Qru80nyTPZ5eFDrkYBw9e7QZO1/LG73OiKC7CYpHPCp1x7xI2SS9hYTYPO7+Sb1bnYW8u1jVvNkPmL1pfOK8YzghPaOB+DuuylI999RivMWNiDyNo1m7yxmquqPNhT1/wza8B9wuvTuqkjsYYj+7yi8LvDSzxbxrpEM8CJZvvOVS0zxADQc9jQK1vAw8IzyJHaS8Zpy+vJ08ujo7pSW8YoOduy3yBrusqAY8WKIJvcMIxzv+GkK9jqqZPDnbGjtYEuY8V4GiuyF/lbzbC8e8G3bsvNS3D71Li3m8/XYPvehMVbzeUze9As8WvBwkUDo1wC68PRYgPSVAdLyWjDu9sHEGvWy8mTwj7Fc9Hsr+PPoyBL1etrA9maXHvHS9KbrJ/2y8ib6bPRu0N734G2M75IsRPeExEL031yK86VSEu+cWH7vZ8hG8LD8MPd0SlbuwAQc8oMPQPFkuCb2tP1c9jNV7PKdB67xoCwe9qzuGPTF7GryR2hU9hpyFPHcmp7zF4G44PWGEvKRcNL1I16W7FFtUPXZpTb3VAKa77+sdvXx1TTtkcmi88RYNPfD2MT3wDuE8z6YwvUw0Zr3Gon08Cb4SPROTbzwS5BA9OHssPBZVg7wHbU8940AGPJeg0TwfsR69M558vPWxGD1B9G495VLnPEUkqjypRZC7ZvQEvTbnFL0K0J88gK8TPYZwcD1/5uA6TNLBO42LxjyoAKu9S7PnvMhhyDxo7xQ6z0IOvR79Njw0rGo8JrQYOcjxibyYECU9jhDhvFNvfr2PjjK9tmKTvVlBSL1D5jo97ybtPB+drjqRc9A8qEzEO0snazx6uAi8HMpsvRE5jD2/cMI8pb6MvMH1JTrrlhA7FSXlvJunobpSBUo8SWESO2GbmzyDENS8aNGIu7cmjr2vJA68bYWFPYiJwTq6jNe7rmpsPURXZTzNj3C8gOGYPXA8eb3zykq9VVpJPfmsCr2QGr28M60UPAM81by2F+E8iRIpvNr8gTyU9G49s+GVOwlX5Tx/bQS9PyLpPG1FNb3IAAa9gCU1PQ4pjLxGiw49Dlmeu6BW5jxiNgo7nMBjvKxzojxajdo5YJINveVZ+TxjqCc92SD8vJTYQbrz+wu4OPttPA8EA7tThpQ78YcRvcgH/Lqi2ZK91eAru7zDsDygfGO9XIOnvIdoJry+Et28auovPQNI27wV0tg8NRFNvHhEeTx/upS8W3b8PBwLozz5MwG9orGpPOsdGDzoReU8VgqevMwoxrzNwAO7S9AmvVkQwjwT9z68J3vZPETmt7vIl4C81jJkvEEcg7zhPRq8XEITveHZV72Eqzq8H3iru5ykBjuk2S08pXA5vLf1Jjs6OYC94AITPCqOCL0ZILy7SuXLOyGQ97ygjCK9zj1NPILM6LyNwKo85SuIvKSdWb3n+KC7hbO2PARiQTy9dWw7sp5tulLrOjvRBFw9aUu2PPbboT39LUm8oVrdOgPtaLwHhms8yq9oPMiWgjzY5wE8wTsuPWOMWD1zI3G8xzMfPbPDjL1nxIq86XCxPMQkGjwhA9S8wUSSPH8IH7uXRK49OIX1vEQ0pryraDu9aIlQvOaXQr0iktc8NksfvY+1Hz39ggs8Ml0HPD8czjwd52O77ryuvO5+g73veoi8TYRMve+iNL1+pJe7tqwJPNXXf70hVXS9tOITvWH7q7xfdDG7LakovPdxUruwdis9NDLLvCBtzTzP8Vq8NeF/O6Qr0btSnkY9J2ZGvcq0wDuwTIy9tV+DvL4dtTyKYXe8uhhvvH2GPj1IQpw8N/tUPWDuKr2QATm7mt8SvTiUS7sY7Qa8qwGgPC6dUjsYv107xZNGvFQCy7t7pty8z4OBPcz4JT35Iy48QjElPX9jjL3bKES9UNe/vOz/i72QHpG8AiI/PBwgAz1K8mo8d3XDPE3T6TyoyU671B8GvDa9d71H5Tu8gppTPePyl71oWk09ZuNtOw1hC70CUSq8gR/7vOIFKTyJk6q8aw0cu3aoCT1HPrM8O6sCvNfhxTvstIg8jkQQu7Cl97qGf967PJEnPRepvbtnloE6F3BjvXfcvLxzwCW931AqvQDgnbxKuOG7510UvXfHED0QBGQ93TNEvWgINDxbxLS84Fg+vUTiZj2bCTs8aU2/OznU27sMCsY81ccvvAQwH70Edam8WN5cPJoG3bw2zm483haMPANiRL1XmxW8x3EJvWEKq7y2IUm8rzUavQ==']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "embeddings = impresso.images.get_embeddings(image_id)\n",
+ "embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "2a42263a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 3 items (0 - 3) of 6 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " caption | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " previewUrl | \n",
+ " imageTypes.visualContentType | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxwort-1930-09-26-a-i0036 | \n",
+ " luxwort-1930-09-26-a | \n",
+ "  | \n",
+ " 1930-09-26 | \n",
+ " [REDACTED] | \n",
+ " [6] | \n",
+ " luxwort | \n",
+ " Luxemburger Wort | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ " | SMZ-1929-06-29-a-i0004 | \n",
+ " SMZ-1929-06-29-a | \n",
+ "  | \n",
+ " 1929-06-29 | \n",
+ " [REDACTED] | \n",
+ " [4] | \n",
+ " SMZ | \n",
+ " SMUV-Zeitung | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ " | FZG-1878-08-10-a-i0012 | \n",
+ " FZG-1878-08-10-a | \n",
+ "  | \n",
+ " 1878-08-10 | \n",
+ " [REDACTED] | \n",
+ " [4] | \n",
+ " FZG | \n",
+ " Freiburger Nachrichten | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.images.find(\n",
+ " embedding=embeddings[0],\n",
+ " limit=3\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "67764d26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 3 items (0 - 3) of 6 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " caption | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " imageTypes.visualContentType | \n",
+ " previewUrl | \n",
+ " contentItemUid | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxwort-1930-09-26-a-i0036 | \n",
+ " luxwort-1930-09-26-a | \n",
+ "  | \n",
+ " 1930-09-26 | \n",
+ " [REDACTED] | \n",
+ " [6] | \n",
+ " luxwort | \n",
+ " Luxemburger Wort | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Object | \n",
+ " /assets/images/not-allowed.png | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " | FZG-1878-08-03-a-i0014 | \n",
+ " FZG-1878-08-03-a | \n",
+ "  | \n",
+ " 1878-08-03 | \n",
+ " [REDACTED] | \n",
+ " [4] | \n",
+ " FZG | \n",
+ " Freiburger Nachrichten | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Object | \n",
+ " /assets/images/not-allowed.png | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " | EXP-1999-09-02-a-i0033 | \n",
+ " EXP-1999-09-02-a | \n",
+ "  | \n",
+ " 1999-09-02 | \n",
+ " nan | \n",
+ " [3] | \n",
+ " EXP | \n",
+ " L'Express | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Scenery or Landscape | \n",
+ " /assets/images/not-allowed.png | \n",
+ " EXP-1999-09-02-a-i0028 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.images.find(\n",
+ " embedding=embeddings[1],\n",
+ " limit=3\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "42d63977",
+ "metadata": {},
+ "source": [
+ "### Out of corpus embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ca9b5ad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 3 items (0 - 3) of 6 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " imageTypes.visualContentType | \n",
+ " previewUrl | \n",
+ " caption | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxland-1980-10-10-a-i0080 | \n",
+ " luxland-1980-10-10-a | \n",
+ "  | \n",
+ " 1980-10-10 | \n",
+ " [2] | \n",
+ " luxland | \n",
+ " d'Letzeburger Land | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Scenery or Landscape | \n",
+ " /assets/images/not-allowed.png | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " | luxland-2007-02-09-a-i0105 | \n",
+ " luxland-2007-02-09-a | \n",
+ "  | \n",
+ " 2007-02-09 | \n",
+ " [10] | \n",
+ " luxland | \n",
+ " d'Letzeburger Land | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Scenery or Landscape | \n",
+ " /assets/images/not-allowed.png | \n",
+ " [REDACTED] | \n",
+ "
\n",
+ " \n",
+ " | luxland-1981-06-19-a-i0110 | \n",
+ " luxland-1981-06-19-a | \n",
+ "  | \n",
+ " 1981-06-19 | \n",
+ " [2] | \n",
+ " luxland | \n",
+ " d'Letzeburger Land | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " Human Representation - Scene | \n",
+ " /assets/images/not-allowed.png | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.images.find(\n",
+ " embedding=impresso.tools.embed_text(text=\"belval\", target=\"multimodal\"),\n",
+ " # media_id=\"luxwort\",\n",
+ " limit=3\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "a749c56b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'dinov2-1024:MHwEv2fhLz1gVKw9Mp/XvuXfpb/lmvi/YdoIv7Ljxj1XTpHARWOuPwWGeL/TVE1A0APwv1hmsL/6B4S/WXKzv1xGrb+mm5e/4DqSP8XO/j3w7si+VJ6VP2ndJb8c7ZA/RV2UvV7vnr9+WhQ/85wIQMFIdr+yL2a9GG06QHNZN71W+m2/uEEyPvgCCr8lHkK9l5J+v6SIu783sR8/SJpbvo5hpT0Dn4k+n6B+PSe5yL+AZpe+H6OtPm7GAcCNwoi/+LsgQHU5hD+wZwBAKgiJvcLaPsA8UwE/gDAavjlLg74Wque/p+a0v2K6dT9E9TK/vh9mwLpG6L3mixRADd2tPlmHJ8B2Qza//TyuPyyVUz9LtQU/OiIdvv07rj60PgNA4TTzv3yKRb+KY1E/sEuNv2y7yr+5VJQ/XeOKP0tttT4c39q/TUgGQLyr3z7uvas/U3A+QIMnAEDoxcA/wgYAwHXQDz4/Pcs//Oy5PptSwr+tbgs/+O7GvxPYGD43pcq/+Oyavg6V6L9R4Yy/R/jSv7OeUUBiAKY/fQHQPxLuVj+YQnM/yYKVPzX3rL+qppg7Hj+FP3oQeT5n9F0/Hgwrv2iPurytVeg//Z8wP9N7B8AgbGc/ldGOv6OqJ79oRo6/J7CQvzLeCz8G+wXAMEhtwFnN/T+PI0a/fUHCvqcIcj42p2G+jQs/QPF9X7/HYRnAtVroPuxmFL7E7MW/WxnhPn4cQ76cqxzA2aYZv7Wl2z+3bwK/N9GZvlUxPb4uApg/1OmJwC9a+z6Mmja+R78bP6RA8j7Nxpi+qQC1Ptxdob+iZx0/tjY8Pin9K7xGbaQ/IvJNP9L+BcAERL4/e2JTP9C3db9SmBc/DDPRPiGDFj9UDOy+Zvk+P64/0b+7gI6+ToOKPzgt4r+JT7M/xPjmv0DbiT78wr8+OhHEP8+uH78fPBE/5+mGv+L1rj74Aqi/AhcEQK5xHD/z6oQ/BgGHvtbb4D9L7wFAs73RP5BrWD4K/1i/JUMSwNz8rj8xZca/4+VHQCnTaj6jjDDASSh7P3N1kL28va49waikvq028L4+uLQ84FGzvkUhW75MxAa/f64mviU6ib87WxC/YctMvqiEZL4Wphg+kc91v0sUGUA6laS/r/m9v5HKC0B07QM/UEz8vup5hsCXPEA/Qb5rPrwUhr6KIRq/HVOKPxQBor8Bg/M+BBnRvkAThz8nONA7MxFcP7FwDUAnLfW+Ij3+PHIwD0B7QNo+n7Kzvxgtzz/9zQG+aDOiv3RUjT/N3QVADjzhv9KI/b4xD9q+bjWEvwtZK7+RCtQ+XG3qvemDIL/zTJY+TfJAvxFkkD/Qb6I+hD2hPameDb/pSPA/6U/5P1izQr8worE/MDgGP+/qKcDCMYQ/uG3PP7u1gb+yYjPAbE4UQGeYUj9cA4u/qomLP5RGgb6Z+Ko+4U/Ev9pgZD/xuYW92skBPzHOz7+Yv0RAh7LYvweBA7/FMBI7+OntP84C4D9qowPAFhYavzcWDMDR13i/7BQIPwmnFsCJpcRA5KIcv25+nD84RYe9b8d8PyBzjj+aW1++rdqGv6oOuD9jL6A+G1ESwF65T79E3HtAUvQawFdKGb4svDy/hA6Nv3WnUEDLvZg/2TFVPwb+QL+U/J+/VK1Ov9QFa7/sg9u/koqhPxBAUr8Urpu/2ZWuPtCkqj7qWDxAmCgOv3otZj56jzXATmwxP1PwJj8py7+/TkjivjxMXT6IK4i/t+ndPtejOz+Lz0K+V5qFvmmeSr+7KDW/rvV0vnISkD+N4VO/kcvRv8+OhD+wMAy/sTNRv5Hpw77wKmQ/tvCjvb7VRj+yWKc/3BcIP3etJ7++8Cc+QQsPP4KovT3jz4s+j/84P/uO+L+9rzI/E/4cQEDDKD+86S8/JhfWv4Bgvr/wuKS+qpOTP9YN1j3zCpG/uZSJvFIAjL/s6Ak/CbF9P6ZGhT8wIYQ/3gX+PzWOYUCCwRG/9rTNP6gGnb+RCw7A931uPuR4VD7eu40/FkYjv/IByr8We48/NmEIP3BR5j+r5RRAl7eCPdTY6T99iMU/0Sh+v4ImjL8xO6C/bTlUwMR88b/nQCc/caB4v6fo7j+Vpec+N72RvzpZn78VOQTAPTUEQD43F8AfOpA/Wd2PP6wZKL/2T1y/fgqJPvjh679S1hrAu0cewIooTcAwghPARhAtPyPYKkDNrEi/KIF5PilzwD8Cibs+VgUfvwZBTEBTOyQ/jnKMwFhh4j97FYs/SLn/vsJmr71/jta/1u/AP2dcGz48IBc+7WpUv8L70r9avCI/VHkfv2W/Rb+wK1k+8UOKv/JZ37+iUPq+QrRnQDwGxD/Kd90/T4irvsB4Fr9ueoLAvLM9PyTKpLtMgW0/wV2qvicWJkB2Cxe+kcOcP9mW0j4WwQ4/efaKP0jrhT+KQEi+Dv2+vw/b4T90APu94rQAv3j9Hz/u1Le7wuIiP6/PRL/fy42/WDWuvn3N0jxTzRnAh75GQMClrT9tF4a+kJtkPuNx0T4lZK8/MR8/P3piYr4E9O6+sAKfv+6eRb/4aM4+iQsMP1uXXD1wW90/UmXbvUTywr/554E/7YgNP4ZdTL6lWzW+kzbkP300fL7UsKs+H0aUP2QkXT84gZ2/zhSyv+8uoEDyBk4/nRhuvzUeSL5sHo/AI3BkP94vWb9cMzG/khljv0EAcz+EGsQ/JJnyPtm3jL8nUIu+MtQyv0rFzD8sZ6Y+kJvtP/nmi78Yyqy/HPiBwPqpRr5w7u++uxxhv4huV78uJNk+fEq8v18B1j6w5yU/PZeRPwJh+z+MuNY/ks00vg7FnL+MGj8/vKk1PyGjKsBb/6A+Vi5gP+Vj879PQFa/ZnmaP15+x78fMjW/nZNNPxaYWD9ZklG8p0c/v4BQEEAwzpC/oyvIP5Z2KECqhpw+ltLwvfrB2j+sQEDAWAoSvxK9PT+e3um/dxc1v2KlT716erG9C6FmP8YnUL+LWyxAt3f5Pr+Toj6TsV6/bt4wvgEbtT4OLSo+MvqkPjeIG0B9Re0/nmhtP4SOvj9E57S/cjnsvgoCFL4HR90/4xFawL9Foj9H0Qi/zBCMP6Xfyj8TSF4+a3W2vpsz4z4s5OG+Fp7Pv9Ow+r+1YbS/3PLCPgSxsD1l3sBAxt/zP6D8dj9uEJy/2HfLvjaFk78WOE6/j4SJPxRYHcANduA/pNC0v54PAj8uiIK/JQ8cQOAKk7+nC62+Kaorv/7UVb9GgeQ/MrKsPgrH4T7Ym5PAOHcjvwDDbT8Nw20/KClov2l1Sj8rXRQ+HpZJv+/MOD+LJZ1AUKhnQIsEoL9qMJq/m/KKwKRbkL8coZk/0xqZvhmvgD948Ws/nmF+P5f3Ur+WlSg/1v3rPvvnor/Yyv6/MTASQIqUBUDKqT4/8Yi1PvOnnj/JHF096Os6vx+Nrz95tfg/eaBmP/cmlT/SCa0/cecCQEIQ/D6B2AjA4NyVv9KrCcBdGOE/seZEP1e0gL8N9CQ/IqaqP9YxoUAz/CjAPTIIwHHtQ7/qkglA0TJsv7SJ5b923TzAIEPovF84FsDuG54/nJskPtV3cr77LVe/sJYCwCdZ6z8LHBVAHl+Wv3R9RT5YjTU/uOS4PkbdoD/Cd7i+LjvbvohF1L+13n++Zp13v88syMA0Ps0/cWkFP/ZAUj8vvti+UyNTvwCQt7/iZ3i/PDYzv4huqz9LIIM/TweVvns0Kr9w1DdAtjmIPlzQvL+EFxK+Yd5yv+G9079VFEw/0cbWvWqWG8Dpvwi+FDPjvx7EAUB6lny/BsrTP+rpEr29fTU/Q3P9v8RtEMBy2JHAapUAv9Ms1r5doaa+ri98v2GWFj8yZx5ALWa6PzAebz+hLLe+h+N8P0rrA8CmsRRANLsVwFjNmT/0cc2/R+ahP8B9qD+IjcK9c2iJvyy/UL/3MYA+2fsCvrCsIECLmRk/6vsjQP81Hr/cBgG/Z0aOv0IkKT8jWjE/AKeyvpMCPj+7DIk/Vt0fQCwzeT/lvIDAJkmLPZC5dj8zM2O9GJT0Pr+Okz9wXcA/Fpb/PxFWtD6kAxBADHRNvoFMgj9iU5M+FyEiPlg8SUCXVHe/Rb4nP08Eg70NY+Q+kl9xvyC0oT5jmM++Cla/PzAZ2b8EeYe/uMzVP7yAQj4H8PO+g04iwO8fHcCa/ljA5Z+PP39R1D11Uno/+2TVv3ZcYjyfkAQ+7aZ0vw2QEr8LLEY+mXBovxYsf8AeD7A/7+crv7824T0cCRpAJcucv26v3L6061u9RZ7pv2j2hb+YCjm/UTLlv6k0iD/A9ma/gUWbuzPRsL94taq/KGoovqkBrb+amAvAAY76PTSYhz84/vi+TIgNPoD6qL4RzI8/9VKkvYzbVL81zVS/5hr5PzHnxb9Lbdk+wMINP7apL79aMnO/noYYwM70Sr8FQde/NNcXQOn1S75NEeA/6HYeP1dt6jwsEXA+cWQiPy5+H0Aa4bO/fqRdP8O8BT+ysp696cZsP5MLYD+U1HO/rg0ewKL5j77I7ofAf9s6vqNurT/AK5Y+hHZyvi9ecr8ELn2+OJNev6dmQD9pUAE/s9nyP+Fqcz/AHwZAdVO5v0+FnT+/QDy/k1R9P31097/IZ8xADkDDP668rz+0EuY9jCTev3ZkrT/cBrc+GWeHvzreqD9PrGm+9ChkvjfWEcCTXJE++oMwP+Lhir9Jpby+j7NTQECBm7/LSca/b03HvxBqhD7vz7m+ciohQB9EUz9zV7y/MOSFuvf3u78uMdk/fMgKvyD33D8R6gdARq16P/BPiT8RhRu/j3BhP6SSCsDH0We/I/3bPzE7wT97Buk/Zm0pQNiqkb4m3B3ANV2HvxIsjT9wsAPAg+W3vz/aacCLYQ1AQhwzP6DlLr9lStU9qswdP8KKqr61faq/AyAAv6npKsDPBv8/Ktqhvxp5oD7Od+Y/LtRSvSGZ4D6RuJy/YMyfvi1Irz+IGLi+HDxPQAnodL7dPmI/4RgBQO/gD8D7du++ugKVviqQkj63IUzAJeiqPm0iAT/cnAW+ka+hv+lwXUBIsgrAyeWqP5B7E8DU3IQ+ZBlXvpNpvD9je4Y/EE5dP7izWz/15iS9kRkjwMsjGr/vp8U+bcHxP6vadD8veDS9Swk4v+jv5D6Yk3U+zREZv12Wx78FUNI/d58GQLkU5L91nZ8+FKdJP1/Glz86MRrA2AMdP1L7eD7XD5w/oD+DPybVKUD9Fq+/HmTBvzz8GkABTmA/Ao4IwIXiFb3Apba/axqSP8MFjD8Np/M+6txyvzXbXz+zWcE/FDY8vwfKH0DL8Oy+8bcUQHHLxL+jBBc/C6Q8wPNsvj8/E46/7nTXv7p5177qo7U/XmJRPfXkRr/z9PO+U6UEwNGWRj+sb6c+TL6vv3exf7/W7PI+Y92hPjk6f0DoQQxA4TjZvHv0uL/BbY49cmInP2VWCL8WjQm+RGa8vg=='"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.tools.embed_image(image=\"https://www.uni.lu/wp-content/uploads/sites/7/2025/10/08132107/Impresso-logo-2025-4_3.png\", target=\"image\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "dea464ff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 3 items (0 - 3) of 6 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " contentItemUid | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " previewUrl | \n",
+ " imageTypes.visualContentType | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | EXP-2006-09-27-a-i0242 | \n",
+ " EXP-2006-09-27-a | \n",
+ "  | \n",
+ " 2006-09-27 | \n",
+ " EXP-2006-09-27-a-i0238 | \n",
+ " [29] | \n",
+ " EXP | \n",
+ " L'Express | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Ornament or Illustrated Title | \n",
+ "
\n",
+ " \n",
+ " | IMP-2011-02-04-a-i0065 | \n",
+ " IMP-2011-02-04-a | \n",
+ "  | \n",
+ " 2011-02-04 | \n",
+ " IMP-2011-02-04-a-i0068 | \n",
+ " [7] | \n",
+ " IMP | \n",
+ " L'Impartial | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Ornament or Illustrated Title | \n",
+ "
\n",
+ " \n",
+ " | EXP-2006-03-02-a-i0024 | \n",
+ " EXP-2006-03-02-a | \n",
+ "  | \n",
+ " 2006-03-02 | \n",
+ " EXP-2006-03-02-a-i0021 | \n",
+ " [2] | \n",
+ " EXP | \n",
+ " L'Express | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " /assets/images/not-allowed.png | \n",
+ " Ornament or Illustrated Title | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.images.find(\n",
+ " embedding=impresso.tools.embed_image(image=\"https://www.uni.lu/wp-content/uploads/sites/7/2025/10/08132107/Impresso-logo-2025-4_3.png\", target=\"image\"),\n",
+ " limit=3\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b6228079",
+ "metadata": {},
+ "source": [
+ "## Get content items related to images"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a48b6e85",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindImages result
\n",
+ "
Contains 2 items of 2 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | | \n",
+ " issueUid | \n",
+ " previewImage | \n",
+ " date | \n",
+ " caption | \n",
+ " pageNumbers | \n",
+ " mediaSourceRef.uid | \n",
+ " mediaSourceRef.name | \n",
+ " mediaSourceRef.type | \n",
+ " imageTypes.visualContent | \n",
+ " previewUrl | \n",
+ " imageTypes.visualContentType | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxland-1996-10-11-a-i0076 | \n",
+ " luxland-1996-10-11-a | \n",
+ "  | \n",
+ " 1996-10-11 | \n",
+ " Silvie Fleury, Soft Rocket, 1995. | \n",
+ " [11] | \n",
+ " luxland | \n",
+ " d'Letzeburger Land | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " https://iiif.eluxemburgensia.lu/image/iiif/2/ark:70795%2fhmrj0j%2fpages%2f11/2728,1968,615,803/max/0/default.jpg | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ " | luxwort-1930-09-26-a-i0036 | \n",
+ " luxwort-1930-09-26-a | \n",
+ "  | \n",
+ " 1930-09-26 | \n",
+ " Das Urbild aller Lokomotiven: Stephensons berühmte „Rocket\", mit der er das Preisausschreiben der Bahngesellschaft Liverpool—Manchester gewann. | \n",
+ " [6] | \n",
+ " luxwort | \n",
+ " Luxemburger Wort | \n",
+ " newspaper | \n",
+ " Image | \n",
+ " https://iiif.eluxemburgensia.lu/image/iiif/2/ark:70795%2f0pn9pt%2fpages%2f6/283,1847,1345,876/max/0/default.jpg | \n",
+ " Object | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.images.find(term=\"rocket\",content_type=\"object\")\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b0c95547",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['luxland-1996-10-11-a', 'luxwort-1930-09-26-a']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "issue_ids = result.df['issueUid'].unique().tolist()\n",
+ "issue_ids"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60a49f18",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Search result
\n",
+ "
Contains 100 items (0 - 100) of 130 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | luxland-1996-10-11-a-i0019 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Ministère des Travaux publics | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp67_fr', 'relevance'... | \n",
+ " 316 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1996-10-11T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Luxembourg', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Luxembourg', 'mentionConfide... | \n",
+ " [{'surfaceForm': 'architecte mandaté Monsieur ... | \n",
+ " [{'surfaceForm': 'Esprit', 'mentionConfidence'... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | luxland-1996-10-11-a-i0014 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " urbanistisches Konzept | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp10_de', 'relevance'... | \n",
+ " 193 | \n",
+ " 1 | \n",
+ " de | \n",
+ " False | \n",
+ " 1996-10-11T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [5] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Luxemburg', 'count': 1}] | \n",
+ " [{'uid': '2-50-Joseph_Goebbels', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Stadt Luxemburg', 'mentionCo... | \n",
+ " [{'surfaceForm': 'Bautenminister Robert Goebbe... | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | luxland-1996-10-11-a-i0030 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Éducation différenciée | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp99_fr', 'relevance'... | \n",
+ " 102 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1996-10-11T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [8] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'direction de l'institut pour... | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from impresso import OR\n",
+ "\n",
+ "content_items = impresso.search.find(\n",
+ " issue_id=OR(*issue_ids)\n",
+ ")\n",
+ "content_items"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "7af1eb3b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['luxland-1996-10-11-a', 'luxwort-1930-09-26-a']"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "content_items.df['issueUid'].unique().tolist()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "impresso-py3.13 (3.13.7)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/notebooks/media_sources.ipynb b/examples/notebooks/media_sources.ipynb
index c51ee3c..673f1a6 100644
--- a/examples/notebooks/media_sources.ipynb
+++ b/examples/notebooks/media_sources.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎉 You are now connected to the Impresso API! 🎉\n",
- "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from impresso import connect\n",
"\n",
@@ -95,7 +86,7 @@
" [fr, de] | \n",
" [1895, 1895] | \n",
" [1895-04-07T00:00:00+00:00, 1895-12-29T00:00:0... | \n",
- " [{'id': 'countryCode', 'label': 'country code'... | \n",
+ " [] | \n",
" 743 | \n",
" 39 | \n",
" 314 | \n",
@@ -107,7 +98,7 @@
" [fr, de, lb] | \n",
" [1848, 1950] | \n",
" [1848-03-23T00:00:00+00:00, 1950-12-30T00:00:0... | \n",
- " [{'id': 'countryCode', 'label': 'country code'... | \n",
+ " [] | \n",
" 1332945 | \n",
" 30370 | \n",
" 161209 | \n",
@@ -117,7 +108,7 @@
""
],
"text/plain": [
- ""
+ ""
]
},
"execution_count": 2,
@@ -177,7 +168,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "impresso-py3.11",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -191,7 +182,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/examples/notebooks/search.ipynb b/examples/notebooks/search.ipynb
index a6afeff..76b11ba 100644
--- a/examples/notebooks/search.ipynb
+++ b/examples/notebooks/search.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎉 You are now connected to the Impresso API! 🎉\n",
- "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from impresso import connect\n",
"\n",
@@ -40,7 +31,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -49,7 +40,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 8 items of 8 total items.
\n",
+ "
Contains 100 items (0 - 100) of 277 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -73,21 +64,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -106,72 +103,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | luxland-1991-12-06-a-i0055 | \n",
+ " IMP-1990-12-07-a-i0433 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Unzufriedenes Gefühl | \n",
- " | \n",
- " [{'uid': '2-54-Reuters'}] | \n",
- " [{'uid': '2-50-Parry'}, {'uid': '2-50-Robin_Wi... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp87_de', 'relevance'... | \n",
- " 414 | \n",
+ " print | \n",
+ " s s w xi | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp44_fr', 'relevance'... | \n",
+ " 1281 | \n",
" 1 | \n",
- " de | \n",
+ " fr | \n",
" False | \n",
- " 1991-12-06T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1990-12-07T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [42] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | LCE-1982-09-14-a-i0018 | \n",
+ " GDL-1983-09-24-a-i0020 | \n",
+ " in_cpy | \n",
" ar | \n",
- " A L'AFFICHE | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Arnold'}, {'uid': '2-50-Pierre'}] | \n",
+ " print | \n",
+ " Sur les écrans lausannois | \n",
" [{'uid': 'tm-fr-all-v2.0_tp73_fr', 'relevance'... | \n",
- " 147 | \n",
+ " 111 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1982-09-14T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LCE | \n",
- " newspaper | \n",
+ " 1983-09-24T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [2] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Colisée', 'count': 1}, {'uid': ... | \n",
+ " [{'uid': '2-50-Woody_Allen', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Athénée', 'mentionConfidence... | \n",
+ " [{'surfaceForm': 'Carmen', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'Barbe', 'mentionConfidence':... | \n",
+ " [{'surfaceForm': 'Athénée', 'mentionConfidence... | \n",
"
\n",
" \n",
- " | luxland-2007-06-22-a-i0043 | \n",
+ " GDL-1981-07-11-a-i0008 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Prévisible homme vert | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Harold_Godwinson'}, {'uid': '2-... | \n",
+ " print | \n",
+ " I cinéma. | \n",
" [{'uid': 'tm-fr-all-v2.0_tp73_fr', 'relevance'... | \n",
- " 212 | \n",
+ " 199 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 2007-06-22T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1981-07-11T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [2] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Richard_Wagner', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Rohmer', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'John Boorman', 'mentionConfi... | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 16,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -189,32 +211,35 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Total items in the result set: 8\n",
- "Got page 0 - 2 of 8. The first title is Unzufriedenes Gefühl\n",
- "Got page 2 - 4 of 8. The first title is Prévisible homme vert\n",
- "Got page 4 - 6 of 8. The first title is Aux élus et aux membres des comités locaux PRD du district de St-Mauri[...]\n",
- "Got page 6 - 8 of 8. The first title is DU TAC\n"
+ "Total items in the result set: 65\n",
+ "Got page 0 - 10 of 65. The first title is Huit films au programme\n",
+ "Got page 10 - 20 of 65. The first title is \n",
+ "Got page 20 - 30 of 65. The first title is \n",
+ "Got page 30 - 40 of 65. The first title is \n",
+ "Got page 40 - 50 of 65. The first title is RADIO\n",
+ "Got page 50 - 60 of 65. The first title is \n",
+ "Got page 60 - 65 of 65. The first title is \n"
]
}
],
"source": [
"result = impresso.search.find(\n",
- " term=\"monthy python\",\n",
- " limit=2,\n",
+ " term=\"monty python flying circus\",\n",
+ " limit=10,\n",
")\n",
"\n",
"print(f\"Total items in the result set: {result.total}\")\n",
"for page in result.pages():\n",
" print(\n",
" f\"Got page {page.offset} - {page.offset + page.size} of {page.total}. \"\n",
- " + f\"The first title is {page.raw['data'][0]['title']}\"\n",
+ " + f\"The first title is {page.raw['data'][0].get('title', '')}\"\n",
" )\n"
]
},
@@ -229,7 +254,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -238,7 +263,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 80 items of 80 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -262,21 +287,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -295,54 +326,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | luxland-2000-06-23-a-i0028 | \n",
+ " LLE-1973-05-18-a-i0016 | \n",
+ " in_cpy | \n",
+ " ad | \n",
+ " print | \n",
+ " UNKNOWN | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp77_fr', 'relevance'... | \n",
+ " 41 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1973-05-18T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [22] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | FZG-1999-12-31-a-i0124 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Vor 400 Jahren wurde in Luxemburg das erste Bu... | \n",
- " | \n",
- " [{'uid': '2-54-Resurrection_of_Jesus'}, {'uid'... | \n",
- " [{'uid': '2-50-Jesus_(Begriffsklärung)'}, {'ui... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp11_de', 'relevance'... | \n",
- " 2999 | \n",
- " 3 | \n",
+ " print | \n",
+ " Merkur Druck AG mit neuer Führung Die La... | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp98_de', 'relevance'... | \n",
+ " 66 | \n",
+ " 1 | \n",
" de | \n",
" False | \n",
- " 2000-06-23T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1999-12-31T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [18] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | NV2-1874-03-10-a-i0002 | \n",
- " page | \n",
- " NOUVELLISTE VAUDOIS I Le transport | \n",
- " | \n",
- " [{'uid': '2-54-Afrique'}, {'uid': '2-54-Vienne... | \n",
- " [{'uid': '2-50-Delsol'}, {'uid': '2-50-Léon_Ga... | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp36_fr', 'relevance'... | \n",
- " 2913 | \n",
+ " JDG-1980-01-24-a-i0087 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Visite à des artisans de Prilly, Vaud Une peti... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp77_fr', 'relevance'... | \n",
+ " 757 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1874-03-10T00:00:00+00:00 | \n",
- " CH | \n",
- " BCUL | \n",
- " NV2 | \n",
- " newspaper | \n",
+ " 1980-01-24T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [11] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 18,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -361,7 +435,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -370,7 +444,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 2 items of 2 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -394,18 +468,107 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " issueUid | \n",
+ " countryCode | \n",
+ " providerCode | \n",
+ " mediaUid | \n",
+ " mediaType | \n",
+ " hasOLR | \n",
+ " ocrQualityScore | \n",
+ " relevanceScore | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | DTT-1976-12-02-a-i0114 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " DRSI 6.05 Impresso. 6.50 Zum neuen Tag. ... | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp62_de', 'relevance'... | \n",
+ " 354 | \n",
+ " 1 | \n",
+ " de | \n",
+ " False | \n",
+ " 1976-12-02T00:00:00+00:00 | \n",
+ " DTT-1976-12-02-a | \n",
+ " CH | \n",
+ " Migros | \n",
+ " DTT | \n",
+ " newspaper | \n",
+ " True | \n",
+ " 0.94 | \n",
+ " 0 | \n",
+ " [14] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | FZG-1995-11-27-a-i0121 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Kaffeemaschine \"Impresso\" Bügeleisen Dampfrein... | \n",
+ " [] | \n",
+ " 19 | \n",
+ " 1 | \n",
+ " de | \n",
+ " False | \n",
+ " 1995-11-27T00:00:00+00:00 | \n",
+ " FZG-1995-11-27-a | \n",
+ " CH | \n",
+ " SNL | \n",
+ " FZG | \n",
+ " newspaper | \n",
+ " True | \n",
+ " 0.88 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 19,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -424,7 +587,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -433,7 +596,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 174 total items.
\n",
+ "
Contains 100 items (0 - 100) of 857 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -457,21 +620,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -490,72 +659,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | GAV-1890-12-27-a-i0006 | \n",
+ " JDG-1989-08-31-a-i0113 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Art, 1. Chaque personne, homme ou femme,... | \n",
- " | \n",
- " [{'uid': '2-54-Société'}] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp70_fr', 'relevance'... | \n",
- " 202 | \n",
+ " print | \n",
+ " FOOTBALL Homme ou femme, devenez arbitre | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp50_fr', 'relevance'... | \n",
+ " 193 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1890-12-27T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " GAV | \n",
- " newspaper | \n",
+ " 1989-08-31T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [16] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | LLS-1980-06-25-a-i0003 | \n",
+ " EXP-1950-12-04-a-i0120 | \n",
+ " in_cpy | \n",
" ar | \n",
- " ÉGALITÊ HOMME-FEMME: lOMMAGE! | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp88_fr', 'relevance'... | \n",
- " 818 | \n",
+ " print | \n",
+ " PENSEZ-VOUS COMME UN HOMME ou COMME UNE FEMME ? | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp55_fr', 'relevance'... | \n",
+ " 97 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1980-06-25T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LLS | \n",
- " newspaper | \n",
+ " 1950-12-04T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [7] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | LLS-1982-12-08-a-i0006 | \n",
+ " oerennes-1931-01-18-a-i0089 | \n",
+ " in_cpy | \n",
" ar | \n",
- " ËGALITÉ HOMME—FEMME; GAGHOTTERIES | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Christiane_Brunner'}] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp87_fr', 'relevance'... | \n",
- " 370 | \n",
+ " print | \n",
+ " Un homme et une femme renversés par un camion | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp11_fr', 'relevance'... | \n",
+ " 100 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1982-12-08T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LLS | \n",
- " newspaper | \n",
+ " 1931-01-18T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'boulevard de Chézy', 'mentio... | \n",
+ " [{'surfaceForm': 'Mme Bordet', 'mentionConfide... | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 20,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -577,7 +771,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -586,7 +780,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 20128 total items.
\n",
+ "
Contains 100 items (0 - 100) of 88000 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -610,21 +804,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -643,72 +843,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | CDV-1850-11-16-a-i0020 | \n",
+ " EXP-1884-08-02-a-i0031 | \n",
+ " in_cpy | \n",
" ar | \n",
- " LE RÊVE D'UN HOMME D'ETAT | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [] | \n",
- " 21 | \n",
+ " print | \n",
+ " Un jeune homme | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp82_fr', 'relevance'... | \n",
+ " 48 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1850-11-16T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " CDV | \n",
- " newspaper | \n",
+ " 1884-08-02T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Canton_de_Zurich', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'canton de Zurich', 'mentionC... | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'O. F', 'mentionConfidence': ... | \n",
+ " [{'surfaceForm': 'O.', 'mentionConfidence': 54... | \n",
"
\n",
" \n",
- " | CON-1902-10-29-a-i0003 | \n",
+ " EXP-1886-09-07-a-i0041 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Humoristique Entre amis : Ma femme est m... | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp18_fr', 'relevance'... | \n",
- " 65 | \n",
+ " print | \n",
+ " Un jeune homme | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp82_fr', 'relevance'... | \n",
+ " 40 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1902-10-29T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " CON | \n",
- " newspaper | \n",
+ " 1886-09-07T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | FedGazFr-1982-07-27-a-i0001 | \n",
+ " EXP-1886-12-10-a-i0048 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Rapport sur la politique de la Suisse en faveu... | \n",
- " | \n",
- " [{'uid': '2-54-Suisse'}, {'uid': '2-54-Genève'... | \n",
- " [{'uid': '2-50-Arthur_Honegger'}, {'uid': '2-5... | \n",
- " [] | \n",
- " 19944 | \n",
- " 65 | \n",
+ " print | \n",
+ " OFFRES & DEMANDES D'EMPLOI On demande un jeune... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp82_fr', 'relevance'... | \n",
+ " 96 | \n",
+ " 1 | \n",
" fr | \n",
- " True | \n",
- " 1982-07-27T00:00:00+00:00 | \n",
- " CH | \n",
- " SFA | \n",
- " FedGazFr | \n",
- " newspaper | \n",
+ " False | \n",
+ " 1886-12-10T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 21,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -730,7 +955,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -763,21 +988,27 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
+ " ... | \n",
" mediaType | \n",
+ " hasOLR | \n",
+ " ocrQualityScore | \n",
+ " relevanceScore | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -796,36 +1027,49 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | armeteufel-1921-06-04-a-i0015 | \n",
+ " LES-2010-02-01-a-i0024 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Ernst Toller: Die Ludditen. Das Vorspiel. | \n",
- " | \n",
- " [{'uid': '2-54-England'}, {'uid': '2-54-Portug... | \n",
- " [{'uid': '2-50-Palace_of_Westminster'}, {'uid'... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp41_de', 'relevance'... | \n",
- " 990 | \n",
+ " print | \n",
+ " La révolte luddite Kirkpatrick Sale, Edition L... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp10_fr', 'relevance'... | \n",
+ " 291 | \n",
" 1 | \n",
- " de | \n",
+ " fr | \n",
" False | \n",
- " 1921-06-04T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " armeteufel | \n",
+ " 2010-02-01T00:00:00+00:00 | \n",
+ " ... | \n",
" newspaper | \n",
+ " True | \n",
+ " 0.99 | \n",
+ " 1 | \n",
+ " [11] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'L’ouvrage de Sale', 'mention... | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "1 rows × 24 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 22,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -851,7 +1095,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -860,9 +1104,9 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 276 total items.
\n",
+ "
Contains 67 items of 67 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -884,21 +1128,27 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " transcript | \n",
"
\n",
" \n",
" | uid | \n",
@@ -917,72 +1167,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | luxwort-1946-04-30-a-i0004 | \n",
+ " luxwort-1946-04-02-a-i0053 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Wie Hitlers Dolmetscher sie sah Hinter den ver... | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Adolf_Hitler'}, {'uid': '2-50-J... | \n",
+ " print | \n",
+ " Die gestrigen Verhandlungen in Nürnberg | \n",
" [{'uid': 'tm-de-all-v2.0_tp61_de', 'relevance'... | \n",
- " 1050 | \n",
+ " 235 | \n",
" 1 | \n",
" de | \n",
- " False | \n",
- " 1946-04-30T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " True | \n",
+ " 1946-04-02T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-Joachim_von_Ribbentrop', 'count... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Ribbentrop Hitler', 'mention... | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | luxwort-1940-03-30-a-i0005 | \n",
+ " FZG-1988-08-25-a-i0029 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Hitler-Vrief an Stalin. | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Rudolf_Heß'}, {'uid': '2-50-Ben... | \n",
+ " print | \n",
+ " Demonstrationen in den 3 baltischen Sowj... | \n",
" [{'uid': 'tm-de-all-v2.0_tp61_de', 'relevance'... | \n",
- " 427 | \n",
+ " 906 | \n",
" 1 | \n",
" de | \n",
- " False | \n",
- " 1940-03-30T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " True | \n",
+ " 1988-08-25T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '4-55-AFP', 'count': 2}, {'uid': '', ... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'afp', 'mentionConfidence': 9... | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | dunioun-1946-03-30-a-i0102 | \n",
+ " DTT-1948-01-23-a-i0009 | \n",
+ " in_cpy | \n",
" ar | \n",
- " DE NÜRNBERGER PROZESS | \n",
- " | \n",
- " [{'uid': '2-54-Däitscht_Räich_vun_1933_bis_194... | \n",
- " [{'uid': '2-50-Adolf_Hitler'}] | \n",
- " [{'uid': 'tm-lb-all-v2.1_tp53_lb', 'relevance'... | \n",
- " 593 | \n",
+ " print | \n",
+ " Als Molotow Hitler Glück wünschte | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp77_de', 'relevance'... | \n",
+ " 687 | \n",
" 1 | \n",
- " lb | \n",
+ " de | \n",
" True | \n",
- " 1946-03-30T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " dunioun | \n",
- " newspaper | \n",
- "
\n",
+ " 1948-01-23T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Vereinigte_Staaten', 'count': 2... | \n",
+ " [{'uid': '2-50-Adolf_Hitler', 'count': 1}, {'u... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Amerika', 'mentionConfidence... | \n",
+ " [{'surfaceForm': 'Hitler', 'mentionConfidence'... | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " \n",
" \n",
"\n",
+ "3 rows × 29 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 23,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1004,7 +1279,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -1013,7 +1288,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 1 items of 1 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1037,18 +1312,88 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | LLE-1970-11-10-a-i0021 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Rencontre Scheel-Cyrankiewicz | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp74_fr', 'relevance'... | \n",
+ " 225 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " True | \n",
+ " 1970-11-10T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [1] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Bonn', 'count': 1}] | \n",
+ " [{'uid': '2-50-Walter_Scheel', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Bonn', 'mentionConfidence': ... | \n",
+ " [{'surfaceForm': 'Walter Scheel', 'mentionConf... | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
" \n",
"\n",
+ "
1 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 24,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1068,7 +1413,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -1109,10 +1454,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 25,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -1130,7 +1475,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -1171,10 +1516,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 26,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1192,7 +1537,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -1233,10 +1578,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 27,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1256,7 +1601,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1265,7 +1610,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 100 items (0 - 100) of 485 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1289,18 +1634,136 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | EXP-1958-08-15-a-i0236 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Les réactions dans le monde | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp71_fr', 'relevance'... | \n",
+ " 400 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1958-08-15T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [11] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Washington_(État)', 'count': 1}... | \n",
+ " [{'uid': '2-50-Harry_S._Truman', 'count': 1}, ... | \n",
+ " [{'uid': '2-53-Organización_de_las_Naciones_Un... | \n",
+ " [{'uid': '4-55-AFP', 'count': 1}, {'uid': '', ... | \n",
+ " [{'surfaceForm': 'WASHINGTON', 'mentionConfide... | \n",
+ " [{'surfaceForm': 'ancien président Truman', 'm... | \n",
+ " [{'surfaceForm': 'Conseil jordanien', 'mention... | \n",
+ " [{'surfaceForm': '. F. P', 'mentionConfidence'... | \n",
+ "
\n",
+ " \n",
+ " | EXP-2016-08-24-a-i0103 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " La traque des services secrets | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp25_fr', 'relevance'... | \n",
+ " 97 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 2016-08-24T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [16] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-53-Box-office', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'BOX-OFFICE', 'mentionConfide... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | EXP-1978-03-03-a-i0433 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Dollar et pétrole | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp72_fr', 'relevance'... | \n",
+ " 545 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1978-03-03T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [28] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-États-Unis', 'count': 3}, {'uid... | \n",
+ " [{'uid': '2-50-Jimmy_Carter', 'count': 1}] | \n",
+ " [{'uid': '2-53-États_pontificaux', 'count': 3}... | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Etats-Unis', 'mentionConfide... | \n",
+ " [{'surfaceForm': 'M. Carter', 'mentionConfiden... | \n",
+ " [{'surfaceForm': 'Etats', 'mentionConfidence':... | \n",
+ " [] | \n",
+ "
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 28,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -1320,7 +1783,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1329,7 +1792,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 191 total items.
\n",
+ "
Contains 100 items (0 - 100) of 1822 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1353,21 +1816,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -1386,72 +1855,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | luxwort-1945-07-04-a-i0001 | \n",
+ " GDL-1991-07-04-a-i0269 | \n",
+ " in_cpy | \n",
" ar | \n",
- " „Proclaim liberty throughout the land...\" Ganz... | \n",
- " | \n",
+ " print | \n",
+ " spectacles | \n",
" [] | \n",
- " [{'uid': '2-50-Adler'}, {'uid': '2-50-Charlott... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp79_de', 'relevance'... | \n",
- " 1079 | \n",
+ " 74 | \n",
" 1 | \n",
- " de | \n",
- " True | \n",
- " 1945-07-04T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " fr | \n",
+ " False | \n",
+ " 1991-07-04T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [30] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | luxwort-1947-07-06-a-i0034 | \n",
+ " JDG-1995-07-04-a-i0070 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Molotow erscheint nicht zur Feier des Independ... | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Wjatscheslaw_Michailowitsch_Mol... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp77_de', 'relevance'... | \n",
- " 63 | \n",
+ " print | \n",
+ " New York en hausse | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp89_fr', 'relevance'... | \n",
+ " 68 | \n",
" 1 | \n",
- " de | \n",
- " True | \n",
- " 1947-07-06T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " fr | \n",
+ " False | \n",
+ " 1995-07-04T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [13] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | luxwort-1950-12-27-a-i0043 | \n",
+ " JDG-1995-07-05-a-i0129 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Präsident Truman wieder in Washington | \n",
- " | \n",
- " [{'uid': '2-54-Independence'}, {'uid': '2-54-M... | \n",
- " [{'uid': '2-50-Harry_S._Truman'}, {'uid': '2-5... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp77_de', 'relevance'... | \n",
- " 83 | \n",
+ " print | \n",
+ " Le rêve américain au Bout-du-Monde | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp75_fr', 'relevance'... | \n",
+ " 338 | \n",
" 1 | \n",
- " de | \n",
- " True | \n",
- " 1950-12-27T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " fr | \n",
+ " False | \n",
+ " 1995-07-05T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [20] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 29,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1471,7 +1965,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1480,7 +1974,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 116 total items.
\n",
+ "
Contains 100 items (0 - 100) of 500 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1504,21 +1998,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -1537,72 +2037,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | luxwort-1904-04-09-a-i0015 | \n",
- " ar | \n",
- " Lokal-Neuigkeiten. | \n",
- " | \n",
+ " oeuvre-1919-07-04-a-i0088 | \n",
+ " in_cpy | \n",
+ " ad | \n",
+ " print | \n",
+ " Publicité | \n",
" [] | \n",
- " [{'uid': '2-50-Peter'}] | \n",
- " [{'uid': 'tm-de-all-v2.0_tp94_de', 'relevance'... | \n",
- " 367 | \n",
+ " 25 | \n",
" 1 | \n",
- " de | \n",
- " False | \n",
- " 1904-04-09T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxwort | \n",
- " newspaper | \n",
+ " fr | \n",
+ " True | \n",
+ " 1919-07-04T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [1] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'ASSEMBLÉE DE WEIMAR DISCUTE ... | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | waechtersauer-1849-02-07-a-i0007 | \n",
+ " excelsior-1918-07-12-a-i0042 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Belgien. | \n",
- " | \n",
- " [{'uid': '2-54-Belgien'}] | \n",
- " [] | \n",
- " [{'uid': 'tm-de-all-v2.0_tp47_de', 'relevance'... | \n",
- " 65 | \n",
+ " print | \n",
+ " L'apôtre | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp55_fr', 'relevance'... | \n",
+ " 181 | \n",
" 1 | \n",
- " de | \n",
+ " fr | \n",
" False | \n",
- " 1849-02-07T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " waechtersauer | \n",
- " newspaper | \n",
+ " 1918-07-12T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Amérique', 'mentionConfidenc... | \n",
+ " [{'surfaceForm': 'président Wilson', 'mentionC... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-États-Unis', 'count': 2}] | \n",
+ " [{'uid': '2-50-Woodrow_Wilson', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
- " | NTS-1873-09-20-a-i0020 | \n",
+ " IMP-2009-08-19-a-i0287 | \n",
+ " in_cpy | \n",
" ar | \n",
- " — New-Iork. In Schevreport (Louisiana) b... | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [{'uid': 'tm-de-all-v2.0_tp47_de', 'relevance'... | \n",
- " 158 | \n",
+ " print | \n",
+ " Les ovnis britanniques | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp25_fr', 'relevance'... | \n",
+ " 208 | \n",
" 1 | \n",
- " de | \n",
+ " fr | \n",
" False | \n",
- " 1873-09-20T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " NTS | \n",
- " newspaper | \n",
+ " 2009-08-19T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [32] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'ats', 'mentionConfidence': 9... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '4-55-ATS_SDA', 'count': 1}, {'uid': ... | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 30,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1624,7 +2149,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1633,9 +2158,9 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 3 items of 3 total items.
\n",
+ "
Contains 43 items of 43 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -1657,21 +2182,27 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -1690,72 +2221,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | luxland-2004-06-18-a-i0025 | \n",
+ " FCT-1956-04-06-a-i0012 | \n",
+ " in_cpy | \n",
" ar | \n",
- " New Europe | \n",
- " | \n",
- " [{'uid': '2-54-Luxembourg'}, {'uid': '2-54-Hun... | \n",
- " [] | \n",
+ " print | \n",
+ " LISTA DELLE VIVANDE | \n",
" [] | \n",
- " 239 | \n",
+ " 454 | \n",
" 1 | \n",
- " en | \n",
+ " it | \n",
" False | \n",
- " 2004-06-18T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1956-04-06T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'frutta', 'mentionConfidence'... | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | luxland-2007-06-15-a-i0041 | \n",
+ " LLE-1994-03-28-a-i0225 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Letter to the Editor | \n",
- " | \n",
- " [{'uid': '2-54-England'}] | \n",
+ " print | \n",
+ " RAI | \n",
" [] | \n",
- " [] | \n",
- " 445 | \n",
+ " 102 | \n",
" 1 | \n",
- " en | \n",
+ " it | \n",
" False | \n",
- " 2007-06-15T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1994-03-28T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [26] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | luxland-2005-09-09-a-i0019 | \n",
+ " LLE-1994-04-01-a-i0344 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Conflicting views about Europe's future | \n",
- " | \n",
- " [{'uid': '2-54-Europe'}, {'uid': '2-54-Luxembo... | \n",
- " [{'uid': '2-50-Jacques_Chirac'}] | \n",
+ " print | \n",
+ " RAI | \n",
" [] | \n",
- " 1079 | \n",
+ " 99 | \n",
" 1 | \n",
- " en | \n",
+ " it | \n",
" False | \n",
- " 2005-09-09T00:00:00+00:00 | \n",
- " LU | \n",
- " BNL | \n",
- " luxland | \n",
- " newspaper | \n",
+ " 1994-04-01T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [33] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "3 rows × 27 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 31,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1773,7 +2329,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -1782,9 +2338,9 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 460 total items.
\n",
+ "
Contains 100 items (0 - 100) of 3138 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -1806,21 +2362,27 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -1839,72 +2401,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | lepetitparisien-1909-02-26-a-i0063 | \n",
+ " EXP-1961-03-09-a-i0265 | \n",
+ " in_cpy | \n",
" ar | \n",
- " « LA BANANA » | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp43_fr', 'relevance'... | \n",
- " 190 | \n",
+ " print | \n",
+ " M. Dayal exige le retour des troupes de l'ONU ... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp74_fr', 'relevance'... | \n",
+ " 83 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1909-02-26T00:00:00+00:00 | \n",
- " FR | \n",
- " BNF | \n",
- " lepetitparisien | \n",
- " newspaper | \n",
+ " 1961-03-09T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [19] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-53-Organización_de_las_Naciones_Un... | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Matadi 'f', 'mentionConfiden... | \n",
+ " [{'surfaceForm': 'Joseph Kasavubu, M. Dayal, r... | \n",
+ " [{'surfaceForm': 'Nations Unies', 'mentionConf... | \n",
+ " [] | \n",
"
\n",
" \n",
- " | LSE-1927-02-07-a-i0006 | \n",
+ " EXP-1988-07-18-a-i0047 | \n",
+ " in_cpy | \n",
" ar | \n",
- " ECHOS | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
+ " print | \n",
+ " Garçon, s'il vous pia?! | \n",
" [{'uid': 'tm-fr-all-v2.0_tp36_fr', 'relevance'... | \n",
- " 201 | \n",
+ " 412 | \n",
" 1 | \n",
" fr | \n",
- " True | \n",
- " 1927-02-07T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LSE | \n",
- " newspaper | \n",
+ " False | \n",
+ " 1988-07-18T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Neuchâtel', 'count': 1}, {'uid'... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Neuchâtel', 'mentionConfiden... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
"
\n",
" \n",
- " | LCE-1975-04-25-a-i0020 | \n",
+ " EXP-2007-03-08-a-i0159 | \n",
+ " in_cpy | \n",
" ar | \n",
- " BANANAS | \n",
- " | \n",
- " [] | \n",
- " [{'uid': '2-50-Woody_Allen'}] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp73_fr', 'relevance'... | \n",
+ " print | \n",
+ " MOTS CROISÉS N o 803 | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp58_fr', 'relevance'... | \n",
" 229 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1975-04-25T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LCE | \n",
- " newspaper | \n",
+ " 2007-03-08T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [16] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
+ "3 rows × 28 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 32,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -1924,7 +2511,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -1933,7 +2520,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 1 items of 1 total items.
\n",
+ "
Contains 12 items of 12 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1957,21 +2544,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -1990,36 +2583,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | NZZ-1947-02-28-c-i0008 | \n",
- " page | \n",
- " [REDACTED] | \n",
- " [REDACTED] | \n",
- " [{'uid': '2-54-Malcolm_Reed'}, {'uid': '2-54-M... | \n",
- " [{'uid': '2-50-Willi'}, {'uid': '2-50-Sarah_Be... | \n",
- " [{'uid': 'tm-de-all-v2.0_tp87_de', 'relevance'... | \n",
- " 1045 | \n",
+ " GDL-1981-08-26-a-i0040 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Hommage photographique à Charles Chaplin | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp55_fr', 'relevance'... | \n",
+ " 752 | \n",
" 1 | \n",
- " de | \n",
+ " fr | \n",
" False | \n",
- " 1947-02-28T00:00:00+00:00 | \n",
- " CH | \n",
- " NZZ | \n",
- " NZZ | \n",
- " newspaper | \n",
+ " 1981-08-26T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [5] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Allemagne', 'count': 1}] | \n",
+ " [{'uid': '2-50-Charlie_Chaplin', 'count': 2}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Grande Place', 'mentionConfi... | \n",
+ " [{'surfaceForm': 'Sir Charles Chaplin', 'menti... | \n",
+ " [{'surfaceForm': 'Université de Liège', 'menti... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | GDL-1985-12-21-a-i0208 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " COURSE AUTOUR DU MONDE Eric Tabarly n'abdiquer... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp12_fr', 'relevance'... | \n",
+ " 1197 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1985-12-21T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [23] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Auckland', 'count': 1}, {'uid':... | \n",
+ " [] | \n",
+ " [{'uid': '2-53-Pen_Duick', 'count': 1}] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Breton', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'Eric Tabarly', 'mentionConfi... | \n",
+ " [{'surfaceForm': 'Whitbread', 'mentionConfiden... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | EXP-1965-02-20-a-i0265 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " -;' ' ¦\"•.\"';'' .j_-f, 'Xi(*$(JlK>M 9| ll^V... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp37_fr', 'relevance'... | \n",
+ " 885 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1965-02-20T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [14] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Canton_de_Zurich', 'count': 1},... | \n",
+ " [] | \n",
+ " [{'uid': '2-53-États-Unis', 'count': 1}] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Zurich', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'Charlie Chaplin', 'mentionCo... | \n",
+ " [{'surfaceForm': 'américaine', 'mentionConfide... | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 33,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -2039,7 +2693,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -2048,7 +2702,7 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 221039 total items.
\n",
+ "
Contains 100 items (0 - 100) of 1175198 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2072,21 +2726,27 @@
"
\n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -2105,72 +2765,97 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
" \n",
- " | LSE-1959-08-25-a-i0059 | \n",
+ " EXP-1858-06-17-a-i0029 | \n",
+ " in_cpy | \n",
" ar | \n",
- " LA FOUDRE INCENDIAIRE | \n",
- " | \n",
- " [] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp48_fr', 'relevance'... | \n",
- " 272 | \n",
+ " print | \n",
+ " INTÉRÊTS LOCAUX. | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp36_fr', 'relevance'... | \n",
+ " 219 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1959-08-25T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LSE | \n",
- " newspaper | \n",
+ " 1858-06-17T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-53-La_Chaux-de-Fonds', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Chaux-de-Fonds', 'mentionCon... | \n",
+ " [] | \n",
"
\n",
" \n",
- " | lepetitparisien-1925-02-27-a-i0066 | \n",
+ " EXP-1859-11-17-a-i0025 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Une ferme détruite par un incendie | \n",
- " | \n",
- " [{'uid': '2-54-Pontoise'}] | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp48_fr', 'relevance'... | \n",
- " 102 | \n",
+ " print | \n",
+ " AVIS DIVERS. | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp70_fr', 'relevance'... | \n",
+ " 35 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1925-02-27T00:00:00+00:00 | \n",
- " FR | \n",
- " BNF | \n",
- " lepetitparisien | \n",
- " newspaper | \n",
+ " 1859-11-17T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Comité du bâtiment des Conce... | \n",
+ " [] | \n",
"
\n",
" \n",
- " | lepetitparisien-1908-09-04-a-i0048 | \n",
+ " EXP-1868-06-06-a-i0046 | \n",
+ " in_cpy | \n",
" ar | \n",
- " Un chantier de bois et une scierie détruits, à... | \n",
- " | \n",
- " [{'uid': '2-54-Provins'}, {'uid': '2-54-Arrond... | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp48_fr', 'relevance'... | \n",
- " 162 | \n",
+ " print | \n",
+ " Avis an publie. | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp40_fr', 'relevance'... | \n",
+ " 52 | \n",
" 1 | \n",
" fr | \n",
" False | \n",
- " 1908-09-04T00:00:00+00:00 | \n",
- " FR | \n",
- " BNF | \n",
- " lepetitparisien | \n",
- " newspaper | \n",
+ " 1868-06-06T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Neuchâtel', 'count': 1}] | \n",
+ " [] | \n",
+ " [{'uid': '2-53-Neuchâtel', 'count': 1}] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'ville de Neuchâtel', 'mentio... | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Neuchâtel', 'mentionConfiden... | \n",
+ " [] | \n",
"
\n",
" \n",
"\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 34,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -2190,7 +2875,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -2224,37 +2909,219 @@
" \n",
" | \n",
"
\n",
- " \n",
- " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.search.find(collection_id=\"REPLACEME\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Country\n",
+ "\n",
+ "Find all content items published in either of the two specified countries."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Search result
\n",
+ "
Contains 100 items (0 - 100) of 5658 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | EXP-2016-03-07-a-i0117 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " CONTEXTE | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp71_fr', 'relevance'... | \n",
+ " 116 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 2016-03-07T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [17] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | EXP-2017-06-17-a-i0168 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " La Suisse va suivre le durcissement européen s... | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp88_fr', 'relevance'... | \n",
+ " 508 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 2017-06-17T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [21] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | LCE-2004-06-04-a-i0053 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " NaN | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp87_fr', 'relevance'... | \n",
+ " 119 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 2004-06-04T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Christian Weber Chef de pres... | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
" \n",
"
\n",
+ "
3 rows × 28 columns
\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 35,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "impresso.search.find(collection_id=\"REPLACEME\")"
+ "impresso.search.find(term=\"Schengen\", country=OR(\"FR\", \"CH\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Country\n",
+ "## Partner\n",
"\n",
- "Find all content items published in either of the two specified countries."
+ "Limit search to content items provided by a specific partner of the Impresso project."
]
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -2263,9 +3130,9 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 100 items (0 - 100) of 378 total items.
\n",
+ "
Contains 18 items of 18 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -2287,21 +3154,27 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
" type | \n",
+ " sourceMedium | \n",
" title | \n",
- " transcript | \n",
- " locations | \n",
- " persons | \n",
" topics | \n",
" transcriptLength | \n",
" totalPages | \n",
" languageCode | \n",
" isOnFrontPage | \n",
" publicationDate | \n",
- " countryCode | \n",
- " dataProviderCode | \n",
- " mediaCode | \n",
- " mediaType | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
"
\n",
" \n",
" | uid | \n",
@@ -2320,92 +3193,117 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | arbeitgeber-2005-05-04-a-i0044 | \n",
- " page | \n",
- " [REDACTED] | \n",
- " [REDACTED] | \n",
- " [{'uid': '2-54-Convention_de_Schengen'}, {'uid... | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp87_fr', 'relevance'... | \n",
- " 611 | \n",
+ " DTT-1972-01-20-a-i0098 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Wieder ein Mordanschlag in Belfast | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp42_de', 'relevance'... | \n",
+ " 305 | \n",
" 1 | \n",
- " fr | \n",
+ " de | \n",
" False | \n",
- " 2005-05-04T00:00:00+00:00 | \n",
- " CH | \n",
- " SWA | \n",
- " arbeitgeber | \n",
- " newspaper | \n",
+ " 1972-01-20T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Belfast', 'count': 2}, {'uid': ... | \n",
+ " [{'uid': '2-50-Edward_Heath', 'count': 1}, {'u... | \n",
+ " [{'uid': '2-53-Europäische_Wirtschaftsgemeinsc... | \n",
+ " [{'uid': '4-55-AP', 'count': 1}, {'uid': '', '... | \n",
+ " [{'surfaceForm': 'Belfast', 'mentionConfidence... | \n",
+ " [{'surfaceForm': 'Premierminister Edward Heath... | \n",
+ " [{'surfaceForm': 'EWG', 'mentionConfidence': 5... | \n",
+ " [{'surfaceForm': 'ap', 'mentionConfidence': 98... | \n",
"
\n",
" \n",
- " | arbeitgeber-2005-05-04-a-i0046 | \n",
- " page | \n",
- " [REDACTED] | \n",
- " [REDACTED] | \n",
- " [{'uid': '2-54-Europe_de_l'Ouest'}, {'uid': '2... | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp87_fr', 'relevance'... | \n",
- " 735 | \n",
+ " DTT-1969-04-30-a-i0159 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Das neue Statut der KP Chinas | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp32_de', 'relevance'... | \n",
+ " 401 | \n",
" 1 | \n",
- " fr | \n",
+ " de | \n",
" False | \n",
- " 2005-05-04T00:00:00+00:00 | \n",
- " CH | \n",
- " SWA | \n",
- " arbeitgeber | \n",
- " newspaper | \n",
+ " 1969-04-30T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [2] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '4-55-DPA', 'count': 1}, {'uid': '4-5... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'DPA', 'mentionConfidence': 9... | \n",
"
\n",
" \n",
- " | LCE-2002-02-15-a-i0047 | \n",
+ " DTT-1970-02-10-a-i0098 | \n",
+ " in_cpy | \n",
" ar | \n",
- " des conditions claires Négocier dans | \n",
- " | \n",
- " [{'uid': '2-54-Suisse'}, {'uid': '2-54-Union_e... | \n",
- " [] | \n",
- " [{'uid': 'tm-fr-all-v2.0_tp87_fr', 'relevance'... | \n",
- " 328 | \n",
+ " print | \n",
+ " Tschu wollte nicht | \n",
+ " [{'uid': 'tm-de-all-v2.0_tp32_de', 'relevance'... | \n",
+ " 265 | \n",
" 1 | \n",
- " fr | \n",
+ " de | \n",
" False | \n",
- " 2002-02-15T00:00:00+00:00 | \n",
- " CH | \n",
- " SNL | \n",
- " LCE | \n",
- " newspaper | \n",
+ " 1970-02-10T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [2] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Hongkong', 'count': 2}, {'uid':... | \n",
+ " [{'uid': '2-50-Zhou_Enlai', 'count': 1}, {'uid... | \n",
+ " [{'uid': '2-53-Hongkong', 'count': 1}, {'uid':... | \n",
+ " [{'uid': '4-55-Reuters', 'count': 1}, {'uid': ... | \n",
+ " [{'surfaceForm': 'Hongkong', 'mentionConfidenc... | \n",
+ " [{'surfaceForm': 'chinesische Ministerpräsiden... | \n",
+ " [{'surfaceForm': 'Hongkonger', 'mentionConfide... | \n",
+ " [{'surfaceForm': 'Reuter', 'mentionConfidence'... | \n",
"
\n",
" \n",
"\n",
+ "3 rows × 28 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 36,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "impresso.search.find(term=\"Schengen\", country=OR(\"FR\", \"CH\"))"
+ "impresso.search.find(term=\"Schengen\", partner_id=\"Migros\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Partner\n",
+ "## Text reuse cluster\n",
"\n",
- "Limit search to content items provided by a specific partner of the Impresso project."
+ "Find all content items that are part of a specific text reuse cluster."
]
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
@@ -2416,7 +3314,7 @@
"Search result
\n",
"Contains 0 items of 0 total items.
\n",
"
\n",
- "See this result in the Impresso App.\n",
+ "See this result in the Impresso App.\n",
"\n",
"\n",
"Data preview:
\n",
@@ -2446,30 +3344,31 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 37,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "impresso.search.find(term=\"Schengen\", partner_id=\"Migros\")"
+ "from impresso import OR\n",
+ "impresso.search.find(text_reuse_cluster_id=OR(\"tr-nobp-all-v01-c29\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Text reuse cluster\n",
+ "## Copyright\n",
"\n",
- "Find all content items that are part of a specific text reuse cluster."
+ "Find all content items with specific copyright."
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -2478,9 +3377,9 @@
"\n",
"
\n",
"
Search result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 100 items (0 - 100) of 4564380 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -2502,25 +3401,143 @@
" \n",
" \n",
" | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
+ " \n",
+ " | lepetitparisien-1876-12-06-a-i0033 | \n",
+ " pbl | \n",
+ " ad | \n",
+ " print | \n",
+ " Adv. 9 Page 4 | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp17_fr', 'relevance'... | \n",
+ " 28 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1876-12-06T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | lepetitparisien-1880-04-02-a-i0027 | \n",
+ " pbl | \n",
+ " ad | \n",
+ " print | \n",
+ " Adv. 3 Page 3 | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp80_fr', 'relevance'... | \n",
+ " 63 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1880-04-02T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [3] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'uid': '2-50-André_Gill', 'count': 1}, {'uid... | \n",
+ " [{'uid': '2-53-Le_Parisien', 'count': 1}] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'BOULANGER', 'mentionConfiden... | \n",
+ " [{'surfaceForm': 'Parisien', 'mentionConfidenc... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | lepetitparisien-1880-07-20-a-i0028 | \n",
+ " pbl | \n",
+ " ad | \n",
+ " print | \n",
+ " Adv. 2 Page 4 | \n",
+ " [] | \n",
+ " 39 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1880-07-20T00:00:00+00:00 | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
" \n",
"\n",
+ "3 rows × 28 columns
\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 38,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from impresso import OR\n",
- "impresso.search.find(text_reuse_cluster_id=OR(\"tr-nobp-all-v01-c29\"))"
+ "impresso.search.find(copyright=\"pbl\")"
]
},
{
@@ -2545,7 +3562,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
@@ -2554,11 +3571,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 10 items (0 - 10) of 21 total items.
\n",
+ "
Contains 10 items (0 - 10) of 286 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -2588,26 +3605,26 @@
" \n",
"
\n",
" \n",
- " | 1975-12-01T00:00:00Z | \n",
- " 3 | \n",
+ " 1732-12-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
- " | 1976-12-01T00:00:00Z | \n",
- " 1 | \n",
+ " 1733-12-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
- " | 1979-12-01T00:00:00Z | \n",
- " 1 | \n",
+ " 1734-12-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 39,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -2625,19 +3642,72 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Total items in the result set: 21\n",
- "Got page 0 - 5 of 21. The first title is 1975-12-01T00:00:00Z\n",
- "Got page 5 - 10 of 21. The first title is 1983-12-01T00:00:00Z\n",
- "Got page 10 - 15 of 21. The first title is 1988-12-01T00:00:00Z\n",
- "Got page 15 - 20 of 21. The first title is 1999-12-01T00:00:00Z\n",
- "Got page 20 - 21 of 21. The first title is 2005-12-01T00:00:00Z\n"
+ "Total items in the result set: 286\n",
+ "Got page 0 - 5 of 286. The first title is 1732-12-01T00:00:00Z\n",
+ "Got page 5 - 10 of 286. The first title is 1737-12-01T00:00:00Z\n",
+ "Got page 10 - 15 of 286. The first title is 1742-12-01T00:00:00Z\n",
+ "Got page 15 - 20 of 286. The first title is 1747-12-01T00:00:00Z\n",
+ "Got page 20 - 25 of 286. The first title is 1752-12-01T00:00:00Z\n",
+ "Got page 25 - 30 of 286. The first title is 1757-12-01T00:00:00Z\n",
+ "Got page 30 - 35 of 286. The first title is 1762-12-01T00:00:00Z\n",
+ "Got page 35 - 40 of 286. The first title is 1767-12-01T00:00:00Z\n",
+ "Got page 40 - 45 of 286. The first title is 1772-12-01T00:00:00Z\n",
+ "Got page 45 - 50 of 286. The first title is 1777-12-01T00:00:00Z\n",
+ "Got page 50 - 55 of 286. The first title is 1782-12-01T00:00:00Z\n",
+ "Got page 55 - 60 of 286. The first title is 1787-12-01T00:00:00Z\n",
+ "Got page 60 - 65 of 286. The first title is 1792-12-01T00:00:00Z\n",
+ "Got page 65 - 70 of 286. The first title is 1797-12-01T00:00:00Z\n",
+ "Got page 70 - 75 of 286. The first title is 1802-12-01T00:00:00Z\n",
+ "Got page 75 - 80 of 286. The first title is 1807-12-01T00:00:00Z\n",
+ "Got page 80 - 85 of 286. The first title is 1812-12-01T00:00:00Z\n",
+ "Got page 85 - 90 of 286. The first title is 1817-12-01T00:00:00Z\n",
+ "Got page 90 - 95 of 286. The first title is 1822-12-01T00:00:00Z\n",
+ "Got page 95 - 100 of 286. The first title is 1827-12-01T00:00:00Z\n",
+ "Got page 100 - 105 of 286. The first title is 1832-12-01T00:00:00Z\n",
+ "Got page 105 - 110 of 286. The first title is 1837-12-01T00:00:00Z\n",
+ "Got page 110 - 115 of 286. The first title is 1842-12-01T00:00:00Z\n",
+ "Got page 115 - 120 of 286. The first title is 1847-12-01T00:00:00Z\n",
+ "Got page 120 - 125 of 286. The first title is 1852-12-01T00:00:00Z\n",
+ "Got page 125 - 130 of 286. The first title is 1857-12-01T00:00:00Z\n",
+ "Got page 130 - 135 of 286. The first title is 1862-12-01T00:00:00Z\n",
+ "Got page 135 - 140 of 286. The first title is 1867-12-01T00:00:00Z\n",
+ "Got page 140 - 145 of 286. The first title is 1872-12-01T00:00:00Z\n",
+ "Got page 145 - 150 of 286. The first title is 1877-12-01T00:00:00Z\n",
+ "Got page 150 - 155 of 286. The first title is 1882-12-01T00:00:00Z\n",
+ "Got page 155 - 160 of 286. The first title is 1887-12-01T00:00:00Z\n",
+ "Got page 160 - 165 of 286. The first title is 1892-12-01T00:00:00Z\n",
+ "Got page 165 - 170 of 286. The first title is 1897-12-01T00:00:00Z\n",
+ "Got page 170 - 175 of 286. The first title is 1902-12-01T00:00:00Z\n",
+ "Got page 175 - 180 of 286. The first title is 1907-12-01T00:00:00Z\n",
+ "Got page 180 - 185 of 286. The first title is 1912-12-01T00:00:00Z\n",
+ "Got page 185 - 190 of 286. The first title is 1917-12-01T00:00:00Z\n",
+ "Got page 190 - 195 of 286. The first title is 1922-12-01T00:00:00Z\n",
+ "Got page 195 - 200 of 286. The first title is 1927-12-01T00:00:00Z\n",
+ "Got page 200 - 205 of 286. The first title is 1932-12-01T00:00:00Z\n",
+ "Got page 205 - 210 of 286. The first title is 1937-12-01T00:00:00Z\n",
+ "Got page 210 - 215 of 286. The first title is 1942-12-01T00:00:00Z\n",
+ "Got page 215 - 220 of 286. The first title is 1947-12-01T00:00:00Z\n",
+ "Got page 220 - 225 of 286. The first title is 1952-12-01T00:00:00Z\n",
+ "Got page 225 - 230 of 286. The first title is 1957-12-01T00:00:00Z\n",
+ "Got page 230 - 235 of 286. The first title is 1962-12-01T00:00:00Z\n",
+ "Got page 235 - 240 of 286. The first title is 1967-12-01T00:00:00Z\n",
+ "Got page 240 - 245 of 286. The first title is 1972-12-01T00:00:00Z\n",
+ "Got page 245 - 250 of 286. The first title is 1977-12-01T00:00:00Z\n",
+ "Got page 250 - 255 of 286. The first title is 1982-12-01T00:00:00Z\n",
+ "Got page 255 - 260 of 286. The first title is 1987-12-01T00:00:00Z\n",
+ "Got page 260 - 265 of 286. The first title is 1992-12-01T00:00:00Z\n",
+ "Got page 265 - 270 of 286. The first title is 1997-12-01T00:00:00Z\n",
+ "Got page 270 - 275 of 286. The first title is 2002-12-01T00:00:00Z\n",
+ "Got page 275 - 280 of 286. The first title is 2007-12-01T00:00:00Z\n",
+ "Got page 280 - 285 of 286. The first title is 2012-12-01T00:00:00Z\n",
+ "Got page 285 - 286 of 286. The first title is 2017-12-01T00:00:00Z\n"
]
}
],
@@ -2667,7 +3737,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -2676,11 +3746,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 10 items (0 - 10) of 34 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -2710,11 +3780,15 @@
" \n",
"
\n",
" \n",
+ " | 1861 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
" | 1874 | \n",
" 1 | \n",
"
\n",
" \n",
- " | 2000 | \n",
+ " 1894 | \n",
" 1 | \n",
"
\n",
" \n",
@@ -2722,10 +3796,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 41,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -2745,7 +3819,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
@@ -2758,7 +3832,7 @@
"
\n",
"See this result in the Impresso App.\n",
"\n",
- "\n",
+ "\n",
"\n",
"Data preview:
\n",
"\n",
@@ -2789,25 +3863,25 @@
"
\n",
" \n",
" | 0 | \n",
- " 0 | \n",
+ " 7 | \n",
"
\n",
" \n",
" | 100 | \n",
- " 0 | \n",
+ " 7 | \n",
"
\n",
" \n",
" | 200 | \n",
- " 0 | \n",
+ " 11 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 42,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -2827,7 +3901,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -2836,11 +3910,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 10 items (0 - 10) of 12 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -2870,22 +3944,26 @@
" \n",
"
\n",
" \n",
- " | 3 | \n",
- " 1 | \n",
+ " 1 | \n",
+ " 5 | \n",
"
\n",
" \n",
- " | 6 | \n",
- " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 43,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -2905,7 +3983,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 38,
"metadata": {},
"outputs": [
{
@@ -2914,11 +3992,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 3 items of 3 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -2949,21 +4027,25 @@
"
\n",
" \n",
" | CH | \n",
- " 1 | \n",
+ " 76 | \n",
+ "
\n",
+ " \n",
+ " | FR | \n",
+ " 2 | \n",
"
\n",
" \n",
" | LU | \n",
- " 1 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 44,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -2983,7 +4065,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -2996,7 +4078,7 @@
"
\n",
"See this result in the Impresso App.\n",
"\n",
- "\n",
+ "\n",
"\n",
"Data preview:
\n",
"\n",
@@ -3027,11 +4109,11 @@
"
\n",
" \n",
" | ad | \n",
- " 2108971 | \n",
+ " 17813044 | \n",
"
\n",
" \n",
" | ar | \n",
- " 7977884 | \n",
+ " 32853043 | \n",
"
\n",
" \n",
" | chapter | \n",
@@ -3042,10 +4124,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 45,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@@ -3065,7 +4147,7 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -3074,11 +4156,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 10 items (0 - 10) of 238 total items.
\n",
+ "
Contains 10 items (0 - 10) of 219 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3111,17 +4193,17 @@
"
\n",
" \n",
" | tm-de-all-v2.0_tp00_de | \n",
- " 48 | \n",
- " london (0.02403), new (0.01812), york (0.01433... | \n",
+ " 60 | \n",
+ " NaN | \n",
"
\n",
" \n",
" | tm-de-all-v2.0_tp01_de | \n",
- " 37 | \n",
+ " 83 | \n",
" new (0.03594), york (0.02121), spanien (0.0209... | \n",
"
\n",
" \n",
" | tm-de-all-v2.0_tp02_de | \n",
- " 394 | \n",
+ " 433 | \n",
" zimmer (0.02682), wort (0.02321), expedition (... | \n",
"
\n",
" \n",
@@ -3129,10 +4211,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 46,
+ "execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
@@ -3152,7 +4234,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 49,
"metadata": {},
"outputs": [
{
@@ -3193,10 +4275,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 47,
+ "execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
@@ -3216,7 +4298,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 50,
"metadata": {},
"outputs": [
{
@@ -3225,11 +4307,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 10 items (0 - 10) of 37 total items.
\n",
+ "
Contains 10 items (0 - 10) of 48 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3261,29 +4343,29 @@
" \n",
"
\n",
" \n",
- " | FedGazDe | \n",
- " 17 | \n",
- " Bundesblatt | \n",
+ " DTT | \n",
+ " 18 | \n",
+ " Die Tat | \n",
"
\n",
" \n",
- " | FedGazFr | \n",
- " 18 | \n",
- " Feuille Fédérale | \n",
+ " EXP | \n",
+ " 1426 | \n",
+ " L'Express | \n",
"
\n",
" \n",
- " | GAV | \n",
- " 1 | \n",
- " Gazette du Valais / Nouvelle gazette du Valais | \n",
+ " FZG | \n",
+ " 608 | \n",
+ " Freiburger Nachrichten | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 48,
+ "execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
@@ -3303,7 +4385,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
@@ -3312,11 +4394,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 3 items of 3 total items.
\n",
+ "
Contains 4 items of 4 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3347,25 +4429,25 @@
"
\n",
" \n",
" | de | \n",
- " 6087 | \n",
+ " 6725 | \n",
"
\n",
" \n",
" | fr | \n",
- " 1417 | \n",
+ " 6652 | \n",
"
\n",
" \n",
- " | lb | \n",
- " 236 | \n",
+ " it | \n",
+ " 1 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 49,
+ "execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
@@ -3385,20 +4467,19 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "\n",
+ "
\n",
"
\n",
"
Facet result
\n",
- "
Contains 10 items (7140 - 7150) of 7481 total items.
\n",
+ "
Contains 0 items of 2738 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
"
\n",
"
Data preview:
\n",
"
\n",
@@ -3419,40 +4500,18 @@
" \n",
" \n",
" | \n",
- " count | \n",
- " label | \n",
- "
\n",
- " \n",
- " | value | \n",
- " | \n",
- " | \n",
"
\n",
" \n",
"
\n",
- " \n",
- " | 2-50-Weinreben | \n",
- " 1 | \n",
- " Weinreben | \n",
- "
\n",
- " \n",
- " | 2-50-Weis | \n",
- " 7 | \n",
- " Weis | \n",
- "
\n",
- " \n",
- " | 2-50-Weisgerber | \n",
- " 1 | \n",
- " Weisgerber | \n",
- "
\n",
" \n",
"\n",
""
],
"text/plain": [
- "
"
+ ""
]
},
- "execution_count": 50,
+ "execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
@@ -3472,7 +4531,7 @@
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
@@ -3481,11 +4540,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 10 items (3310 - 3320) of 6610 total items.
\n",
+ "
Contains 10 items (3310 - 3320) of 3891 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3517,29 +4576,29 @@
" \n",
"
\n",
" \n",
- " | 2-54-Linthebene | \n",
+ " 2-54-Sévaz | \n",
" 1 | \n",
- " Linthebene | \n",
+ " Sévaz | \n",
"
\n",
" \n",
- " | 2-54-Linz | \n",
- " 3 | \n",
- " Linz | \n",
+ " 2-54-Séville | \n",
+ " 2 | \n",
+ " Séville | \n",
"
\n",
" \n",
- " | 2-54-Lipa | \n",
- " 1 | \n",
- " Lipa | \n",
+ " 2-54-Südafrikanische_Union | \n",
+ " 9 | \n",
+ " Südafrikanische Union | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 51,
+ "execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
@@ -3559,7 +4618,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 55,
"metadata": {},
"outputs": [
{
@@ -3568,11 +4627,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 1 items of 1 total items.
\n",
+ "
Contains 7 items of 7 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3594,26 +4653,39 @@
"
\n",
" | \n",
" count | \n",
+ " label | \n",
"
\n",
" \n",
" | value | \n",
" | \n",
+ " | \n",
"
\n",
" \n",
" \n",
" \n",
- " | 4-55-Havas | \n",
- " 1 | \n",
+ " 4-55-AFP | \n",
+ " 5 | \n",
+ " AFP | \n",
+ "
\n",
+ " \n",
+ " | 4-55-AP | \n",
+ " 8 | \n",
+ " AP | \n",
+ "
\n",
+ " \n",
+ " | 4-55-ATS_SDA | \n",
+ " 20 | \n",
+ " ATS SDA | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 52,
+ "execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
@@ -3634,19 +4706,20 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "\n",
+ "
\n",
"
\n",
"
Facet result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 2 items of 2 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
+ "
\n",
"
\n",
"
Data preview:
\n",
"
\n",
@@ -3667,24 +4740,37 @@
" \n",
" \n",
" | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " | value | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | in_cpy | \n",
+ " 70118 | \n",
+ "
\n",
+ " \n",
+ " | pbl | \n",
+ " 11010 | \n",
+ "
\n",
" \n",
"\n",
""
],
"text/plain": [
- "
"
+ ""
]
},
- "execution_count": 53,
+ "execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "impresso.search.facet(\"accessRight\", term=\"pomme\")"
+ "impresso.search.facet(\"copyright\", term=\"pomme\")"
]
},
{
@@ -3698,7 +4784,7 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
@@ -3707,11 +4793,11 @@
"\n",
"
\n",
"
Facet result
\n",
- "
Contains 7 items of 7 total items.
\n",
+ "
Contains 8 items of 8 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
- "
\n",
+ "
\n",
"
\n",
"Data preview:
\n",
"\n",
@@ -3742,25 +4828,25 @@
"
\n",
" \n",
" | BCUL | \n",
- " 1475 | \n",
+ " 1450 | \n",
"
\n",
" \n",
" | BNF | \n",
- " 13716 | \n",
+ " 25661 | \n",
"
\n",
" \n",
" | BNL | \n",
- " 2385 | \n",
+ " 3593 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 54,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
@@ -3772,7 +4858,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "impresso-py3.11",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -3786,7 +4872,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/examples/notebooks/textReuse.ipynb b/examples/notebooks/textReuse.ipynb
index 40b4f39..9f584c2 100644
--- a/examples/notebooks/textReuse.ipynb
+++ b/examples/notebooks/textReuse.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎉 You are now connected to the Impresso API! 🎉\n",
- "🔗 Using API: https://dev.impresso-project.ch/public-api/v1\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from impresso import connect, AND, OR\n",
"\n",
@@ -36,7 +27,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -45,9 +36,9 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 5 items (2 - 7) of 10 total items.
\n",
+ "
Contains 5 items (2 - 7) of 28 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -86,45 +77,45 @@
" \n",
" \n",
" \n",
- " | tr-all-v1-24-c77310101729 | \n",
- " 79.439252 | \n",
+ " tr-all-v1-24-c25769902214 | \n",
+ " 61.176471 | \n",
" 2 | \n",
- " sont pas admis.\\nTaxe fr. 3, 50 ( bonification... | \n",
- " 1887-06-22 | \n",
- " 1887-06-23 | \n",
+ " une longueur du\\ndeuxième au troisième.\\nPRIX ... | \n",
+ " 1899-08-13 | \n",
+ " 1899-08-13 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c77310101732 | \n",
- " 39.743590 | \n",
+ " tr-all-v1-24-c14741 | \n",
+ " 20.792079 | \n",
" 2 | \n",
- " sont pas admis.\\nTaxe fr. 3, 50 ( bonification... | \n",
- " 1887-06-23 | \n",
- " 1887-06-23 | \n",
+ " 1\", 03 fr. 50 et 25 fr. 50 placé ; 2\", 45 fr. ... | \n",
+ " 1900-05-28 | \n",
+ " 1900-05-29 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c244588 | \n",
- " 78.823529 | \n",
+ " tr-all-v1-24-c60129751715 | \n",
+ " 20.000000 | \n",
" 2 | \n",
- " Suisses à l’étranger. — La Suisse aura mainten... | \n",
- " 1884-08-20 | \n",
- " 1884-08-21 | \n",
+ " 43 fr. 50 placé; 2 e, 30 fr. 00;\\n3 e, 51 fr. ... | \n",
+ " 1899-10-17 | \n",
+ " 1899-10-18 | \n",
"
\n",
" \n",
"\n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 2,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"impresso.text_reuse.clusters.find(\n",
- " term=\"banana\",\n",
+ " term=\"chirimoya\",\n",
" offset=2,\n",
" limit=5,\n",
" order_by=\"passages-count\",\n",
@@ -140,22 +131,26 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Total items in the result set: 10. Limit: 5. Offset: 0. Size: 5.\n",
- "Got page 0 - 5 of 10. The first title is tr-all-v1-24-c8590049316\n",
- "Got page 5 - 10 of 10. The first title is tr-all-v1-24-c51540329030\n"
+ "Total items in the result set: 28. Limit: 5. Offset: 0. Size: 5.\n",
+ "Got page 0 - 5 of 28. The first sample is PRIX DE MARIGNY. 3 ans et au-d...\n",
+ "Got page 5 - 10 of 28. The first sample is Ipeck (J. Watkins), 3* Pouilli...\n",
+ "Got page 10 - 15 of 28. The first sample is PRIX DE MARIGNY. 3 ans et au-d...\n",
+ "Got page 15 - 20 of 28. The first sample is PRIX DU GOUVERNEMENT. 2,000 fr...\n",
+ "Got page 20 - 25 of 28. The first sample is Prix, Vermeille, 40,000 francs...\n",
+ "Got page 25 - 28 of 28. The first sample is Prix de Ghcffrevillo (8,000 fr...\n"
]
}
],
"source": [
"result = impresso.text_reuse.clusters.find(\n",
- " term=\"banana\",\n",
+ " term=\"chirimoya\",\n",
" limit=5,\n",
" order_by=\"passages-count\",\n",
")\n",
@@ -164,7 +159,7 @@
"for page in result.pages():\n",
" print(\n",
" f\"Got page {page.offset} - {page.offset + page.size} of {page.total}. \"\n",
- " + f\"The first title is {page.raw['data'][0]['uid']}\"\n",
+ " + f\"The first sample is {page.pydantic.data[0].textSample[:30]}...\"\n",
" )"
]
},
@@ -177,7 +172,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -186,7 +181,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 3 items of 3 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -210,18 +205,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c8590012740 | \n",
+ " 38.571429 | \n",
+ " 50 | \n",
+ " La Chaux-de-Fonds,\\nLe Sphynx samedi 22 mars 2... | \n",
+ " 2008-01-14 | \n",
+ " 2008-12-22 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c8590012740 | \n",
+ " 38.571429 | \n",
+ " 50 | \n",
+ " La Chaux-de-Fonds,\\nLe Sphynx samedi 22 mars 2... | \n",
+ " 2008-01-14 | \n",
+ " 2008-12-22 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c452205 | \n",
+ " 1.000000 | \n",
+ " 95 | \n",
+ " Amérique (lu Nom : pour.New-York via Cher-\\nbo... | \n",
+ " 1921-11-04 | \n",
+ " 1924-09-10 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 4,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -242,7 +274,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -251,7 +283,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 4 items of 4 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -275,18 +307,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c73349 | \n",
+ " 0.60241 | \n",
+ " 127 | \n",
+ " Tarif international voyageurs et\\nbagages Luxe... | \n",
+ " 1931-02-16 | \n",
+ " 1957-11-01 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c73349 | \n",
+ " 0.60241 | \n",
+ " 127 | \n",
+ " Trafic-marchandises Luxembourg — Suisse.\\nAvec... | \n",
+ " 1931-02-16 | \n",
+ " 1957-11-01 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c73349 | \n",
+ " 0.60241 | \n",
+ " 127 | \n",
+ " AVIS — Tarif international voyageurs\\net bagag... | \n",
+ " 1931-02-16 | \n",
+ " 1957-11-01 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 5,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -307,7 +376,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -316,7 +385,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 13 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -340,18 +409,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c17180643325 | \n",
+ " 50.37037 | \n",
+ " 7 | \n",
+ " Lo Temps a reçu do Banana, embouchure du\\nCong... | \n",
+ " 1889-01-17 | \n",
+ " 1889-01-18 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c17180643325 | \n",
+ " 50.37037 | \n",
+ " 7 | \n",
+ " Stanley\\nte temps vient de recevoir de Banana,... | \n",
+ " 1889-01-17 | \n",
+ " 1889-01-18 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c17180643325 | \n",
+ " 50.37037 | \n",
+ " 7 | \n",
+ " Nouvelle » < J « Slaniey.\\nNous venons, dit le... | \n",
+ " 1889-01-17 | \n",
+ " 1889-01-18 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 6,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -374,7 +480,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -383,7 +489,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 1 items of 1 total items.
\n",
+ "
Contains 2 items of 2 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -424,6 +530,14 @@
" \n",
"
\n",
" \n",
+ " | tr-all-v1-24-c77310151136 | \n",
+ " 22.222222 | \n",
+ " 4 | \n",
+ " 8 p\\n13. Croix Madame 55 T. Huet F. Doumen 10 ... | \n",
+ " 2012-02-15 | \n",
+ " 2012-05-12 | \n",
+ "
\n",
+ " \n",
" | tr-all-v1-24-c137439334025 | \n",
" 78.536585 | \n",
" 2 | \n",
@@ -436,10 +550,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 7,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -460,7 +574,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -469,7 +583,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 5 items of 5 total items.
\n",
+ "
Contains 10 items (0 - 10) of 257 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -510,38 +624,38 @@
" \n",
"
\n",
" \n",
- " | tr-all-v1-24-c85900161645 | \n",
- " 53.061224 | \n",
- " 4 | \n",
- " werden.\\nAls Reise-Neuheiten präsentiert die p... | \n",
- " 1977-01-20 | \n",
- " 1977-02-09 | \n",
+ " tr-all-v1-24-c34359854842 | \n",
+ " 84.000000 | \n",
+ " 2 | \n",
+ " ¦ ~\\nBANANISSIM 0!\\nWir servieren\\nkulinarisch... | \n",
+ " 1973-05-16 | \n",
+ " 1973-05-18 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c77310034000 | \n",
- " 52.046784 | \n",
+ " tr-all-v1-24-c34359854842 | \n",
+ " 84.000000 | \n",
" 2 | \n",
- " Nos brochures con-\\ntiennent davantage d'atout... | \n",
- " 1977-04-06 | \n",
- " 1977-04-15 | \n",
+ " m\\nBANANISSIM 0 I\\nWir servieren\\nkulinarische... | \n",
+ " 1973-05-16 | \n",
+ " 1973-05-18 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c51540329030 | \n",
- " 90.476190 | \n",
+ " tr-all-v1-24-c515122 | \n",
+ " 66.956522 | \n",
" 2 | \n",
- " Un slogan pour les catalogues Popularis en cou... | \n",
- " 1977-04-06 | \n",
- " 1977-04-15 | \n",
+ " veut récupérer\\nMatadi et Banana\\nLéopoldville... | \n",
+ " 1961-03-07 | \n",
+ " 1961-03-07 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 8,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -564,7 +678,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -573,7 +687,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 189 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -597,18 +711,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c111669250947 | \n",
+ " 100.000 | \n",
+ " 2 | \n",
+ " Banana, Feindouno et\\nFeltscher à Lausanne\\nFO... | \n",
+ " 2013-08-31 | \n",
+ " 2013-08-31 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c137438967408 | \n",
+ " 100.000 | \n",
+ " 2 | \n",
+ " Deschansonsàenfairetrémousserplusd’un\\nFocus\\n... | \n",
+ " 2008-07-10 | \n",
+ " 2008-07-10 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c85899565930 | \n",
+ " 34.375 | \n",
+ " 2 | \n",
+ " Colonies de vacances », « Juanita\\nBanana », «... | \n",
+ " 1967-06-22 | \n",
+ " 1967-10-02 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 9,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -629,7 +780,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -670,10 +821,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 10,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -691,7 +842,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -700,7 +851,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 28 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -724,18 +875,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c60130139951 | \n",
+ " 60.606061 | \n",
+ " 2 | \n",
+ " La « Panther » coulée\\nMilan, 28 teptembre.\\nD... | \n",
+ " 1914-09-29 | \n",
+ " 1914-09-29 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c94489993744 | \n",
+ " 67.441860 | \n",
+ " 2 | \n",
+ " Les dernières nouvelles de l'expédition\\nStanl... | \n",
+ " 1887-04-29 | \n",
+ " 1887-04-29 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c25769855244 | \n",
+ " 53.658537 | \n",
+ " 3 | \n",
+ " congolaise\\nLEOPOLDVILLE (AFP et VPI). — « D'a... | \n",
+ " 1961-03-21 | \n",
+ " 1961-03-21 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 11,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -756,7 +944,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -765,7 +953,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 9 items of 9 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -789,18 +977,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c128849802984 | \n",
+ " 55.172414 | \n",
+ " 3 | \n",
+ " Hambourg, 15. — L'enquête ordonnée à\\nla suite... | \n",
+ " 1904-09-16 | \n",
+ " 1904-09-17 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c120259208814 | \n",
+ " 55.172414 | \n",
+ " 3 | \n",
+ " Samedi et dimanche. l'Aca-\\ndémie chorégraphiq... | \n",
+ " 1927-08-31 | \n",
+ " 1927-09-02 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c85899511538 | \n",
+ " 60.714286 | \n",
+ " 2 | \n",
+ " Une dépêche de Roma (Congo) annonce\\nque le st... | \n",
+ " 1901-11-05 | \n",
+ " 1901-11-06 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 12,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -821,7 +1046,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -830,9 +1055,9 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 5 items of 5 total items.
\n",
+ "
Contains 10 items (0 - 10) of 409 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -856,9 +1081,9 @@
" | \n",
" lexicalOverlap | \n",
" clusterSize | \n",
- " textSample | \n",
" timeCoverage.startDate | \n",
" timeCoverage.endDate | \n",
+ " textSample | \n",
"
\n",
" \n",
" | uid | \n",
@@ -871,38 +1096,38 @@
" \n",
"
\n",
" \n",
- " | tr-all-v1-24-c8589980257 | \n",
- " 66.153846 | \n",
+ " tr-all-v1-24-c68719515693 | \n",
+ " 41.176471 | \n",
" 2 | \n",
- " The British- Luxembourg Society\\nbeg to call t... | \n",
- " 1948-04-01 | \n",
- " 1948-04-01 | \n",
+ " 1939-08-23 | \n",
+ " 1939-08-26 | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c336675 | \n",
- " 76.923077 | \n",
+ " tr-all-v1-24-c8590220034 | \n",
+ " 51.282051 | \n",
" 2 | \n",
- " Amsterdam, Atlanta, Barcelona, Brussels, Chica... | \n",
- " 1989-12-02 | \n",
- " 1990-04-13 | \n",
+ " 1987-09-25 | \n",
+ " 1987-09-25 | \n",
+ " NaN | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c10636 | \n",
- " 18.840580 | \n",
- " 30 | \n",
- " INTERNATIONAL\\nHOLDING S.A.\\nSociété Anonyme\\n... | \n",
- " 1975-07-11 | \n",
- " 1997-03-26 | \n",
+ " tr-all-v1-24-c606366 | \n",
+ " 36.363636 | \n",
+ " 4 | \n",
+ " 1986-10-02 | \n",
+ " 1988-09-23 | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 13,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -923,7 +1148,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
@@ -932,7 +1157,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 1 items of 1 total items.
\n",
+ "
Contains 10 items (0 - 10) of 3931 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -973,22 +1198,38 @@
" \n",
"
\n",
" \n",
- " | tr-all-v1-24-c68720183505 | \n",
- " 92.741935 | \n",
+ " tr-all-v1-24-c171798816707 | \n",
+ " 97.297297 | \n",
" 2 | \n",
- " Asile en Suisse\\nRien ne va plus!\\nIl faut en ... | \n",
- " 1991-02-20 | \n",
- " 1991-02-27 | \n",
+ " En route vers\\nSchengen\\nSUISSE _.\\nEn dépit d... | \n",
+ " 2001-03-27 | \n",
+ " 2001-03-27 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c17180030095 | \n",
+ " 32.000000 | \n",
+ " 6 | \n",
+ " Schengen augmente l'efficacité de la police.\\n... | \n",
+ " 2005-04-01 | \n",
+ " 2005-04-02 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c171798816707 | \n",
+ " 97.297297 | \n",
+ " 2 | \n",
+ " En route vers\\nSchengen\\nSUISSE\\nEn dépit des ... | \n",
+ " 2001-03-27 | \n",
+ " 2001-03-27 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 14,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -1009,7 +1250,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
@@ -1018,7 +1259,7 @@
"\n",
"
\n",
"
FindTextReuseClusters result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 33 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1042,18 +1283,55 @@
"
\n",
" \n",
" | \n",
+ " lexicalOverlap | \n",
+ " clusterSize | \n",
+ " textSample | \n",
+ " timeCoverage.startDate | \n",
+ " timeCoverage.endDate | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | tr-all-v1-24-c68719924299 | \n",
+ " 90.0 | \n",
+ " 3 | \n",
+ " Des représen-\\ntants du groupe de Schengen et ... | \n",
+ " 1993-01-23 | \n",
+ " 1993-01-23 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c163209269715 | \n",
+ " 100.0 | \n",
+ " 2 | \n",
+ " Europe : collaborations indivisibles\\npar Jacq... | \n",
+ " 1993-05-14 | \n",
+ " 1993-05-14 | \n",
+ "
\n",
+ " \n",
+ " | tr-all-v1-24-c8590228981 | \n",
+ " 100.0 | \n",
+ " 2 | \n",
+ " Faute de\\npouvoir adhérer aux Accords de\\nSche... | \n",
+ " 1995-06-28 | \n",
+ " 1995-06-28 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 15,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -1074,7 +1352,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
@@ -1115,10 +1393,10 @@
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 16,
+ "execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -1146,7 +1424,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -1205,10 +1483,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 17,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -1226,7 +1504,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
@@ -1263,7 +1541,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -1322,10 +1600,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 19,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -1346,7 +1624,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 38,
"metadata": {},
"outputs": [
{
@@ -1405,10 +1683,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 20,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -1429,7 +1707,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -1488,10 +1766,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 21,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@@ -1512,7 +1790,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -1576,10 +1854,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 22,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -1607,7 +1885,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
@@ -1616,7 +1894,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 5 items (2 - 7) of 360 total items.
\n",
+ "
Contains 5 items (2 - 7) of 1385 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1655,35 +1933,35 @@
" \n",
"
\n",
" \n",
- " | c103079374309-tageblatt-1946-04-12-a-i0117@23:645 | \n",
- " Mme Jean BOULTGEN et son fils\\nNicolas et les ... | \n",
- " tageblatt-1946-04-12-a-i0117 | \n",
- " 23 | \n",
- " 645 | \n",
+ " c103079271183-tageblatt-1945-02-05-a-i0028@0:1110 | \n",
+ " [REDACTED] | \n",
+ " tageblatt-1945-02-05-a-i0028 | \n",
+ " 0 | \n",
+ " 1110 | \n",
"
\n",
" \n",
- " | c103079419312-JV-1875-01-09-a-i0001@6137:7961 | \n",
- " se trouvaient MM. Buffet, le duc d’Audiffret-\\... | \n",
- " JV-1875-01-09-a-i0001 | \n",
- " 6137 | \n",
- " 7961 | \n",
+ " c103079273629-luxwort-1931-07-17-a-i0053@0:887 | \n",
+ " [REDACTED] | \n",
+ " luxwort-1931-07-17-a-i0053 | \n",
+ " 0 | \n",
+ " 887 | \n",
"
\n",
" \n",
- " | c103079498166-tageblatt-1933-02-28-a-i0073@225:1486 | \n",
- " à Esch-sur-Alzette, rue de l'Industrie 48;\\nPo... | \n",
- " tageblatt-1933-02-28-a-i0073 | \n",
- " 225 | \n",
- " 1486 | \n",
+ " c103079273629-tageblatt-1931-07-17-a-i0117@0:1031 | \n",
+ " [REDACTED] | \n",
+ " tageblatt-1931-07-17-a-i0117 | \n",
+ " 0 | \n",
+ " 1031 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 23,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
@@ -1706,30 +1984,23 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Total items in the result set: 360. Limit: 50. Offset: 2. Size: 50.\n",
- "Got page 2 - 52 of 360. The first title is c103079374309-tageblatt-1946-04-12-a-i0117@23:645\n",
- "Got page 52 - 102 of 360. The first title is c137439596821-tageblatt-1946-12-06-a-i0087@16:897\n",
- "Got page 102 - 152 of 360. The first title is c206159066754-tageblatt-1946-01-17-a-i0053@0:706\n",
- "Got page 152 - 202 of 360. The first title is c51539607640-tageblatt-1949-07-21-a-i0052@827:1730\n",
- "Got page 202 - 252 of 360. The first title is c68719908844-buergerbeamten-1911-04-01-b-i0009@2802:3174\n",
- "Got page 252 - 302 of 360. The first title is c94489329094-tageblatt-1947-08-21-a-i0053@331:679\n",
- "Got page 302 - 352 of 360. The first title is c94489496965-dunioun-1946-12-04-a-i0082@0:5041\n",
- "Got page 352 - 360 of 360. The first title is c4836-dunioun-1947-06-27-a-i0120@589:1072\n"
+ "Total items in the result set: 2. Limit: 20. Offset: 1. Size: 1.\n",
+ "Got page 1 - 2 of 2. The first title is c77309776797-IMP-2007-10-26-a-i0232@0:3325\n"
]
}
],
"source": [
"result = impresso.text_reuse.passages.find(\n",
- " term=\"belval\",\n",
- " offset=2,\n",
- " limit=50,\n",
+ " term=\"luxembourg université\",\n",
+ " offset=1,\n",
+ " limit=20,\n",
" order_by=\"clusterSize\",\n",
")\n",
"\n",
@@ -1750,7 +2021,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 49,
"metadata": {},
"outputs": [
{
@@ -1791,10 +2062,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 25,
+ "execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
@@ -1815,7 +2086,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 50,
"metadata": {},
"outputs": [
{
@@ -1824,7 +2095,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 3 items of 3 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1848,18 +2119,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c8590012740-EXP-2008-03-25-a-i0105@0:642 | \n",
+ " [REDACTED] | \n",
+ " EXP-2008-03-25-a-i0105 | \n",
+ " 0 | \n",
+ " 642 | \n",
+ "
\n",
+ " \n",
+ " | c8590012740-IMP-2008-03-25-a-i0111@0:643 | \n",
+ " [REDACTED] | \n",
+ " IMP-2008-03-25-a-i0111 | \n",
+ " 0 | \n",
+ " 643 | \n",
+ "
\n",
+ " \n",
+ " | c452205-lepetitparisien-1922-08-15-a-i0071@11:836 | \n",
+ " Amérique (lu Nom : pour.New-York via Cher-\\nbo... | \n",
+ " lepetitparisien-1922-08-15-a-i0071 | \n",
+ " 11 | \n",
+ " 836 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 26,
+ "execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
@@ -1880,7 +2183,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
@@ -1889,7 +2192,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 4 items of 4 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1913,18 +2216,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c73349-dunioun-1946-02-28-a-i0044@0:477 | \n",
+ " [REDACTED] | \n",
+ " dunioun-1946-02-28-a-i0044 | \n",
+ " 0 | \n",
+ " 477 | \n",
+ "
\n",
+ " \n",
+ " | c73349-obermosel-1947-07-03-a-i0018@0:368 | \n",
+ " [REDACTED] | \n",
+ " obermosel-1947-07-03-a-i0018 | \n",
+ " 0 | \n",
+ " 368 | \n",
+ "
\n",
+ " \n",
+ " | c73349-luxwort-1946-02-28-a-i0031@0:495 | \n",
+ " [REDACTED] | \n",
+ " luxwort-1946-02-28-a-i0031 | \n",
+ " 0 | \n",
+ " 495 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 27,
+ "execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
@@ -1945,7 +2280,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 52,
"metadata": {},
"outputs": [
{
@@ -1954,7 +2289,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 13 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -1978,18 +2313,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c17180643325-lematin-1889-01-17-a-i0007@102:1068 | \n",
+ " Lo Temps a reçu do Banana, embouchure du\\nCong... | \n",
+ " lematin-1889-01-17-a-i0007 | \n",
+ " 102 | \n",
+ " 1068 | \n",
+ "
\n",
+ " \n",
+ " | c17180643325-lepetitparisien-1889-01-18-a-i0004@0:954 | \n",
+ " Stanley\\nte temps vient de recevoir de Banana,... | \n",
+ " lepetitparisien-1889-01-18-a-i0004 | \n",
+ " 0 | \n",
+ " 954 | \n",
+ "
\n",
+ " \n",
+ " | c17180643325-JDG-1889-01-18-a-i0041@8:1058 | \n",
+ " [REDACTED] | \n",
+ " JDG-1889-01-18-a-i0041 | \n",
+ " 8 | \n",
+ " 1058 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 28,
+ "execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
@@ -2012,7 +2379,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 53,
"metadata": {},
"outputs": [
{
@@ -2021,7 +2388,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 1 items of 1 total items.
\n",
+ "
Contains 2 items of 2 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2060,8 +2427,15 @@
" \n",
"
\n",
" \n",
+ " | c77310151136-IMP-2012-03-13-a-i0196@862:1613 | \n",
+ " [REDACTED] | \n",
+ " IMP-2012-03-13-a-i0196 | \n",
+ " 862 | \n",
+ " 1613 | \n",
+ "
\n",
+ " \n",
" | c137439334025-LLS-1937-05-08-a-i0020@0:2709 | \n",
- " Les Sociétés anonymes suisses\\npendant la cris... | \n",
+ " [REDACTED] | \n",
" LLS-1937-05-08-a-i0020 | \n",
" 0 | \n",
" 2709 | \n",
@@ -2071,10 +2445,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 29,
+ "execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
@@ -2095,7 +2469,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
@@ -2104,7 +2478,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 10 items of 10 total items.
\n",
+ "
Contains 10 items (0 - 10) of 257 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2143,35 +2517,35 @@
" \n",
"
\n",
" \n",
- " | c85900161645-SMZ-1977-02-02-a-i0036@629:2044 | \n",
- " werden. Als Reise-Neuheiten präsentiert Popula... | \n",
- " SMZ-1977-02-02-a-i0036 | \n",
- " 629 | \n",
- " 2044 | \n",
+ " c34359854842-DTT-1973-05-16-a-i0233@376:583 | \n",
+ " [REDACTED] | \n",
+ " DTT-1973-05-16-a-i0233 | \n",
+ " 376 | \n",
+ " 583 | \n",
"
\n",
" \n",
- " | c85900161645-OIZ-1977-01-27-a-i0037@627:2010 | \n",
- " werden.\\nAls Reise-Neuheiten präsentiert die p... | \n",
- " OIZ-1977-01-27-a-i0037 | \n",
- " 627 | \n",
- " 2010 | \n",
+ " c34359854842-FZG-1973-05-18-a-i0139@12:217 | \n",
+ " [REDACTED] | \n",
+ " FZG-1973-05-18-a-i0139 | \n",
+ " 12 | \n",
+ " 217 | \n",
"
\n",
" \n",
- " | c85900161645-VHT-1977-02-09-a-i0022@630:2034 | \n",
- " werden.\\nAls Reise-Neuheiten präsentiert die p... | \n",
- " VHT-1977-02-09-a-i0022 | \n",
- " 630 | \n",
- " 2034 | \n",
+ " c515122-JDG-1961-03-07-a-i0101@3226:4339 | \n",
+ " [REDACTED] | \n",
+ " JDG-1961-03-07-a-i0101 | \n",
+ " 3226 | \n",
+ " 4339 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 30,
+ "execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
@@ -2194,7 +2568,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 55,
"metadata": {},
"outputs": [
{
@@ -2203,7 +2577,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 189 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2227,18 +2601,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c111669250947-EXP-2013-08-31-a-i0228@0:634 | \n",
+ " [REDACTED] | \n",
+ " EXP-2013-08-31-a-i0228 | \n",
+ " 0 | \n",
+ " 634 | \n",
+ "
\n",
+ " \n",
+ " | c137438967408-EXP-2008-07-10-a-i0330@0:815 | \n",
+ " [REDACTED] | \n",
+ " EXP-2008-07-10-a-i0330 | \n",
+ " 0 | \n",
+ " 815 | \n",
+ "
\n",
+ " \n",
+ " | c85899565930-EXP-1967-06-22-a-i0052@1833:1934 | \n",
+ " [REDACTED] | \n",
+ " EXP-1967-06-22-a-i0052 | \n",
+ " 1833 | \n",
+ " 1934 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 31,
+ "execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
@@ -2259,7 +2665,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 56,
"metadata": {},
"outputs": [
{
@@ -2300,10 +2706,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 32,
+ "execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
@@ -2321,7 +2727,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
@@ -2330,7 +2736,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 28 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2354,18 +2760,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c60130139951-LLE-1914-09-29-a-i0001@14738:14971 | \n",
+ " [REDACTED] | \n",
+ " LLE-1914-09-29-a-i0001 | \n",
+ " 14738 | \n",
+ " 14971 | \n",
+ "
\n",
+ " \n",
+ " | c94489993744-GDL-1887-04-29-a-i0003@4424:4759 | \n",
+ " [REDACTED] | \n",
+ " GDL-1887-04-29-a-i0003 | \n",
+ " 4424 | \n",
+ " 4759 | \n",
+ "
\n",
+ " \n",
+ " | c25769855244-EXP-1961-03-21-a-i0004@138:1259 | \n",
+ " [REDACTED] | \n",
+ " EXP-1961-03-21-a-i0004 | \n",
+ " 138 | \n",
+ " 1259 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 33,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@@ -2386,7 +2824,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
@@ -2395,7 +2833,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 9 items of 9 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2419,18 +2857,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c128849802984-EXP-1904-09-16-a-i0064@285:483 | \n",
+ " [REDACTED] | \n",
+ " EXP-1904-09-16-a-i0064 | \n",
+ " 285 | \n",
+ " 483 | \n",
+ "
\n",
+ " \n",
+ " | c120259208814-IMP-1927-08-31-a-i0057@9108:9556 | \n",
+ " [REDACTED] | \n",
+ " IMP-1927-08-31-a-i0057 | \n",
+ " 9108 | \n",
+ " 9556 | \n",
+ "
\n",
+ " \n",
+ " | c85899511538-indeplux-1901-11-06-a-i0013@387:815 | \n",
+ " [REDACTED] | \n",
+ " indeplux-1901-11-06-a-i0013 | \n",
+ " 387 | \n",
+ " 815 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 34,
+ "execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
@@ -2451,7 +2921,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
@@ -2460,9 +2930,9 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 10 items (0 - 10) of 27 total items.
\n",
+ "
Contains 10 items (0 - 10) of 409 total items.
\n",
"
\n",
- "See this result in the
Impresso App.\n",
+ "See this result in the
Impresso App.\n",
"
\n",
"
\n",
"Data preview:
\n",
@@ -2499,35 +2969,35 @@
" \n",
" \n",
" \n",
- " | c8589980257-dunioun-1948-04-01-a-i0009@0:509 | \n",
- " The British- Luxembourg Society\\nbeg to call t... | \n",
- " dunioun-1948-04-01-a-i0009 | \n",
+ " c68719515693-luxembourg1935-1939-08-23-a-i0005@0:595 | \n",
+ " [REDACTED] | \n",
+ " luxembourg1935-1939-08-23-a-i0005 | \n",
" 0 | \n",
- " 509 | \n",
+ " 595 | \n",
"
\n",
" \n",
- " | c10636-luxland-1982-07-16-a-i0048@10:631 | \n",
- " Société Anonyme\\nHeadoffice:\\nLuxembourg,\\n47,... | \n",
- " luxland-1982-07-16-a-i0048 | \n",
- " 10 | \n",
- " 631 | \n",
+ " c8590220034-GDL-1987-09-25-a-i0302@293:837 | \n",
+ " [REDACTED] | \n",
+ " GDL-1987-09-25-a-i0302 | \n",
+ " 293 | \n",
+ " 837 | \n",
"
\n",
" \n",
- " | c10636-luxland-1987-10-09-a-i0050@17:566 | \n",
- " Société Anonyme\\nLuxembourg,\\n37, rue Notre-Da... | \n",
- " luxland-1987-10-09-a-i0050 | \n",
- " 17 | \n",
- " 566 | \n",
+ " c606366-GDL-1987-09-25-a-i0302@69:816 | \n",
+ " [REDACTED] | \n",
+ " GDL-1987-09-25-a-i0302 | \n",
+ " 69 | \n",
+ " 816 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 35,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
@@ -2548,7 +3018,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 60,
"metadata": {},
"outputs": [
{
@@ -2557,7 +3027,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 2 items of 2 total items.
\n",
+ "
Contains 10 items (0 - 10) of 3931 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2596,28 +3066,35 @@
" \n",
"
\n",
" \n",
- " | c68720183505-SDT-1991-02-27-a-i0012@0:9876 | \n",
- " Asile en Suisse\\nRien ne va plus!\\nIl faut en ... | \n",
- " SDT-1991-02-27-a-i0012 | \n",
+ " c171798816707-IMP-2001-03-27-a-i0004@0:274 | \n",
+ " [REDACTED] | \n",
+ " IMP-2001-03-27-a-i0004 | \n",
" 0 | \n",
- " 9876 | \n",
+ " 274 | \n",
+ "
\n",
+ " \n",
+ " | c17180030095-EXP-2005-04-02-a-i0127@64:312 | \n",
+ " [REDACTED] | \n",
+ " EXP-2005-04-02-a-i0127 | \n",
+ " 64 | \n",
+ " 312 | \n",
"
\n",
" \n",
- " | c68720183505-LLS-1991-02-20-a-i0016@0:9916 | \n",
- " Asile en Suisse\\nRien ne va plus!\\nIl faut en ... | \n",
- " LLS-1991-02-20-a-i0016 | \n",
+ " c171798816707-EXP-2001-03-27-a-i0006@0:270 | \n",
+ " [REDACTED] | \n",
+ " EXP-2001-03-27-a-i0006 | \n",
" 0 | \n",
- " 9916 | \n",
+ " 270 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 36,
+ "execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
@@ -2638,7 +3115,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
@@ -2647,7 +3124,7 @@
"\n",
"
\n",
"
FindTextReusePassages result
\n",
- "
Contains 0 items of 0 total items.
\n",
+ "
Contains 10 items (0 - 10) of 33 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2671,18 +3148,50 @@
"
\n",
" \n",
" | \n",
+ " content | \n",
+ " contentItemId | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " | c68719924299-EXP-1993-01-23-a-i0070@1167:1571 | \n",
+ " [REDACTED] | \n",
+ " EXP-1993-01-23-a-i0070 | \n",
+ " 1167 | \n",
+ " 1571 | \n",
+ "
\n",
+ " \n",
+ " | c163209269715-JDG-1993-05-14-a-i0004@0:654 | \n",
+ " [REDACTED] | \n",
+ " JDG-1993-05-14-a-i0004 | \n",
+ " 0 | \n",
+ " 654 | \n",
+ "
\n",
+ " \n",
+ " | c8590228981-GDL-1995-06-28-a-i0226@1881:2069 | \n",
+ " [REDACTED] | \n",
+ " GDL-1995-06-28-a-i0226 | \n",
+ " 1881 | \n",
+ " 2069 | \n",
+ "
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 37,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
@@ -2703,7 +3212,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
@@ -2744,10 +3253,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 38,
+ "execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
@@ -2775,7 +3284,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
@@ -2839,10 +3348,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 39,
+ "execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
@@ -2860,7 +3369,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 64,
"metadata": {},
"outputs": [
{
@@ -2894,7 +3403,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
@@ -2903,7 +3412,7 @@
"\n",
"
\n",
"
PassagesFacet result
\n",
- "
Contains 6 items of 6 total items.
\n",
+ "
Contains 10 items (0 - 10) of 285 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -2936,26 +3445,26 @@
" \n",
"
\n",
" \n",
- " | 1883-07-01T00:00:00Z | \n",
- " 1 | \n",
+ " 1733-07-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
- " | 1884-07-01T00:00:00Z | \n",
- " 1 | \n",
+ " 1734-07-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
- " | 1885-07-01T00:00:00Z | \n",
- " 2 | \n",
+ " 1735-07-01T00:00:00Z | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 41,
+ "execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
@@ -2967,71 +3476,6 @@
")"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## yearmonth"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- "
PassagesFacet result
\n",
- "
Contains 0 items of 0 total items.
\n",
- "
\n",
- "See this result in the
Impresso App.\n",
- "
\n",
- "
\n",
- "Data preview:
\n",
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "impresso.text_reuse.passages.facet(\n",
- " \"yearmonth\",\n",
- " term=\"banana\"\n",
- ")"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -3041,7 +3485,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
@@ -3082,10 +3526,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 43,
+ "execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
@@ -3106,7 +3550,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 68,
"metadata": {},
"outputs": [
{
@@ -3115,7 +3559,7 @@
"\n",
"
\n",
"
PassagesFacet result
\n",
- "
Contains 10 items (0 - 10) of 57 total items.
\n",
+ "
Contains 10 items (0 - 10) of 1000 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -3148,15 +3592,15 @@
" \n",
"
\n",
" \n",
- " | tr-all-v1-24-c103079453365 | \n",
+ " tr-all-v1-24-c103079251103 | \n",
" 1 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c111669295627 | \n",
+ " tr-all-v1-24-c103079443428 | \n",
" 2 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c111669295628 | \n",
+ " tr-all-v1-24-c103079453365 | \n",
" 1 | \n",
"
\n",
" \n",
@@ -3164,10 +3608,10 @@
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 44,
+ "execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
@@ -3188,7 +3632,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 69,
"metadata": {},
"outputs": [
{
@@ -3231,25 +3675,25 @@
" \n",
" \n",
" | 2 | \n",
- " 16 | \n",
+ " 515 | \n",
"
\n",
" \n",
" | 252 | \n",
- " 0 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 502 | \n",
- " 0 | \n",
+ " 1 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 45,
+ "execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
@@ -3270,7 +3714,7 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
@@ -3313,11 +3757,11 @@
" \n",
" \n",
" | 0 | \n",
- " 0 | \n",
+ " 5 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 0 | \n",
+ " 3 | \n",
"
\n",
" \n",
" | 2 | \n",
@@ -3328,10 +3772,10 @@
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 46,
+ "execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
@@ -3352,7 +3796,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 71,
"metadata": {},
"outputs": [
{
@@ -3395,25 +3839,25 @@
" \n",
" \n",
" | 0 | \n",
- " 16 | \n",
+ " 440 | \n",
"
\n",
" \n",
" | 118 | \n",
- " 0 | \n",
+ " 11 | \n",
"
\n",
" \n",
" | 236 | \n",
- " 0 | \n",
+ " 12 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 47,
+ "execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
@@ -3434,7 +3878,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 72,
"metadata": {},
"outputs": [
{
@@ -3443,7 +3887,7 @@
"\n",
"
\n",
"
PassagesFacet result
\n",
- "
Contains 10 items of 10 total items.
\n",
+ "
Contains 10 items (0 - 10) of 278 total items.
\n",
"
\n",
"See this result in the
Impresso App.\n",
"
\n",
@@ -3476,26 +3920,26 @@
" \n",
"
\n",
" \n",
- " | tr-all-v1-24-c111669295627 | \n",
- " 2 | \n",
+ " tr-all-v1-24-c103079570140 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c137439334025 | \n",
- " 1 | \n",
+ " tr-all-v1-24-c103079589728 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | tr-all-v1-24-c244588 | \n",
- " 1 | \n",
+ " tr-all-v1-24-c103079624507 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 48,
+ "execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
@@ -3516,7 +3960,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3581,7 +4025,7 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3643,7 +4087,7 @@
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3705,7 +4149,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3767,7 +4211,7 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3829,7 +4273,7 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3904,7 +4348,7 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3969,7 +4413,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "impresso-py3.11",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -3983,7 +4427,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/examples/notebooks/tools.ipynb b/examples/notebooks/tools.ipynb
index 715017b..515755e 100644
--- a/examples/notebooks/tools.ipynb
+++ b/examples/notebooks/tools.ipynb
@@ -1,5 +1,29 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exploring Impresso Tools (NER, NEL, article embeddings) \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook provides an overview and demonstration of the core tools integrated into the Impresso application, focusing on components that power text understanding, entity recognition, and retrieval capabilities across the corpus. \n",
+ "\n",
+ "This notebook documents production-level tools that are permanent within the Impresso infrastructure.\n",
+ "\n",
+ "Specifically, we cover three major components:\n",
+ "\n",
+ "* **Named Entity Recognition (NER)** – identifying and classifying named entities (e.g., people, places, organizations) in historical newspaper text using the [impresso-project/ner-stacked-bert-multilingual](https://huggingface.co/impresso-project/ner-stacked-bert-multilingual) model.\n",
+ "* **Named Entity Linking (NEL)** – resolving recognized entities to canonical entries in knowledge bases such as Wikidata, using the [impresso-project/nel-mgenre-multilingual](https://huggingface.co/impresso-project/nel-mgenre-multilingual) model.\n",
+ "* **Article Embeddings** – generating embeddings of full articles using [gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) to enable semantic search with:\n",
+ " - **In-corpus queries** – selecting a query directly from the *Impresso* corpus. \n",
+ " - **Out-of-corpus queries** – embedding an external query (e.g., manually formulated or from another source). \n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -15,67 +39,926 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Named entity recognition\n"
+ "### Named entity recognition\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Ner result
\n",
+ "
Contains 6 items of 6 total items.
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " surfaceForm | \n",
+ " function | \n",
+ " name | \n",
+ " confidence.ner | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ " wikidata.id | \n",
+ " wikidata.wikipediaPageName | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2:41:pers:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n",
+ " pers | \n",
+ " Jean-Baptiste Nicolas Robert Schuman | \n",
+ " N/A | \n",
+ " Baptiste Nicolas Robert Schuman | \n",
+ " 91.25 | \n",
+ " 2 | \n",
+ " 41 | \n",
+ " N/A | \n",
+ " N/A | \n",
+ "
\n",
+ " \n",
+ " | 46:80:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n",
+ " time | \n",
+ " 29 June 1886 – 4 September 1963 | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 77.90 | \n",
+ " 46 | \n",
+ " 80 | \n",
+ " N/A | \n",
+ " N/A | \n",
+ "
\n",
+ " \n",
+ " | 88:98:org:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n",
+ " org | \n",
+ " Luxembourg | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 25.12 | \n",
+ " 88 | \n",
+ " 98 | \n",
+ " N/A | \n",
+ " N/A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"text = \"\"\"\n",
- "Jean-Baptiste Nicolas Robert Schuman ( \n",
- "29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
- "statesman. Schuman was a Christian democratic (Popular \n",
- "Republican Movement) political thinker and activist. \n",
- "\"\"\"\n",
+ " Jean-Baptiste Nicolas Robert Schuman ( \n",
+ " 29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
+ " statesman. Schuman was a Christian democratic (Popular \n",
+ " Republican Movement) political thinker and activist. \n",
+ " \"\"\"\n",
"result = impresso.tools.ner(\n",
" text=text\n",
")\n",
"result"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Named entity linking\n",
+ "\n",
+ "For the system to know what entity to link, we need to surround it with the markers [START] and [END]. Leave spaces between the entity and the markers."
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Ner result
\n",
+ "
Contains 1 items of 1 total items.
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " confidence.nel | \n",
+ " wikidata.id | \n",
+ " wikidata.wikipediaPageName | \n",
+ " wikidata.wikipediaPageUrl | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " unk | \n",
+ " 99.93 | \n",
+ " Q15981 | \n",
+ " Robert Schuman | \n",
+ " https://en.wikipedia.org/wiki/Robert_Schuman | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"text = \"\"\"\n",
- "Jean-Baptiste Nicolas Robert Schuman ( \n",
- "29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
- "statesman. Schuman was a Christian democratic (Popular \n",
- "Republican Movement) political thinker and activist. \n",
- "\"\"\"\n",
- "result = impresso.tools.ner_nel(\n",
+ " [START] Jean-Baptiste Nicolas Robert Schuman [END] ( \n",
+ " 29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
+ " statesman. Schuman was a Christian democratic (Popular \n",
+ " Republican Movement) political thinker and activist. \n",
+ " \"\"\"\n",
+ "\n",
+ "result = impresso.tools.nel(\n",
" text=text,\n",
")\n",
"result"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Named entity recognition and linking"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This method will do entity recognition and linking at the same time."
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Ner result
\n",
+ "
Contains 6 items of 6 total items.
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " surfaceForm | \n",
+ " function | \n",
+ " name | \n",
+ " confidence.ner | \n",
+ " confidence.nel | \n",
+ " offset.start | \n",
+ " offset.end | \n",
+ " wikidata.id | \n",
+ " wikidata.wikipediaPageName | \n",
+ " wikidata.wikipediaPageUrl | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2:41:pers:nel-mgenre-multilingual | \n",
+ " pers | \n",
+ " Jean-Baptiste Nicolas Robert Schuman | \n",
+ " N/A | \n",
+ " Baptiste Nicolas Robert Schuman | \n",
+ " 91.25 | \n",
+ " 96.76 | \n",
+ " 2 | \n",
+ " 41 | \n",
+ " Q15981 | \n",
+ " Robert Schuman | \n",
+ " https://en.wikipedia.org/wiki/Robert_Schuman | \n",
+ "
\n",
+ " \n",
+ " | 46:80:time:nel-mgenre-multilingual | \n",
+ " time | \n",
+ " 29 June 1886 – 4 September 1963 | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 77.90 | \n",
+ " 86.46 | \n",
+ " 46 | \n",
+ " 80 | \n",
+ " Q15981 | \n",
+ " Robert Schuman | \n",
+ " https://en.wikipedia.org/wiki/Robert_Schuman | \n",
+ "
\n",
+ " \n",
+ " | 88:98:org:nel-mgenre-multilingual | \n",
+ " org | \n",
+ " Luxembourg | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 25.12 | \n",
+ " 100.00 | \n",
+ " 88 | \n",
+ " 98 | \n",
+ " Q32 | \n",
+ " Luxembourg | \n",
+ " https://en.wikipedia.org/wiki/Luxembourg | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"text = \"\"\"\n",
- "[START]Jean-Baptiste Nicolas Robert Schuman[END] ( \n",
- "29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
- "statesman. Schuman was a Christian democratic (Popular \n",
- "Republican Movement) political thinker and activist. \n",
- "\"\"\"\n",
- "result = impresso.tools.nel(\n",
+ " Jean-Baptiste Nicolas Robert Schuman ( \n",
+ " 29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
+ " statesman. Schuman was a Christian democratic (Popular \n",
+ " Republican Movement) political thinker and activist. \n",
+ " \"\"\"\n",
+ "result = impresso.tools.ner_nel(\n",
" text=text,\n",
")\n",
"result"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Article embeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "All content items in the Impresso data of type _article_ longer than 800 characters were embedded using [gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) which is the latest in the GTE (General Text Embedding) family that has a strong multilingual capability."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gte-768:Fao1vWPioj3Msv+7LwqfPLicTb2kPgO+pkpAPVhbkL1cdi891oY9vYv1dDzSHTC9dJ0MvYGTZT3d87C90kt6PZ81gT0pg4c7x6XRPdfOC70xllU9WJrxO/pBwDwDI1C8YveXvZ3gyrzDjek9FkkhvXFAoL2LjYo9W0XwvKLdFLsg10C7QGhjPcAUHD1Z4YC8yFZhPbxhnToLYza9cFl8vMkdYTw0DZG8Mr10vX8zIjo/ZTs8AYC1PGyAn70CFaS7PAyNvNqxPT0AUyC9eiAyPXVJo7y1OGM9r/8TPdFwez1HIaS9lXUHvGb4Aj4N9oK9wTlNvQBmOrtQG9E7WqmPvUjIYLx6jZa8hOEFPV6vkLzIEoM9latiPej5i7s72xI8NmLBurlZuDzdEyw8gD7mPcUoQLytEUk6CfLAPfnsOD1UKy29vVunvBrRozzb8jU9op9OPJnZqTyJeE+8bchbvT3uhLxL2WU9toKuPfffBrwuxZk8H2PLvG6TWby+ei09OKuLvAUJTL0mvh08oOuDvYTpiTsBNQ47CNTAPB3frbzvPi88CUQ9vcza/LwKvRs91lUCPPlTCjx/bH+9dEkovK43K70N5bC8+BMxvcgJBz1vxoo9JQygvHvAkz3VDry8nWWzuwuSJ72RL+a8lJP5vJPTur1O3uG8Ph27vGo/LL0q7ew8mvK9vBMQmD3kaIU93Lg1PejcIb1ffAg9y4wuvDBo2LxlEzc9SWfWunvsKb228fy8F7iKPBLTSL2OFiO9T4+JPOr5jzuas5K8grgMuy6EQzzGCF69W2DZPKX7JT3C/908EIxYvaKCMrwC3e28KIJJvLBgGj0P+Im8LMJuvGClrT2C4W67DOx6u/g0rbummEI8UquLPKSzvrwj1nk8OyIoPXR3ILxpzxy98WElPW2ejDxP2jK8vsGBvKCdBD1QD/K8sieVPc6Cjr0mm6k7U8SCvEEc9LyS4Iq9STFGvGc7uLsB0bg8kPsMvf71OL1OeyE8xRFePMzPlLwlezA8mk6FvZepnTygYVm8lpG/vSrQE70AAZo9dW9tvKEhMDw3tzM9JxoBPQQrfz1oTuA8PLAyPTZcT7zgAJ27is46vJwGMrzvySK9aSHsPKMgBzx3DUU8LM9OPSKZLr2bfkC8NAxEvUJnyrwaLyk8kDEHvdzT8by4SLe8ihZfvc4Bn71960I92S0sPYRbJTpyUBw9kjFBPM6NsjyVPf+8xJrePDDPbrxxoXm9TomuOp9iw7vczX28Cwy5vNWlFrxusiE95cLcPKQBdrzeF/A8oUoNPCiY5LySdkO9exmBPEWYAbu0dzg8ZbvUOuFCHL25xvk8CGmPvPW+ELwGwC89tsSCPNrVND1qxde8wMZBPdphLDtXLgO9ltB3O0HdljvoDCS76XEvPWikL730pcU8rzN6PEWfEL0nL4U8RkJSuz9WGT1B9Vc6bO4aO8ZaNr0bEbw8kx6ovSSNdbz79707pqSJOz+2Nz26XpE8rrukvC6WvzkEPiy9haWUPSanjDw7xIo7iyCLO3pH57v+h3Q7AYOiu6m7r7zROxW75Dm+PJsavb3b49S9xarFuxI6mTsIBE+860WOPCQhuLuGPg49SIv8PF0f7Trvn4y8A+xZvYJWlLzP/XG6VdBdvVypPjkCnrm85P5/PO3zS73+Adg6S7cTvJEkk7zksFQ9WJANvR9u9LzuLDS9/6lOPbK26bxHOCc9Jhe9vOutYrxDnWu6fV7zOjkbGL1G4d28M76NPO6FgTy6dXA8bidnvcTxqDzBSjS9/ykjPMqsRjvicHM8/dRsO4ODubw6nQM9Aa3JvYEvYD1Lqq+8zqH3vPIaMjxxD868qD45vCfHFzqe84S6bNcWPeMPvDtdVJE9NUAlvEWahTw8OMW7l9UePTSgeb1fym09T8ZAPenUuLz8wZU92C8GPMWIoj0D2Y+8X82HvUBCuDwwSJW8g9PlO66ndjzMbi89yL8UvNg2Xj1ahaa8CPdmO6NkFD3si2s9/MGdPODKHb3wY5k7dq54Pagplj32DUk8nZsdPLiM/7yMrMo8DXxQPUKaWj2G+oG8ARQXu59o07yzynE8jSu2PN1QUzt14Ju8baxvPKdDzzzvh968hA5EPRlkIT3s/Lw8qi6iPMu7wDz1Tmm9AJRJPN6sPD0ysUe8qOWBOgnuf7wy7Ss9wXNUva8HoD1WWKu8A02wvDoVYj25MK26lVSEPPRYEz7054m88q3Wu+/HyDwdstU8fPgmvUca97zGigI7HH4JPRhFKD1RtSO97Z0hPSUjmr3L3wI9XarYPG18CL3+1wi985qOO1XFBTxMC4y8RFryPH62ID1Foh48L2qGOjWQtzyzxou9/8IPPBpMPTxwqXc9AI6LPas9U7yD9jS9bgpQvSYFML1F6DS847GHPM/Z4zwxSyq82X5/usUL4DzAyo48HReVvN7ZfbwE7pS8smoxPfbUBr1njLA86H4wPbXUirukqWk9x9tROh8TWbx4QDm9eshOPEXhprx+oBK9EJkHPVYNAD2rYFm90V6BvENfjjwiI4m4O+1aPC7oQzwed2s856AhvKBkE7008Dk8cUTaOxr6rr21I408yucmPf5ixbzQmN08B0VhPHgWn7zW0Bc9Tp8PPWcNWTwrzsa7sYUsPO0hKbyrSwW9AVRRvTVcFbwpevs8nPqHveffgrwJ/3Y9jsVnPcSSUzzpLT+9UuoxvXH2jDyVhxg9JDGEvXIWIb1+EEo8IDXXO7z7Jb3Sb1q8hlUsveisQz16Ex69Wy71vJYkLD0Skb88QswPu4B8xTwNQZa8Y3WTu0gAYDxHedo6JFIPPVoQgDssKZg7slNMvAi6ij3EoSY975P5u6pe/DvrIcw8yK+BuzGHhDxnldS7SnwJvM914zx4WQo80+KRPXi5lb1TdO6862kdvJnqUDwEGKC8VSQTPVvIED3RpbS6K2LLu5Lkxzy42ke7NJe+vNAeeT3oAla4apEDvflT7jySHUK8XkD9vGnjyLwF/ze9wwQgveJbbTwQqIE8KQCLPafGVDy7why9PYh6u5F3GbxKID29MTKLvP0oSD0pj9W84Uwou8UW4bxTfWu9dodZvTFvS73kC/a8dlGkPNkkQb3gpIY8zPqZO+iI1jtHxvY6XTg7PRXa/bxGlt67WI4EPewo/7o9bQu8lA5yPIfSbDyl5eI7fvPvPH4kObxE7AU94mALvR2mjjyRb5m7jTgLu2XBjr2975m7jd8KPeNqbL2aPwS7YZ9yPBIuJT0SmpI896JOPNBsgby6gIm8xfqmvMTVPD3KjB+9hoxkuxCJGz0fPUM8NThhPcOC6zt+xR29BvuSPL5LqbxVyuK6ymV1O9SvzLwuV4Q8tOQ9PTqIfTxdFi09alKKuwl6XTuQE2i8D3okO49v0bxSfHE6VeFvPdbmFj3UTnK9/YfZPCTYVzzN4AU8m4ndvBYsCj1urWU8ABAKuXfmXb0ppoo8beOtvHhRWr2XiOa89ynYOyML3byIbIk9HHLSPO1MBT1FQBk9DfyfvCqsNb2IWYk8oCUQPbhLL71lLsU8B5IjO1fgOL0ycMK82sYwvZUSMb0UGh69P8pjPaFck7wnxT68lU0zPRrqCz3kkPG65XKdvEXE0jw4pcI8cSIHvFiOcb0l3Ek9VUOkPChwLzynODe8aQiZvDW5J73IfLQ6hWssvFg2XjyDOKu8606sPCB9ojzrkDE82OjXvDW7AT1QVvY8IYFCPEX8ej2G27w7Ru7iOg+vZzwiSQi72enwPDArtjxJbMk8uwbjvE0aVTxpfOW8aH5RvI9N4jxA90w81ykfvJGJTb3jDPk8dWaduxEHJT2tKYo7pRSsPGrOFbvmSyE9Oea/Ox/Gjz3g/oQ98aR6vNpF8bwjhYq9oabBPHp0I711Tl48yUFhvCTTID37KX48sUpEPNDPEL37nWO8b+TLvA5o6rqLJYA9V2uCu1QCgDwAYRU92wJ7PaXYkT2yzWi9PKNjvEUb8zxqDEI8ZVDlPPM2dzyZ46a8+lpOPYGWy7y0FTS9DJShPADtP72T3o+7dCmdPPZ5qDwGSBO9zBAEPOryVrpgZiE9'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "text = \"\"\"\n",
+ " Jean-Baptiste Nicolas Robert Schuman ( \n",
+ " 29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
+ " statesman. Schuman was a Christian democratic (Popular \n",
+ " Republican Movement) political thinker and activist. \n",
+ " \"\"\"\n",
+ "\n",
+ "text_space_embedding = impresso.tools.embed_text(text=text, target=\"text\")\n",
+ "text_space_embedding"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Search by text embedding with `tools.embed_text` with an out-of-corpus embedding\n",
+ "\n",
+ "The `include_embeddings` flag is `False` by default to avoid downloading embeddings for items that may not be needed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Search result
\n",
+ "
Contains 5 items (0 - 5) of 40 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " title | \n",
+ " topics | \n",
+ " embeddings | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " ... | \n",
+ " pageNumbers | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | IMP-1949-01-20-a-i0001 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Les voyages de M. Robert Schuman | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp29_fr', 'relevance'... | \n",
+ " [gte-768:byoMvc6wYT2x8rA6qQcxPUqpML2TUoq9aprOP... | \n",
+ " 643 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " True | \n",
+ " ... | \n",
+ " [1] | \n",
+ " [] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | EXP-1958-03-11-a-i0148 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Robert Schuman parle de la zone de libre-échange | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp00_fr', 'relevance'... | \n",
+ " [gte-768:LcFFvDLFOD3U6ZW8d+9EPfC3Er3GYJq9GsIGP... | \n",
+ " 657 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " ... | \n",
+ " [6] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Berne', 'count': 3}, {'uid': '2... | \n",
+ " [{'uid': '2-50-Robert_Schuman', 'count': 5}] | \n",
+ " [{'uid': '2-53-États_pontificaux', 'count': 2}] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'BERNE', 'mentionConfidence':... | \n",
+ " [{'surfaceForm': 'M. Robert Schuman, ancien pr... | \n",
+ " [{'surfaceForm': 'Association romande et de l'... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | luxland-1963-09-13-a-i0008 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " Lettre à l'éditeur | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp45_fr', 'relevance'... | \n",
+ " [gte-768:KBsevTg+oj0D76y82ZYQPXHxhr3uGrW9TWa9P... | \n",
+ " 481 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " ... | \n",
+ " [4] | \n",
+ " [] | \n",
+ " [{'uid': '2-54-France', 'count': 1}, {'uid': '... | \n",
+ " [{'uid': '2-50-Robert_Schuman', 'count': 2}] | \n",
+ " [{'uid': '2-53-Institut_de_France', 'count': 1}] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'France', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'professeurs à la Faculté de ... | \n",
+ " [{'surfaceForm': 'Institut de France', 'mentio... | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 29 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.search.find(\n",
+ " embedding=text_space_embedding,\n",
+ " limit=5,\n",
+ " include_embeddings=True,\n",
+ ")\n",
+ "\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Get embeddings by ID."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gte-768:byoMvc6wYT2x8rA6qQcxPUqpML2TUoq9aprOPHVB1Lxfdgm8ohwxvfk6R7sODvi7grSrOtsjuT1UZ6G9F0pvPRxrKz0MONu7vAyrPYfLczv7g9g8oyPQPccqGD1FGwG7fpNvvWb4X7w0bp09EN8DvtWGBL6mxoU8H5qlvQeW7Du35ZY80d/bPTRunT2VLJq8BMBPPfmtOz2CtCu8oq0vvWQUBT2f0x+9knAUvWLHALyhNkg8whmoO5YKnbz6HD29jc6lPEO6vTuJuUK9O+1HPWm8yzxfBfo7/7LSPP7MaTzSybe9VO0NPWTYLT5bR4+8sfIwu4v4CLtvt5e7W9QavC1+47zD96o8hesLPRbnxru4WIs9RLZKPXeob70u5ww8H6D9uxKmmzviCka9li/GPSh5zLwkMLs8ZKGQPTOBlTwhfBu9purnPIoc6zzmsKc8Te5OPFIVY7wNFt685U/kvCaJmLymTx49ZKdoPavlszvUkRY9sXfWvB+wSTyPQ/88pfLNvM8Gk72ZU668EE2+vPZkKr24ajw8NWMLPRmVDr3CFTW9BbYEPCWterzjawk91LX4vJF6XzzE6KW9fgLxvEJTIr13Daa8Y50dvdOogT1g+64990oTvJL1uT3YSSk9vXc5ux4zCr103qu8BFVBvakLpL2Iwai70tWQuwwQhr27q2c9gNYovbhJhj1qH/Q8lMNwPf1PKr2hNsg8RRsBvcvIEzxnWSM9OCOEvENHybxydxC9DKPpPH6Nl725wxk9rhghvdWGhD0NidK8JDihui9SmzwC9P07UgsYugAUFj1Naak8/sIevVtZwDubvjw8s10/vWdZIzy5UKW8+waZuzSLYD1QP8a7XTfDPAtScryY6B+6n2p2OqQM5bs6bJW72UU2PXEs8TruIM28UC0VPfjH0jzX7Fi7Tk8SvOvFCjxdJZI73d8+PLb/Lb3TQJ+860owu3lenb1DKoa9hlaau/HCOz2lbSg9Cy4QvfOwirwRO407+3GnvJEH67zodJO860ORvWZzujuUwYs8bWx4vWD3O73chmE9ML0pvH9vDT3njqo87/SEPBDayToxuTY9RRuBO0D6RL1Baca8/Htyve9xxLxG/1u9SbUJPWPBfzy6xX68G/RDPIdOtLzX7Ni719qnvKMCmjtvPD0886Qxvd1syryzyM270s0qvQChIb3gMDa7wD8YPXPmkTwdTSE9nBeavEYPKL0lteC8Lt+mPEQlTDylZUK9rbEFPJvQ7bx4eLS8aqoaOk5zdLsIgEg9JL3GPHNZBjt2vK480kpqPMRuEr37k6S8gVPou5u+PDxB5KC7hlYavX/wv7xLkww9F9/gvLLi5DzomHU9mc4IPaXgHD3lwli8c1kGPEHuazpcQxy9peCcvLPITbvuIE28d4AaPWuWW70GIRM96ulsPOpkR726uzO86mTHvNSnOj2QKWg8PiC1PFmNbrw+KJu8mWVfvT6XHL0K5+M84ZtEut7FJzfQ+XI8h9dMvRdIirujoVa9VUGxPTmOEj3rTiO9SxwlPVmN7jvrxQo8y8gTvNqeE7x1tMi80WznPJ6CqLzTMZq91YkwvdJaNjz0Hf68LnAlPTc9m7zPhJk8eXBOOziSBb2+4se7BTsqvFmJe73Cnk28EUXYu0hcrLxB7mu6r4sVvZlis71MCGY7GSjyu+l+3rv1gjQ9udXKvMl7D70yDqG80tf1PFA34LwK5+M8vl0iPGUSd7zJ/EG7gUkdO+eWEL2PtvO8c1mGOwrnYzzoE9A8PbkZve2l8jxn3kg8kQGTPXRj0bujAho8HspgO3leHb0eM4o9YGqwvWdrVD1+AnE8fLMHveXCWLzw5Li8gUkdvM6esLt/Y7S8gx+6Ov+oh7xDt5E9vuLHPBko8rvtpwC9kKRCO32bVb0DZQ09Jg4+PCW14Lz4vYc8SUKVPMgaTD2Wl6i8wg1PvSsrBzyBQbe8TdydPDbKpjzVm2E9T1H3POa7uT0zlzm9HxENvCYOvjukDGU9ZIOGPKR1jrxENRg8DDhbPPZj4z2Scvm84ov4PJny6rvhm8Q6y1/qPBKwZjz/LS28r5HtO+TOMb0P6hW8POFuPM+OZDzveaq85rAnPYc8Az1DR8m89mSqPW9Kez2y2Bm8nv/nuvDkOD1rgLc5JaMvujbUcTvX4g09whU1PYDeDrz/vis99B3+vCLxdD1/axq95dKkvHYvIz2LAlQ8cSxxukDHGj5rBV28ErDmO7CHIjyf5dA8jOi8vOtOI73aPVC99JjYvJQwDT2Kmzi9WfYXPUoqY71SFeM8MTgEvG1MibvOGYu99WyQPMiRszvbgAm9YVQMPf7CnjyXFOi8PL2MvJwpyzzgqxC9/kfEuxHImLzR31u8knJ5PbWchbysXBs9o5V9vaMm/LyqhPC8F73jPCJeET0agc88wwlcvEb5Az1dN8M8eV6dvFZMwzsXvWO8/6gHOzewD713pHw92ORyPV03w7xduHU9n2p2O4kkUT1b2A29yJGzula30bjexac3KdYcPWqqGjoqNpm9d6IXPQeWbDsBiW+8abRlvAcRxzzTOLm8RCVMvFruMb1zWYY8mfLqPPFQjr1FqvE8zTOiOwL0fTw8VOM8ENpJvELUVL3tKLM8DKEEPfI5I73bDRW9OCVpPHJ3ED0mHgq9N7+UvXNZhrx3ERk9KycUvQ7uCD0dwJU925atvN5k5Ds10gy9JpNjPCPfwzwEVUE8IQ9/vccfhr3Hq8o8VyJgO5NKJD2FfAq9QWFgvTC9KT08Spi8HeR3vJlbFD2x/Ps7bzy9PNUOVj14iuW8HspgPG/B4jrd74q8jWV8PeXSpDvxT0c9Mcf0ux817zzJ/EG6jVPLvCUoVTym2Da7mfLqu8G45Luedk+9jUkAvUHu6zvuIE07fvwYPXtKXr2vke28QeQgPJlTLjwtC+88cLMkPe6TwTys7368/jUTvYI5UTwbdwQ9PUx9POpSlrxO1Le8elJEvdjSQT35Osc5JaOvO4fJjrzw3NK8nQtBvTT+1LvZV+c7ZnSBPerfIbtPWV28YPuuu8l/Ar17ySu99mSqvGhRvTwzGOy8xlrTO4oSoLz4Qi29U/EAvD+Asb387IG9FtEiPf7M6bxbWcA8giegPPMbGT0Sppu7LKhGPSf0JrwNmZ67Dm+7O5afjjyzvgI8+puKPa4YIT2nvh88c1kGO09Z3by5REw9/FeQPF4agD34z7i7DpG4PN31Yr0tASS8ZoXrPBzwUL3j6Ei9bkSjPDIWBz2cKcu8OY4SPc20VL3Xaya9iaeRPJ0PtDwrmgi99BMzPA5/hzzuDpw8SxDMPNUEizz5Ose62VfnPMl/grwQTb689B3+PBviEr3Jm368s01zPP7MaTzOnjA8fLH5u7wUkbyOtI68YV7XvFJ+jLwrI6E8aTcmPctn0DsXSu+8DgQtuiTFrDzjceE7J2+BvLQhqzy3cqI85Ud+O5ny6juY7BI9drwuPYkk0byk6IK8Ny3PvE9dUL3anpM9ujYOPbHqyrw5C9K8l32RvDmcUL1xjTS8FI7pPH9jNLwAq2w9vXe5O0aQ2jsRyJi89mA3vT4yZjyUwYs8ezgtPRgmjbwq3HS9ZKdoPH6FsTz7g1i9/6gHvIjT2bxCWXo82w0VPXyzh70dSS49+TpHPFZMwzvFUIi7tpAsPVgYlTvcfJa8OfkgPaG5iDzxwju8xrsWPDmQdz3lR348K7CsPJCkQjwu5ww7Pq3Auhv0QzylbSg8rFBCPP5HxDu8hwU8bevFOrHqSj21lJ87e8HFvALuJT15ZoO8VrdRvMcwcDwkOCG8iZdFvNNAH7t2vC49ZnO6u+6BELxxl/87M/SJPATBlj3dWhm7N8JAPTw/hj2PHx08nYoOvZCkQr0vXGa8xruWPLo6Ab0PVaQ8+hjKPDt2YLzNppa8hlaaO6XgnL32ZCq6m6yLvLQz3LmIwag8+o8xvXCny7xa36w95EkMPTiVsT3v9uk7t/s6Pc6IDD334ek8X3IWPXjzjrx5cE4719Y0Pae6LL0+pqG9Ot+JPJwpy7o1X5i7dFi/Pcr4Tj2tNqu85U/kPKkPl7yfYCs9'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "first_item_embeddings = impresso.content_items.get_embeddings(result.df.index[0])[0]\n",
+ "first_item_embeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Search by text embedding with `tools.embed_text` with an in-corpus embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
Search result
\n",
+ "
Contains 2 items (0 - 2) of 16 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " copyrightStatus | \n",
+ " type | \n",
+ " sourceMedium | \n",
+ " topics | \n",
+ " transcriptLength | \n",
+ " totalPages | \n",
+ " languageCode | \n",
+ " isOnFrontPage | \n",
+ " publicationDate | \n",
+ " issueUid | \n",
+ " ... | \n",
+ " collectionUids | \n",
+ " entities.locations | \n",
+ " entities.persons | \n",
+ " entities.organisations | \n",
+ " entities.newsAgencies | \n",
+ " mentions.locations | \n",
+ " mentions.persons | \n",
+ " mentions.organisations | \n",
+ " mentions.newsAgencies | \n",
+ " title | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | oecaen-1941-05-15-a-i0018 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp29_fr', 'relevance'... | \n",
+ " 263 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1941-05-15T00:00:00+00:00 | \n",
+ " oecaen-1941-05-15-a | \n",
+ " ... | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Europe', 'count': 1}, {'uid': '... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " [{'surfaceForm': 'Europe', 'mentionConfidence'... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [] | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | lepetitparisien-1926-09-18-a-i0054 | \n",
+ " in_cpy | \n",
+ " ar | \n",
+ " print | \n",
+ " [{'uid': 'tm-fr-all-v2.0_tp03_fr', 'relevance'... | \n",
+ " 232 | \n",
+ " 1 | \n",
+ " fr | \n",
+ " False | \n",
+ " 1926-09-18T00:00:00+00:00 | \n",
+ " lepetitparisien-1926-09-18-a | \n",
+ " ... | \n",
+ " [] | \n",
+ " [{'uid': '2-54-Paris', 'count': 1}, {'uid': '2... | \n",
+ " [{'uid': '2-50-Austen_Chamberlain', 'count': 1}] | \n",
+ " [{'uid': '2-53-France', 'count': 1}] | \n",
+ " [{'uid': '4-55-Havas', 'count': 4}, {'uid': ''... | \n",
+ " [{'surfaceForm': 'Genève', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'BRIAND', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'France', 'mentionConfidence'... | \n",
+ " [{'surfaceForm': 'Havas', 'mentionConfidence':... | \n",
+ " L'ENTREVUE BRIAND-STRESEMANN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.search.find(\n",
+ " country=\"FR\",\n",
+ " embedding=first_item_embeddings,\n",
+ " limit=2\n",
+ ")\n",
+ "\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Other tools"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Convert embedding to an array of floats and back"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gte-768:8gU7vdWAjz3oWoK9o8o+PUHElL3dkT+9zky+vFKAfT34Ejg9S8Y/vSqUE71SgSE98ArSvMs/AT4x0a68X4zWPXoJnD2/+Rs7OJ/IPS4awLz4VjI9nRS0PedVKz3oMy69kP8Qve/ujzyU9L49KpSTvVwUS73ilTI9rkowvTgcaz11fhg9Q++bPUEzlj3ZLRC8UXdzPQKYwDw1x4C7CEn0PBl2pju5K4K886TavJmmFr2Zg/U8IMFiOy66g7zq/dS8QUG3PFI0Hb3fHSe8PIE+PYrQljy7ZKq7piDNPNNeUjwkbuO9Byf3PFWAPT7+w2u9pQNnvHniR7tFl0W8Qh86PDd9S7wGjW482+mVPCM6krw0AY09J6xiPSQXcb1gatm8fbX4O6JNnLtH0ZE7IT8pPccBAr0wFSm8x5KAPUfy6jyCKRE8ep5NPIPCdTwWsHI9EkzDOu4x5rySWja8q1RevCF9aDzkX1k9uSpePNbW3btIQBM9ez1tu+t7GzypaDo993ivvLvEZr37rEC8jUf+u0lq9rrQheY8MXBOu07ejjq+G5k8fracvACdV7wuWP88zKQUvQ230TgY6r69iz8Yvc7dPL0YyEG9AtejPJBRrD22msM9f3F+vN3CgbwTRyw9HO6xvPSfQ7wQSJC91Ll3vJaOx73uMWa8VKyoPPD8sL3Mc9I8ndonvanXOz12+zo9es8PPTSqmrs7NLo83IL6vIi4x7w7lZo9tXSTPCwf1zywO6u9S0mdPAsFur2/GvU6GJd/vALXIz3BVMG8C95lvDzAIT0o60U8oAqGvRgbATzG43c9hMOZvDc/jDrCjg27o8q+vOm/FTz0YQQ7CYPAvDFwTj3K2ck6g8L1PFTrC73zgt06L7nfPF7fFT0zjbS7OfuRPBJMwzu6R8S8PY/fPH702zw2Q/+7cDvCvDyBPj0W4TS7TKTCPGjSe70+6yg85+apvPWHtL37bV297hANPVw2yD0g8iS97DehOo/iqjxNwSg9EkxDvPCNr7wj2bG8wUBlvWIFBj0jOW680EenvRfcnbxmWvA8t1HyvHo+ET17HBQ8skCCPNlrTzz8mQg961j6O86Lobwu21w785EiveNROLzqeve83UmSPYowU7wXPFo79GGEOksFozxc+Ai88yKhvJerrbxaSiQ9ROlgvW3lM72r0iS8AhuevB9EwLy0Osc8sET1PJ72abxCgBo9OthwPLV0k7uAbOe83cKBOoVPgT1qbai76pz0PCP7LjyJlsq8H3UCPQ23UTyaYfg7K4C3OXFYKL0bT5K8rkowPGqrZzzqPDi9xkRYvNbW3bvr29c8U18kvf0IirzSQWw8l0pNvOKtwT1DJJE9GPmDPIVdojytPA89jialu2qsi7yG/EG85yRpvL/5mzvPyeC7OE0tPVhBmr1CMnI9Bo3uu3J1jrx6/628RyMtPQVAaj2WUAi5pIZEPVXI6rxoM1w5SR1yvUv2Xb2e9428j+IqPLWVbLzU/XG8glkvvYi4RzwyTtG8CJecPfnwujwBuj08U84lu1LiAT1Lh9y7rF4MvXao+zq+G5k8q6FiPWQ+rrwJImC9n1dKvMlOBj3TjnC9vxr1OmHoHzt8ONY8JdSavG7f+Lz9JMy76v3UPEGFMb0bcY88ldLBvKq1vrsE0ei8lr8JvYIRgr1knuo7jgSovC66gzwjOW49B6pUvHUPl7xoM9w8x6ChPfzJpjtDqyE96R/SvM9ppLyeuCq82BAqPLcNeL1WpxG9vdw1vD5L5TzDbJA8YZVgvWQ+rjzaiDU9QEbOPBoVxjyRPVA8EirGPLay0js/10w9AZSNvbnKoTzJ+8Y8WhnivDONtDwdqje9S4fcO/OCXbyWUIg80gOtOTdg5TvpETE9V2MXPZcMDrxgSQC8u2QqOkNd+bymIM08llAIPeiir7znJOm8hX57PYQjVj0fg6O7as1kvRYgmDyJUtC863sbvFU4ED3ESpM87bRDPeDxuz1gSQC8i89yvPusQDzuYig9/UbJPP1Gybzkbh48bGiRPFUHTj3AFgK9SSy3PAmDwLy0Vy28YAl5O5jIEz2lZEe8DbdROY/iKr0ALla99bypPGAJeTyPQwu8ZJ7qPNUaWDxEewO9334HPVqrhD0bcGu85voFvWnv4Tz3F089/VWOPWYW9juuGhI9Bo6SvEfy6jvxiJg80ebGu83w9DxlWxS98sbXPO/taz3vryw8GIoCPYS+Ij688BG9BTJJPPODgbwhoAk9vxr1vHQwcLw+Kgy9uA6cvECnrju5KwK9XERpPUfy6rys74o8cF0/OqHP1byVQUO9+m6BvEdiEL1h6B88gQyrPK/H0jzqLhe9Wl3cvMo6qjwO9jS9WsOTvXTysLtaGWK7DXmSPJpheLxiloQ9q0EmveMgdr1xlwu9g8J1vXt80DxK6Lw8usWKvCaQILsl1Bq8SSw3PJxAHztxl4u8kZ4wvVNQX73CQQk9mNKBPRn4XzwIxzo8/qISPEXHYz3i03G8+NNUPJK7FjxJy1a8dgoAvfDMEruwg9i8Ouc1Pc6s+rv76v87ThxOPGHonzrgOWm9vhuZvIGJzTzkbfo8vZ3SPEKKiL2aYfi7+fA6PBtPErzRZI09V4TwuzYiprtXhZQ8ympIPUxEhr2GXP68DhgyPKh8ljwfpSC99D5jvGvITbuBuWs9PHMdvJRxYTrzgt096MSsvBVB8Tzjj3e8geotPUWXRbz8uwW91/PDvOQNPr33F087ynhpvG9AWbzyBTu9jQk/PF2zaj0mIR+9haEcO8JBCT1+ZAG9wPNgPKAwNjx6t4C9QzygPQgFerxkPq68Snk7PVih1jy3z7i8ZRwxPWHoHzukxSe8maVyvCGgiTthBOI8H3UCvZHc7zoDFWO8z2mkvL/5m7rIvOM8nECfPH8RQr25aUG9B1g5vXTBbj3HMSA9gihtPYnVLT1bWMW8UAkWvWgz3Ln/QbK6ApjAPGIm3zsCWoG86xo7PPI1WT0yr7G8w39IPTSqmrreDwa9FUyDvXTyMDsmkKA78xQAPekRMT2oWpk7CUUBPKh78rwA8Ba9QcSUvFJzADp5Qyi95SqkPOOymLw/aEu9zS/YPLslx71RlFm7jpUmPX9ak71Zvjw9HSxxOYb8QbwvKYW9JaNYPXN0arzdwgG8NGagPHLVyrs/iki7MBUpvFQvBrylA2c80SWqPGHFfrz35zA9I/suPEGTUj1H0ZE8BTJJO9jgC72lA+e7JJU3Pc5MPr1erlM8wHa+PFI0nTxgSQC8BmwVPJrkVb3e8h+93WEhvc1uuzwtnR29exwUvMWluDySOLk8gQwrPdeSYzw6Vje8qPk4vJRx4bxEnNy81/NDu/tMBL2b3768t1FyPB0scTwfBd08LXugvNzHGL3dwoG7bXYyvYJn0Lw3wUU93LM8PdIDLbvxJzg7YhMnvQ5WcTxLh1w8whDHvKk3+DyWUIg8oJEWvKCRFjxDqyE98MwSOpfNqrzG43e9Ed1BvYFK6jy0SGg9vrq4PC+YhryDwvU7rb5IvVZ2T73eAMG89P//PO/t6zpAX4E9GPkDPQ81mLx1Tda7xZcXvX+OZDzgK0i9EM+gPBoCDr3WZ1y9/MkmvDalAz0wFak8x5/9u9P+Fb3QRyc8Kkbruypfnr0teyA9w8xMPMwTljx8+pY8UDm0vN/tCL3maGM7MBWpO6TFpzmKMFO8NQVAPPPjvbw/CI88a1lMvXCbfjw8UHy7NId5vGkSg7nYcQo8oQAYvTdg5boPEve8FuE0uiIdrDy7omk8HC0VvfTBQL1jYCs8piBNvO6SRrxwm/67uagkvSP7rrunX7A9wdHjvIabYbzR5sa8O5WaPOZoYztjIci8DtQ3PY37HT3iNFI8j3MpveB4zLoVQhW85mkHvVr9H71LZoM8UZTZu52bRLoEhQi9RHpfOwRUxrwX3J08QKeuvHODL70Nt9E8ae/hvBVClTztC7Y9jRdgPRfcHT1ID9E7dQ+XO+enxjy4Dpw8AXvaPNR7OLzCjo26hd/buxEv3TthVn29Fp26PJ3MBj26xQq8sZIdPbxCrTxPmhS93x2nO23lszwS7IY8'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "original_embedding = impresso.content_items.get_embeddings(\"lepetitparisien-1928-05-07-a-i0047\")[0]\n",
+ "original_embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-0.04565996676683426,\n",
+ " 0.07006994634866714,\n",
+ " -0.06364995241165161,\n",
+ " 0.04657996818423271,\n",
+ " -0.07263994961977005,\n",
+ " -0.046769965440034866,\n",
+ " -0.02322998270392418,\n",
+ " 0.061889953911304474,\n",
+ " 0.04493996500968933,\n",
+ " -0.04681996628642082]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from impresso.util.embeddings import embedding_to_vector, vector_to_embedding\n",
+ "\n",
+ "array = embedding_to_vector(original_embedding)\n",
+ "array[:10] # show first 10 values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gte-768:8gU7vdWAjz3oWoK9o8o+PUHElL3dkT+9zky+vFKAfT34Ejg9S8Y/vSqUE71SgSE98ArSvMs/AT4x0a68X4zWPXoJnD2/+Rs7OJ/IPS4awLz4VjI9nRS0PedVKz3oMy69kP8Qve/ujzyU9L49KpSTvVwUS73ilTI9rkowvTgcaz11fhg9Q++bPUEzlj3ZLRC8UXdzPQKYwDw1x4C7CEn0PBl2pju5K4K886TavJmmFr2Zg/U8IMFiOy66g7zq/dS8QUG3PFI0Hb3fHSe8PIE+PYrQljy7ZKq7piDNPNNeUjwkbuO9Byf3PFWAPT7+w2u9pQNnvHniR7tFl0W8Qh86PDd9S7wGjW482+mVPCM6krw0AY09J6xiPSQXcb1gatm8fbX4O6JNnLtH0ZE7IT8pPccBAr0wFSm8x5KAPUfy6jyCKRE8ep5NPIPCdTwWsHI9EkzDOu4x5rySWja8q1RevCF9aDzkX1k9uSpePNbW3btIQBM9ez1tu+t7GzypaDo993ivvLvEZr37rEC8jUf+u0lq9rrQheY8MXBOu07ejjq+G5k8fracvACdV7wuWP88zKQUvQ230TgY6r69iz8Yvc7dPL0YyEG9AtejPJBRrD22msM9f3F+vN3CgbwTRyw9HO6xvPSfQ7wQSJC91Ll3vJaOx73uMWa8VKyoPPD8sL3Mc9I8ndonvanXOz12+zo9es8PPTSqmrs7NLo83IL6vIi4x7w7lZo9tXSTPCwf1zywO6u9S0mdPAsFur2/GvU6GJd/vALXIz3BVMG8C95lvDzAIT0o60U8oAqGvRgbATzG43c9hMOZvDc/jDrCjg27o8q+vOm/FTz0YQQ7CYPAvDFwTj3K2ck6g8L1PFTrC73zgt06L7nfPF7fFT0zjbS7OfuRPBJMwzu6R8S8PY/fPH702zw2Q/+7cDvCvDyBPj0W4TS7TKTCPGjSe70+6yg85+apvPWHtL37bV297hANPVw2yD0g8iS97DehOo/iqjxNwSg9EkxDvPCNr7wj2bG8wUBlvWIFBj0jOW680EenvRfcnbxmWvA8t1HyvHo+ET17HBQ8skCCPNlrTzz8mQg961j6O86Lobwu21w785EiveNROLzqeve83UmSPYowU7wXPFo79GGEOksFozxc+Ai88yKhvJerrbxaSiQ9ROlgvW3lM72r0iS8AhuevB9EwLy0Osc8sET1PJ72abxCgBo9OthwPLV0k7uAbOe83cKBOoVPgT1qbai76pz0PCP7LjyJlsq8H3UCPQ23UTyaYfg7K4C3OXFYKL0bT5K8rkowPGqrZzzqPDi9xkRYvNbW3bvr29c8U18kvf0IirzSQWw8l0pNvOKtwT1DJJE9GPmDPIVdojytPA89jialu2qsi7yG/EG85yRpvL/5mzvPyeC7OE0tPVhBmr1CMnI9Bo3uu3J1jrx6/628RyMtPQVAaj2WUAi5pIZEPVXI6rxoM1w5SR1yvUv2Xb2e9428j+IqPLWVbLzU/XG8glkvvYi4RzwyTtG8CJecPfnwujwBuj08U84lu1LiAT1Lh9y7rF4MvXao+zq+G5k8q6FiPWQ+rrwJImC9n1dKvMlOBj3TjnC9vxr1OmHoHzt8ONY8JdSavG7f+Lz9JMy76v3UPEGFMb0bcY88ldLBvKq1vrsE0ei8lr8JvYIRgr1knuo7jgSovC66gzwjOW49B6pUvHUPl7xoM9w8x6ChPfzJpjtDqyE96R/SvM9ppLyeuCq82BAqPLcNeL1WpxG9vdw1vD5L5TzDbJA8YZVgvWQ+rjzaiDU9QEbOPBoVxjyRPVA8EirGPLay0js/10w9AZSNvbnKoTzJ+8Y8WhnivDONtDwdqje9S4fcO/OCXbyWUIg80gOtOTdg5TvpETE9V2MXPZcMDrxgSQC8u2QqOkNd+bymIM08llAIPeiir7znJOm8hX57PYQjVj0fg6O7as1kvRYgmDyJUtC863sbvFU4ED3ESpM87bRDPeDxuz1gSQC8i89yvPusQDzuYig9/UbJPP1Gybzkbh48bGiRPFUHTj3AFgK9SSy3PAmDwLy0Vy28YAl5O5jIEz2lZEe8DbdROY/iKr0ALla99bypPGAJeTyPQwu8ZJ7qPNUaWDxEewO9334HPVqrhD0bcGu85voFvWnv4Tz3F089/VWOPWYW9juuGhI9Bo6SvEfy6jvxiJg80ebGu83w9DxlWxS98sbXPO/taz3vryw8GIoCPYS+Ij688BG9BTJJPPODgbwhoAk9vxr1vHQwcLw+Kgy9uA6cvECnrju5KwK9XERpPUfy6rys74o8cF0/OqHP1byVQUO9+m6BvEdiEL1h6B88gQyrPK/H0jzqLhe9Wl3cvMo6qjwO9jS9WsOTvXTysLtaGWK7DXmSPJpheLxiloQ9q0EmveMgdr1xlwu9g8J1vXt80DxK6Lw8usWKvCaQILsl1Bq8SSw3PJxAHztxl4u8kZ4wvVNQX73CQQk9mNKBPRn4XzwIxzo8/qISPEXHYz3i03G8+NNUPJK7FjxJy1a8dgoAvfDMEruwg9i8Ouc1Pc6s+rv76v87ThxOPGHonzrgOWm9vhuZvIGJzTzkbfo8vZ3SPEKKiL2aYfi7+fA6PBtPErzRZI09V4TwuzYiprtXhZQ8ympIPUxEhr2GXP68DhgyPKh8ljwfpSC99D5jvGvITbuBuWs9PHMdvJRxYTrzgt096MSsvBVB8Tzjj3e8geotPUWXRbz8uwW91/PDvOQNPr33F087ynhpvG9AWbzyBTu9jQk/PF2zaj0mIR+9haEcO8JBCT1+ZAG9wPNgPKAwNjx6t4C9QzygPQgFerxkPq68Snk7PVih1jy3z7i8ZRwxPWHoHzukxSe8maVyvCGgiTthBOI8H3UCvZHc7zoDFWO8z2mkvL/5m7rIvOM8nECfPH8RQr25aUG9B1g5vXTBbj3HMSA9gihtPYnVLT1bWMW8UAkWvWgz3Ln/QbK6ApjAPGIm3zsCWoG86xo7PPI1WT0yr7G8w39IPTSqmrreDwa9FUyDvXTyMDsmkKA78xQAPekRMT2oWpk7CUUBPKh78rwA8Ba9QcSUvFJzADp5Qyi95SqkPOOymLw/aEu9zS/YPLslx71RlFm7jpUmPX9ak71Zvjw9HSxxOYb8QbwvKYW9JaNYPXN0arzdwgG8NGagPHLVyrs/iki7MBUpvFQvBrylA2c80SWqPGHFfrz35zA9I/suPEGTUj1H0ZE8BTJJO9jgC72lA+e7JJU3Pc5MPr1erlM8wHa+PFI0nTxgSQC8BmwVPJrkVb3e8h+93WEhvc1uuzwtnR29exwUvMWluDySOLk8gQwrPdeSYzw6Vje8qPk4vJRx4bxEnNy81/NDu/tMBL2b3768t1FyPB0scTwfBd08LXugvNzHGL3dwoG7bXYyvYJn0Lw3wUU93LM8PdIDLbvxJzg7YhMnvQ5WcTxLh1w8whDHvKk3+DyWUIg8oJEWvKCRFjxDqyE98MwSOpfNqrzG43e9Ed1BvYFK6jy0SGg9vrq4PC+YhryDwvU7rb5IvVZ2T73eAMG89P//PO/t6zpAX4E9GPkDPQ81mLx1Tda7xZcXvX+OZDzgK0i9EM+gPBoCDr3WZ1y9/MkmvDalAz0wFak8x5/9u9P+Fb3QRyc8Kkbruypfnr0teyA9w8xMPMwTljx8+pY8UDm0vN/tCL3maGM7MBWpO6TFpzmKMFO8NQVAPPPjvbw/CI88a1lMvXCbfjw8UHy7NId5vGkSg7nYcQo8oQAYvTdg5boPEve8FuE0uiIdrDy7omk8HC0VvfTBQL1jYCs8piBNvO6SRrxwm/67uagkvSP7rrunX7A9wdHjvIabYbzR5sa8O5WaPOZoYztjIci8DtQ3PY37HT3iNFI8j3MpveB4zLoVQhW85mkHvVr9H71LZoM8UZTZu52bRLoEhQi9RHpfOwRUxrwX3J08QKeuvHODL70Nt9E8ae/hvBVClTztC7Y9jRdgPRfcHT1ID9E7dQ+XO+enxjy4Dpw8AXvaPNR7OLzCjo26hd/buxEv3TthVn29Fp26PJ3MBj26xQq8sZIdPbxCrTxPmhS93x2nO23lszwS7IY8'"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "recreated_embedding = vector_to_embedding(array, 'gte-768')\n",
+ "recreated_embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original and recreated embeddings are the same: True'"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f\"Original and recreated embeddings are the same: {original_embedding == recreated_embedding}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Embed an image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'openclip-768:/aKPPipmmT7ziZ0/+jaWPqb4lb5up0w/pYmevSLlvz5p4TW9XbhBv3uSO7+PATE+6IP3Ppwugb4e6cU+GDAwvxzPLD/ddM4+vOPyPkfXhD4O6kW+sNy4PRitcD9NsP8+4Marvfd2zL4ao5W+qgGaPomXBT/XNxG/RN8LP++jhr8U/kK/WCjjPuqsPj+5nfm+byGPvpGSL7/JmJ69ReE1P1Ymojypa9W+iN6/v2qMAD7EYRu/IBDDPVoOFL/YSHW/jljfPrBmMj9kQOu+duvivl4Scb8Q120/bMMxPtwLvL63tu49xDiJPHekFT/hLCW/4a5iv9TBhr482H++Jj6NPkLkhr6+XiQ/w2caveqZmD4wGQG/JKOxPpYZCz//JVa/mGMoP7CVrb6SMbe9cssZPwXSuD/3NGK/RRvYvWeeib4r7Po9FlfsPUAHJT/MZ6w/HI+fPs7yL780EJK+Kf3tvgUtxb60Gdo+HmIfPvAg7Tx6sL4+lku2P00+xT5fF8k+1ABAvuLUlj6H+Vg/L8TvvrxA1b4dnkw/T+oAPwV4Bz+ndBG/iPGrPzAgsL6tjve+bxvhPcivr78RMXe/fqrzvgRYtz5J6CW/aij8vQPKgb57ewu//I75vfD2UT44VLK+0iEdPlo4yz8HQXs/uqeUP2zWDT65m2G/MNTjvUzhCD/bZuk+gFh0PuFsabzAHSC+ewifPwikgz5iPzc+Dq+dvTRjlT/yI8M+PpK2v2/8Ib3bteG+eYIzvSzmjL/+GjG/tCofvi7gdD7c5am+RpGsPj6il77sJtM+LaZOQDlDGb5bUxk/2UIuvyxn4r40VVK/Upc6vrTzzb5WPo09nRiGPfuqLr8BhSu9dh4nP2rVgD8biU+/jMJVPsO5o741cUS+C+UNPVC+Yr9D4uC+WpTUvTRwE79uMDa/WWbDvUaAP7+uC2W+J834PeCKCr8shTA+xgJjv+qEGr/fRJW++CoYv+ybjz4JoWY9WZZ+vVvgH7/ho6a+ihLAvY7tNr/cQ9Y+LPgdv8HKXj36MsO+CKu8vsEODL+v5II/w+oPP3ZWLr54We4/kse1P4jUOT2WYQM+kjKCPoA20L0Ni6e+RM8Kv+KlAr9tbuQ+TEWAvprpFz+g8Vc+aodlvio+tL0/eDo+y+IPv8Zbyz4+ywa/k8cAPVLNPL6CX+S9aqrmvVHnd74bOqq+zSNhPqhZCD/EKKs+DjT3vhaxDz9r7Du/HhunP/Fpob6kSza/ixKIPxOqhD9el7o9UYuAvi6wML7Gnb89k7tMv0V0fT2SSmc/leLCP4jHoL/oa8m+VsHBPV5unb7QxUE9uJWeP4S7Ab90nl4/RmMZvgugHT+g/j2/wO4iv1nKaT884gO/v0phv13ygT38Iou+GkEHvxKokb8kN7m9VTULPlTzFD8Kgq69ntyEPDT1DD/OHKW+Y6BEPKHRZj4IVmi/cx+FP1Z55b5XfMy+4ohhPtnBcr8YdjY/ngwOvrUEzL9CBiW/YCJuPZbBCL/WTxm+vYsMvuV8hb4+Mt69fSLLPitEh76ESpi+I4gFPzdOND9Co1E/h5afvV8Vjr7ujcg8z4UQP8k5n77H74+/sMlSvgLWAr8q01u/qBeMPm4zrb+YLju+4OkEv+gYKz/ceQBABrwLvsuhYz36tH8+TJ64Pz+5gT/iqwE/JGFCv7Q2lr790Yq9tUmAv/QLCr5xCNs++CXDvoCQhz4sB72/lkkAv8kpCz5I9g2/gV0rPdg9xr6JdoA/ZYCMvizx8r21TC0/wlqAPz1zcD9LSEi/GQ4WvvgE/r6MEkA+wF1jvjz+cT/qsEi/lm7Avs2zVD8FOuE+OcnHPnoHdT/BBr0+WC36vv5pND4vVGc/Aqh9vxnIPL+IHlk+JjXWPrpdXL56KtC7ftBVvoS8H7508aQ9wI89veo9q7/s5b6+Sq5tPoteh7514Wk9zI6jvYBYlD/+Es68vJsCPb44Hj+Kd/8+4MGjPgbNir6tSLQ/+pP5v27fJL4lkeW++Nqgv4l8db/tDm8/1kCbPSL5Q79QYwa/5Vq4PtXpIL+WU69AWpKWP1B4jb5EROO+yxGDPibMbz9N9xU/9+i3voUmkD78qji/c6cVv5ZRcL5BBKA+sn+9vuB+Cb86KEM+0SWTPSSIxz2mS9M+5fkXP61xFT8Evb2+Qq0tPwME1rukgrW+WLqKv15OC7+fLkA/8OUVPh0her+Lwxm7C3mtP/+Rwr6fT2W/YOgCv4Bmoj66eJq+VC0JPzTMnz5EMpy+vAYhv9FeKD8i1B8/vnacvmD/g72kgom+7OI1P7jeL78zXQe/L1yovsWwf7wjcYE/5huTP8akIr4txc0+scADP4L2YL8UiWg+HJrvPblQ4j7OEO6+0nYnu+bYuj5qgaC+DU37PFSC47+Kwle9lQRwvOIA2T88Eoy/Q67XPuwLEL/H2j0/r9JEP86Vkz6C4X6+ykkqv9jsM7+FpJ4+AOHVvbcftb3wQbM+FNMlP2X9yD5NqzG/So0LvTgujb7AVg8/4fSIv8jpIz/KbGS+EYK5vlRB97/j2Jo/liKcPtQetj5fvgc/d6VmPkF+er9lcGg+TfmbPgNzq76aEhk/uV02v+4nQrwVCnXAR4akO7RNjz5bDzS/5N0Uvz9C7D1ST/g+pQ9YvyeIn75/hBq/Uls9vkL7Bb/nQdm+KsN5P9rnBb4BzwK/DGW/vviRS74uPEy+ni0uP72mKL1stnE/wX5dPZwDPz6YPhG/OHpaPvxsKD8C5do+DdOBPkLnGD95FDW/FSUVPo44LT/WtKq9zqyJPb76LT+u+pO+i3ZGv3mQer/qLAs/kJ8qvwShKL+gV+w+1QYwPz26BT/Beli/g4rOv0yLVD+ATgw+39/KPql/pj7vJPE9VAv7PDf5AL8SY46+x0nHvriawr70MQm/DPjxP3jIfT3+Ej8/oagRP6lrij5EQy+8JaebPd7D8b74yya+SPOFPVhyvD4uU2s/JUSFP9DTmr4M2os/QTXzuzDKuL60uMi98sXYvoA8lz6orou+uYwVPvigxr3KxCG/w87oPXCWQD5Edqq9mC40P3xgoz9cypI+fct9POL3oD8ke2o+BJCFPlSx6b4N7dY/2O98vm/fC8Bg3CS/4PUsv9bFkz4IN7e+EKfjPvv2vb6QN7k+J3HcPojy5L3T36e/EUBTPfWTZz/a47Y+NyvLP2dxcTxTObY/5UK6PjJCbL6O9AE+x7MPvlHpJr0w0W6+2WZtP9bpGr9CAxY+Pm6LvtxYGD8Yzeu6nhyfvgIaDz5Ww8M+8G4kP4BCJD5Qjx2+iUGCvsdEkb2n+NG+dFPGvijAnD/a5Jk/qQgtPvEJBj/yYxG/eN2yu5KTyz4ZjmY/dIPaPOA/oL6ctZq+grrfP2i2kr/4SFC+rJvqvnT9ob7wcTS/HogePs4FKDwvCFa/i1EqP5tJBjwfL7u+sHfRPhVN+r7yNO29WCJAP1JNCD4PE7w9AhnLveIx3T4FgSK+OOxIv1gONL9O/qg+BmXbPcnsgr4NrvO+lH0tP+fQhT9l4SG/+vxOvmJPfr/7RIC+SwvTPEWMWj8Fzo++KKoKvvP9FL4M2Re/9E+dvvT3MD/gSBK/QMjiPrmU5T7Ub0k/RvhLPYJhTb7/xrQ+CATGPuI4Bj9PZwE+LlApP6S7Kb+R8jw+y1+Hvq6grz4r2Fi++Gf7PzrBC77t3Vc9obEWP5R+lr6/ii2+GmBZP8MWSL8ayf2+/wkaPyWEjr5hdBK/4sMwP9ANbT4bkFw/9lGDP/576r6I+i8/paifvj4DND9gbgC/XxervxGG374S+hu/N2tFv1CgnD4mN3W+MZZJPzskij9qnpw/NtfSP4aDFb9Cyni+rMa1vuwAaz4PZhq/aEk8v7/1CD/is4S+XaE2PGjCGz4MMgO/EN+0PlkYWz4oulQ/ZEFfvFzqBr/oTW8++lRovsi3A78XwoQ8ytGsvr2ozb6lWye+vqe+PqBoHT/1Lk0/GIQMPl8JKD6kMDG/ZdFhPjJmv77qKVM/bqxgP8W+Pb44I2A/qfqxPrL7wr6lkZE/m8snvyDx1b+m5gw/TjvNv49JSr+tyYO/'"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.tools.embed_image(\"https://impresso-project.ch/assets/images/posts/rep-thomas.png\", target=\"multimodal\")"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": ".venv",
+ "display_name": "impresso-py3.13 (3.13.7)",
"language": "python",
"name": "python3"
},
@@ -89,9 +972,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.13.7"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/examples/notebooks/topics.ipynb b/examples/notebooks/topics.ipynb
new file mode 100644
index 0000000..dad3840
--- /dev/null
+++ b/examples/notebooks/topics.ipynb
@@ -0,0 +1,780 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "cell-0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🎉 You are now connected to the Impresso API! 🎉\n",
+ "🔗 Using API: http://localhost:3030\n"
+ ]
+ }
+ ],
+ "source": [
+ "from impresso import connect\n",
+ "\n",
+ "impresso = connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-1",
+ "metadata": {},
+ "source": [
+ "# Search topics\n",
+ "\n",
+ "Find topics in the Impresso database. Topics are generated using topic modeling techniques and represent themes or subjects discussed in the content items."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "cell-2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindTopics result
\n",
+ "
Contains 10 items (0 - 10) of 300 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " language | \n",
+ " contentItemsCount | \n",
+ " words | \n",
+ " model | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tm-fr-all-v2.0_tp58_fr | \n",
+ " fr | \n",
+ " 12788084 | \n",
+ " [{'w': 'der', 'p': 0.1617}, {'w': 'man', 'p': ... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp52_fr | \n",
+ " fr | \n",
+ " 6598628 | \n",
+ " [{'w': 'front', 'p': 0.02305}, {'w': 'armée', ... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp36_fr | \n",
+ " fr | \n",
+ " 4124269 | \n",
+ " [{'w': 'main', 'p': 0.01644}, {'w': 'tête', 'p... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.topics.find()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-3",
+ "metadata": {},
+ "source": [
+ "Search for topics by keyword. This searches within the top words associated with each topic."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cell-4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindTopics result
\n",
+ "
Contains 1 items of 1 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " language | \n",
+ " contentItemsCount | \n",
+ " words | \n",
+ " model | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tm-fr-all-v2.0_tp30_fr | \n",
+ " fr | \n",
+ " 602205 | \n",
+ " [{'w': 'forêt', 'p': 0.02919}, {'w': 'bois', '... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.topics.find(term=\"chat\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-5",
+ "metadata": {},
+ "source": [
+ "## Order results\n",
+ "\n",
+ "Topics can be ordered by different fields."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cell-6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
FindTopics result
\n",
+ "
Contains 9 items (0 - 9) of 42 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " language | \n",
+ " contentItemsCount | \n",
+ " words | \n",
+ " model | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tm-fr-all-v2.0_tp36_fr | \n",
+ " fr | \n",
+ " 4124269 | \n",
+ " [{'w': 'main', 'p': 0.01644}, {'w': 'tête', 'p... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp47_fr | \n",
+ " fr | \n",
+ " 849431 | \n",
+ " [{'w': 'ville', 'p': 0.03486}, {'w': 'siècle',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp79_fr | \n",
+ " fr | \n",
+ " 893270 | \n",
+ " [{'w': 'tir', 'p': 0.07371}, {'w': 'gauche', '... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.topics.find(term=\"fait\", order_by=\"-name\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-7",
+ "metadata": {},
+ "source": [
+ "## Pagination\n",
+ "\n",
+ "Iterate through all pages of results."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "cell-8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total topics in the result set: 42\n",
+ "Got page 0 - 10 of 42. The first topic has [{'uid': 'tm-fr-all-v2.0_tp34_fr', 'language': 'fr', 'contentItemsCount': 1039982, 'words': [{'w': 'course', 'p': 0.03807}, {'w': 'monde', 'p': 0.02638}, {'w': 'place', 'p': 0.02077}, {'w': 'temps', 'p': 0.01844}, {'w': 'étape', 'p': 0.01844}, {'w': 'départ', 'p': 0.01703}, {'w': 'suisse', 'p': 0.01639}, {'w': 'victoire', 'p': 0.01488}, {'w': 'manche', 'p': 0.01444}, {'w': 'ordre', 'p': 0.01402}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-lb-all-v2.1_tp57_lb', 'language': 'lb', 'contentItemsCount': 1654, 'words': [{'w': 'jongen', 'p': 0.09709}, {'w': 'letzeburg', 'p': 0.03108}, {'w': 'ligue', 'p': 0.02757}, {'w': 'rapatriement', 'p': 0.02723}, {'w': 'letzeburger', 'p': 0.02338}, {'w': 'service', 'p': 0.02247}, {'w': 'büro', 'p': 0.0114}, {'w': 'guerre', 'p': 0.01054}, {'w': 'commissariat', 'p': 0.0103}, {'w': 'jong', 'p': 0.00948}], 'model': 'tm-lb-all-v2.1'}, {'uid': 'tm-fr-all-v2.0_tp78_fr', 'language': 'fr', 'contentItemsCount': 834649, 'words': [{'w': 'vie', 'p': 0.01865}, {'w': 'esprit', 'p': 0.01474}, {'w': 'pays', 'p': 0.0144}, {'w': 'travail', 'p': 0.01423}, {'w': 'effort', 'p': 0.01203}, {'w': 'point', 'p': 0.01016}, {'w': 'temps', 'p': 0.00958}, {'w': 'progrès', 'p': 0.0087}, {'w': 'intérêt', 'p': 0.00867}, {'w': 'vue', 'p': 0.00799}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp87_fr', 'language': 'fr', 'contentItemsCount': 3077453, 'words': [{'w': 'problème', 'p': 0.01207}, {'w': 'fait', 'p': 0.01119}, {'w': 'question', 'p': 0.0103}, {'w': 'exemple', 'p': 0.01004}, {'w': 'monde', 'p': 0.00982}, {'w': 'cas', 'p': 0.00919}, {'w': 'système', 'p': 0.00893}, {'w': 'politique', 'p': 0.00787}, {'w': 'temps', 'p': 0.00767}, {'w': 'société', 'p': 0.00713}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp44_fr', 'language': 'fr', 'contentItemsCount': 1818665, 'words': [{'w': 'temps', 'p': 0.02877}, {'w': 'argent', 'p': 0.0225}, {'w': 'gens', 'p': 0.01945}, {'w': 'foi', 'p': 0.01682}, {'w': 'chose', 'p': 0.0163}, {'w': 'vie', 'p': 0.01484}, {'w': 'travail', 'p': 0.01312}, {'w': 'moment', 'p': 0.01181}, {'w': 'monde', 'p': 0.01174}, {'w': 'besoin', 'p': 0.0112}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp55_fr', 'language': 'fr', 'contentItemsCount': 2041018, 'words': [{'w': 'vie', 'p': 0.01796}, {'w': 'monde', 'p': 0.01165}, {'w': 'auteur', 'p': 0.00906}, {'w': 'foi', 'p': 0.0088}, {'w': 'sen', 'p': 0.00866}, {'w': 'temps', 'p': 0.00862}, {'w': 'esprit', 'p': 0.0081}, {'w': 'livre', 'p': 0.00807}, {'w': 'art', 'p': 0.00754}, {'w': 'amour', 'p': 0.00737}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp30_fr', 'language': 'fr', 'contentItemsCount': 602205, 'words': [{'w': 'forêt', 'p': 0.02919}, {'w': 'bois', 'p': 0.01989}, {'w': 'chasse', 'p': 0.01955}, {'w': 'nature', 'p': 0.0136}, {'w': 'chien', 'p': 0.01324}, {'w': 'chasseur', 'p': 0.01133}, {'w': 'animal', 'p': 0.01106}, {'w': 'eau', 'p': 0.0088}, {'w': 'terre', 'p': 0.00786}, {'w': 'arbre', 'p': 0.00743}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp56_fr', 'language': 'fr', 'contentItemsCount': 509737, 'words': [{'w': 'conseil', 'p': 0.07441}, {'w': 'commune', 'p': 0.04146}, {'w': 'construction', 'p': 0.02389}, {'w': 'crédit', 'p': 0.02198}, {'w': 'ville', 'p': 0.01986}, {'w': 'commission', 'p': 0.01335}, {'w': 'projet', 'p': 0.01329}, {'w': 'séance', 'p': 0.012}, {'w': 'bâtiment', 'p': 0.01082}, {'w': 'terrain', 'p': 0.0103}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp62_fr', 'language': 'fr', 'contentItemsCount': 627456, 'words': [{'w': 'match', 'p': 0.03138}, {'w': 'jeu', 'p': 0.02741}, {'w': 'but', 'p': 0.02187}, {'w': 'équipe', 'p': 0.01958}, {'w': 'minute', 'p': 0.01875}, {'w': 'partie', 'p': 0.01395}, {'w': 'foi', 'p': 0.01367}, {'w': 'défense', 'p': 0.01245}, {'w': 'coup', 'p': 0.01226}, {'w': 'victoire', 'p': 0.0122}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp54_fr', 'language': 'fr', 'contentItemsCount': 968428, 'words': [{'w': 'parti', 'p': 0.06376}, {'w': 'conseil', 'p': 0.04967}, {'w': 'voix', 'p': 0.04724}, {'w': 'élection', 'p': 0.02733}, {'w': 'liste', 'p': 0.02668}, {'w': 'candidat', 'p': 0.02576}, {'w': 'majorité', 'p': 0.02}, {'w': 'scrutin', 'p': 0.01749}, {'w': 'tour', 'p': 0.01747}, {'w': 'président', 'p': 0.01586}], 'model': 'tm-fr-all-v2.0'}] content items\n",
+ "Got page 10 - 20 of 42. The first topic has [{'uid': 'tm-fr-all-v2.0_tp26_fr', 'language': 'fr', 'contentItemsCount': 683575, 'words': [{'w': 'cas', 'p': 0.03093}, {'w': 'santé', 'p': 0.03048}, {'w': 'médecin', 'p': 0.0286}, {'w': 'maladie', 'p': 0.02417}, {'w': 'docteur', 'p': 0.01896}, {'w': 'médecine', 'p': 0.01797}, {'w': 'hôpital', 'p': 0.01625}, {'w': 'traitement', 'p': 0.01295}, {'w': 'professeur', 'p': 0.01184}, {'w': 'état', 'p': 0.00996}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp42_fr', 'language': 'fr', 'contentItemsCount': 910641, 'words': [{'w': 'tribunal', 'p': 0.03413}, {'w': 'affaire', 'p': 0.02811}, {'w': 'prison', 'p': 0.02383}, {'w': 'procès', 'p': 0.02288}, {'w': 'mois', 'p': 0.02218}, {'w': 'juge', 'p': 0.02014}, {'w': 'justice', 'p': 0.0178}, {'w': 'peine', 'p': 0.01752}, {'w': 'amende', 'p': 0.01559}, {'w': 'frais', 'p': 0.01543}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp95_fr', 'language': 'fr', 'contentItemsCount': 685561, 'words': [{'w': 'loi', 'p': 0.07082}, {'w': 'droit', 'p': 0.05264}, {'w': 'conseil', 'p': 0.03093}, {'w': 'cas', 'p': 0.02686}, {'w': 'article', 'p': 0.02222}, {'w': 'recours', 'p': 0.01317}, {'w': 'art', 'p': 0.01269}, {'w': 'vigueur', 'p': 0.01123}, {'w': 'projet', 'p': 0.0108}, {'w': 'décision', 'p': 0.01019}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp88_fr', 'language': 'fr', 'contentItemsCount': 934422, 'words': [{'w': 'conseil', 'p': 0.05508}, {'w': 'initiative', 'p': 0.04484}, {'w': 'loi', 'p': 0.03003}, {'w': 'peuple', 'p': 0.02706}, {'w': 'projet', 'p': 0.02699}, {'w': 'canton', 'p': 0.0237}, {'w': 'vote', 'p': 0.02073}, {'w': 'référendum', 'p': 0.01542}, {'w': 'droit', 'p': 0.0154}, {'w': 'oui', 'p': 0.01461}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp76_fr', 'language': 'fr', 'contentItemsCount': 469979, 'words': [{'w': 'avion', 'p': 0.04868}, {'w': 'vol', 'p': 0.02812}, {'w': 'appareil', 'p': 0.0277}, {'w': 'aviation', 'p': 0.0203}, {'w': 'pilote', 'p': 0.01793}, {'w': 'bord', 'p': 0.01618}, {'w': 'air', 'p': 0.01499}, {'w': 'aéroport', 'p': 0.01211}, {'w': 'sol', 'p': 0.01023}, {'w': 'vitesse', 'p': 0.00934}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp03_fr', 'language': 'fr', 'contentItemsCount': 660366, 'words': [{'w': 'gouvernement', 'p': 0.04781}, {'w': 'conférence', 'p': 0.0339}, {'w': 'accord', 'p': 0.02596}, {'w': 'question', 'p': 0.02494}, {'w': 'traité', 'p': 0.01894}, {'w': 'sujet', 'p': 0.01296}, {'w': 'commission', 'p': 0.01274}, {'w': 'conseil', 'p': 0.01218}, {'w': 'vue', 'p': 0.01129}, {'w': 'convention', 'p': 0.01072}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp98_fr', 'language': 'fr', 'contentItemsCount': 651087, 'words': [{'w': 'gouvernement', 'p': 0.06227}, {'w': 'parti', 'p': 0.03965}, {'w': 'ministre', 'p': 0.03468}, {'w': 'président', 'p': 0.02254}, {'w': 'politique', 'p': 0.02246}, {'w': 'gauche', 'p': 0.0156}, {'w': 'majorité', 'p': 0.01515}, {'w': 'cabinet', 'p': 0.01504}, {'w': 'opposition', 'p': 0.01443}, {'w': 'ministère', 'p': 0.01201}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp29_fr', 'language': 'fr', 'contentItemsCount': 641952, 'words': [{'w': 'guerre', 'p': 0.06224}, {'w': 'paix', 'p': 0.03208}, {'w': 'pays', 'p': 0.0225}, {'w': 'peuple', 'p': 0.01924}, {'w': 'politique', 'p': 0.01695}, {'w': 'nation', 'p': 0.01448}, {'w': 'monde', 'p': 0.01274}, {'w': 'gouvernement', 'p': 0.01088}, {'w': 'situation', 'p': 0.00931}, {'w': 'discours', 'p': 0.00769}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp32_fr', 'language': 'fr', 'contentItemsCount': 708594, 'words': [{'w': 'travail', 'p': 0.12078}, {'w': 'grève', 'p': 0.0275}, {'w': 'personnel', 'p': 0.02613}, {'w': 'chômage', 'p': 0.02585}, {'w': 'syndicat', 'p': 0.01578}, {'w': 'temps', 'p': 0.01383}, {'w': 'situation', 'p': 0.01297}, {'w': 'industrie', 'p': 0.01275}, {'w': 'emploi', 'p': 0.01265}, {'w': 'semaine', 'p': 0.01261}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp71_fr', 'language': 'fr', 'contentItemsCount': 480008, 'words': [{'w': 'pays', 'p': 0.04381}, {'w': 'président', 'p': 0.02988}, {'w': 'accord', 'p': 0.02501}, {'w': 'o.n.u', 'p': 0.01614}, {'w': 'ministre', 'p': 0.01545}, {'w': 'paix', 'p': 0.01383}, {'w': 'sécurité', 'p': 0.01362}, {'w': 'conférence', 'p': 0.01261}, {'w': 'secrétaire', 'p': 0.01045}, {'w': 'plan', 'p': 0.0097}], 'model': 'tm-fr-all-v2.0'}] content items\n",
+ "Got page 20 - 30 of 42. The first topic has [{'uid': 'tm-fr-all-v2.0_tp72_fr', 'language': 'fr', 'contentItemsCount': 469714, 'words': [{'w': 'pays', 'p': 0.0647}, {'w': 'industrie', 'p': 0.03546}, {'w': 'production', 'p': 0.03028}, {'w': 'commerce', 'p': 0.02679}, {'w': 'économie', 'p': 0.02376}, {'w': 'marché', 'p': 0.01515}, {'w': 'développement', 'p': 0.01418}, {'w': 'situation', 'p': 0.01212}, {'w': 'suisse', 'p': 0.01208}, {'w': 'exportation', 'p': 0.01141}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp53_fr', 'language': 'fr', 'contentItemsCount': 544922, 'words': [{'w': 'budget', 'p': 0.0446}, {'w': 'impôt', 'p': 0.0376}, {'w': 'conseil', 'p': 0.02422}, {'w': 'déficit', 'p': 0.01899}, {'w': 'taxe', 'p': 0.01808}, {'w': 'augmentation', 'p': 0.01774}, {'w': 'année', 'p': 0.01724}, {'w': 'dette', 'p': 0.01623}, {'w': 'compte', 'p': 0.01528}, {'w': 'somme', 'p': 0.0141}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp66_fr', 'language': 'fr', 'contentItemsCount': 708234, 'words': [{'w': 'année', 'p': 0.07471}, {'w': 'nombre', 'p': 0.04433}, {'w': 'mois', 'p': 0.0405}, {'w': 'rapport', 'p': 0.0378}, {'w': 'augmentation', 'p': 0.02949}, {'w': 'fin', 'p': 0.02265}, {'w': 'période', 'p': 0.01788}, {'w': 'population', 'p': 0.01769}, {'w': 'chiffre', 'p': 0.01678}, {'w': 'cours', 'p': 0.01543}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp31_fr', 'language': 'fr', 'contentItemsCount': 708078, 'words': [{'w': 'construction', 'p': 0.03015}, {'w': 'route', 'p': 0.02937}, {'w': 'place', 'p': 0.02313}, {'w': 'projet', 'p': 0.02165}, {'w': 'ville', 'p': 0.01895}, {'w': 'plan', 'p': 0.01628}, {'w': 'circulation', 'p': 0.01552}, {'w': 'chantier', 'p': 0.01536}, {'w': 'zone', 'p': 0.01286}, {'w': 'trafic', 'p': 0.01257}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp67_fr', 'language': 'fr', 'contentItemsCount': 518595, 'words': [{'w': 'fer', 'p': 0.08723}, {'w': 'ligne', 'p': 0.0452}, {'w': 'chemin', 'p': 0.04172}, {'w': 'voie', 'p': 0.02761}, {'w': 'trafic', 'p': 0.02535}, {'w': 'transport', 'p': 0.01909}, {'w': 'exploitation', 'p': 0.01858}, {'w': 'service', 'p': 0.01622}, {'w': 'réseau', 'p': 0.01572}, {'w': 'marchandise', 'p': 0.01443}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp77_fr', 'language': 'fr', 'contentItemsCount': 959913, 'words': [{'w': 'télévision', 'p': 0.03162}, {'w': 'radio', 'p': 0.02261}, {'w': 'chaîne', 'p': 0.01648}, {'w': 'système', 'p': 0.01439}, {'w': 'publicité', 'p': 0.01315}, {'w': 'émission', 'p': 0.01199}, {'w': 'appareil', 'p': 0.01075}, {'w': 'écran', 'p': 0.01009}, {'w': 'service', 'p': 0.00915}, {'w': 'information', 'p': 0.00913}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp89_fr', 'language': 'fr', 'contentItemsCount': 679290, 'words': [{'w': 'marché', 'p': 0.0378}, {'w': 'taux', 'p': 0.03275}, {'w': 'cours', 'p': 0.02892}, {'w': 'hausse', 'p': 0.02781}, {'w': 'baisse', 'p': 0.02336}, {'w': 'dollar', 'p': 0.01568}, {'w': 'semaine', 'p': 0.01509}, {'w': 'mois', 'p': 0.01249}, {'w': 'indice', 'p': 0.01047}, {'w': 'intérêt', 'p': 0.01027}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp17_fr', 'language': 'fr', 'contentItemsCount': 845174, 'words': [{'w': 'eau', 'p': 0.04316}, {'w': 'énergie', 'p': 0.02758}, {'w': 'gaz', 'p': 0.02695}, {'w': 'électricité', 'p': 0.01203}, {'w': 'air', 'p': 0.01154}, {'w': 'usine', 'p': 0.01128}, {'w': 'chauffage', 'p': 0.01109}, {'w': 'centrale', 'p': 0.00739}, {'w': 'chaleur', 'p': 0.00731}, {'w': 'charbon', 'p': 0.00683}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp19_fr', 'language': 'fr', 'contentItemsCount': 956328, 'words': [{'w': 'roi', 'p': 0.06924}, {'w': 'prince', 'p': 0.0529}, {'w': 'empereur', 'p': 0.03115}, {'w': 'comte', 'p': 0.02491}, {'w': 'reine', 'p': 0.02366}, {'w': 'duc', 'p': 0.0169}, {'w': 'princesse', 'p': 0.01547}, {'w': 'cour', 'p': 0.01089}, {'w': 'souverain', 'p': 0.01012}, {'w': 'palais', 'p': 0.01008}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp37_fr', 'language': 'fr', 'contentItemsCount': 2422013, 'words': [{'w': 'presse', 'p': 0.03126}, {'w': 'journal', 'p': 0.02853}, {'w': 'lettre', 'p': 0.02086}, {'w': 'affaire', 'p': 0.02034}, {'w': 'article', 'p': 0.01838}, {'w': 'question', 'p': 0.01606}, {'w': 'fait', 'p': 0.01434}, {'w': 'propos', 'p': 0.01279}, {'w': 'sujet', 'p': 0.01251}, {'w': 'cas', 'p': 0.01111}], 'model': 'tm-fr-all-v2.0'}] content items\n",
+ "Got page 30 - 38 of 42. The first topic has [{'uid': 'tm-fr-all-v2.0_tp64_fr', 'language': 'fr', 'contentItemsCount': 1387503, 'words': [{'w': 'question', 'p': 0.01595}, {'w': 'point', 'p': 0.01204}, {'w': 'pays', 'p': 0.01183}, {'w': 'conseil', 'p': 0.01124}, {'w': 'gouvernement', 'p': 0.01031}, {'w': 'loi', 'p': 0.01024}, {'w': 'droit', 'p': 0.00984}, {'w': 'temps', 'p': 0.00949}, {'w': 'lieu', 'p': 0.00876}, {'w': 'moment', 'p': 0.00798}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp00_fr', 'language': 'fr', 'contentItemsCount': 2396291, 'words': [{'w': 'pays', 'p': 0.01966}, {'w': 'foi', 'p': 0.01544}, {'w': 'cours', 'p': 0.01464}, {'w': 'part', 'p': 0.01368}, {'w': 'dune', 'p': 0.01336}, {'w': 'temps', 'p': 0.01284}, {'w': 'vie', 'p': 0.01236}, {'w': 'travail', 'p': 0.01065}, {'w': 'monde', 'p': 0.01055}, {'w': 'membre', 'p': 0.00952}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp52_fr', 'language': 'fr', 'contentItemsCount': 6598628, 'words': [{'w': 'front', 'p': 0.02305}, {'w': 'armée', 'p': 0.02116}, {'w': 'guerre', 'p': 0.01898}, {'w': 'ennemi', 'p': 0.01845}, {'w': 'nord', 'p': 0.01778}, {'w': 'région', 'p': 0.01743}, {'w': 'sud', 'p': 0.01716}, {'w': 'général', 'p': 0.01712}, {'w': 'allemand', 'p': 0.01697}, {'w': 'attaque', 'p': 0.01489}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp04_fr', 'language': 'fr', 'contentItemsCount': 1518530, 'words': [{'w': 'enfant', 'p': 0.05173}, {'w': 'famille', 'p': 0.05102}, {'w': 'père', 'p': 0.0477}, {'w': 'vie', 'p': 0.04224}, {'w': 'mère', 'p': 0.04119}, {'w': 'fils', 'p': 0.02674}, {'w': 'fille', 'p': 0.02496}, {'w': 'mari', 'p': 0.02102}, {'w': 'mariage', 'p': 0.02021}, {'w': 'maison', 'p': 0.01749}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp50_fr', 'language': 'fr', 'contentItemsCount': 835204, 'words': [{'w': 'match', 'p': 0.05334}, {'w': 'équipe', 'p': 0.04143}, {'w': 'ligue', 'p': 0.02935}, {'w': 'club', 'p': 0.02618}, {'w': 'saison', 'p': 0.02484}, {'w': 'championnat', 'p': 0.02335}, {'w': 'groupe', 'p': 0.01593}, {'w': 'football', 'p': 0.01481}, {'w': 'tour', 'p': 0.01444}, {'w': 'entraîneur', 'p': 0.0125}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp18_fr', 'language': 'fr', 'contentItemsCount': 2882829, 'words': [{'w': 'fille', 'p': 0.01542}, {'w': 'main', 'p': 0.01512}, {'w': 'voix', 'p': 0.01033}, {'w': 'foi', 'p': 0.00988}, {'w': 'temps', 'p': 0.00854}, {'w': 'père', 'p': 0.00842}, {'w': 'moment', 'p': 0.00815}, {'w': 'chose', 'p': 0.00804}, {'w': 'mère', 'p': 0.00749}, {'w': 'tête', 'p': 0.00741}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp33_fr', 'language': 'fr', 'contentItemsCount': 1080727, 'words': [{'w': 'gouvernement', 'p': 0.03591}, {'w': 'général', 'p': 0.02096}, {'w': 'ministre', 'p': 0.01669}, {'w': 'guerre', 'p': 0.01627}, {'w': 'ordre', 'p': 0.00929}, {'w': 'dépêche', 'p': 0.00928}, {'w': 'ministère', 'p': 0.00878}, {'w': 'ville', 'p': 0.00878}, {'w': 'lord', 'p': 0.0087}, {'w': 'armée', 'p': 0.00828}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp79_fr', 'language': 'fr', 'contentItemsCount': 893270, 'words': [{'w': 'tir', 'p': 0.07371}, {'w': 'gauche', 'p': 0.05973}, {'w': 'droite', 'p': 0.05536}, {'w': 'page', 'p': 0.02455}, {'w': 'main', 'p': 0.01981}, {'w': 'coup', 'p': 0.01833}, {'w': 'foi', 'p': 0.01738}, {'w': 'partie', 'p': 0.01396}, {'w': 'mot', 'p': 0.01362}, {'w': 'stand', 'p': 0.01322}], 'model': 'tm-fr-all-v2.0'}] content items\n",
+ "Got page 40 - 42 of 42. The first topic has [{'uid': 'tm-fr-all-v2.0_tp36_fr', 'language': 'fr', 'contentItemsCount': 4124269, 'words': [{'w': 'main', 'p': 0.01644}, {'w': 'tête', 'p': 0.01052}, {'w': 'temps', 'p': 0.00959}, {'w': 'coup', 'p': 0.00952}, {'w': 'air', 'p': 0.0087}, {'w': 'foi', 'p': 0.00858}, {'w': 'bras', 'p': 0.00799}, {'w': 'porte', 'p': 0.00799}, {'w': 'nuit', 'p': 0.00796}, {'w': 'voix', 'p': 0.00676}], 'model': 'tm-fr-all-v2.0'}, {'uid': 'tm-fr-all-v2.0_tp47_fr', 'language': 'fr', 'contentItemsCount': 849431, 'words': [{'w': 'ville', 'p': 0.03486}, {'w': 'siècle', 'p': 0.02264}, {'w': 'maison', 'p': 0.01493}, {'w': 'pays', 'p': 0.01087}, {'w': 'château', 'p': 0.00944}, {'w': 'pierre', 'p': 0.0089}, {'w': 'époque', 'p': 0.00867}, {'w': 'temps', 'p': 0.00866}, {'w': 'nom', 'p': 0.00835}, {'w': 'place', 'p': 0.00754}], 'model': 'tm-fr-all-v2.0'}] content items\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.topics.find(\n",
+ " term=\"fait\",\n",
+ " limit=10,\n",
+ ")\n",
+ "\n",
+ "print(f\"Total topics in the result set: {result.total}\")\n",
+ "for page in result.pages():\n",
+ " print(\n",
+ " f\"Got page {page.offset} - {page.offset + page.size} of {page.total}. \"\n",
+ " + f\"The first topic has {page.pydantic.data} content items\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-9",
+ "metadata": {},
+ "source": [
+ "## Get a topic by its ID\n",
+ "\n",
+ "Retrieve a specific topic using its unique identifier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "cell-10",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Topic ID: tm-fr-all-v2.0_tp58_fr\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
GetTopic result
\n",
+ "
Contains 0 items of 0 total items.
\n",
+ "
\n",
+ "See this result in the
Impresso App.\n",
+ "
\n",
+ "
\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " language | \n",
+ " contentItemsCount | \n",
+ " words | \n",
+ " model | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tm-fr-all-v2.0_tp58_fr | \n",
+ " fr | \n",
+ " 12788084 | \n",
+ " [{'w': 'der', 'p': 0.1617}, {'w': 'man', 'p': ... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# First, let's get a topic ID from the search results\n",
+ "result = impresso.topics.find(limit=1)\n",
+ "topic_id = result.pydantic.data[0]['uid']\n",
+ "print(f\"Topic ID: {topic_id}\")\n",
+ "\n",
+ "# Now get the full topic details\n",
+ "impresso.topics.get(topic_id)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-13",
+ "metadata": {},
+ "source": [
+ "## View as DataFrame\n",
+ "\n",
+ "Convert the results to a pandas DataFrame for easier analysis."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "cell-14",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " language | \n",
+ " contentItemsCount | \n",
+ " words | \n",
+ " model | \n",
+ "
\n",
+ " \n",
+ " | uid | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tm-fr-all-v2.0_tp58_fr | \n",
+ " fr | \n",
+ " 12788084 | \n",
+ " [{'w': 'der', 'p': 0.1617}, {'w': 'man', 'p': ... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp52_fr | \n",
+ " fr | \n",
+ " 6598628 | \n",
+ " [{'w': 'front', 'p': 0.02305}, {'w': 'armée', ... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp36_fr | \n",
+ " fr | \n",
+ " 4124269 | \n",
+ " [{'w': 'main', 'p': 0.01644}, {'w': 'tête', 'p... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp25_fr | \n",
+ " fr | \n",
+ " 3487610 | \n",
+ " [{'w': 'journal', 'p': 0.0304}, {'w': 'monde',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp87_fr | \n",
+ " fr | \n",
+ " 3077453 | \n",
+ " [{'w': 'problème', 'p': 0.01207}, {'w': 'fait'... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp18_fr | \n",
+ " fr | \n",
+ " 2882829 | \n",
+ " [{'w': 'fille', 'p': 0.01542}, {'w': 'main', '... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp37_fr | \n",
+ " fr | \n",
+ " 2422013 | \n",
+ " [{'w': 'presse', 'p': 0.03126}, {'w': 'journal... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp00_fr | \n",
+ " fr | \n",
+ " 2396291 | \n",
+ " [{'w': 'pays', 'p': 0.01966}, {'w': 'foi', 'p'... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp75_fr | \n",
+ " fr | \n",
+ " 2250426 | \n",
+ " [{'w': 'bat', 'p': 0.05116}, {'w': 'finale', '... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp55_fr | \n",
+ " fr | \n",
+ " 2041018 | \n",
+ " [{'w': 'vie', 'p': 0.01796}, {'w': 'monde', 'p... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp44_fr | \n",
+ " fr | \n",
+ " 1818665 | \n",
+ " [{'w': 'temps', 'p': 0.02877}, {'w': 'argent',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp22_fr | \n",
+ " fr | \n",
+ " 1617798 | \n",
+ " [{'w': 'maison', 'p': 0.03547}, {'w': 'vente',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp04_fr | \n",
+ " fr | \n",
+ " 1518530 | \n",
+ " [{'w': 'enfant', 'p': 0.05173}, {'w': 'famille... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp82_fr | \n",
+ " fr | \n",
+ " 1506308 | \n",
+ " [{'w': 'bureau', 'p': 0.06955}, {'w': 'place',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp64_fr | \n",
+ " fr | \n",
+ " 1387503 | \n",
+ " [{'w': 'question', 'p': 0.01595}, {'w': 'point... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp97_fr | \n",
+ " fr | \n",
+ " 1351470 | \n",
+ " [{'w': 'police', 'p': 0.0589}, {'w': 'coup', '... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp85_fr | \n",
+ " fr | \n",
+ " 1293934 | \n",
+ " [{'w': 'vie', 'p': 0.02054}, {'w': 'monde', 'p... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp15_fr | \n",
+ " fr | \n",
+ " 1132230 | \n",
+ " [{'w': 'cuisine', 'p': 0.04559}, {'w': 'salle'... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ " | tm-fr-all-v2.0_tp40_fr | \n",
+ " fr | \n",
+ " 1109715 | \n",
+ " [{'w': 'maison', 'p': 0.05313}, {'w': 'vente',... | \n",
+ " tm-fr-all-v2.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " language contentItemsCount \\\n",
+ "uid \n",
+ "tm-fr-all-v2.0_tp58_fr fr 12788084 \n",
+ "tm-fr-all-v2.0_tp52_fr fr 6598628 \n",
+ "tm-fr-all-v2.0_tp36_fr fr 4124269 \n",
+ "tm-fr-all-v2.0_tp25_fr fr 3487610 \n",
+ "tm-fr-all-v2.0_tp87_fr fr 3077453 \n",
+ "tm-fr-all-v2.0_tp18_fr fr 2882829 \n",
+ "tm-fr-all-v2.0_tp37_fr fr 2422013 \n",
+ "tm-fr-all-v2.0_tp00_fr fr 2396291 \n",
+ "tm-fr-all-v2.0_tp75_fr fr 2250426 \n",
+ "tm-fr-all-v2.0_tp55_fr fr 2041018 \n",
+ "tm-fr-all-v2.0_tp44_fr fr 1818665 \n",
+ "tm-fr-all-v2.0_tp22_fr fr 1617798 \n",
+ "tm-fr-all-v2.0_tp04_fr fr 1518530 \n",
+ "tm-fr-all-v2.0_tp82_fr fr 1506308 \n",
+ "tm-fr-all-v2.0_tp64_fr fr 1387503 \n",
+ "tm-fr-all-v2.0_tp97_fr fr 1351470 \n",
+ "tm-fr-all-v2.0_tp85_fr fr 1293934 \n",
+ "tm-fr-all-v2.0_tp15_fr fr 1132230 \n",
+ "tm-fr-all-v2.0_tp40_fr fr 1109715 \n",
+ "\n",
+ " words \\\n",
+ "uid \n",
+ "tm-fr-all-v2.0_tp58_fr [{'w': 'der', 'p': 0.1617}, {'w': 'man', 'p': ... \n",
+ "tm-fr-all-v2.0_tp52_fr [{'w': 'front', 'p': 0.02305}, {'w': 'armée', ... \n",
+ "tm-fr-all-v2.0_tp36_fr [{'w': 'main', 'p': 0.01644}, {'w': 'tête', 'p... \n",
+ "tm-fr-all-v2.0_tp25_fr [{'w': 'journal', 'p': 0.0304}, {'w': 'monde',... \n",
+ "tm-fr-all-v2.0_tp87_fr [{'w': 'problème', 'p': 0.01207}, {'w': 'fait'... \n",
+ "tm-fr-all-v2.0_tp18_fr [{'w': 'fille', 'p': 0.01542}, {'w': 'main', '... \n",
+ "tm-fr-all-v2.0_tp37_fr [{'w': 'presse', 'p': 0.03126}, {'w': 'journal... \n",
+ "tm-fr-all-v2.0_tp00_fr [{'w': 'pays', 'p': 0.01966}, {'w': 'foi', 'p'... \n",
+ "tm-fr-all-v2.0_tp75_fr [{'w': 'bat', 'p': 0.05116}, {'w': 'finale', '... \n",
+ "tm-fr-all-v2.0_tp55_fr [{'w': 'vie', 'p': 0.01796}, {'w': 'monde', 'p... \n",
+ "tm-fr-all-v2.0_tp44_fr [{'w': 'temps', 'p': 0.02877}, {'w': 'argent',... \n",
+ "tm-fr-all-v2.0_tp22_fr [{'w': 'maison', 'p': 0.03547}, {'w': 'vente',... \n",
+ "tm-fr-all-v2.0_tp04_fr [{'w': 'enfant', 'p': 0.05173}, {'w': 'famille... \n",
+ "tm-fr-all-v2.0_tp82_fr [{'w': 'bureau', 'p': 0.06955}, {'w': 'place',... \n",
+ "tm-fr-all-v2.0_tp64_fr [{'w': 'question', 'p': 0.01595}, {'w': 'point... \n",
+ "tm-fr-all-v2.0_tp97_fr [{'w': 'police', 'p': 0.0589}, {'w': 'coup', '... \n",
+ "tm-fr-all-v2.0_tp85_fr [{'w': 'vie', 'p': 0.02054}, {'w': 'monde', 'p... \n",
+ "tm-fr-all-v2.0_tp15_fr [{'w': 'cuisine', 'p': 0.04559}, {'w': 'salle'... \n",
+ "tm-fr-all-v2.0_tp40_fr [{'w': 'maison', 'p': 0.05313}, {'w': 'vente',... \n",
+ "\n",
+ " model \n",
+ "uid \n",
+ "tm-fr-all-v2.0_tp58_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp52_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp36_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp25_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp87_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp18_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp37_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp00_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp75_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp55_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp44_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp22_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp04_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp82_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp64_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp97_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp85_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp15_fr tm-fr-all-v2.0 \n",
+ "tm-fr-all-v2.0_tp40_fr tm-fr-all-v2.0 "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = impresso.topics.find(limit=20)\n",
+ "result.df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cell-15",
+ "metadata": {},
+ "source": [
+ "## Explore topic words\n",
+ "\n",
+ "Each topic contains a list of the most representative words with their probabilities."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "cell-16",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Topic ID: tm-fr-all-v2.0_tp58_fr\n",
+ "Language: fr\n",
+ "Content items count: 12788084\n",
+ "\n",
+ "Top words:\n",
+ " der: 0.1617\n",
+ " man: 0.0308\n",
+ " dan: 0.0285\n",
+ " pou: 0.0267\n",
+ " dos: 0.0230\n",
+ " ben: 0.0205\n",
+ " con: 0.0149\n",
+ " ion: 0.0139\n",
+ " aber: 0.0124\n",
+ " welche: 0.0115\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = impresso.topics.find(limit=1)\n",
+ "topic = result.raw['data'][0]\n",
+ "\n",
+ "print(f\"Topic ID: {topic['uid']}\")\n",
+ "print(f\"Language: {topic['language']}\")\n",
+ "print(f\"Content items count: {topic.get('contentItemsCount', 'N/A')}\")\n",
+ "print(f\"\\nTop words:\")\n",
+ "for word in topic.get('words', [])[:10]:\n",
+ " print(f\" {word['w']}: {word['p']:.4f}\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "impresso-py3.13 (3.13.7)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/impresso/api_client/api/collections/add_collectable_items_from_filters.py b/impresso/api_client/api/collections/add_collectable_items_from_filters.py
new file mode 100644
index 0000000..0001464
--- /dev/null
+++ b/impresso/api_client/api/collections/add_collectable_items_from_filters.py
@@ -0,0 +1,200 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union, cast
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.add_collectable_items_from_filters import AddCollectableItemsFromFilters
+from ...models.error import Error
+from ...types import Response
+
+
+def _get_kwargs(
+ collection_id: str,
+ *,
+ body: AddCollectableItemsFromFilters,
+) -> Dict[str, Any]:
+ headers: Dict[str, Any] = {}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "post",
+ "url": f"/collections/{collection_id}/items",
+ }
+
+ _body = body.to_dict()
+
+ _kwargs["json"] = _body
+ headers["Content-Type"] = "application/json"
+
+ _kwargs["headers"] = headers
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Any, Error]]:
+ if response.status_code == HTTPStatus.ACCEPTED:
+ response_202 = cast(Any, None)
+ return response_202
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Any, Error]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ collection_id: str,
+ *,
+ client: AuthenticatedClient,
+ body: AddCollectableItemsFromFilters,
+) -> Response[Union[Any, Error]]:
+ """Add items to a collection from a filtered search result
+
+ Args:
+ collection_id (str):
+ body (AddCollectableItemsFromFilters): Request to add content items to a collection from
+ content items that match given filters
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Any, Error]]
+ """
+
+ kwargs = _get_kwargs(
+ collection_id=collection_id,
+ body=body,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ collection_id: str,
+ *,
+ client: AuthenticatedClient,
+ body: AddCollectableItemsFromFilters,
+) -> Optional[Union[Any, Error]]:
+ """Add items to a collection from a filtered search result
+
+ Args:
+ collection_id (str):
+ body (AddCollectableItemsFromFilters): Request to add content items to a collection from
+ content items that match given filters
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Any, Error]
+ """
+
+ return sync_detailed(
+ collection_id=collection_id,
+ client=client,
+ body=body,
+ ).parsed
+
+
+async def asyncio_detailed(
+ collection_id: str,
+ *,
+ client: AuthenticatedClient,
+ body: AddCollectableItemsFromFilters,
+) -> Response[Union[Any, Error]]:
+ """Add items to a collection from a filtered search result
+
+ Args:
+ collection_id (str):
+ body (AddCollectableItemsFromFilters): Request to add content items to a collection from
+ content items that match given filters
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Any, Error]]
+ """
+
+ kwargs = _get_kwargs(
+ collection_id=collection_id,
+ body=body,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ collection_id: str,
+ *,
+ client: AuthenticatedClient,
+ body: AddCollectableItemsFromFilters,
+) -> Optional[Union[Any, Error]]:
+ """Add items to a collection from a filtered search result
+
+ Args:
+ collection_id (str):
+ body (AddCollectableItemsFromFilters): Request to add content items to a collection from
+ content items that match given filters
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Any, Error]
+ """
+
+ return (
+ await asyncio_detailed(
+ collection_id=collection_id,
+ client=client,
+ body=body,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/collections/create_collection.py b/impresso/api_client/api/collections/create_collection.py
index e8ef384..6cc627c 100644
--- a/impresso/api_client/api/collections/create_collection.py
+++ b/impresso/api_client/api/collections/create_collection.py
@@ -46,6 +46,10 @@ def _parse_response(
response_403 = Error.from_dict(response.json())
return response_403
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/collections/find_collections.py b/impresso/api_client/api/collections/find_collections.py
index f7f6913..1af0e33 100644
--- a/impresso/api_client/api/collections/find_collections.py
+++ b/impresso/api_client/api/collections/find_collections.py
@@ -59,6 +59,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/collections/get_collection.py b/impresso/api_client/api/collections/get_collection.py
index 6db6129..2214861 100644
--- a/impresso/api_client/api/collections/get_collection.py
+++ b/impresso/api_client/api/collections/get_collection.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/collections/patch_collections_collection_id_items.py b/impresso/api_client/api/collections/patch_collections_collection_id_items.py
index 1e0c61b..5484ae7 100644
--- a/impresso/api_client/api/collections/patch_collections_collection_id_items.py
+++ b/impresso/api_client/api/collections/patch_collections_collection_id_items.py
@@ -51,6 +51,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/collections/remove_collection.py b/impresso/api_client/api/collections/remove_collection.py
index de76d90..5e14335 100644
--- a/impresso/api_client/api/collections/remove_collection.py
+++ b/impresso/api_client/api/collections/remove_collection.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/collections/update_collection.py b/impresso/api_client/api/collections/update_collection.py
index c727a35..5c441a3 100644
--- a/impresso/api_client/api/collections/update_collection.py
+++ b/impresso/api_client/api/collections/update_collection.py
@@ -51,6 +51,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/content_items/get_content_item.py b/impresso/api_client/api/content_items/get_content_item.py
index a500326..78575ea 100644
--- a/impresso/api_client/api/content_items/get_content_item.py
+++ b/impresso/api_client/api/content_items/get_content_item.py
@@ -7,15 +7,24 @@
from ...client import AuthenticatedClient, Client
from ...models.content_item import ContentItem
from ...models.error import Error
-from ...types import Response
+from ...types import UNSET, Response, Unset
def _get_kwargs(
id: str,
+ *,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Dict[str, Any]:
+ params: Dict[str, Any] = {}
+
+ params["include_embeddings"] = include_embeddings
+
+ params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
+
_kwargs: Dict[str, Any] = {
"method": "get",
"url": f"/content-items/{id}",
+ "params": params,
}
return _kwargs
@@ -40,6 +49,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
@@ -73,11 +86,13 @@ def sync_detailed(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Response[Union[ContentItem, Error]]:
"""Get a content item by its UID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -89,6 +104,7 @@ def sync_detailed(
kwargs = _get_kwargs(
id=id,
+ include_embeddings=include_embeddings,
)
response = client.get_httpx_client().request(
@@ -102,11 +118,13 @@ def sync(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Optional[Union[ContentItem, Error]]:
"""Get a content item by its UID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -119,6 +137,7 @@ def sync(
return sync_detailed(
id=id,
client=client,
+ include_embeddings=include_embeddings,
).parsed
@@ -126,11 +145,13 @@ async def asyncio_detailed(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Response[Union[ContentItem, Error]]:
"""Get a content item by its UID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -142,6 +163,7 @@ async def asyncio_detailed(
kwargs = _get_kwargs(
id=id,
+ include_embeddings=include_embeddings,
)
response = await client.get_async_httpx_client().request(**kwargs)
@@ -153,11 +175,13 @@ async def asyncio(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Optional[Union[ContentItem, Error]]:
"""Get a content item by its UID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -171,5 +195,6 @@ async def asyncio(
await asyncio_detailed(
id=id,
client=client,
+ include_embeddings=include_embeddings,
)
).parsed
diff --git a/impresso/api_client/api/data_providers/__init__.py b/impresso/api_client/api/data_providers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/impresso/api_client/api/data_providers/find_data_providers.py b/impresso/api_client/api/data_providers/find_data_providers.py
new file mode 100644
index 0000000..1624038
--- /dev/null
+++ b/impresso/api_client/api/data_providers/find_data_providers.py
@@ -0,0 +1,217 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.find_data_providers_base_find_response import FindDataProvidersBaseFindResponse
+from ...types import UNSET, Response, Unset
+
+
+def _get_kwargs(
+ *,
+ term: Union[Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Dict[str, Any]:
+ params: Dict[str, Any] = {}
+
+ params["term"] = term
+
+ params["limit"] = limit
+
+ params["offset"] = offset
+
+ params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "get",
+ "url": "/data-providers",
+ "params": params,
+ }
+
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, FindDataProvidersBaseFindResponse]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = FindDataProvidersBaseFindResponse.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, FindDataProvidersBaseFindResponse]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ *,
+ client: AuthenticatedClient,
+ term: Union[Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Response[Union[Error, FindDataProvidersBaseFindResponse]]:
+ """Find data providers
+
+ Args:
+ term (Union[Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindDataProvidersBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ term=term,
+ limit=limit,
+ offset=offset,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ *,
+ client: AuthenticatedClient,
+ term: Union[Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Optional[Union[Error, FindDataProvidersBaseFindResponse]]:
+ """Find data providers
+
+ Args:
+ term (Union[Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindDataProvidersBaseFindResponse]
+ """
+
+ return sync_detailed(
+ client=client,
+ term=term,
+ limit=limit,
+ offset=offset,
+ ).parsed
+
+
+async def asyncio_detailed(
+ *,
+ client: AuthenticatedClient,
+ term: Union[Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Response[Union[Error, FindDataProvidersBaseFindResponse]]:
+ """Find data providers
+
+ Args:
+ term (Union[Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindDataProvidersBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ term=term,
+ limit=limit,
+ offset=offset,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ *,
+ client: AuthenticatedClient,
+ term: Union[Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Optional[Union[Error, FindDataProvidersBaseFindResponse]]:
+ """Find data providers
+
+ Args:
+ term (Union[Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindDataProvidersBaseFindResponse]
+ """
+
+ return (
+ await asyncio_detailed(
+ client=client,
+ term=term,
+ limit=limit,
+ offset=offset,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/data_providers/get_data_provider.py b/impresso/api_client/api/data_providers/get_data_provider.py
new file mode 100644
index 0000000..808c4de
--- /dev/null
+++ b/impresso/api_client/api/data_providers/get_data_provider.py
@@ -0,0 +1,179 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.data_provider import DataProvider
+from ...models.error import Error
+from ...types import Response
+
+
+def _get_kwargs(
+ id: str,
+) -> Dict[str, Any]:
+ _kwargs: Dict[str, Any] = {
+ "method": "get",
+ "url": f"/data-providers/{id}",
+ }
+
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[DataProvider, Error]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = DataProvider.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[DataProvider, Error]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[DataProvider, Error]]:
+ """Get data provider by ID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[DataProvider, Error]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[DataProvider, Error]]:
+ """Get data provider by ID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[DataProvider, Error]
+ """
+
+ return sync_detailed(
+ id=id,
+ client=client,
+ ).parsed
+
+
+async def asyncio_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[DataProvider, Error]]:
+ """Get data provider by ID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[DataProvider, Error]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[DataProvider, Error]]:
+ """Get data provider by ID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[DataProvider, Error]
+ """
+
+ return (
+ await asyncio_detailed(
+ id=id,
+ client=client,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/entities/find_entities.py b/impresso/api_client/api/entities/find_entities.py
index 05bb347..8e15d3b 100644
--- a/impresso/api_client/api/entities/find_entities.py
+++ b/impresso/api_client/api/entities/find_entities.py
@@ -80,6 +80,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/entities/get_entity.py b/impresso/api_client/api/entities/get_entity.py
index 5e36549..a59f2a3 100644
--- a/impresso/api_client/api/entities/get_entity.py
+++ b/impresso/api_client/api/entities/get_entity.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/experiments/__init__.py b/impresso/api_client/api/experiments/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/impresso/api_client/api/experiments/find_experiments.py b/impresso/api_client/api/experiments/find_experiments.py
new file mode 100644
index 0000000..61901bf
--- /dev/null
+++ b/impresso/api_client/api/experiments/find_experiments.py
@@ -0,0 +1,155 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.find_experiments_base_find_response import FindExperimentsBaseFindResponse
+from ...types import Response
+
+
+def _get_kwargs() -> Dict[str, Any]:
+ _kwargs: Dict[str, Any] = {
+ "method": "get",
+ "url": "/experiments",
+ }
+
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, FindExperimentsBaseFindResponse]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = FindExperimentsBaseFindResponse.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, FindExperimentsBaseFindResponse]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[Error, FindExperimentsBaseFindResponse]]:
+ """Get a list of available experiments
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindExperimentsBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs()
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[Error, FindExperimentsBaseFindResponse]]:
+ """Get a list of available experiments
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindExperimentsBaseFindResponse]
+ """
+
+ return sync_detailed(
+ client=client,
+ ).parsed
+
+
+async def asyncio_detailed(
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[Error, FindExperimentsBaseFindResponse]]:
+ """Get a list of available experiments
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindExperimentsBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs()
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[Error, FindExperimentsBaseFindResponse]]:
+ """Get a list of available experiments
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindExperimentsBaseFindResponse]
+ """
+
+ return (
+ await asyncio_detailed(
+ client=client,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/experiments/interact_with_experiment.py b/impresso/api_client/api/experiments/interact_with_experiment.py
new file mode 100644
index 0000000..2088e33
--- /dev/null
+++ b/impresso/api_client/api/experiments/interact_with_experiment.py
@@ -0,0 +1,202 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.freeform import Freeform
+from ...models.interact_with_experiment_body import InteractWithExperimentBody
+from ...types import Response
+
+
+def _get_kwargs(
+ id: str,
+ *,
+ body: InteractWithExperimentBody,
+) -> Dict[str, Any]:
+ headers: Dict[str, Any] = {}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "put",
+ "url": f"/experiments/{id}",
+ }
+
+ _body = body.to_dict()
+
+ _kwargs["json"] = _body
+ headers["Content-Type"] = "application/json"
+
+ _kwargs["headers"] = headers
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, Freeform]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = Freeform.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, Freeform]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+ body: InteractWithExperimentBody,
+) -> Response[Union[Error, Freeform]]:
+ """Interact with an experiment with experiment specific data
+
+ Args:
+ id (str):
+ body (InteractWithExperimentBody): Experiment specific request body
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, Freeform]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ body=body,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+ body: InteractWithExperimentBody,
+) -> Optional[Union[Error, Freeform]]:
+ """Interact with an experiment with experiment specific data
+
+ Args:
+ id (str):
+ body (InteractWithExperimentBody): Experiment specific request body
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, Freeform]
+ """
+
+ return sync_detailed(
+ id=id,
+ client=client,
+ body=body,
+ ).parsed
+
+
+async def asyncio_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+ body: InteractWithExperimentBody,
+) -> Response[Union[Error, Freeform]]:
+ """Interact with an experiment with experiment specific data
+
+ Args:
+ id (str):
+ body (InteractWithExperimentBody): Experiment specific request body
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, Freeform]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ body=body,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+ body: InteractWithExperimentBody,
+) -> Optional[Union[Error, Freeform]]:
+ """Interact with an experiment with experiment specific data
+
+ Args:
+ id (str):
+ body (InteractWithExperimentBody): Experiment specific request body
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, Freeform]
+ """
+
+ return (
+ await asyncio_detailed(
+ id=id,
+ client=client,
+ body=body,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/images/find_images.py b/impresso/api_client/api/images/find_images.py
index b13f367..2ecc8c7 100644
--- a/impresso/api_client/api/images/find_images.py
+++ b/impresso/api_client/api/images/find_images.py
@@ -18,6 +18,7 @@ def _get_kwargs(
similar_to_image_id: Union[Unset, str] = UNSET,
order_by: Union[Unset, FindImagesOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Dict[str, Any]:
@@ -46,6 +47,8 @@ def _get_kwargs(
json_filters = filters
params["filters"] = json_filters
+ params["include_embeddings"] = include_embeddings
+
params["limit"] = limit
params["offset"] = offset
@@ -80,6 +83,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
@@ -116,6 +123,7 @@ def sync_detailed(
similar_to_image_id: Union[Unset, str] = UNSET,
order_by: Union[Unset, FindImagesOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Response[Union[Error, FindImagesBaseFindResponse]]:
@@ -126,6 +134,7 @@ def sync_detailed(
similar_to_image_id (Union[Unset, str]):
order_by (Union[Unset, FindImagesOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -142,6 +151,7 @@ def sync_detailed(
similar_to_image_id=similar_to_image_id,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
@@ -160,6 +170,7 @@ def sync(
similar_to_image_id: Union[Unset, str] = UNSET,
order_by: Union[Unset, FindImagesOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Optional[Union[Error, FindImagesBaseFindResponse]]:
@@ -170,6 +181,7 @@ def sync(
similar_to_image_id (Union[Unset, str]):
order_by (Union[Unset, FindImagesOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -187,6 +199,7 @@ def sync(
similar_to_image_id=similar_to_image_id,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
).parsed
@@ -199,6 +212,7 @@ async def asyncio_detailed(
similar_to_image_id: Union[Unset, str] = UNSET,
order_by: Union[Unset, FindImagesOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Response[Union[Error, FindImagesBaseFindResponse]]:
@@ -209,6 +223,7 @@ async def asyncio_detailed(
similar_to_image_id (Union[Unset, str]):
order_by (Union[Unset, FindImagesOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -225,6 +240,7 @@ async def asyncio_detailed(
similar_to_image_id=similar_to_image_id,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
@@ -241,6 +257,7 @@ async def asyncio(
similar_to_image_id: Union[Unset, str] = UNSET,
order_by: Union[Unset, FindImagesOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Optional[Union[Error, FindImagesBaseFindResponse]]:
@@ -251,6 +268,7 @@ async def asyncio(
similar_to_image_id (Union[Unset, str]):
order_by (Union[Unset, FindImagesOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -269,6 +287,7 @@ async def asyncio(
similar_to_image_id=similar_to_image_id,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
diff --git a/impresso/api_client/api/images/get_image.py b/impresso/api_client/api/images/get_image.py
index 9f3548e..5d41a43 100644
--- a/impresso/api_client/api/images/get_image.py
+++ b/impresso/api_client/api/images/get_image.py
@@ -7,15 +7,24 @@
from ...client import AuthenticatedClient, Client
from ...models.error import Error
from ...models.image import Image
-from ...types import Response
+from ...types import UNSET, Response, Unset
def _get_kwargs(
id: str,
+ *,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Dict[str, Any]:
+ params: Dict[str, Any] = {}
+
+ params["include_embeddings"] = include_embeddings
+
+ params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
+
_kwargs: Dict[str, Any] = {
"method": "get",
"url": f"/images/{id}",
+ "params": params,
}
return _kwargs
@@ -40,6 +49,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
@@ -73,11 +86,13 @@ def sync_detailed(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Response[Union[Error, Image]]:
"""Get image by ID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -89,6 +104,7 @@ def sync_detailed(
kwargs = _get_kwargs(
id=id,
+ include_embeddings=include_embeddings,
)
response = client.get_httpx_client().request(
@@ -102,11 +118,13 @@ def sync(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Optional[Union[Error, Image]]:
"""Get image by ID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -119,6 +137,7 @@ def sync(
return sync_detailed(
id=id,
client=client,
+ include_embeddings=include_embeddings,
).parsed
@@ -126,11 +145,13 @@ async def asyncio_detailed(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Response[Union[Error, Image]]:
"""Get image by ID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -142,6 +163,7 @@ async def asyncio_detailed(
kwargs = _get_kwargs(
id=id,
+ include_embeddings=include_embeddings,
)
response = await client.get_async_httpx_client().request(**kwargs)
@@ -153,11 +175,13 @@ async def asyncio(
id: str,
*,
client: AuthenticatedClient,
+ include_embeddings: Union[Unset, bool] = UNSET,
) -> Optional[Union[Error, Image]]:
"""Get image by ID
Args:
id (str):
+ include_embeddings (Union[Unset, bool]):
Raises:
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -171,5 +195,6 @@ async def asyncio(
await asyncio_detailed(
id=id,
client=client,
+ include_embeddings=include_embeddings,
)
).parsed
diff --git a/impresso/api_client/api/media_sources/find_media_sources.py b/impresso/api_client/api/media_sources/find_media_sources.py
index 2dc4b2e..1f89cfa 100644
--- a/impresso/api_client/api/media_sources/find_media_sources.py
+++ b/impresso/api_client/api/media_sources/find_media_sources.py
@@ -73,6 +73,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/media_sources/get_media_source.py b/impresso/api_client/api/media_sources/get_media_source.py
index 1cbaefb..04fcbb3 100644
--- a/impresso/api_client/api/media_sources/get_media_source.py
+++ b/impresso/api_client/api/media_sources/get_media_source.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/not_used/patch_collections_collection_id_items_id.py b/impresso/api_client/api/not_used/patch_collections_collection_id_items_id.py
index 172e6e0..5bacd20 100644
--- a/impresso/api_client/api/not_used/patch_collections_collection_id_items_id.py
+++ b/impresso/api_client/api/not_used/patch_collections_collection_id_items_id.py
@@ -52,6 +52,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/search/search.py b/impresso/api_client/api/search/search.py
index 3e8fe39..b48faef 100644
--- a/impresso/api_client/api/search/search.py
+++ b/impresso/api_client/api/search/search.py
@@ -17,6 +17,7 @@ def _get_kwargs(
term: Union[Unset, str] = UNSET,
order_by: Union[Unset, SearchOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Dict[str, Any]:
@@ -43,6 +44,8 @@ def _get_kwargs(
json_filters = filters
params["filters"] = json_filters
+ params["include_embeddings"] = include_embeddings
+
params["limit"] = limit
params["offset"] = offset
@@ -77,6 +80,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
@@ -112,6 +119,7 @@ def sync_detailed(
term: Union[Unset, str] = UNSET,
order_by: Union[Unset, SearchOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Response[Union[Error, SearchBaseFindResponse]]:
@@ -121,6 +129,7 @@ def sync_detailed(
term (Union[Unset, str]):
order_by (Union[Unset, SearchOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -136,6 +145,7 @@ def sync_detailed(
term=term,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
@@ -153,6 +163,7 @@ def sync(
term: Union[Unset, str] = UNSET,
order_by: Union[Unset, SearchOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Optional[Union[Error, SearchBaseFindResponse]]:
@@ -162,6 +173,7 @@ def sync(
term (Union[Unset, str]):
order_by (Union[Unset, SearchOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -178,6 +190,7 @@ def sync(
term=term,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
).parsed
@@ -189,6 +202,7 @@ async def asyncio_detailed(
term: Union[Unset, str] = UNSET,
order_by: Union[Unset, SearchOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Response[Union[Error, SearchBaseFindResponse]]:
@@ -198,6 +212,7 @@ async def asyncio_detailed(
term (Union[Unset, str]):
order_by (Union[Unset, SearchOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -213,6 +228,7 @@ async def asyncio_detailed(
term=term,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
@@ -228,6 +244,7 @@ async def asyncio(
term: Union[Unset, str] = UNSET,
order_by: Union[Unset, SearchOrderBy] = UNSET,
filters: Union[List["Filter"], Unset, str] = UNSET,
+ include_embeddings: Union[Unset, bool] = UNSET,
limit: Union[Unset, int] = UNSET,
offset: Union[Unset, int] = UNSET,
) -> Optional[Union[Error, SearchBaseFindResponse]]:
@@ -237,6 +254,7 @@ async def asyncio(
term (Union[Unset, str]):
order_by (Union[Unset, SearchOrderBy]):
filters (Union[List['Filter'], Unset, str]):
+ include_embeddings (Union[Unset, bool]):
limit (Union[Unset, int]):
offset (Union[Unset, int]):
@@ -254,6 +272,7 @@ async def asyncio(
term=term,
order_by=order_by,
filters=filters,
+ include_embeddings=include_embeddings,
limit=limit,
offset=offset,
)
diff --git a/impresso/api_client/api/search_facets/get_images_facet.py b/impresso/api_client/api/search_facets/get_images_facet.py
index 5f675bb..96f3cdd 100644
--- a/impresso/api_client/api/search_facets/get_images_facet.py
+++ b/impresso/api_client/api/search_facets/get_images_facet.py
@@ -76,6 +76,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/search_facets/get_search_facet.py b/impresso/api_client/api/search_facets/get_search_facet.py
index 2f40da7..b4e92b3 100644
--- a/impresso/api_client/api/search_facets/get_search_facet.py
+++ b/impresso/api_client/api/search_facets/get_search_facet.py
@@ -76,6 +76,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/search_facets/get_tr_clusters_facet.py b/impresso/api_client/api/search_facets/get_tr_clusters_facet.py
index 07ed29e..6c1e0a1 100644
--- a/impresso/api_client/api/search_facets/get_tr_clusters_facet.py
+++ b/impresso/api_client/api/search_facets/get_tr_clusters_facet.py
@@ -76,6 +76,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/search_facets/get_tr_passages_facet.py b/impresso/api_client/api/search_facets/get_tr_passages_facet.py
index b0653e3..87d09e3 100644
--- a/impresso/api_client/api/search_facets/get_tr_passages_facet.py
+++ b/impresso/api_client/api/search_facets/get_tr_passages_facet.py
@@ -76,6 +76,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/text_reuse_clusters/find_text_reuse_clusters.py b/impresso/api_client/api/text_reuse_clusters/find_text_reuse_clusters.py
index 30026e4..965fd57 100644
--- a/impresso/api_client/api/text_reuse_clusters/find_text_reuse_clusters.py
+++ b/impresso/api_client/api/text_reuse_clusters/find_text_reuse_clusters.py
@@ -77,6 +77,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/text_reuse_clusters/get_text_reuse_cluster.py b/impresso/api_client/api/text_reuse_clusters/get_text_reuse_cluster.py
index a9f6499..c51459d 100644
--- a/impresso/api_client/api/text_reuse_clusters/get_text_reuse_cluster.py
+++ b/impresso/api_client/api/text_reuse_clusters/get_text_reuse_cluster.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/text_reuse_passages/find_text_reuse_passages.py b/impresso/api_client/api/text_reuse_passages/find_text_reuse_passages.py
index 42e5dc5..c8ecbeb 100644
--- a/impresso/api_client/api/text_reuse_passages/find_text_reuse_passages.py
+++ b/impresso/api_client/api/text_reuse_passages/find_text_reuse_passages.py
@@ -74,6 +74,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/text_reuse_passages/get_text_reuse_passage.py b/impresso/api_client/api/text_reuse_passages/get_text_reuse_passage.py
index e7af180..e50c72c 100644
--- a/impresso/api_client/api/text_reuse_passages/get_text_reuse_passage.py
+++ b/impresso/api_client/api/text_reuse_passages/get_text_reuse_passage.py
@@ -40,6 +40,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/tools/perform_image_embedding.py b/impresso/api_client/api/tools/perform_image_embedding.py
new file mode 100644
index 0000000..fd81207
--- /dev/null
+++ b/impresso/api_client/api/tools/perform_image_embedding.py
@@ -0,0 +1,189 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.impresso_embedding_response import ImpressoEmbeddingResponse
+from ...models.impresso_image_embedding_request import ImpressoImageEmbeddingRequest
+from ...types import Response
+
+
+def _get_kwargs(
+ *,
+ body: ImpressoImageEmbeddingRequest,
+) -> Dict[str, Any]:
+ headers: Dict[str, Any] = {}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "post",
+ "url": "/tools/embedder/image",
+ }
+
+ _body = body.to_dict()
+
+ _kwargs["json"] = _body
+ headers["Content-Type"] = "application/json"
+
+ _kwargs["headers"] = headers
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ if response.status_code == HTTPStatus.CREATED:
+ response_201 = ImpressoEmbeddingResponse.from_dict(response.json())
+
+ return response_201
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoImageEmbeddingRequest,
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed an image into a vector space
+
+ Args:
+ body (ImpressoImageEmbeddingRequest): Body of a request to the Impresso Image Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, ImpressoEmbeddingResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ body=body,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoImageEmbeddingRequest,
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed an image into a vector space
+
+ Args:
+ body (ImpressoImageEmbeddingRequest): Body of a request to the Impresso Image Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, ImpressoEmbeddingResponse]
+ """
+
+ return sync_detailed(
+ client=client,
+ body=body,
+ ).parsed
+
+
+async def asyncio_detailed(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoImageEmbeddingRequest,
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed an image into a vector space
+
+ Args:
+ body (ImpressoImageEmbeddingRequest): Body of a request to the Impresso Image Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, ImpressoEmbeddingResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ body=body,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoImageEmbeddingRequest,
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed an image into a vector space
+
+ Args:
+ body (ImpressoImageEmbeddingRequest): Body of a request to the Impresso Image Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, ImpressoEmbeddingResponse]
+ """
+
+ return (
+ await asyncio_detailed(
+ client=client,
+ body=body,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/tools/perform_ner.py b/impresso/api_client/api/tools/perform_ner.py
index f7caed1..853f249 100644
--- a/impresso/api_client/api/tools/perform_ner.py
+++ b/impresso/api_client/api/tools/perform_ner.py
@@ -46,6 +46,10 @@ def _parse_response(
response_403 = Error.from_dict(response.json())
return response_403
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/api/tools/perform_text_embedding.py b/impresso/api_client/api/tools/perform_text_embedding.py
new file mode 100644
index 0000000..d5dffd9
--- /dev/null
+++ b/impresso/api_client/api/tools/perform_text_embedding.py
@@ -0,0 +1,189 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.impresso_embedding_response import ImpressoEmbeddingResponse
+from ...models.impresso_text_embedding_request import ImpressoTextEmbeddingRequest
+from ...types import Response
+
+
+def _get_kwargs(
+ *,
+ body: ImpressoTextEmbeddingRequest,
+) -> Dict[str, Any]:
+ headers: Dict[str, Any] = {}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "post",
+ "url": "/tools/embedder/text",
+ }
+
+ _body = body.to_dict()
+
+ _kwargs["json"] = _body
+ headers["Content-Type"] = "application/json"
+
+ _kwargs["headers"] = headers
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ if response.status_code == HTTPStatus.CREATED:
+ response_201 = ImpressoEmbeddingResponse.from_dict(response.json())
+
+ return response_201
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoTextEmbeddingRequest,
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed a text into a vector space
+
+ Args:
+ body (ImpressoTextEmbeddingRequest): Body of a request to the Impresso Text Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, ImpressoEmbeddingResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ body=body,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoTextEmbeddingRequest,
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed a text into a vector space
+
+ Args:
+ body (ImpressoTextEmbeddingRequest): Body of a request to the Impresso Text Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, ImpressoEmbeddingResponse]
+ """
+
+ return sync_detailed(
+ client=client,
+ body=body,
+ ).parsed
+
+
+async def asyncio_detailed(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoTextEmbeddingRequest,
+) -> Response[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed a text into a vector space
+
+ Args:
+ body (ImpressoTextEmbeddingRequest): Body of a request to the Impresso Text Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, ImpressoEmbeddingResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ body=body,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ *,
+ client: AuthenticatedClient,
+ body: ImpressoTextEmbeddingRequest,
+) -> Optional[Union[Error, ImpressoEmbeddingResponse]]:
+ """Embed a text into a vector space
+
+ Args:
+ body (ImpressoTextEmbeddingRequest): Body of a request to the Impresso Text Embedding
+ endpoint
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, ImpressoEmbeddingResponse]
+ """
+
+ return (
+ await asyncio_detailed(
+ client=client,
+ body=body,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/topics/__init__.py b/impresso/api_client/api/topics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/impresso/api_client/api/topics/find_topics.py b/impresso/api_client/api/topics/find_topics.py
new file mode 100644
index 0000000..6871679
--- /dev/null
+++ b/impresso/api_client/api/topics/find_topics.py
@@ -0,0 +1,264 @@
+from http import HTTPStatus
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.filter_ import Filter
+from ...models.find_topics_base_find_response import FindTopicsBaseFindResponse
+from ...models.find_topics_order_by import FindTopicsOrderBy
+from ...types import UNSET, Response, Unset
+
+
+def _get_kwargs(
+ *,
+ q: Union[Unset, str] = UNSET,
+ order_by: Union[Unset, FindTopicsOrderBy] = FindTopicsOrderBy.NAME,
+ filters: Union[List["Filter"], Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Dict[str, Any]:
+ params: Dict[str, Any] = {}
+
+ params["q"] = q
+
+ json_order_by: Union[Unset, str] = UNSET
+ if not isinstance(order_by, Unset):
+ json_order_by = order_by.value
+
+ params["order_by"] = json_order_by
+
+ json_filters: Union[List[Dict[str, Any]], Unset, str]
+ if isinstance(filters, Unset):
+ json_filters = UNSET
+ elif isinstance(filters, list):
+ json_filters = []
+ for filters_type_1_item_data in filters:
+ filters_type_1_item = filters_type_1_item_data.to_dict()
+ json_filters.append(filters_type_1_item)
+
+ else:
+ json_filters = filters
+ params["filters"] = json_filters
+
+ params["limit"] = limit
+
+ params["offset"] = offset
+
+ params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
+
+ _kwargs: Dict[str, Any] = {
+ "method": "get",
+ "url": "/topics",
+ "params": params,
+ }
+
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, FindTopicsBaseFindResponse]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = FindTopicsBaseFindResponse.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, FindTopicsBaseFindResponse]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ *,
+ client: AuthenticatedClient,
+ q: Union[Unset, str] = UNSET,
+ order_by: Union[Unset, FindTopicsOrderBy] = FindTopicsOrderBy.NAME,
+ filters: Union[List["Filter"], Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Response[Union[Error, FindTopicsBaseFindResponse]]:
+ """Find topics
+
+ Args:
+ q (Union[Unset, str]):
+ order_by (Union[Unset, FindTopicsOrderBy]): Default: FindTopicsOrderBy.NAME.
+ filters (Union[List['Filter'], Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindTopicsBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ q=q,
+ order_by=order_by,
+ filters=filters,
+ limit=limit,
+ offset=offset,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ *,
+ client: AuthenticatedClient,
+ q: Union[Unset, str] = UNSET,
+ order_by: Union[Unset, FindTopicsOrderBy] = FindTopicsOrderBy.NAME,
+ filters: Union[List["Filter"], Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Optional[Union[Error, FindTopicsBaseFindResponse]]:
+ """Find topics
+
+ Args:
+ q (Union[Unset, str]):
+ order_by (Union[Unset, FindTopicsOrderBy]): Default: FindTopicsOrderBy.NAME.
+ filters (Union[List['Filter'], Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindTopicsBaseFindResponse]
+ """
+
+ return sync_detailed(
+ client=client,
+ q=q,
+ order_by=order_by,
+ filters=filters,
+ limit=limit,
+ offset=offset,
+ ).parsed
+
+
+async def asyncio_detailed(
+ *,
+ client: AuthenticatedClient,
+ q: Union[Unset, str] = UNSET,
+ order_by: Union[Unset, FindTopicsOrderBy] = FindTopicsOrderBy.NAME,
+ filters: Union[List["Filter"], Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Response[Union[Error, FindTopicsBaseFindResponse]]:
+ """Find topics
+
+ Args:
+ q (Union[Unset, str]):
+ order_by (Union[Unset, FindTopicsOrderBy]): Default: FindTopicsOrderBy.NAME.
+ filters (Union[List['Filter'], Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, FindTopicsBaseFindResponse]]
+ """
+
+ kwargs = _get_kwargs(
+ q=q,
+ order_by=order_by,
+ filters=filters,
+ limit=limit,
+ offset=offset,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ *,
+ client: AuthenticatedClient,
+ q: Union[Unset, str] = UNSET,
+ order_by: Union[Unset, FindTopicsOrderBy] = FindTopicsOrderBy.NAME,
+ filters: Union[List["Filter"], Unset, str] = UNSET,
+ limit: Union[Unset, int] = UNSET,
+ offset: Union[Unset, int] = UNSET,
+) -> Optional[Union[Error, FindTopicsBaseFindResponse]]:
+ """Find topics
+
+ Args:
+ q (Union[Unset, str]):
+ order_by (Union[Unset, FindTopicsOrderBy]): Default: FindTopicsOrderBy.NAME.
+ filters (Union[List['Filter'], Unset, str]):
+ limit (Union[Unset, int]):
+ offset (Union[Unset, int]):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, FindTopicsBaseFindResponse]
+ """
+
+ return (
+ await asyncio_detailed(
+ client=client,
+ q=q,
+ order_by=order_by,
+ filters=filters,
+ limit=limit,
+ offset=offset,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/topics/get_topic.py b/impresso/api_client/api/topics/get_topic.py
new file mode 100644
index 0000000..054dcb4
--- /dev/null
+++ b/impresso/api_client/api/topics/get_topic.py
@@ -0,0 +1,179 @@
+from http import HTTPStatus
+from typing import Any, Dict, Optional, Union
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.topic import Topic
+from ...types import Response
+
+
+def _get_kwargs(
+ id: str,
+) -> Dict[str, Any]:
+ _kwargs: Dict[str, Any] = {
+ "method": "get",
+ "url": f"/topics/{id}",
+ }
+
+ return _kwargs
+
+
+def _parse_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[Union[Error, Topic]]:
+ if response.status_code == HTTPStatus.OK:
+ response_200 = Topic.from_dict(response.json())
+
+ return response_200
+ if response.status_code == HTTPStatus.UNAUTHORIZED:
+ response_401 = Error.from_dict(response.json())
+
+ return response_401
+ if response.status_code == HTTPStatus.FORBIDDEN:
+ response_403 = Error.from_dict(response.json())
+
+ return response_403
+ if response.status_code == HTTPStatus.NOT_FOUND:
+ response_404 = Error.from_dict(response.json())
+
+ return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
+ if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
+ response_422 = Error.from_dict(response.json())
+
+ return response_422
+ if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+ response_429 = Error.from_dict(response.json())
+
+ return response_429
+ if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
+ response_500 = Error.from_dict(response.json())
+
+ return response_500
+ if client.raise_on_unexpected_status:
+ raise errors.UnexpectedStatus(response.status_code, response.content)
+ else:
+ return None
+
+
+def _build_response(
+ *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[Union[Error, Topic]]:
+ return Response(
+ status_code=HTTPStatus(response.status_code),
+ content=response.content,
+ headers=response.headers,
+ parsed=_parse_response(client=client, response=response),
+ )
+
+
+def sync_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[Error, Topic]]:
+ """Get a topic by its UID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, Topic]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ )
+
+ response = client.get_httpx_client().request(
+ **kwargs,
+ )
+
+ return _build_response(client=client, response=response)
+
+
+def sync(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[Error, Topic]]:
+ """Get a topic by its UID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, Topic]
+ """
+
+ return sync_detailed(
+ id=id,
+ client=client,
+ ).parsed
+
+
+async def asyncio_detailed(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Response[Union[Error, Topic]]:
+ """Get a topic by its UID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Response[Union[Error, Topic]]
+ """
+
+ kwargs = _get_kwargs(
+ id=id,
+ )
+
+ response = await client.get_async_httpx_client().request(**kwargs)
+
+ return _build_response(client=client, response=response)
+
+
+async def asyncio(
+ id: str,
+ *,
+ client: AuthenticatedClient,
+) -> Optional[Union[Error, Topic]]:
+ """Get a topic by its UID
+
+ Args:
+ id (str):
+
+ Raises:
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+ Returns:
+ Union[Error, Topic]
+ """
+
+ return (
+ await asyncio_detailed(
+ id=id,
+ client=client,
+ )
+ ).parsed
diff --git a/impresso/api_client/api/version/get_version_details.py b/impresso/api_client/api/version/get_version_details.py
index de0622e..2b23682 100644
--- a/impresso/api_client/api/version/get_version_details.py
+++ b/impresso/api_client/api/version/get_version_details.py
@@ -30,6 +30,10 @@ def _parse_response(
response_404 = Error.from_dict(response.json())
return response_404
+ if response.status_code == HTTPStatus.IM_A_TEAPOT:
+ response_418 = Error.from_dict(response.json())
+
+ return response_418
if response.status_code == HTTPStatus.UNPROCESSABLE_CONTENT:
response_422 = Error.from_dict(response.json())
diff --git a/impresso/api_client/models/__init__.py b/impresso/api_client/models/__init__.py
index 073066c..8a71f7a 100644
--- a/impresso/api_client/models/__init__.py
+++ b/impresso/api_client/models/__init__.py
@@ -1,5 +1,7 @@
"""Contains all the data models used in inputs/outputs"""
+from .add_collectable_items_from_filters import AddCollectableItemsFromFilters
+from .add_collectable_items_from_filters_namespace import AddCollectableItemsFromFiltersNamespace
from .authentication_create_request import AuthenticationCreateRequest
from .authentication_create_request_strategy import AuthenticationCreateRequestStrategy
from .authentication_response import AuthenticationResponse
@@ -13,12 +15,18 @@
from .collection_access_level import CollectionAccessLevel
from .content_item import ContentItem
from .content_item_copyright_status import ContentItemCopyrightStatus
+from .content_item_entities_mentions_information import ContentItemEntitiesMentionsInformation
from .content_item_media_type import ContentItemMediaType
+from .content_item_named_entities_information import ContentItemNamedEntitiesInformation
from .content_item_source_medium import ContentItemSourceMedium
+from .data_provider import DataProvider
+from .data_provider_names_item import DataProviderNamesItem
from .entity_details import EntityDetails
from .entity_details_type import EntityDetailsType
from .entity_mention import EntityMention
from .error import Error
+from .experiment_info import ExperimentInfo
+from .facet_with_label import FacetWithLabel
from .filter_ import Filter
from .filter_context import FilterContext
from .filter_op import FilterOp
@@ -26,9 +34,13 @@
from .find_collections_base_find_response import FindCollectionsBaseFindResponse
from .find_collections_base_find_response_pagination import FindCollectionsBaseFindResponsePagination
from .find_collections_order_by import FindCollectionsOrderBy
+from .find_data_providers_base_find_response import FindDataProvidersBaseFindResponse
+from .find_data_providers_base_find_response_pagination import FindDataProvidersBaseFindResponsePagination
from .find_entities_base_find_response import FindEntitiesBaseFindResponse
from .find_entities_base_find_response_pagination import FindEntitiesBaseFindResponsePagination
from .find_entities_order_by import FindEntitiesOrderBy
+from .find_experiments_base_find_response import FindExperimentsBaseFindResponse
+from .find_experiments_base_find_response_pagination import FindExperimentsBaseFindResponsePagination
from .find_images_base_find_response import FindImagesBaseFindResponse
from .find_images_base_find_response_pagination import FindImagesBaseFindResponsePagination
from .find_images_order_by import FindImagesOrderBy
@@ -42,6 +54,9 @@
from .find_text_reuse_passages_base_find_response import FindTextReusePassagesBaseFindResponse
from .find_text_reuse_passages_base_find_response_pagination import FindTextReusePassagesBaseFindResponsePagination
from .find_text_reuse_passages_order_by import FindTextReusePassagesOrderBy
+from .find_topics_base_find_response import FindTopicsBaseFindResponse
+from .find_topics_base_find_response_pagination import FindTopicsBaseFindResponsePagination
+from .find_topics_order_by import FindTopicsOrderBy
from .freeform import Freeform
from .get_images_facet_base_find_response import GetImagesFacetBaseFindResponse
from .get_images_facet_base_find_response_pagination import GetImagesFacetBaseFindResponsePagination
@@ -60,8 +75,12 @@
from .get_tr_passages_facet_id import GetTrPassagesFacetId
from .get_tr_passages_facet_order_by import GetTrPassagesFacetOrderBy
from .image import Image
+from .image_image_types import ImageImageTypes
from .image_media_source_ref import ImageMediaSourceRef
from .image_media_source_ref_type import ImageMediaSourceRefType
+from .impresso_embedding_response import ImpressoEmbeddingResponse
+from .impresso_image_embedding_request import ImpressoImageEmbeddingRequest
+from .impresso_image_embedding_request_search_target import ImpressoImageEmbeddingRequestSearchTarget
from .impresso_named_entity_recognition_entity import ImpressoNamedEntityRecognitionEntity
from .impresso_named_entity_recognition_entity_confidence import ImpressoNamedEntityRecognitionEntityConfidence
from .impresso_named_entity_recognition_entity_offset import ImpressoNamedEntityRecognitionEntityOffset
@@ -70,6 +89,9 @@
from .impresso_named_entity_recognition_request import ImpressoNamedEntityRecognitionRequest
from .impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod
from .impresso_named_entity_recognition_response import ImpressoNamedEntityRecognitionResponse
+from .impresso_text_embedding_request import ImpressoTextEmbeddingRequest
+from .impresso_text_embedding_request_search_target import ImpressoTextEmbeddingRequestSearchTarget
+from .interact_with_experiment_body import InteractWithExperimentBody
from .media_source import MediaSource
from .media_source_properties_item import MediaSourcePropertiesItem
from .media_source_totals import MediaSourceTotals
@@ -90,7 +112,9 @@
from .text_reuse_cluster_time_coverage import TextReuseClusterTimeCoverage
from .text_reuse_passage import TextReusePassage
from .text_reuse_passage_offset import TextReusePassageOffset
+from .topic import Topic
from .topic_mention import TopicMention
+from .topic_word import TopicWord
from .update_collectable_items_request import UpdateCollectableItemsRequest
from .version_details import VersionDetails
from .wikidata_location import WikidataLocation
@@ -105,6 +129,8 @@
from .word_match import WordMatch
__all__ = (
+ "AddCollectableItemsFromFilters",
+ "AddCollectableItemsFromFiltersNamespace",
"AuthenticationCreateRequest",
"AuthenticationCreateRequestStrategy",
"AuthenticationResponse",
@@ -118,12 +144,18 @@
"CollectionAccessLevel",
"ContentItem",
"ContentItemCopyrightStatus",
+ "ContentItemEntitiesMentionsInformation",
"ContentItemMediaType",
+ "ContentItemNamedEntitiesInformation",
"ContentItemSourceMedium",
+ "DataProvider",
+ "DataProviderNamesItem",
"EntityDetails",
"EntityDetailsType",
"EntityMention",
"Error",
+ "ExperimentInfo",
+ "FacetWithLabel",
"Filter",
"FilterContext",
"FilterOp",
@@ -131,9 +163,13 @@
"FindCollectionsBaseFindResponse",
"FindCollectionsBaseFindResponsePagination",
"FindCollectionsOrderBy",
+ "FindDataProvidersBaseFindResponse",
+ "FindDataProvidersBaseFindResponsePagination",
"FindEntitiesBaseFindResponse",
"FindEntitiesBaseFindResponsePagination",
"FindEntitiesOrderBy",
+ "FindExperimentsBaseFindResponse",
+ "FindExperimentsBaseFindResponsePagination",
"FindImagesBaseFindResponse",
"FindImagesBaseFindResponsePagination",
"FindImagesOrderBy",
@@ -147,6 +183,9 @@
"FindTextReusePassagesBaseFindResponse",
"FindTextReusePassagesBaseFindResponsePagination",
"FindTextReusePassagesOrderBy",
+ "FindTopicsBaseFindResponse",
+ "FindTopicsBaseFindResponsePagination",
+ "FindTopicsOrderBy",
"Freeform",
"GetImagesFacetBaseFindResponse",
"GetImagesFacetBaseFindResponsePagination",
@@ -165,8 +204,12 @@
"GetTrPassagesFacetId",
"GetTrPassagesFacetOrderBy",
"Image",
+ "ImageImageTypes",
"ImageMediaSourceRef",
"ImageMediaSourceRefType",
+ "ImpressoEmbeddingResponse",
+ "ImpressoImageEmbeddingRequest",
+ "ImpressoImageEmbeddingRequestSearchTarget",
"ImpressoNamedEntityRecognitionEntity",
"ImpressoNamedEntityRecognitionEntityConfidence",
"ImpressoNamedEntityRecognitionEntityOffset",
@@ -175,6 +218,9 @@
"ImpressoNamedEntityRecognitionRequest",
"ImpressoNamedEntityRecognitionRequestMethod",
"ImpressoNamedEntityRecognitionResponse",
+ "ImpressoTextEmbeddingRequest",
+ "ImpressoTextEmbeddingRequestSearchTarget",
+ "InteractWithExperimentBody",
"MediaSource",
"MediaSourcePropertiesItem",
"MediaSourceTotals",
@@ -195,7 +241,9 @@
"TextReuseClusterTimeCoverage",
"TextReusePassage",
"TextReusePassageOffset",
+ "Topic",
"TopicMention",
+ "TopicWord",
"UpdateCollectableItemsRequest",
"VersionDetails",
"WikidataLocation",
diff --git a/impresso/api_client/models/add_collectable_items_from_filters.py b/impresso/api_client/models/add_collectable_items_from_filters.py
new file mode 100644
index 0000000..b96fc81
--- /dev/null
+++ b/impresso/api_client/models/add_collectable_items_from_filters.py
@@ -0,0 +1,64 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+from ..models.add_collectable_items_from_filters_namespace import AddCollectableItemsFromFiltersNamespace
+
+if TYPE_CHECKING:
+ from ..models.filter_ import Filter
+
+
+T = TypeVar("T", bound="AddCollectableItemsFromFilters")
+
+
+@_attrs_define
+class AddCollectableItemsFromFilters:
+ """Request to add content items to a collection from content items that match given filters
+
+ Attributes:
+ filters (List['Filter']): Filters to apply when selecting items to add to the collection
+ namespace (AddCollectableItemsFromFiltersNamespace): Namespace to use when selecting items to add to the
+ collection
+ """
+
+ filters: List["Filter"]
+ namespace: AddCollectableItemsFromFiltersNamespace
+
+ def to_dict(self) -> Dict[str, Any]:
+ filters = []
+ for filters_item_data in self.filters:
+ filters_item = filters_item_data.to_dict()
+ filters.append(filters_item)
+
+ namespace = self.namespace.value
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "filters": filters,
+ "namespace": namespace,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.filter_ import Filter
+
+ d = src_dict.copy()
+ filters = []
+ _filters = d.pop("filters")
+ for filters_item_data in _filters:
+ filters_item = Filter.from_dict(filters_item_data)
+
+ filters.append(filters_item)
+
+ namespace = AddCollectableItemsFromFiltersNamespace(d.pop("namespace"))
+
+ add_collectable_items_from_filters = cls(
+ filters=filters,
+ namespace=namespace,
+ )
+
+ return add_collectable_items_from_filters
diff --git a/impresso/api_client/models/add_collectable_items_from_filters_namespace.py b/impresso/api_client/models/add_collectable_items_from_filters_namespace.py
new file mode 100644
index 0000000..d6df77b
--- /dev/null
+++ b/impresso/api_client/models/add_collectable_items_from_filters_namespace.py
@@ -0,0 +1,16 @@
+from enum import Enum
+from typing import Literal
+
+
+class AddCollectableItemsFromFiltersNamespace(str, Enum):
+ SEARCH = "search"
+ TR_PASSAGES = "tr_passages"
+
+ def __str__(self) -> str:
+ return str(self.value)
+
+
+AddCollectableItemsFromFiltersNamespaceLiteral = Literal[
+ "search",
+ "tr_passages",
+]
diff --git a/impresso/api_client/models/content_item.py b/impresso/api_client/models/content_item.py
index 8de2bdf..30374e9 100644
--- a/impresso/api_client/models/content_item.py
+++ b/impresso/api_client/models/content_item.py
@@ -1,5 +1,5 @@
import datetime
-from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar, Union, cast
from attrs import define as _attrs_define
from dateutil.parser import isoparse
@@ -10,7 +10,8 @@
from ..types import UNSET, Unset
if TYPE_CHECKING:
- from ..models.named_entity import NamedEntity
+ from ..models.content_item_entities_mentions_information import ContentItemEntitiesMentionsInformation
+ from ..models.content_item_named_entities_information import ContentItemNamedEntitiesInformation
from ..models.topic_mention import TopicMention
@@ -30,13 +31,13 @@ class ContentItem:
print for newspapers, typescript for digitised radio bulletin typescripts).
title (Union[Unset, str]): The title of the content item.
transcript (Union[Unset, str]): Transcript of the content item.
- location_entities (Union[Unset, List['NamedEntity']]): Linked location entities mentioned in the content item.
- person_entities (Union[Unset, List['NamedEntity']]): Linked person entities mentioned in the content item.
- organisation_entities (Union[Unset, List['NamedEntity']]): Linked organisation entities mentioned in the content
- item.
- news_agencies_entities (Union[Unset, List['NamedEntity']]): Linked news agency entities mentioned in the content
- item.
+ entities (Union[Unset, ContentItemNamedEntitiesInformation]): A collection of linked named entities (location,
+ person, etc.) present in text.
+ mentions (Union[Unset, ContentItemEntitiesMentionsInformation]): A collection of entity mentions (location,
+ person, etc.) present in text.
topics (Union[Unset, List['TopicMention']]): Topics mentioned in the content item.
+ embeddings (Union[Unset, List[str]]): Precomputed embeddings for the content item in the format:
+ :.
transcript_length (Union[Unset, float]): The length of the transcript in characters.
total_pages (Union[Unset, float]): Total number of pages the item covers.
language_code (Union[Unset, str]): ISO 639-1 language code of the content item.
@@ -48,6 +49,11 @@ class ContentItem:
media_uid (Union[Unset, str]): Media title alias. Usually a 3 letter code of the media title (newspaper, radio
station, etc.).
media_type (Union[Unset, ContentItemMediaType]): The type of the media the content item belongs to.
+ has_olr (Union[Unset, bool]): Whether the content item has OCR/OLR data available.
+ ocr_quality_score (Union[Unset, float]): OCR quality score of the content item (0 - 1).
+ relevance_score (Union[Unset, float]): Relevance score of the content item (0 - 1).
+ page_numbers (Union[Unset, List[float]]): Page numbers the content item appears on.
+ collection_uids (Union[Unset, List[str]]): Unique identifiers of collections the content item belongs to.
"""
uid: str
@@ -56,11 +62,10 @@ class ContentItem:
source_medium: Union[Unset, ContentItemSourceMedium] = UNSET
title: Union[Unset, str] = UNSET
transcript: Union[Unset, str] = UNSET
- location_entities: Union[Unset, List["NamedEntity"]] = UNSET
- person_entities: Union[Unset, List["NamedEntity"]] = UNSET
- organisation_entities: Union[Unset, List["NamedEntity"]] = UNSET
- news_agencies_entities: Union[Unset, List["NamedEntity"]] = UNSET
+ entities: Union[Unset, "ContentItemNamedEntitiesInformation"] = UNSET
+ mentions: Union[Unset, "ContentItemEntitiesMentionsInformation"] = UNSET
topics: Union[Unset, List["TopicMention"]] = UNSET
+ embeddings: Union[Unset, List[str]] = UNSET
transcript_length: Union[Unset, float] = UNSET
total_pages: Union[Unset, float] = UNSET
language_code: Union[Unset, str] = UNSET
@@ -71,6 +76,11 @@ class ContentItem:
provider_code: Union[Unset, str] = UNSET
media_uid: Union[Unset, str] = UNSET
media_type: Union[Unset, ContentItemMediaType] = UNSET
+ has_olr: Union[Unset, bool] = UNSET
+ ocr_quality_score: Union[Unset, float] = UNSET
+ relevance_score: Union[Unset, float] = UNSET
+ page_numbers: Union[Unset, List[float]] = UNSET
+ collection_uids: Union[Unset, List[str]] = UNSET
def to_dict(self) -> Dict[str, Any]:
uid = self.uid
@@ -89,33 +99,13 @@ def to_dict(self) -> Dict[str, Any]:
transcript = self.transcript
- location_entities: Union[Unset, List[Dict[str, Any]]] = UNSET
- if not isinstance(self.location_entities, Unset):
- location_entities = []
- for location_entities_item_data in self.location_entities:
- location_entities_item = location_entities_item_data.to_dict()
- location_entities.append(location_entities_item)
-
- person_entities: Union[Unset, List[Dict[str, Any]]] = UNSET
- if not isinstance(self.person_entities, Unset):
- person_entities = []
- for person_entities_item_data in self.person_entities:
- person_entities_item = person_entities_item_data.to_dict()
- person_entities.append(person_entities_item)
-
- organisation_entities: Union[Unset, List[Dict[str, Any]]] = UNSET
- if not isinstance(self.organisation_entities, Unset):
- organisation_entities = []
- for organisation_entities_item_data in self.organisation_entities:
- organisation_entities_item = organisation_entities_item_data.to_dict()
- organisation_entities.append(organisation_entities_item)
-
- news_agencies_entities: Union[Unset, List[Dict[str, Any]]] = UNSET
- if not isinstance(self.news_agencies_entities, Unset):
- news_agencies_entities = []
- for news_agencies_entities_item_data in self.news_agencies_entities:
- news_agencies_entities_item = news_agencies_entities_item_data.to_dict()
- news_agencies_entities.append(news_agencies_entities_item)
+ entities: Union[Unset, Dict[str, Any]] = UNSET
+ if not isinstance(self.entities, Unset):
+ entities = self.entities.to_dict()
+
+ mentions: Union[Unset, Dict[str, Any]] = UNSET
+ if not isinstance(self.mentions, Unset):
+ mentions = self.mentions.to_dict()
topics: Union[Unset, List[Dict[str, Any]]] = UNSET
if not isinstance(self.topics, Unset):
@@ -124,6 +114,10 @@ def to_dict(self) -> Dict[str, Any]:
topics_item = topics_item_data.to_dict()
topics.append(topics_item)
+ embeddings: Union[Unset, List[str]] = UNSET
+ if not isinstance(self.embeddings, Unset):
+ embeddings = self.embeddings
+
transcript_length = self.transcript_length
total_pages = self.total_pages
@@ -148,6 +142,20 @@ def to_dict(self) -> Dict[str, Any]:
if not isinstance(self.media_type, Unset):
media_type = self.media_type.value
+ has_olr = self.has_olr
+
+ ocr_quality_score = self.ocr_quality_score
+
+ relevance_score = self.relevance_score
+
+ page_numbers: Union[Unset, List[float]] = UNSET
+ if not isinstance(self.page_numbers, Unset):
+ page_numbers = self.page_numbers
+
+ collection_uids: Union[Unset, List[str]] = UNSET
+ if not isinstance(self.collection_uids, Unset):
+ collection_uids = self.collection_uids
+
field_dict: Dict[str, Any] = {}
field_dict.update(
{
@@ -164,16 +172,14 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["title"] = title
if transcript is not UNSET:
field_dict["transcript"] = transcript
- if location_entities is not UNSET:
- field_dict["locationEntities"] = location_entities
- if person_entities is not UNSET:
- field_dict["personEntities"] = person_entities
- if organisation_entities is not UNSET:
- field_dict["organisationEntities"] = organisation_entities
- if news_agencies_entities is not UNSET:
- field_dict["newsAgenciesEntities"] = news_agencies_entities
+ if entities is not UNSET:
+ field_dict["entities"] = entities
+ if mentions is not UNSET:
+ field_dict["mentions"] = mentions
if topics is not UNSET:
field_dict["topics"] = topics
+ if embeddings is not UNSET:
+ field_dict["embeddings"] = embeddings
if transcript_length is not UNSET:
field_dict["transcriptLength"] = transcript_length
if total_pages is not UNSET:
@@ -194,12 +200,23 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["mediaUid"] = media_uid
if media_type is not UNSET:
field_dict["mediaType"] = media_type
+ if has_olr is not UNSET:
+ field_dict["hasOLR"] = has_olr
+ if ocr_quality_score is not UNSET:
+ field_dict["ocrQualityScore"] = ocr_quality_score
+ if relevance_score is not UNSET:
+ field_dict["relevanceScore"] = relevance_score
+ if page_numbers is not UNSET:
+ field_dict["pageNumbers"] = page_numbers
+ if collection_uids is not UNSET:
+ field_dict["collectionUids"] = collection_uids
return field_dict
@classmethod
def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
- from ..models.named_entity import NamedEntity
+ from ..models.content_item_entities_mentions_information import ContentItemEntitiesMentionsInformation
+ from ..models.content_item_named_entities_information import ContentItemNamedEntitiesInformation
from ..models.topic_mention import TopicMention
d = src_dict.copy()
@@ -225,33 +242,19 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
transcript = d.pop("transcript", UNSET)
- location_entities = []
- _location_entities = d.pop("locationEntities", UNSET)
- for location_entities_item_data in _location_entities or []:
- location_entities_item = NamedEntity.from_dict(location_entities_item_data)
-
- location_entities.append(location_entities_item)
-
- person_entities = []
- _person_entities = d.pop("personEntities", UNSET)
- for person_entities_item_data in _person_entities or []:
- person_entities_item = NamedEntity.from_dict(person_entities_item_data)
-
- person_entities.append(person_entities_item)
-
- organisation_entities = []
- _organisation_entities = d.pop("organisationEntities", UNSET)
- for organisation_entities_item_data in _organisation_entities or []:
- organisation_entities_item = NamedEntity.from_dict(organisation_entities_item_data)
-
- organisation_entities.append(organisation_entities_item)
-
- news_agencies_entities = []
- _news_agencies_entities = d.pop("newsAgenciesEntities", UNSET)
- for news_agencies_entities_item_data in _news_agencies_entities or []:
- news_agencies_entities_item = NamedEntity.from_dict(news_agencies_entities_item_data)
+ _entities = d.pop("entities", UNSET)
+ entities: Union[Unset, ContentItemNamedEntitiesInformation]
+ if isinstance(_entities, Unset):
+ entities = UNSET
+ else:
+ entities = ContentItemNamedEntitiesInformation.from_dict(_entities)
- news_agencies_entities.append(news_agencies_entities_item)
+ _mentions = d.pop("mentions", UNSET)
+ mentions: Union[Unset, ContentItemEntitiesMentionsInformation]
+ if isinstance(_mentions, Unset):
+ mentions = UNSET
+ else:
+ mentions = ContentItemEntitiesMentionsInformation.from_dict(_mentions)
topics = []
_topics = d.pop("topics", UNSET)
@@ -260,6 +263,8 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
topics.append(topics_item)
+ embeddings = cast(List[str], d.pop("embeddings", UNSET))
+
transcript_length = d.pop("transcriptLength", UNSET)
total_pages = d.pop("totalPages", UNSET)
@@ -290,6 +295,16 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
else:
media_type = ContentItemMediaType(_media_type)
+ has_olr = d.pop("hasOLR", UNSET)
+
+ ocr_quality_score = d.pop("ocrQualityScore", UNSET)
+
+ relevance_score = d.pop("relevanceScore", UNSET)
+
+ page_numbers = cast(List[float], d.pop("pageNumbers", UNSET))
+
+ collection_uids = cast(List[str], d.pop("collectionUids", UNSET))
+
content_item = cls(
uid=uid,
copyright_status=copyright_status,
@@ -297,11 +312,10 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
source_medium=source_medium,
title=title,
transcript=transcript,
- location_entities=location_entities,
- person_entities=person_entities,
- organisation_entities=organisation_entities,
- news_agencies_entities=news_agencies_entities,
+ entities=entities,
+ mentions=mentions,
topics=topics,
+ embeddings=embeddings,
transcript_length=transcript_length,
total_pages=total_pages,
language_code=language_code,
@@ -312,6 +326,11 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
provider_code=provider_code,
media_uid=media_uid,
media_type=media_type,
+ has_olr=has_olr,
+ ocr_quality_score=ocr_quality_score,
+ relevance_score=relevance_score,
+ page_numbers=page_numbers,
+ collection_uids=collection_uids,
)
return content_item
diff --git a/impresso/api_client/models/content_item_entities_mentions_information.py b/impresso/api_client/models/content_item_entities_mentions_information.py
new file mode 100644
index 0000000..1d2b046
--- /dev/null
+++ b/impresso/api_client/models/content_item_entities_mentions_information.py
@@ -0,0 +1,112 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+if TYPE_CHECKING:
+ from ..models.entity_mention import EntityMention
+
+
+T = TypeVar("T", bound="ContentItemEntitiesMentionsInformation")
+
+
+@_attrs_define
+class ContentItemEntitiesMentionsInformation:
+ """A collection of entity mentions (location, person, etc.) present in text.
+
+ Attributes:
+ locations (Union[Unset, List['EntityMention']]): Locations mentioned in the content item.
+ persons (Union[Unset, List['EntityMention']]): Persons mentioned in the content item.
+ organisations (Union[Unset, List['EntityMention']]): Organisations mentioned in the content item.
+ news_agencies (Union[Unset, List['EntityMention']]): News agencies mentioned in the content item.
+ """
+
+ locations: Union[Unset, List["EntityMention"]] = UNSET
+ persons: Union[Unset, List["EntityMention"]] = UNSET
+ organisations: Union[Unset, List["EntityMention"]] = UNSET
+ news_agencies: Union[Unset, List["EntityMention"]] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ locations: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.locations, Unset):
+ locations = []
+ for locations_item_data in self.locations:
+ locations_item = locations_item_data.to_dict()
+ locations.append(locations_item)
+
+ persons: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.persons, Unset):
+ persons = []
+ for persons_item_data in self.persons:
+ persons_item = persons_item_data.to_dict()
+ persons.append(persons_item)
+
+ organisations: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.organisations, Unset):
+ organisations = []
+ for organisations_item_data in self.organisations:
+ organisations_item = organisations_item_data.to_dict()
+ organisations.append(organisations_item)
+
+ news_agencies: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.news_agencies, Unset):
+ news_agencies = []
+ for news_agencies_item_data in self.news_agencies:
+ news_agencies_item = news_agencies_item_data.to_dict()
+ news_agencies.append(news_agencies_item)
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update({})
+ if locations is not UNSET:
+ field_dict["locations"] = locations
+ if persons is not UNSET:
+ field_dict["persons"] = persons
+ if organisations is not UNSET:
+ field_dict["organisations"] = organisations
+ if news_agencies is not UNSET:
+ field_dict["newsAgencies"] = news_agencies
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.entity_mention import EntityMention
+
+ d = src_dict.copy()
+ locations = []
+ _locations = d.pop("locations", UNSET)
+ for locations_item_data in _locations or []:
+ locations_item = EntityMention.from_dict(locations_item_data)
+
+ locations.append(locations_item)
+
+ persons = []
+ _persons = d.pop("persons", UNSET)
+ for persons_item_data in _persons or []:
+ persons_item = EntityMention.from_dict(persons_item_data)
+
+ persons.append(persons_item)
+
+ organisations = []
+ _organisations = d.pop("organisations", UNSET)
+ for organisations_item_data in _organisations or []:
+ organisations_item = EntityMention.from_dict(organisations_item_data)
+
+ organisations.append(organisations_item)
+
+ news_agencies = []
+ _news_agencies = d.pop("newsAgencies", UNSET)
+ for news_agencies_item_data in _news_agencies or []:
+ news_agencies_item = EntityMention.from_dict(news_agencies_item_data)
+
+ news_agencies.append(news_agencies_item)
+
+ content_item_entities_mentions_information = cls(
+ locations=locations,
+ persons=persons,
+ organisations=organisations,
+ news_agencies=news_agencies,
+ )
+
+ return content_item_entities_mentions_information
diff --git a/impresso/api_client/models/content_item_named_entities_information.py b/impresso/api_client/models/content_item_named_entities_information.py
new file mode 100644
index 0000000..2ca50c0
--- /dev/null
+++ b/impresso/api_client/models/content_item_named_entities_information.py
@@ -0,0 +1,112 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+if TYPE_CHECKING:
+ from ..models.named_entity import NamedEntity
+
+
+T = TypeVar("T", bound="ContentItemNamedEntitiesInformation")
+
+
+@_attrs_define
+class ContentItemNamedEntitiesInformation:
+ """A collection of linked named entities (location, person, etc.) present in text.
+
+ Attributes:
+ locations (Union[Unset, List['NamedEntity']]): Linked location entities mentioned in the content item.
+ persons (Union[Unset, List['NamedEntity']]): Linked person entities mentioned in the content item.
+ organisations (Union[Unset, List['NamedEntity']]): Linked organisation entities mentioned in the content item.
+ news_agencies (Union[Unset, List['NamedEntity']]): Linked news agency entities mentioned in the content item.
+ """
+
+ locations: Union[Unset, List["NamedEntity"]] = UNSET
+ persons: Union[Unset, List["NamedEntity"]] = UNSET
+ organisations: Union[Unset, List["NamedEntity"]] = UNSET
+ news_agencies: Union[Unset, List["NamedEntity"]] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ locations: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.locations, Unset):
+ locations = []
+ for locations_item_data in self.locations:
+ locations_item = locations_item_data.to_dict()
+ locations.append(locations_item)
+
+ persons: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.persons, Unset):
+ persons = []
+ for persons_item_data in self.persons:
+ persons_item = persons_item_data.to_dict()
+ persons.append(persons_item)
+
+ organisations: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.organisations, Unset):
+ organisations = []
+ for organisations_item_data in self.organisations:
+ organisations_item = organisations_item_data.to_dict()
+ organisations.append(organisations_item)
+
+ news_agencies: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.news_agencies, Unset):
+ news_agencies = []
+ for news_agencies_item_data in self.news_agencies:
+ news_agencies_item = news_agencies_item_data.to_dict()
+ news_agencies.append(news_agencies_item)
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update({})
+ if locations is not UNSET:
+ field_dict["locations"] = locations
+ if persons is not UNSET:
+ field_dict["persons"] = persons
+ if organisations is not UNSET:
+ field_dict["organisations"] = organisations
+ if news_agencies is not UNSET:
+ field_dict["newsAgencies"] = news_agencies
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.named_entity import NamedEntity
+
+ d = src_dict.copy()
+ locations = []
+ _locations = d.pop("locations", UNSET)
+ for locations_item_data in _locations or []:
+ locations_item = NamedEntity.from_dict(locations_item_data)
+
+ locations.append(locations_item)
+
+ persons = []
+ _persons = d.pop("persons", UNSET)
+ for persons_item_data in _persons or []:
+ persons_item = NamedEntity.from_dict(persons_item_data)
+
+ persons.append(persons_item)
+
+ organisations = []
+ _organisations = d.pop("organisations", UNSET)
+ for organisations_item_data in _organisations or []:
+ organisations_item = NamedEntity.from_dict(organisations_item_data)
+
+ organisations.append(organisations_item)
+
+ news_agencies = []
+ _news_agencies = d.pop("newsAgencies", UNSET)
+ for news_agencies_item_data in _news_agencies or []:
+ news_agencies_item = NamedEntity.from_dict(news_agencies_item_data)
+
+ news_agencies.append(news_agencies_item)
+
+ content_item_named_entities_information = cls(
+ locations=locations,
+ persons=persons,
+ organisations=organisations,
+ news_agencies=news_agencies,
+ )
+
+ return content_item_named_entities_information
diff --git a/impresso/api_client/models/data_provider.py b/impresso/api_client/models/data_provider.py
new file mode 100644
index 0000000..1c0a866
--- /dev/null
+++ b/impresso/api_client/models/data_provider.py
@@ -0,0 +1,70 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+if TYPE_CHECKING:
+ from ..models.data_provider_names_item import DataProviderNamesItem
+
+
+T = TypeVar("T", bound="DataProvider")
+
+
+@_attrs_define
+class DataProvider:
+ """A data provider is a partner institution that provides content to Impresso (e.g., libraries, archives, media
+ organizations).
+
+ Attributes:
+ id (str): The unique identifier of the data provider.
+ name (str): The default name of the data provider.
+ names (List['DataProviderNamesItem']): Names of the data provider in different languages.
+ """
+
+ id: str
+ name: str
+ names: List["DataProviderNamesItem"]
+
+ def to_dict(self) -> Dict[str, Any]:
+ id = self.id
+
+ name = self.name
+
+ names = []
+ for names_item_data in self.names:
+ names_item = names_item_data.to_dict()
+ names.append(names_item)
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "id": id,
+ "name": name,
+ "names": names,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.data_provider_names_item import DataProviderNamesItem
+
+ d = src_dict.copy()
+ id = d.pop("id")
+
+ name = d.pop("name")
+
+ names = []
+ _names = d.pop("names")
+ for names_item_data in _names:
+ names_item = DataProviderNamesItem.from_dict(names_item_data)
+
+ names.append(names_item)
+
+ data_provider = cls(
+ id=id,
+ name=name,
+ names=names,
+ )
+
+ return data_provider
diff --git a/impresso/api_client/models/data_provider_names_item.py b/impresso/api_client/models/data_provider_names_item.py
new file mode 100644
index 0000000..db5344b
--- /dev/null
+++ b/impresso/api_client/models/data_provider_names_item.py
@@ -0,0 +1,46 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="DataProviderNamesItem")
+
+
+@_attrs_define
+class DataProviderNamesItem:
+ """
+ Attributes:
+ lang_code (str): ISO 639-1 language code.
+ name (str): Name of the data provider in this language.
+ """
+
+ lang_code: str
+ name: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ lang_code = self.lang_code
+
+ name = self.name
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "langCode": lang_code,
+ "name": name,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ lang_code = d.pop("langCode")
+
+ name = d.pop("name")
+
+ data_provider_names_item = cls(
+ lang_code=lang_code,
+ name=name,
+ )
+
+ return data_provider_names_item
diff --git a/impresso/api_client/models/entity_mention.py b/impresso/api_client/models/entity_mention.py
index e3114de..0c1d81e 100644
--- a/impresso/api_client/models/entity_mention.py
+++ b/impresso/api_client/models/entity_mention.py
@@ -9,42 +9,59 @@
@_attrs_define
class EntityMention:
- """An entity (location, persion) mention.
+ """An entity (location, person, etc.) mentioned in the content item.
Attributes:
- uid (str): Unique identifier of the entity
- relevance (Union[Unset, float]): Relevance of the entity in the document
+ surface_form (str): The surface form (label) of the entity mention
+ mention_confidence (float): Confidence score of the entity mention
+ start_offset (Union[Unset, int]): Start offset of the entity mention in the content item
+ end_offset (Union[Unset, int]): End offset of the entity mention in the content item
"""
- uid: str
- relevance: Union[Unset, float] = UNSET
+ surface_form: str
+ mention_confidence: float
+ start_offset: Union[Unset, int] = UNSET
+ end_offset: Union[Unset, int] = UNSET
def to_dict(self) -> Dict[str, Any]:
- uid = self.uid
+ surface_form = self.surface_form
- relevance = self.relevance
+ mention_confidence = self.mention_confidence
+
+ start_offset = self.start_offset
+
+ end_offset = self.end_offset
field_dict: Dict[str, Any] = {}
field_dict.update(
{
- "uid": uid,
+ "surfaceForm": surface_form,
+ "mentionConfidence": mention_confidence,
}
)
- if relevance is not UNSET:
- field_dict["relevance"] = relevance
+ if start_offset is not UNSET:
+ field_dict["startOffset"] = start_offset
+ if end_offset is not UNSET:
+ field_dict["endOffset"] = end_offset
return field_dict
@classmethod
def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
d = src_dict.copy()
- uid = d.pop("uid")
+ surface_form = d.pop("surfaceForm")
+
+ mention_confidence = d.pop("mentionConfidence")
+
+ start_offset = d.pop("startOffset", UNSET)
- relevance = d.pop("relevance", UNSET)
+ end_offset = d.pop("endOffset", UNSET)
entity_mention = cls(
- uid=uid,
- relevance=relevance,
+ surface_form=surface_form,
+ mention_confidence=mention_confidence,
+ start_offset=start_offset,
+ end_offset=end_offset,
)
return entity_mention
diff --git a/impresso/api_client/models/experiment_info.py b/impresso/api_client/models/experiment_info.py
new file mode 100644
index 0000000..c3afe97
--- /dev/null
+++ b/impresso/api_client/models/experiment_info.py
@@ -0,0 +1,58 @@
+from typing import Any, Dict, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="ExperimentInfo")
+
+
+@_attrs_define
+class ExperimentInfo:
+ """Information about an available experiment including its identifier, name, and description.
+
+ Attributes:
+ id (str): The unique identifier of the experiment.
+ name (str): The display name of the experiment.
+ description (Union[Unset, str]): A description of what the experiment does.
+ """
+
+ id: str
+ name: str
+ description: Union[Unset, str] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ id = self.id
+
+ name = self.name
+
+ description = self.description
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "id": id,
+ "name": name,
+ }
+ )
+ if description is not UNSET:
+ field_dict["description"] = description
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ id = d.pop("id")
+
+ name = d.pop("name")
+
+ description = d.pop("description", UNSET)
+
+ experiment_info = cls(
+ id=id,
+ name=name,
+ description=description,
+ )
+
+ return experiment_info
diff --git a/impresso/api_client/models/facet_with_label.py b/impresso/api_client/models/facet_with_label.py
new file mode 100644
index 0000000..f0c5b30
--- /dev/null
+++ b/impresso/api_client/models/facet_with_label.py
@@ -0,0 +1,47 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="FacetWithLabel")
+
+
+@_attrs_define
+class FacetWithLabel:
+ """An facet that has a value and a label
+
+ Attributes:
+ id (str): Unique identifier of the facet
+ label (str): Label of the facet
+ """
+
+ id: str
+ label: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ id = self.id
+
+ label = self.label
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "id": id,
+ "label": label,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ id = d.pop("id")
+
+ label = d.pop("label")
+
+ facet_with_label = cls(
+ id=id,
+ label=label,
+ )
+
+ return facet_with_label
diff --git a/impresso/api_client/models/find_data_providers_base_find_response.py b/impresso/api_client/models/find_data_providers_base_find_response.py
new file mode 100644
index 0000000..2b0eb82
--- /dev/null
+++ b/impresso/api_client/models/find_data_providers_base_find_response.py
@@ -0,0 +1,64 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+if TYPE_CHECKING:
+ from ..models.data_provider import DataProvider
+ from ..models.find_data_providers_base_find_response_pagination import FindDataProvidersBaseFindResponsePagination
+
+
+T = TypeVar("T", bound="FindDataProvidersBaseFindResponse")
+
+
+@_attrs_define
+class FindDataProvidersBaseFindResponse:
+ """
+ Attributes:
+ data (List['DataProvider']):
+ pagination (FindDataProvidersBaseFindResponsePagination):
+ """
+
+ data: List["DataProvider"]
+ pagination: "FindDataProvidersBaseFindResponsePagination"
+
+ def to_dict(self) -> Dict[str, Any]:
+ data = []
+ for data_item_data in self.data:
+ data_item = data_item_data.to_dict()
+ data.append(data_item)
+
+ pagination = self.pagination.to_dict()
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "data": data,
+ "pagination": pagination,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.data_provider import DataProvider
+ from ..models.find_data_providers_base_find_response_pagination import (
+ FindDataProvidersBaseFindResponsePagination,
+ )
+
+ d = src_dict.copy()
+ data = []
+ _data = d.pop("data")
+ for data_item_data in _data:
+ data_item = DataProvider.from_dict(data_item_data)
+
+ data.append(data_item)
+
+ pagination = FindDataProvidersBaseFindResponsePagination.from_dict(d.pop("pagination"))
+
+ find_data_providers_base_find_response = cls(
+ data=data,
+ pagination=pagination,
+ )
+
+ return find_data_providers_base_find_response
diff --git a/impresso/api_client/models/find_data_providers_base_find_response_pagination.py b/impresso/api_client/models/find_data_providers_base_find_response_pagination.py
new file mode 100644
index 0000000..9f304bd
--- /dev/null
+++ b/impresso/api_client/models/find_data_providers_base_find_response_pagination.py
@@ -0,0 +1,54 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="FindDataProvidersBaseFindResponsePagination")
+
+
+@_attrs_define
+class FindDataProvidersBaseFindResponsePagination:
+ """
+ Attributes:
+ total (int): The total number of items matching the query
+ limit (int): The number of items returned in this response
+ offset (int): Starting index of the items subset returned in this response
+ """
+
+ total: int
+ limit: int
+ offset: int
+
+ def to_dict(self) -> Dict[str, Any]:
+ total = self.total
+
+ limit = self.limit
+
+ offset = self.offset
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "total": total,
+ "limit": limit,
+ "offset": offset,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ total = d.pop("total")
+
+ limit = d.pop("limit")
+
+ offset = d.pop("offset")
+
+ find_data_providers_base_find_response_pagination = cls(
+ total=total,
+ limit=limit,
+ offset=offset,
+ )
+
+ return find_data_providers_base_find_response_pagination
diff --git a/impresso/api_client/models/find_experiments_base_find_response.py b/impresso/api_client/models/find_experiments_base_find_response.py
new file mode 100644
index 0000000..a445269
--- /dev/null
+++ b/impresso/api_client/models/find_experiments_base_find_response.py
@@ -0,0 +1,62 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+if TYPE_CHECKING:
+ from ..models.experiment_info import ExperimentInfo
+ from ..models.find_experiments_base_find_response_pagination import FindExperimentsBaseFindResponsePagination
+
+
+T = TypeVar("T", bound="FindExperimentsBaseFindResponse")
+
+
+@_attrs_define
+class FindExperimentsBaseFindResponse:
+ """
+ Attributes:
+ data (List['ExperimentInfo']):
+ pagination (FindExperimentsBaseFindResponsePagination):
+ """
+
+ data: List["ExperimentInfo"]
+ pagination: "FindExperimentsBaseFindResponsePagination"
+
+ def to_dict(self) -> Dict[str, Any]:
+ data = []
+ for data_item_data in self.data:
+ data_item = data_item_data.to_dict()
+ data.append(data_item)
+
+ pagination = self.pagination.to_dict()
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "data": data,
+ "pagination": pagination,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.experiment_info import ExperimentInfo
+ from ..models.find_experiments_base_find_response_pagination import FindExperimentsBaseFindResponsePagination
+
+ d = src_dict.copy()
+ data = []
+ _data = d.pop("data")
+ for data_item_data in _data:
+ data_item = ExperimentInfo.from_dict(data_item_data)
+
+ data.append(data_item)
+
+ pagination = FindExperimentsBaseFindResponsePagination.from_dict(d.pop("pagination"))
+
+ find_experiments_base_find_response = cls(
+ data=data,
+ pagination=pagination,
+ )
+
+ return find_experiments_base_find_response
diff --git a/impresso/api_client/models/find_experiments_base_find_response_pagination.py b/impresso/api_client/models/find_experiments_base_find_response_pagination.py
new file mode 100644
index 0000000..4f40582
--- /dev/null
+++ b/impresso/api_client/models/find_experiments_base_find_response_pagination.py
@@ -0,0 +1,54 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="FindExperimentsBaseFindResponsePagination")
+
+
+@_attrs_define
+class FindExperimentsBaseFindResponsePagination:
+ """
+ Attributes:
+ total (int): The total number of items matching the query
+ limit (int): The number of items returned in this response
+ offset (int): Starting index of the items subset returned in this response
+ """
+
+ total: int
+ limit: int
+ offset: int
+
+ def to_dict(self) -> Dict[str, Any]:
+ total = self.total
+
+ limit = self.limit
+
+ offset = self.offset
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "total": total,
+ "limit": limit,
+ "offset": offset,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ total = d.pop("total")
+
+ limit = d.pop("limit")
+
+ offset = d.pop("offset")
+
+ find_experiments_base_find_response_pagination = cls(
+ total=total,
+ limit=limit,
+ offset=offset,
+ )
+
+ return find_experiments_base_find_response_pagination
diff --git a/impresso/api_client/models/find_topics_base_find_response.py b/impresso/api_client/models/find_topics_base_find_response.py
new file mode 100644
index 0000000..2bfb5b6
--- /dev/null
+++ b/impresso/api_client/models/find_topics_base_find_response.py
@@ -0,0 +1,62 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+if TYPE_CHECKING:
+ from ..models.find_topics_base_find_response_pagination import FindTopicsBaseFindResponsePagination
+ from ..models.topic import Topic
+
+
+T = TypeVar("T", bound="FindTopicsBaseFindResponse")
+
+
+@_attrs_define
+class FindTopicsBaseFindResponse:
+ """
+ Attributes:
+ data (List['Topic']):
+ pagination (FindTopicsBaseFindResponsePagination):
+ """
+
+ data: List["Topic"]
+ pagination: "FindTopicsBaseFindResponsePagination"
+
+ def to_dict(self) -> Dict[str, Any]:
+ data = []
+ for data_item_data in self.data:
+ data_item = data_item_data.to_dict()
+ data.append(data_item)
+
+ pagination = self.pagination.to_dict()
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "data": data,
+ "pagination": pagination,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.find_topics_base_find_response_pagination import FindTopicsBaseFindResponsePagination
+ from ..models.topic import Topic
+
+ d = src_dict.copy()
+ data = []
+ _data = d.pop("data")
+ for data_item_data in _data:
+ data_item = Topic.from_dict(data_item_data)
+
+ data.append(data_item)
+
+ pagination = FindTopicsBaseFindResponsePagination.from_dict(d.pop("pagination"))
+
+ find_topics_base_find_response = cls(
+ data=data,
+ pagination=pagination,
+ )
+
+ return find_topics_base_find_response
diff --git a/impresso/api_client/models/find_topics_base_find_response_pagination.py b/impresso/api_client/models/find_topics_base_find_response_pagination.py
new file mode 100644
index 0000000..40ffddb
--- /dev/null
+++ b/impresso/api_client/models/find_topics_base_find_response_pagination.py
@@ -0,0 +1,54 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="FindTopicsBaseFindResponsePagination")
+
+
+@_attrs_define
+class FindTopicsBaseFindResponsePagination:
+ """
+ Attributes:
+ total (int): The total number of items matching the query
+ limit (int): The number of items returned in this response
+ offset (int): Starting index of the items subset returned in this response
+ """
+
+ total: int
+ limit: int
+ offset: int
+
+ def to_dict(self) -> Dict[str, Any]:
+ total = self.total
+
+ limit = self.limit
+
+ offset = self.offset
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "total": total,
+ "limit": limit,
+ "offset": offset,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ total = d.pop("total")
+
+ limit = d.pop("limit")
+
+ offset = d.pop("offset")
+
+ find_topics_base_find_response_pagination = cls(
+ total=total,
+ limit=limit,
+ offset=offset,
+ )
+
+ return find_topics_base_find_response_pagination
diff --git a/impresso/api_client/models/find_topics_order_by.py b/impresso/api_client/models/find_topics_order_by.py
new file mode 100644
index 0000000..d788883
--- /dev/null
+++ b/impresso/api_client/models/find_topics_order_by.py
@@ -0,0 +1,20 @@
+from enum import Enum
+from typing import Literal
+
+
+class FindTopicsOrderBy(str, Enum):
+ MODEL = "model"
+ NAME = "name"
+ VALUE_1 = "-name"
+ VALUE_3 = "-model"
+
+ def __str__(self) -> str:
+ return str(self.value)
+
+
+FindTopicsOrderByLiteral = Literal[
+ "model",
+ "name",
+ "-name",
+ "-model",
+]
diff --git a/impresso/api_client/models/get_images_facet_id.py b/impresso/api_client/models/get_images_facet_id.py
index cc8f0c0..f7b1c98 100644
--- a/impresso/api_client/models/get_images_facet_id.py
+++ b/impresso/api_client/models/get_images_facet_id.py
@@ -3,6 +3,11 @@
class GetImagesFacetId(str, Enum):
+ COLLECTION = "collection"
+ IMAGECOMMUNICATIONGOAL = "imageCommunicationGoal"
+ IMAGECONTENTTYPE = "imageContentType"
+ IMAGETECHNIQUE = "imageTechnique"
+ IMAGEVISUALCONTENT = "imageVisualContent"
NEWSPAPER = "newspaper"
YEAR = "year"
@@ -11,6 +16,11 @@ def __str__(self) -> str:
GetImagesFacetIdLiteral = Literal[
+ "collection",
+ "imageCommunicationGoal",
+ "imageContentType",
+ "imageTechnique",
+ "imageVisualContent",
"newspaper",
"year",
]
diff --git a/impresso/api_client/models/get_tr_passages_facet_id.py b/impresso/api_client/models/get_tr_passages_facet_id.py
index 0bb41c9..f47a508 100644
--- a/impresso/api_client/models/get_tr_passages_facet_id.py
+++ b/impresso/api_client/models/get_tr_passages_facet_id.py
@@ -11,6 +11,7 @@ class GetTrPassagesFacetId(str, Enum):
LOCATION = "location"
NAG = "nag"
NEWSPAPER = "newspaper"
+ ORGANISATION = "organisation"
PERSON = "person"
TEXTREUSECLUSTER = "textReuseCluster"
TEXTREUSECLUSTERDAYDELTA = "textReuseClusterDayDelta"
@@ -33,6 +34,7 @@ def __str__(self) -> str:
"location",
"nag",
"newspaper",
+ "organisation",
"person",
"textReuseCluster",
"textReuseClusterDayDelta",
diff --git a/impresso/api_client/models/image.py b/impresso/api_client/models/image.py
index d7804f4..dd41064 100644
--- a/impresso/api_client/models/image.py
+++ b/impresso/api_client/models/image.py
@@ -7,6 +7,7 @@
from ..types import UNSET, Unset
if TYPE_CHECKING:
+ from ..models.image_image_types import ImageImageTypes
from ..models.image_media_source_ref import ImageMediaSourceRef
@@ -26,6 +27,9 @@ class Image:
caption (Union[Unset, str]): Image caption
content_item_uid (Union[Unset, str]): The unique identifier of the content item that the image belongs to.
page_numbers (Union[Unset, List[int]]): The page numbers of the issue that the image belongs to.
+ image_types (Union[Unset, ImageImageTypes]):
+ embeddings (Union[Unset, List[str]]): Precomputed embeddings for the image in the format:
+ :.
"""
uid: str
@@ -36,6 +40,8 @@ class Image:
caption: Union[Unset, str] = UNSET
content_item_uid: Union[Unset, str] = UNSET
page_numbers: Union[Unset, List[int]] = UNSET
+ image_types: Union[Unset, "ImageImageTypes"] = UNSET
+ embeddings: Union[Unset, List[str]] = UNSET
def to_dict(self) -> Dict[str, Any]:
uid = self.uid
@@ -56,6 +62,14 @@ def to_dict(self) -> Dict[str, Any]:
if not isinstance(self.page_numbers, Unset):
page_numbers = self.page_numbers
+ image_types: Union[Unset, Dict[str, Any]] = UNSET
+ if not isinstance(self.image_types, Unset):
+ image_types = self.image_types.to_dict()
+
+ embeddings: Union[Unset, List[str]] = UNSET
+ if not isinstance(self.embeddings, Unset):
+ embeddings = self.embeddings
+
field_dict: Dict[str, Any] = {}
field_dict.update(
{
@@ -72,11 +86,16 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["contentItemUid"] = content_item_uid
if page_numbers is not UNSET:
field_dict["pageNumbers"] = page_numbers
+ if image_types is not UNSET:
+ field_dict["imageTypes"] = image_types
+ if embeddings is not UNSET:
+ field_dict["embeddings"] = embeddings
return field_dict
@classmethod
def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.image_image_types import ImageImageTypes
from ..models.image_media_source_ref import ImageMediaSourceRef
d = src_dict.copy()
@@ -96,6 +115,15 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
page_numbers = cast(List[int], d.pop("pageNumbers", UNSET))
+ _image_types = d.pop("imageTypes", UNSET)
+ image_types: Union[Unset, ImageImageTypes]
+ if isinstance(_image_types, Unset):
+ image_types = UNSET
+ else:
+ image_types = ImageImageTypes.from_dict(_image_types)
+
+ embeddings = cast(List[str], d.pop("embeddings", UNSET))
+
image = cls(
uid=uid,
issue_uid=issue_uid,
@@ -105,6 +133,8 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
caption=caption,
content_item_uid=content_item_uid,
page_numbers=page_numbers,
+ image_types=image_types,
+ embeddings=embeddings,
)
return image
diff --git a/impresso/api_client/models/image_image_types.py b/impresso/api_client/models/image_image_types.py
new file mode 100644
index 0000000..41cfe50
--- /dev/null
+++ b/impresso/api_client/models/image_image_types.py
@@ -0,0 +1,65 @@
+from typing import Any, Dict, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="ImageImageTypes")
+
+
+@_attrs_define
+class ImageImageTypes:
+ """
+ Attributes:
+ visual_content (Union[Unset, str]): Whether the content is an image or not.
+ technique (Union[Unset, str]): Determines if the image is a photograph.
+ communication_goal (Union[Unset, str]): Purpose or communicative function of the image.
+ visual_content_type (Union[Unset, str]): Classification of the visual content.
+ """
+
+ visual_content: Union[Unset, str] = UNSET
+ technique: Union[Unset, str] = UNSET
+ communication_goal: Union[Unset, str] = UNSET
+ visual_content_type: Union[Unset, str] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ visual_content = self.visual_content
+
+ technique = self.technique
+
+ communication_goal = self.communication_goal
+
+ visual_content_type = self.visual_content_type
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update({})
+ if visual_content is not UNSET:
+ field_dict["visualContent"] = visual_content
+ if technique is not UNSET:
+ field_dict["technique"] = technique
+ if communication_goal is not UNSET:
+ field_dict["communicationGoal"] = communication_goal
+ if visual_content_type is not UNSET:
+ field_dict["visualContentType"] = visual_content_type
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ visual_content = d.pop("visualContent", UNSET)
+
+ technique = d.pop("technique", UNSET)
+
+ communication_goal = d.pop("communicationGoal", UNSET)
+
+ visual_content_type = d.pop("visualContentType", UNSET)
+
+ image_image_types = cls(
+ visual_content=visual_content,
+ technique=technique,
+ communication_goal=communication_goal,
+ visual_content_type=visual_content_type,
+ )
+
+ return image_image_types
diff --git a/impresso/api_client/models/impresso_embedding_response.py b/impresso/api_client/models/impresso_embedding_response.py
new file mode 100644
index 0000000..a64f7e3
--- /dev/null
+++ b/impresso/api_client/models/impresso_embedding_response.py
@@ -0,0 +1,39 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="ImpressoEmbeddingResponse")
+
+
+@_attrs_define
+class ImpressoEmbeddingResponse:
+ """Body of a response from the Impresso Embedding endpoint
+
+ Attributes:
+ embedding (str): Embedding vector, base64-encoded with the model prefix. E.g. :
+ """
+
+ embedding: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ embedding = self.embedding
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "embedding": embedding,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ embedding = d.pop("embedding")
+
+ impresso_embedding_response = cls(
+ embedding=embedding,
+ )
+
+ return impresso_embedding_response
diff --git a/impresso/api_client/models/impresso_image_embedding_request.py b/impresso/api_client/models/impresso_image_embedding_request.py
new file mode 100644
index 0000000..9aebbd6
--- /dev/null
+++ b/impresso/api_client/models/impresso_image_embedding_request.py
@@ -0,0 +1,50 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+from ..models.impresso_image_embedding_request_search_target import ImpressoImageEmbeddingRequestSearchTarget
+
+T = TypeVar("T", bound="ImpressoImageEmbeddingRequest")
+
+
+@_attrs_define
+class ImpressoImageEmbeddingRequest:
+ """Body of a request to the Impresso Image Embedding endpoint
+
+ Attributes:
+ search_target (ImpressoImageEmbeddingRequestSearchTarget): Which embedding space the embedding is going to be
+ used in
+ bytes_ (str): Base64-encoded image bytes. JPG and PNG formats are supported.
+ """
+
+ search_target: ImpressoImageEmbeddingRequestSearchTarget
+ bytes_: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ search_target = self.search_target.value
+
+ bytes_ = self.bytes_
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "searchTarget": search_target,
+ "bytes": bytes_,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ search_target = ImpressoImageEmbeddingRequestSearchTarget(d.pop("searchTarget"))
+
+ bytes_ = d.pop("bytes")
+
+ impresso_image_embedding_request = cls(
+ search_target=search_target,
+ bytes_=bytes_,
+ )
+
+ return impresso_image_embedding_request
diff --git a/impresso/api_client/models/impresso_image_embedding_request_search_target.py b/impresso/api_client/models/impresso_image_embedding_request_search_target.py
new file mode 100644
index 0000000..3b43405
--- /dev/null
+++ b/impresso/api_client/models/impresso_image_embedding_request_search_target.py
@@ -0,0 +1,16 @@
+from enum import Enum
+from typing import Literal
+
+
+class ImpressoImageEmbeddingRequestSearchTarget(str, Enum):
+ IMAGE = "image"
+ MULTIMODAL = "multimodal"
+
+ def __str__(self) -> str:
+ return str(self.value)
+
+
+ImpressoImageEmbeddingRequestSearchTargetLiteral = Literal[
+ "image",
+ "multimodal",
+]
diff --git a/impresso/api_client/models/impresso_text_embedding_request.py b/impresso/api_client/models/impresso_text_embedding_request.py
new file mode 100644
index 0000000..1cea762
--- /dev/null
+++ b/impresso/api_client/models/impresso_text_embedding_request.py
@@ -0,0 +1,50 @@
+from typing import Any, Dict, Type, TypeVar
+
+from attrs import define as _attrs_define
+
+from ..models.impresso_text_embedding_request_search_target import ImpressoTextEmbeddingRequestSearchTarget
+
+T = TypeVar("T", bound="ImpressoTextEmbeddingRequest")
+
+
+@_attrs_define
+class ImpressoTextEmbeddingRequest:
+ """Body of a request to the Impresso Text Embedding endpoint
+
+ Attributes:
+ search_target (ImpressoTextEmbeddingRequestSearchTarget): Which embedding space the embedding is going to be
+ used in
+ text (str): Text to be embedded
+ """
+
+ search_target: ImpressoTextEmbeddingRequestSearchTarget
+ text: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ search_target = self.search_target.value
+
+ text = self.text
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "searchTarget": search_target,
+ "text": text,
+ }
+ )
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ search_target = ImpressoTextEmbeddingRequestSearchTarget(d.pop("searchTarget"))
+
+ text = d.pop("text")
+
+ impresso_text_embedding_request = cls(
+ search_target=search_target,
+ text=text,
+ )
+
+ return impresso_text_embedding_request
diff --git a/impresso/api_client/models/impresso_text_embedding_request_search_target.py b/impresso/api_client/models/impresso_text_embedding_request_search_target.py
new file mode 100644
index 0000000..d7e721e
--- /dev/null
+++ b/impresso/api_client/models/impresso_text_embedding_request_search_target.py
@@ -0,0 +1,16 @@
+from enum import Enum
+from typing import Literal
+
+
+class ImpressoTextEmbeddingRequestSearchTarget(str, Enum):
+ MULTIMODAL = "multimodal"
+ TEXT = "text"
+
+ def __str__(self) -> str:
+ return str(self.value)
+
+
+ImpressoTextEmbeddingRequestSearchTargetLiteral = Literal[
+ "multimodal",
+ "text",
+]
diff --git a/impresso/api_client/models/interact_with_experiment_body.py b/impresso/api_client/models/interact_with_experiment_body.py
new file mode 100644
index 0000000..432ef16
--- /dev/null
+++ b/impresso/api_client/models/interact_with_experiment_body.py
@@ -0,0 +1,43 @@
+from typing import Any, Dict, List, Type, TypeVar
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+T = TypeVar("T", bound="InteractWithExperimentBody")
+
+
+@_attrs_define
+class InteractWithExperimentBody:
+ """Experiment specific request body"""
+
+ additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+ def to_dict(self) -> Dict[str, Any]:
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(self.additional_properties)
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ interact_with_experiment_body = cls()
+
+ interact_with_experiment_body.additional_properties = d
+ return interact_with_experiment_body
+
+ @property
+ def additional_keys(self) -> List[str]:
+ return list(self.additional_properties.keys())
+
+ def __getitem__(self, key: str) -> Any:
+ return self.additional_properties[key]
+
+ def __setitem__(self, key: str, value: Any) -> None:
+ self.additional_properties[key] = value
+
+ def __delitem__(self, key: str) -> None:
+ del self.additional_properties[key]
+
+ def __contains__(self, key: str) -> bool:
+ return key in self.additional_properties
diff --git a/impresso/api_client/models/topic.py b/impresso/api_client/models/topic.py
new file mode 100644
index 0000000..5095d88
--- /dev/null
+++ b/impresso/api_client/models/topic.py
@@ -0,0 +1,92 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+if TYPE_CHECKING:
+ from ..models.topic_word import TopicWord
+
+
+T = TypeVar("T", bound="Topic")
+
+
+@_attrs_define
+class Topic:
+ """A topic
+
+ Attributes:
+ uid (str): The unique identifier of the topic
+ language (str): The language code of the topic
+ content_items_count (Union[Unset, float]): Number of content items with this topic
+ words (Union[Unset, List['TopicWord']]): Top N words associated with the topic
+ model (Union[Unset, str]): ID of the model used to generate the topic
+ """
+
+ uid: str
+ language: str
+ content_items_count: Union[Unset, float] = UNSET
+ words: Union[Unset, List["TopicWord"]] = UNSET
+ model: Union[Unset, str] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ uid = self.uid
+
+ language = self.language
+
+ content_items_count = self.content_items_count
+
+ words: Union[Unset, List[Dict[str, Any]]] = UNSET
+ if not isinstance(self.words, Unset):
+ words = []
+ for words_item_data in self.words:
+ words_item = words_item_data.to_dict()
+ words.append(words_item)
+
+ model = self.model
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "uid": uid,
+ "language": language,
+ }
+ )
+ if content_items_count is not UNSET:
+ field_dict["contentItemsCount"] = content_items_count
+ if words is not UNSET:
+ field_dict["words"] = words
+ if model is not UNSET:
+ field_dict["model"] = model
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ from ..models.topic_word import TopicWord
+
+ d = src_dict.copy()
+ uid = d.pop("uid")
+
+ language = d.pop("language")
+
+ content_items_count = d.pop("contentItemsCount", UNSET)
+
+ words = []
+ _words = d.pop("words", UNSET)
+ for words_item_data in _words or []:
+ words_item = TopicWord.from_dict(words_item_data)
+
+ words.append(words_item)
+
+ model = d.pop("model", UNSET)
+
+ topic = cls(
+ uid=uid,
+ language=language,
+ content_items_count=content_items_count,
+ words=words,
+ model=model,
+ )
+
+ return topic
diff --git a/impresso/api_client/models/topic_word.py b/impresso/api_client/models/topic_word.py
new file mode 100644
index 0000000..fac3c37
--- /dev/null
+++ b/impresso/api_client/models/topic_word.py
@@ -0,0 +1,58 @@
+from typing import Any, Dict, Type, TypeVar, Union
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="TopicWord")
+
+
+@_attrs_define
+class TopicWord:
+ """A word included in a topic
+
+ Attributes:
+ w (str): Word surface form
+ p (float): Probability of the word in topic
+ h (Union[Unset, bool]): If word is highlighted
+ """
+
+ w: str
+ p: float
+ h: Union[Unset, bool] = UNSET
+
+ def to_dict(self) -> Dict[str, Any]:
+ w = self.w
+
+ p = self.p
+
+ h = self.h
+
+ field_dict: Dict[str, Any] = {}
+ field_dict.update(
+ {
+ "w": w,
+ "p": p,
+ }
+ )
+ if h is not UNSET:
+ field_dict["h"] = h
+
+ return field_dict
+
+ @classmethod
+ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+ d = src_dict.copy()
+ w = d.pop("w")
+
+ p = d.pop("p")
+
+ h = d.pop("h", UNSET)
+
+ topic_word = cls(
+ w=w,
+ p=p,
+ h=h,
+ )
+
+ return topic_word
diff --git a/impresso/api_models.py b/impresso/api_models.py
index 862a85b..1339a27 100644
--- a/impresso/api_models.py
+++ b/impresso/api_models.py
@@ -10,79 +10,45 @@
from typing_extensions import Annotated, Literal
-class AuthenticationCreateRequest(BaseModel):
- model_config = ConfigDict(
- extra='allow',
- )
- strategy: Literal['local', 'jwt-app']
- email: Optional[str] = None
- password: Optional[str] = None
- accessToken: Optional[str] = None
-
-
-class Authentication(BaseModel):
- strategy: Optional[str] = None
- payload: Optional[Mapping[str, Any]] = None
-
-
-class User(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- id: int
- username: str
- firstname: str
- lastname: str
- isStaff: bool
- isActive: bool
- isSuperuser: bool
- uid: str
-
-
-class AuthenticationCreateResponse(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- accessToken: str
- authentication: Authentication
- user: Annotated[User, Field(description='User details', title='User')]
+class Q(RootModel[str]):
+ root: Annotated[str, Field(max_length=6000, min_length=2)]
-class CollectableItemsUpdatedResponse(BaseModel):
- totalAdded: Annotated[
- int, Field(description='Total number of items added to the collection')
- ]
- totalRemoved: Annotated[
- int, Field(description='Total number of items removed from the collection')
- ]
+class QItem(RootModel[str]):
+ root: Annotated[str, Field(max_length=6000, min_length=2)]
-class Params(BaseModel):
+class Filter(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- id: Annotated[Optional[str], Field(None, description='The collection id')]
- status: Annotated[
- Optional[Literal['DEL']], Field(None, description='The status of the operation')
+ context: Optional[Literal['include', 'exclude']] = 'include'
+ op: Optional[Literal['AND', 'OR']] = 'OR'
+ type: Annotated[
+ str,
+ Field(
+ description="Possible values are in 'search.validators:eachFilterValidator.type.choices'"
+ ),
]
-
-
-class Task(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- task_id: Annotated[Optional[str], Field(None, description='The ID of the task')]
- creationDate: Annotated[
- Optional[str], Field(None, description='When task was created')
+ precision: Optional[Literal['fuzzy', 'soft', 'exact', 'partial']] = 'exact'
+ q: Optional[Union[Q, Sequence[QItem]]] = None
+ daterange: Annotated[
+ Optional[str],
+ Field(
+ None,
+ pattern='\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z TO \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z',
+ ),
]
+ uids: Optional[str] = None
+ uid: Optional[str] = None
-class CollectionsRemoveResponse(BaseModel):
+class FacetWithLabel(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- params: Params
- task: Annotated[Task, Field(description='Deletion task details')]
+ id: Annotated[str, Field(description='Unique identifier of the facet')]
+ label: Annotated[str, Field(description='Label of the facet')]
class Error(BaseModel):
@@ -107,37 +73,130 @@ class Error(BaseModel):
]
-class Q(RootModel[str]):
- root: Annotated[str, Field(max_length=500, min_length=2)]
+class ImpressoNerRequest(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ text: Annotated[
+ str,
+ Field(
+ description='Text to be processed for named entity recognition',
+ max_length=3999,
+ min_length=1,
+ ),
+ ]
+ method: Annotated[
+ Optional[Literal['ner', 'ner-nel', 'nel']],
+ Field(
+ 'ner',
+ description='NER method to be used: `ner` (default), `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in [START] [END] tags).',
+ ),
+ ]
-class QItem(RootModel[str]):
- root: Annotated[str, Field(max_length=500, min_length=2)]
+class ImpressoImageEmbeddingRequest(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ searchTarget: Annotated[
+ Literal['image', 'multimodal'],
+ Field(description='Which embedding space the embedding is going to be used in'),
+ ]
+ bytes: Annotated[
+ str,
+ Field(
+ description='Base64-encoded image bytes. JPG and PNG formats are supported.'
+ ),
+ ]
-class Filter(BaseModel):
+class ImpressoEmbeddingResponse(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- context: Optional[Literal['include', 'exclude']] = 'include'
- op: Optional[Literal['AND', 'OR']] = 'OR'
- type: Annotated[
+ embedding: Annotated[
str,
Field(
- description="Possible values are in 'search.validators:eachFilterValidator.type.choices'"
+ description='Embedding vector, base64-encoded with the model prefix. E.g. :'
),
]
- precision: Optional[Literal['fuzzy', 'soft', 'exact', 'partial']] = 'exact'
- q: Optional[Union[Q, Sequence[QItem]]] = None
- daterange: Annotated[
- Optional[str],
+
+
+class AddCollectableItemsFromFilters(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ filters: Annotated[
+ Sequence[Filter],
Field(
- None,
- pattern='\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z TO \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z',
+ description='Filters to apply when selecting items to add to the collection'
),
]
- uids: Optional[str] = None
- uid: Optional[str] = None
+ namespace: Annotated[
+ Literal['search', 'tr_passages'],
+ Field(
+ description='Namespace to use when selecting items to add to the collection'
+ ),
+ ]
+
+
+class CollectableItemsUpdatedResponse(BaseModel):
+ totalAdded: Annotated[
+ int, Field(description='Total number of items added to the collection')
+ ]
+ totalRemoved: Annotated[
+ int, Field(description='Total number of items removed from the collection')
+ ]
+
+
+class NewCollectionRequest(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ name: Annotated[str, Field(max_length=50, min_length=2)]
+ description: Annotated[Optional[str], Field(None, max_length=500)]
+ accessLevel: Annotated[
+ Optional[Literal['public', 'private']],
+ Field(None, description='Access level of the collection.'),
+ ]
+
+
+class Authentication(BaseModel):
+ strategy: Optional[str] = None
+ payload: Optional[Mapping[str, Any]] = None
+
+
+class User(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ id: int
+ username: str
+ firstname: str
+ lastname: str
+ isStaff: bool
+ isActive: bool
+ isSuperuser: bool
+ uid: str
+
+
+class AuthenticationCreateResponse(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ accessToken: str
+ authentication: Authentication
+ user: Annotated[User, Field(description='User details', title='User')]
+
+
+class AuthenticationCreateRequest(BaseModel):
+ model_config = ConfigDict(
+ extra='allow',
+ )
+ strategy: Literal['local', 'jwt-app']
+ email: Optional[str] = None
+ password: Optional[str] = None
+ accessToken: Optional[str] = None
class Offset(BaseModel):
@@ -257,54 +316,32 @@ class ImpressoNerEntity(BaseModel):
name: Annotated[Optional[str], Field(None, description='Name of the entity')]
-class ImpressoNerRequest(BaseModel):
+class Params(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- text: Annotated[
- str,
- Field(
- description='Text to be processed for named entity recognition',
- max_length=3999,
- min_length=1,
- ),
- ]
- method: Annotated[
- Optional[Literal['ner', 'ner-nel', 'nel']],
- Field(
- 'ner',
- description='NER method to be used: `ner` (default), `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in [START] [END] tags).',
- ),
+ id: Annotated[Optional[str], Field(None, description='The collection id')]
+ status: Annotated[
+ Optional[Literal['DEL']], Field(None, description='The status of the operation')
]
-class ImpressoNerResponse(BaseModel):
+class Task(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- modelId: Annotated[
- str, Field(description='ID of the model used for the named entity recognition')
- ]
- text: Annotated[
- str, Field(description='Text processed for named entity recognition')
- ]
- timestamp: Annotated[
- AwareDatetime,
- Field(description='Timestamp of when named entity recognition was performed'),
+ task_id: Annotated[Optional[str], Field(None, description='The ID of the task')]
+ creationDate: Annotated[
+ Optional[str], Field(None, description='When task was created')
]
- entities: Sequence[ImpressoNerEntity]
-class NewCollectionRequest(BaseModel):
+class CollectionsRemoveResponse(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- name: Annotated[str, Field(max_length=50, min_length=2)]
- description: Annotated[Optional[str], Field(None, max_length=500)]
- accessLevel: Annotated[
- Optional[Literal['public', 'private']],
- Field(None, description='Access level of the collection.'),
- ]
+ params: Params
+ task: Annotated[Task, Field(description='Deletion task details')]
class UpdateCollectableItemsRequest(BaseModel):
@@ -318,37 +355,309 @@ class UpdateCollectableItemsRequest(BaseModel):
]
-class Pagination(BaseModel):
+class ImpressoTextEmbeddingRequest(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- total: Annotated[
- int, Field(description='The total number of items matching the query')
- ]
- limit: Annotated[
- int, Field(description='The number of items returned in this response')
- ]
- offset: Annotated[
- int,
- Field(
- description='Starting index of the items subset returned in this response'
- ),
+ searchTarget: Annotated[
+ Literal['multimodal', 'text'],
+ Field(description='Which embedding space the embedding is going to be used in'),
]
+ text: Annotated[str, Field(description='Text to be embedded', max_length=8000)]
-class BaseFind(BaseModel):
+class NamedEntity(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- data: Sequence
- pagination: Pagination
+ uid: Annotated[str, Field(description='Unique identifier of the entity')]
+ count: Annotated[
+ Optional[float],
+ Field(None, description='How many times it is mentioned in the text'),
+ ]
-class Collection(BaseModel):
+class Newspaper(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='Unique identifier of the collection.')]
+ uid: Annotated[str, Field(description='The unique identifier of the newspaper.')]
+ title: Annotated[
+ Optional[str], Field(None, description='The title of the newspaper.')
+ ]
+ startYear: Annotated[
+ Optional[float],
+ Field(
+ None,
+ description='The year of the first available article in the newspaper.',
+ ge=0.0,
+ ),
+ ]
+ endYear: Annotated[
+ Optional[float],
+ Field(
+ None,
+ description='The year of the last available article in the newspaper.',
+ ge=0.0,
+ ),
+ ]
+ languageCodes: Annotated[
+ Optional[Sequence[str]],
+ Field(None, description='ISO 639-1 codes of languages used in the newspaper.'),
+ ]
+ totalArticles: Annotated[
+ Optional[float],
+ Field(None, description='Total number of articles in the newspaper.', ge=0.0),
+ ]
+ totalIssues: Annotated[
+ Optional[float],
+ Field(None, description='Total number of issues in the newspaper.', ge=0.0),
+ ]
+ totalPages: Annotated[
+ Optional[float],
+ Field(None, description='Total number of pages in the newspaper.', ge=0.0),
+ ]
+
+
+class Freeform(BaseModel):
+ pass
+ model_config = ConfigDict(
+ extra='allow',
+ )
+
+
+class NamedEntities(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ locations: Annotated[
+ Optional[Sequence[NamedEntity]],
+ Field(
+ None, description='Linked location entities mentioned in the content item.'
+ ),
+ ]
+ persons: Annotated[
+ Optional[Sequence[NamedEntity]],
+ Field(
+ None, description='Linked person entities mentioned in the content item.'
+ ),
+ ]
+ organisations: Annotated[
+ Optional[Sequence[NamedEntity]],
+ Field(
+ None,
+ description='Linked organisation entities mentioned in the content item.',
+ ),
+ ]
+ newsAgencies: Annotated[
+ Optional[Sequence[NamedEntity]],
+ Field(
+ None,
+ description='Linked news agency entities mentioned in the content item.',
+ ),
+ ]
+
+
+class ExperimentInfo(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ id: Annotated[str, Field(description='The unique identifier of the experiment.')]
+ name: Annotated[str, Field(description='The display name of the experiment.')]
+ description: Annotated[
+ Optional[str],
+ Field(None, description='A description of what the experiment does.'),
+ ]
+
+
+class Offset1(BaseModel):
+ start: Annotated[
+ int, Field(description='Start offset of the passage in the content item.')
+ ]
+ end: Annotated[
+ int, Field(description='End offset of the passage in the content item.')
+ ]
+
+
+class TextReusePassage(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ uid: Annotated[str, Field(description='Unique ID of the text reuse passage.')]
+ content: Annotated[
+ Optional[str], Field(None, description='Textual content of the passage.')
+ ]
+ contentItemId: Annotated[
+ Optional[str],
+ Field(
+ None,
+ description='ID of the content item that the text reuse passage belongs to.',
+ ),
+ ]
+ offset: Annotated[
+ Optional[Offset1],
+ Field(
+ None,
+ description='Start and end offsets of the passage in the content item.',
+ ),
+ ]
+
+
+class SearchFacetBucket(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ count: Annotated[int, Field(description='Number of items in the bucket', ge=0)]
+ value: Annotated[
+ Union[str, float, int], Field(description='Value that represents the bucket.')
+ ]
+ label: Annotated[
+ Optional[str], Field(None, description='Label of the value, if relevant.')
+ ]
+
+
+class VersionDetails(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ version: Annotated[str, Field(description='Version of the API.')]
+
+
+class TimeCoverage(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ startDate: Annotated[
+ date,
+ Field(
+ description='Publication date of the earliest content item in the cluster.'
+ ),
+ ]
+ endDate: Annotated[
+ date,
+ Field(
+ description='Publication date of the latest content item in the cluster.'
+ ),
+ ]
+
+
+class TextReuseCluster(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ uid: Annotated[str, Field(description='Unique ID of the text reuse cluster.')]
+ lexicalOverlap: Annotated[
+ Optional[float],
+ Field(
+ None,
+ description='Overlap in percents between the passages in the cluster.',
+ ge=0.0,
+ le=100.0,
+ ),
+ ]
+ clusterSize: Annotated[
+ Optional[int],
+ Field(None, description='Number of passages in the cluster.', ge=0),
+ ]
+ textSample: Annotated[
+ Optional[str],
+ Field(
+ None,
+ description='Sample of a text from one of the passages in the cluster.',
+ ),
+ ]
+ timeCoverage: Annotated[
+ Optional[TimeCoverage], Field(None, description='Time coverage of the cluster.')
+ ]
+
+
+class PageNumber(RootModel[float]):
+ root: Annotated[float, Field(ge=1.0)]
+
+
+class Coordinates(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ latitude: Annotated[
+ Optional[float], Field(None, description='The latitude of the location')
+ ]
+ longitude: Annotated[
+ Optional[float], Field(None, description='The longitude of the location')
+ ]
+
+
+class WikidataLocation(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ id: Annotated[
+ str,
+ Field(
+ description='The Q Wikidata ID of the location (https://www.wikidata.org/wiki/Wikidata:Identifiers)'
+ ),
+ ]
+ type: Annotated[Literal['location'], Field(description='The type of the entity')]
+ labels: Annotated[
+ Optional[Mapping[str, Sequence[str]]],
+ Field(None, description='Labels of the location in different languages'),
+ ]
+ descriptions: Annotated[
+ Optional[Mapping[str, Sequence[str]]],
+ Field(None, description='Descriptions of the location in different languages'),
+ ]
+ coordinates: Optional[Coordinates] = None
+
+
+class TopicWord(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ w: Annotated[str, Field(description='Word surface form')]
+ p: Annotated[float, Field(description='Probability of the word in topic')]
+ h: Annotated[Optional[bool], Field(None, description='If word is highlighted')]
+
+
+class WikidataHuman(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ id: Annotated[
+ str,
+ Field(
+ description='The Q Wikidata ID of the person (https://www.wikidata.org/wiki/Wikidata:Identifiers)'
+ ),
+ ]
+ type: Annotated[Literal['human'], Field(description='The type of the entity')]
+ labels: Annotated[
+ Optional[Mapping[str, Sequence[str]]],
+ Field(None, description='Labels of the person in different languages'),
+ ]
+ descriptions: Annotated[
+ Optional[Mapping[str, Sequence[str]]],
+ Field(None, description='Descriptions of the person in different languages'),
+ ]
+ birthDate: Annotated[
+ Optional[AwareDatetime], Field(None, description='The birth date of the person')
+ ]
+ deathDate: Annotated[
+ Optional[AwareDatetime], Field(None, description='The death date of the person')
+ ]
+ birthPlace: Annotated[
+ Optional[WikidataLocation],
+ Field(None, description='The birth place of the person'),
+ ]
+ deathPlace: Annotated[
+ Optional[WikidataLocation],
+ Field(None, description='The death place of the person'),
+ ]
+
+
+class Collection(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ uid: Annotated[str, Field(description='Unique identifier of the collection.')]
title: Annotated[Optional[str], Field(None, description='Title of the collection.')]
description: Annotated[
Optional[str], Field(None, description='Description of the collection.')
@@ -371,22 +680,56 @@ class Collection(BaseModel):
]
-class EntityMention(BaseModel):
+class WordMatch(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='Unique identifier of the entity')]
- relevance: Annotated[
- Optional[float],
- Field(None, description='Relevance of the entity in the document'),
+ id: Annotated[str, Field(description='Unique identifier for the word')]
+ languageCode: Annotated[str, Field(description='The language code of the word')]
+ word: Annotated[str, Field(description='The word')]
+
+
+class Name(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ langCode: Annotated[str, Field(description='ISO 639-1 language code.')]
+ name: Annotated[
+ str, Field(description='Name of the data provider in this language.')
]
-class Freeform(BaseModel):
- pass
+class DataProvider(BaseModel):
model_config = ConfigDict(
- extra='allow',
+ extra='forbid',
+ )
+ id: Annotated[str, Field(description='The unique identifier of the data provider.')]
+ name: Annotated[str, Field(description='The default name of the data provider.')]
+ names: Annotated[
+ Sequence[Name],
+ Field(description='Names of the data provider in different languages.'),
+ ]
+
+
+class ImageTypes(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
)
+ visualContent: Annotated[
+ Optional[str],
+ Field(None, description='Whether the content is an image or not.'),
+ ]
+ technique: Annotated[
+ Optional[str],
+ Field(None, description='Determines if the image is a photograph.'),
+ ]
+ communicationGoal: Annotated[
+ Optional[str],
+ Field(None, description='Purpose or communicative function of the image.'),
+ ]
+ visualContentType: Annotated[
+ Optional[str], Field(None, description='Classification of the visual content.')
+ ]
class MediaSourceRef(BaseModel):
@@ -427,6 +770,7 @@ class Image(BaseModel):
None, description='The page numbers of the issue that the image belongs to.'
),
]
+ imageTypes: Optional[ImageTypes] = None
mediaSourceRef: Annotated[
MediaSourceRef, Field(description='The media source of the image')
]
@@ -436,281 +780,222 @@ class Image(BaseModel):
description='The date of the image or the date of the issue that the image belongs to.'
),
]
-
-
-class Totals(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- articles: Annotated[
- Optional[int],
- Field(None, description='The number of articles in the media source.'),
- ]
- issues: Annotated[
- Optional[int],
- Field(None, description='The number of issues in the media source.'),
- ]
- pages: Annotated[
- Optional[int],
- Field(None, description='The number of pages in the media source.'),
- ]
-
-
-class Property(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- id: Annotated[str, Field(description='The unique identifier of the property.')]
- label: Annotated[str, Field(description='The name of the property.')]
- value: Annotated[str, Field(description='The value of the property.')]
-
-
-class MediaSource(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- uid: Annotated[str, Field(description='The unique identifier of the media source.')]
- type: Annotated[
- Literal['newspaper'], Field(description='The type of the media source.')
- ]
- name: Annotated[str, Field(description='A display name of the media source.')]
- languageCodes: Annotated[
- Sequence[str],
- Field(description='ISO 639-2 language codes this media source has content in.'),
- ]
- publishedPeriodYears: Annotated[
- Optional[Sequence[int]],
- Field(
- None,
- description='The range of years this media source has been published for. Impresso may not have data for all this period. Is not defined if there is no information.',
- max_length=2,
- min_length=2,
- ),
- ]
- availableDatesRange: Annotated[
- Optional[Sequence[AwareDatetime]],
+ embeddings: Annotated[
+ Optional[Sequence[str]],
Field(
None,
- description='The range of dates this media source has content items for. This represents the earliest and the latest dates of the contet items. Is not defined if there are no content items for this source.',
- max_length=2,
- min_length=2,
+ description='Precomputed embeddings for the image in the format: :.',
),
]
- totals: Totals
- properties: Optional[Sequence[Property]] = None
-
-
-class NamedEntity(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- uid: Annotated[str, Field(description='Unique identifier of the entity')]
- count: Annotated[
- Optional[float],
- Field(None, description='How many times it is mentioned in the text'),
- ]
-class Newspaper(BaseModel):
+class TopicMention(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='The unique identifier of the newspaper.')]
- title: Annotated[
- Optional[str], Field(None, description='The title of the newspaper.')
- ]
- startYear: Annotated[
- Optional[float],
- Field(
- None,
- description='The year of the first available article in the newspaper.',
- ge=0.0,
- ),
- ]
- endYear: Annotated[
+ uid: Annotated[str, Field(description='Unique identifier of the topic.')]
+ relevance: Annotated[
Optional[float],
Field(
None,
- description='The year of the last available article in the newspaper.',
+ description='Relevance of the topic in the content item.',
ge=0.0,
+ le=1.0,
),
]
- languageCodes: Annotated[
- Optional[Sequence[str]],
- Field(None, description='ISO 639-1 codes of languages used in the newspaper.'),
- ]
- totalArticles: Annotated[
- Optional[float],
- Field(None, description='Total number of articles in the newspaper.', ge=0.0),
- ]
- totalIssues: Annotated[
- Optional[float],
- Field(None, description='Total number of issues in the newspaper.', ge=0.0),
- ]
- totalPages: Annotated[
- Optional[float],
- Field(None, description='Total number of pages in the newspaper.', ge=0.0),
- ]
-class SearchFacetBucket(BaseModel):
+class Pagination(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- count: Annotated[int, Field(description='Number of items in the bucket', ge=0)]
- value: Annotated[
- Union[str, float, int], Field(description='Value that represents the bucket.')
+ total: Annotated[
+ int, Field(description='The total number of items matching the query')
]
- label: Annotated[
- Optional[str], Field(None, description='Label of the value, if relevant.')
+ limit: Annotated[
+ int, Field(description='The number of items returned in this response')
+ ]
+ offset: Annotated[
+ int,
+ Field(
+ description='Starting index of the items subset returned in this response'
+ ),
]
-class TimeCoverage(BaseModel):
+class BaseFind(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- startDate: Annotated[
- date,
- Field(
- description='Publication date of the earliest content item in the cluster.'
- ),
- ]
- endDate: Annotated[
- date,
- Field(
- description='Publication date of the latest content item in the cluster.'
- ),
- ]
+ data: Sequence
+ pagination: Pagination
-class TextReuseCluster(BaseModel):
+class Totals(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='Unique ID of the text reuse cluster.')]
- lexicalOverlap: Annotated[
- Optional[float],
- Field(
- None,
- description='Overlap in percents between the passages in the cluster.',
- ge=0.0,
- le=100.0,
- ),
- ]
- clusterSize: Annotated[
+ articles: Annotated[
Optional[int],
- Field(None, description='Number of passages in the cluster.', ge=0),
+ Field(None, description='The number of articles in the media source.'),
]
- textSample: Annotated[
- Optional[str],
- Field(
- None,
- description='Sample of a text from one of the passages in the cluster.',
- ),
+ issues: Annotated[
+ Optional[int],
+ Field(None, description='The number of issues in the media source.'),
]
- timeCoverage: Annotated[
- Optional[TimeCoverage], Field(None, description='Time coverage of the cluster.')
+ pages: Annotated[
+ Optional[int],
+ Field(None, description='The number of pages in the media source.'),
]
-class Offset1(BaseModel):
- start: Annotated[
- int, Field(description='Start offset of the passage in the content item.')
- ]
- end: Annotated[
- int, Field(description='End offset of the passage in the content item.')
- ]
+class Property(BaseModel):
+ model_config = ConfigDict(
+ extra='forbid',
+ )
+ id: Annotated[str, Field(description='The unique identifier of the property.')]
+ label: Annotated[str, Field(description='The name of the property.')]
+ value: Annotated[str, Field(description='The value of the property.')]
-class TextReusePassage(BaseModel):
+class MediaSource(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='Unique ID of the text reuse passage.')]
- content: Annotated[
- Optional[str], Field(None, description='Textual content of the passage.')
+ uid: Annotated[str, Field(description='The unique identifier of the media source.')]
+ type: Annotated[
+ Literal['newspaper'], Field(description='The type of the media source.')
]
- contentItemId: Annotated[
- Optional[str],
+ name: Annotated[str, Field(description='A display name of the media source.')]
+ languageCodes: Annotated[
+ Sequence[str],
+ Field(description='ISO 639-2 language codes this media source has content in.'),
+ ]
+ publishedPeriodYears: Annotated[
+ Optional[Sequence[int]],
Field(
None,
- description='ID of the content item that the text reuse passage belongs to.',
+ description='The range of years this media source has been published for. Impresso may not have data for all this period. Is not defined if there is no information.',
+ max_length=2,
+ min_length=2,
),
]
- offset: Annotated[
- Optional[Offset1],
+ availableDatesRange: Annotated[
+ Optional[Sequence[AwareDatetime]],
Field(
None,
- description='Start and end offsets of the passage in the content item.',
+ description='The range of dates this media source has content items for. This represents the earliest and the latest dates of the contet items. Is not defined if there are no content items for this source.',
+ max_length=2,
+ min_length=2,
),
]
+ totals: Totals
+ properties: Optional[Sequence[Property]] = None
-class TopicMention(BaseModel):
+class EntityMention(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- uid: Annotated[str, Field(description='Unique identifier of the topic.')]
- relevance: Annotated[
- Optional[float],
+ surfaceForm: Annotated[
+ str, Field(description='The surface form (label) of the entity mention')
+ ]
+ mentionConfidence: Annotated[
+ float, Field(description='Confidence score of the entity mention')
+ ]
+ startOffset: Annotated[
+ Optional[int],
Field(
- None,
- description='Relevance of the topic in the content item.',
- ge=0.0,
- le=1.0,
+ None, description='Start offset of the entity mention in the content item'
),
]
+ endOffset: Annotated[
+ Optional[int],
+ Field(None, description='End offset of the entity mention in the content item'),
+ ]
-class VersionDetails(BaseModel):
+class ImpressoNerResponse(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- version: Annotated[str, Field(description='Version of the API.')]
+ modelId: Annotated[
+ str, Field(description='ID of the model used for the named entity recognition')
+ ]
+ text: Annotated[
+ str, Field(description='Text processed for named entity recognition')
+ ]
+ timestamp: Annotated[
+ AwareDatetime,
+ Field(description='Timestamp of when named entity recognition was performed'),
+ ]
+ entities: Sequence[ImpressoNerEntity]
-class Coordinates(BaseModel):
+class Topic(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- latitude: Annotated[
- Optional[float], Field(None, description='The latitude of the location')
+ uid: Annotated[str, Field(description='The unique identifier of the topic')]
+ language: Annotated[str, Field(description='The language code of the topic')]
+ contentItemsCount: Annotated[
+ Optional[float],
+ Field(None, description='Number of content items with this topic'),
]
- longitude: Annotated[
- Optional[float], Field(None, description='The longitude of the location')
+ words: Annotated[
+ Optional[Sequence[TopicWord]],
+ Field(None, description='Top N words associated with the topic'),
+ ]
+ model: Annotated[
+ Optional[str],
+ Field(None, description='ID of the model used to generate the topic'),
]
-class WikidataLocation(BaseModel):
+class EntityDetails(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- id: Annotated[
- str,
- Field(
- description='The Q Wikidata ID of the location (https://www.wikidata.org/wiki/Wikidata:Identifiers)'
- ),
+ uid: Annotated[str, Field(description='Unique identifier of the entity')]
+ label: Annotated[Optional[str], Field(None, description='Entity label')]
+ type: Optional[Literal['person', 'location', 'organisation', 'newsagency']] = None
+ wikidataId: Annotated[
+ Optional[str], Field(None, description='Wikidata identifier of the entity.')
]
- type: Annotated[Literal['location'], Field(description='The type of the entity')]
- labels: Annotated[
- Optional[Mapping[str, Sequence[str]]],
- Field(None, description='Labels of the location in different languages'),
+ totalMentions: Annotated[
+ Optional[int],
+ Field(None, description='Total number of mentions of the entity.'),
]
- descriptions: Annotated[
- Optional[Mapping[str, Sequence[str]]],
- Field(None, description='Descriptions of the location in different languages'),
+ totalContentItems: Annotated[
+ Optional[int],
+ Field(
+ None,
+ description='Total number of content items the entity is mentioned in.',
+ ),
]
- coordinates: Optional[Coordinates] = None
+ wikidataDetails: Optional[Union[WikidataHuman, WikidataLocation]] = None
-class WordMatch(BaseModel):
+class EntityMentions(BaseModel):
model_config = ConfigDict(
extra='forbid',
)
- id: Annotated[str, Field(description='Unique identifier for the word')]
- languageCode: Annotated[str, Field(description='The language code of the word')]
- word: Annotated[str, Field(description='The word')]
+ locations: Annotated[
+ Optional[Sequence[EntityMention]],
+ Field(None, description='Locations mentioned in the content item.'),
+ ]
+ persons: Annotated[
+ Optional[Sequence[EntityMention]],
+ Field(None, description='Persons mentioned in the content item.'),
+ ]
+ organisations: Annotated[
+ Optional[Sequence[EntityMention]],
+ Field(None, description='Organisations mentioned in the content item.'),
+ ]
+ newsAgencies: Annotated[
+ Optional[Sequence[EntityMention]],
+ Field(None, description='News agencies mentioned in the content item.'),
+ ]
class ContentItem(BaseModel):
@@ -745,36 +1030,19 @@ class ContentItem(BaseModel):
transcript: Annotated[
Optional[str], Field(None, description='Transcript of the content item.')
]
- locationEntities: Annotated[
- Optional[Sequence[NamedEntity]],
- Field(
- None, description='Linked location entities mentioned in the content item.'
- ),
- ]
- personEntities: Annotated[
- Optional[Sequence[NamedEntity]],
- Field(
- None, description='Linked person entities mentioned in the content item.'
- ),
- ]
- organisationEntities: Annotated[
- Optional[Sequence[NamedEntity]],
- Field(
- None,
- description='Linked organisation entities mentioned in the content item.',
- ),
+ entities: Optional[NamedEntities] = None
+ mentions: Optional[EntityMentions] = None
+ topics: Annotated[
+ Optional[Sequence[TopicMention]],
+ Field(None, description='Topics mentioned in the content item.'),
]
- newsAgenciesEntities: Annotated[
- Optional[Sequence[NamedEntity]],
+ embeddings: Annotated[
+ Optional[Sequence[str]],
Field(
None,
- description='Linked news agency entities mentioned in the content item.',
+ description='Precomputed embeddings for the content item in the format: :.',
),
]
- topics: Annotated[
- Optional[Sequence[TopicMention]],
- Field(None, description='Topics mentioned in the content item.'),
- ]
transcriptLength: Annotated[
Optional[float],
Field(None, description='The length of the transcript in characters.', ge=0.0),
@@ -828,62 +1096,36 @@ class ContentItem(BaseModel):
],
Field(None, description='The type of the media the content item belongs to.'),
]
-
-
-class WikidataHuman(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- id: Annotated[
- str,
+ hasOLR: Annotated[
+ Optional[bool],
+ Field(None, description='Whether the content item has OCR/OLR data available.'),
+ ]
+ ocrQualityScore: Annotated[
+ Optional[float],
Field(
- description='The Q Wikidata ID of the person (https://www.wikidata.org/wiki/Wikidata:Identifiers)'
+ None,
+ description='OCR quality score of the content item (0 - 1).',
+ ge=0.0,
+ le=1.0,
),
]
- type: Annotated[Literal['human'], Field(description='The type of the entity')]
- labels: Annotated[
- Optional[Mapping[str, Sequence[str]]],
- Field(None, description='Labels of the person in different languages'),
- ]
- descriptions: Annotated[
- Optional[Mapping[str, Sequence[str]]],
- Field(None, description='Descriptions of the person in different languages'),
- ]
- birthDate: Annotated[
- Optional[AwareDatetime], Field(None, description='The birth date of the person')
- ]
- deathDate: Annotated[
- Optional[AwareDatetime], Field(None, description='The death date of the person')
- ]
- birthPlace: Annotated[
- Optional[WikidataLocation],
- Field(None, description='The birth place of the person'),
- ]
- deathPlace: Annotated[
- Optional[WikidataLocation],
- Field(None, description='The death place of the person'),
- ]
-
-
-class EntityDetails(BaseModel):
- model_config = ConfigDict(
- extra='forbid',
- )
- uid: Annotated[str, Field(description='Unique identifier of the entity')]
- label: Annotated[Optional[str], Field(None, description='Entity label')]
- type: Optional[Literal['person', 'location', 'organisation', 'newsagency']] = None
- wikidataId: Annotated[
- Optional[str], Field(None, description='Wikidata identifier of the entity.')
+ relevanceScore: Annotated[
+ Optional[float],
+ Field(
+ None,
+ description='Relevance score of the content item (0 - 1).',
+ ge=0.0,
+ le=1.0,
+ ),
]
- totalMentions: Annotated[
- Optional[int],
- Field(None, description='Total number of mentions of the entity.'),
+ pageNumbers: Annotated[
+ Optional[Sequence[PageNumber]],
+ Field(None, description='Page numbers the content item appears on.'),
]
- totalContentItems: Annotated[
- Optional[int],
+ collectionUids: Annotated[
+ Optional[Sequence[str]],
Field(
None,
- description='Total number of content items the entity is mentioned in.',
+ description='Unique identifiers of collections the content item belongs to.',
),
]
- wikidataDetails: Optional[Union[WikidataHuman, WikidataLocation]] = None
diff --git a/impresso/client.py b/impresso/client.py
index 124e742..33d3979 100644
--- a/impresso/client.py
+++ b/impresso/client.py
@@ -56,6 +56,12 @@ def __init__(self, api_url: str, api_bearer_token: str):
"response": [_log_non_2xx],
}
},
+ timeout=httpx.Timeout(
+ connect=20.0,
+ read=300.0,
+ write=10.0,
+ pool=5.0,
+ ),
)
)
diff --git a/impresso/client_base.py b/impresso/client_base.py
index ac70e5e..952d514 100644
--- a/impresso/client_base.py
+++ b/impresso/client_base.py
@@ -1,11 +1,15 @@
from impresso.api_client.client import AuthenticatedClient
-from impresso.resources.content_items import ContentItemsResource
from impresso.resources.collections import CollectionsResource
+from impresso.resources.content_items import ContentItemsResource
+from impresso.resources.data_providers import DataProvidersResource
from impresso.resources.entities import EntitiesResource
+from impresso.resources.experiments import ExperimentsResource
+from impresso.resources.images import ImagesResource
from impresso.resources.media_sources import MediaSourcesResource
from impresso.resources.search import SearchResource
from impresso.resources.text_reuse import TextReuseDomain
from impresso.resources.tools import ToolsResource
+from impresso.resources.topics import TopicsResource
class ImpressoApiResourcesBase:
@@ -38,6 +42,22 @@ def collections(self) -> CollectionsResource:
def entities(self) -> EntitiesResource:
return EntitiesResource(self._api_client)
+ @property
+ def images(self) -> ImagesResource:
+ return ImagesResource(self._api_client)
+
@property
def tools(self) -> ToolsResource:
return ToolsResource(self._api_client)
+
+ @property
+ def experiments(self) -> ExperimentsResource:
+ return ExperimentsResource(self._api_client)
+
+ @property
+ def topics(self) -> TopicsResource:
+ return TopicsResource(self._api_client)
+
+ @property
+ def data_providers(self) -> DataProvidersResource:
+ return DataProvidersResource(self._api_client)
diff --git a/impresso/data_container.py b/impresso/data_container.py
index af292c9..147484c 100644
--- a/impresso/data_container.py
+++ b/impresso/data_container.py
@@ -31,7 +31,7 @@ def __init__(
self._web_app_search_result_url = web_app_search_result_url
def _repr_html_(self):
- df_repr = self.df.head(3).to_html(notebook=True)
+ df_repr = self.df.head(3)._repr_html_()
response_type = self.__class__.__name__.replace("DataContainer", "").replace(
"Container", ""
)
diff --git a/impresso/protobuf/query_pb2.py b/impresso/protobuf/query_pb2.py
index eb0613f..207fca5 100644
--- a/impresso/protobuf/query_pb2.py
+++ b/impresso/protobuf/query_pb2.py
@@ -1,12 +1,22 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
# source: query.proto
-# Protobuf Python Version: 5.26.1
+# Protobuf Python Version: 6.32.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
+_runtime_version.ValidateProtobufRuntimeVersion(
+ _runtime_version.Domain.PUBLIC,
+ 6,
+ 32,
+ 1,
+ '',
+ 'query.proto'
+)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
@@ -14,37 +24,39 @@
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bquery.proto\x12\x0eimpresso.query\"%\n\tDateRange\x12\x0c\n\x04\x66rom\x18\x01 \x01(\x12\x12\n\n\x02to\x18\x02 \x01(\x12\"\x89\x02\n\x06\x46ilter\x12.\n\x07\x63ontext\x18\x01 \x01(\x0e\x32\x1d.impresso.query.FilterContext\x12*\n\x02op\x18\x02 \x01(\x0e\x32\x1e.impresso.query.FilterOperator\x12(\n\x04type\x18\x03 \x01(\x0e\x32\x1a.impresso.query.FilterType\x12\x32\n\tprecision\x18\x04 \x01(\x0e\x32\x1f.impresso.query.FilterPrecision\x12\t\n\x01q\x18\x05 \x03(\t\x12,\n\tdaterange\x18\x06 \x01(\x0b\x32\x19.impresso.query.DateRange\x12\x0c\n\x04uids\x18\x07 \x03(\t\"d\n\x0bSearchQuery\x12\'\n\x07\x66ilters\x18\x01 \x03(\x0b\x32\x16.impresso.query.Filter\x12,\n\x08group_by\x18\x02 \x01(\x0e\x32\x1a.impresso.query.GroupValue\"\x86\x03\n\x1e\x43ollectionRecommenderParameter\x12R\n\x03key\x18\x01 \x01(\x0e\x32\x45.impresso.query.CollectionRecommenderParameter.RecommenderParameterId\x12\x14\n\x0cstring_value\x18\x02 \x01(\t\x12\x14\n\x0cnumber_value\x18\x03 \x01(\x11\x12\x12\n\nbool_value\x18\x04 \x01(\x08\"\xcf\x01\n\x16RecommenderParameterId\x12\x12\n\x0eID_UNSPECIFIED\x10\x00\x12\x11\n\rID_COUNT_TYPE\x10\x01\x12\x16\n\x12ID_MIN_OCCURRENCES\x10\x02\x12\x15\n\x11ID_NUMBER_TO_KEEP\x10\x03\x12\x1d\n\x19ID_REMOVE_FULLY_MENTIONED\x10\x04\x12\x1a\n\x16ID_NORMALIZE_MAX_SCORE\x10\x05\x12\r\n\tID_MARGIN\x10\x06\x12\x15\n\x11ID_SCALING_FACTOR\x10\x07\"\xc1\x02\n\x15\x43ollectionRecommender\x12\x43\n\x04type\x18\x01 \x01(\x0e\x32\x35.impresso.query.CollectionRecommender.RecommenderType\x12\x0e\n\x06weight\x18\x02 \x01(\x11\x12\x42\n\nparameters\x18\x03 \x03(\x0b\x32..impresso.query.CollectionRecommenderParameter\x12\x0f\n\x07\x65nabled\x18\x04 \x01(\x08\"~\n\x0fRecommenderType\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fTYPE_TIME_RANGE\x10\x01\x12\x11\n\rTYPE_ENTITIES\x10\x02\x12\x0f\n\x0bTYPE_TOPICS\x10\x03\x12\x1c\n\x18TYPE_TEXT_REUSE_CLUSTERS\x10\x04\"]\n\x1e\x43ollectionRecommendersSettings\x12;\n\x0crecommenders\x18\x01 \x03(\x0b\x32%.impresso.query.CollectionRecommender*R\n\rFilterContext\x12\x17\n\x13\x43ONTEXT_UNSPECIFIED\x10\x00\x12\x13\n\x0f\x43ONTEXT_INCLUDE\x10\x01\x12\x13\n\x0f\x43ONTEXT_EXCLUDE\x10\x02*M\n\x0e\x46ilterOperator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x10\n\x0cOPERATOR_AND\x10\x01\x12\x0f\n\x0bOPERATOR_OR\x10\x02*\xc0\x05\n\nFilterType\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\x0c\n\x08TYPE_UID\x10\x01\x12\x1a\n\x16TYPE_HAS_TEXT_CONTENTS\x10\x02\x12\x0e\n\nTYPE_TITLE\x10\x03\x12\x11\n\rTYPE_IS_FRONT\x10\x04\x12\r\n\tTYPE_PAGE\x10\x05\x12\x0e\n\nTYPE_ISSUE\x10\x06\x12\x0f\n\x0bTYPE_STRING\x10\x07\x12\x0f\n\x0bTYPE_ENTITY\x10\x08\x12\x12\n\x0eTYPE_NEWSPAPER\x10\t\x12\x12\n\x0eTYPE_DATERANGE\x10\n\x12\r\n\tTYPE_YEAR\x10\x0b\x12\x11\n\rTYPE_LANGUAGE\x10\x0c\x12\r\n\tTYPE_TYPE\x10\r\x12\x0e\n\nTYPE_REGEX\x10\x0e\x12\x10\n\x0cTYPE_MENTION\x10\x0f\x12\x0f\n\x0bTYPE_PERSON\x10\x10\x12\x11\n\rTYPE_LOCATION\x10\x11\x12\x0e\n\nTYPE_TOPIC\x10\x12\x12\x13\n\x0fTYPE_COLLECTION\x10\x13\x12\x14\n\x10TYPE_OCR_QUALITY\x10\x14\x12\x17\n\x13TYPE_CONTENT_LENGTH\x10\x15\x12\x10\n\x0cTYPE_COUNTRY\x10\x16\x12\x15\n\x11TYPE_ACCESS_RIGHT\x10\x17\x12\x10\n\x0cTYPE_PARTNER\x10\x18\x12\x0e\n\nTYPE_MONTH\x10\x19\x12 \n\x1cTYPE_TEXT_REUSE_CLUSTER_SIZE\x10\x1a\x12+\n\'TYPE_TEXT_REUSE_CLUSTER_LEXICAL_OVERLAP\x10\x1b\x12%\n!TYPE_TEXT_REUSE_CLUSTER_DAY_DELTA\x10\x1c\x12\x1b\n\x17TYPE_TEXT_REUSE_CLUSTER\x10\x1d\x12\x19\n\x15TYPE_MENTION_FUNCTION\x10\x1e\x12\x0c\n\x08TYPE_NAG\x10\x1f\x12\x14\n\x10TYPE_WIKIDATA_ID\x10 *\x81\x01\n\x0f\x46ilterPrecision\x12\x19\n\x15PRECISION_UNSPECIFIED\x10\x00\x12\x13\n\x0fPRECISION_EXACT\x10\x01\x12\x15\n\x11PRECISION_PARTIAL\x10\x02\x12\x13\n\x0fPRECISION_FUZZY\x10\x03\x12\x12\n\x0ePRECISION_SOFT\x10\x04*A\n\nGroupValue\x12\x1a\n\x16GROUPVALUE_UNSPECIFIED\x10\x00\x12\x17\n\x13GROUPVALUE_ARTICLES\x10\x01\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bquery.proto\x12\x0eimpresso.query\"%\n\tDateRange\x12\x0c\n\x04\x66rom\x18\x01 \x01(\x12\x12\n\n\x02to\x18\x02 \x01(\x12\"\x8d\x02\n\x06\x46ilter\x12.\n\x07\x63ontext\x18\x01 \x01(\x0e\x32\x1d.impresso.query.FilterContext\x12*\n\x02op\x18\x02 \x01(\x0e\x32\x1e.impresso.query.FilterOperator\x12(\n\x04type\x18\x03 \x01(\x0e\x32\x1a.impresso.query.FilterType\x12\x32\n\tprecision\x18\x04 \x01(\x0e\x32\x1f.impresso.query.FilterPrecision\x12\t\n\x01q\x18\x05 \x03(\t\x12\x30\n\tdaterange\x18\x06 \x01(\x0b\x32\x19.impresso.query.DateRangeB\x02\x18\x01\x12\x0c\n\x04uids\x18\x07 \x03(\t\"d\n\x0bSearchQuery\x12\'\n\x07\x66ilters\x18\x01 \x03(\x0b\x32\x16.impresso.query.Filter\x12,\n\x08group_by\x18\x02 \x01(\x0e\x32\x1a.impresso.query.GroupValue\"\x86\x03\n\x1e\x43ollectionRecommenderParameter\x12R\n\x03key\x18\x01 \x01(\x0e\x32\x45.impresso.query.CollectionRecommenderParameter.RecommenderParameterId\x12\x14\n\x0cstring_value\x18\x02 \x01(\t\x12\x14\n\x0cnumber_value\x18\x03 \x01(\x11\x12\x12\n\nbool_value\x18\x04 \x01(\x08\"\xcf\x01\n\x16RecommenderParameterId\x12\x12\n\x0eID_UNSPECIFIED\x10\x00\x12\x11\n\rID_COUNT_TYPE\x10\x01\x12\x16\n\x12ID_MIN_OCCURRENCES\x10\x02\x12\x15\n\x11ID_NUMBER_TO_KEEP\x10\x03\x12\x1d\n\x19ID_REMOVE_FULLY_MENTIONED\x10\x04\x12\x1a\n\x16ID_NORMALIZE_MAX_SCORE\x10\x05\x12\r\n\tID_MARGIN\x10\x06\x12\x15\n\x11ID_SCALING_FACTOR\x10\x07\"\xc1\x02\n\x15\x43ollectionRecommender\x12\x43\n\x04type\x18\x01 \x01(\x0e\x32\x35.impresso.query.CollectionRecommender.RecommenderType\x12\x0e\n\x06weight\x18\x02 \x01(\x11\x12\x42\n\nparameters\x18\x03 \x03(\x0b\x32..impresso.query.CollectionRecommenderParameter\x12\x0f\n\x07\x65nabled\x18\x04 \x01(\x08\"~\n\x0fRecommenderType\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fTYPE_TIME_RANGE\x10\x01\x12\x11\n\rTYPE_ENTITIES\x10\x02\x12\x0f\n\x0bTYPE_TOPICS\x10\x03\x12\x1c\n\x18TYPE_TEXT_REUSE_CLUSTERS\x10\x04\"]\n\x1e\x43ollectionRecommendersSettings\x12;\n\x0crecommenders\x18\x01 \x03(\x0b\x32%.impresso.query.CollectionRecommender*R\n\rFilterContext\x12\x17\n\x13\x43ONTEXT_UNSPECIFIED\x10\x00\x12\x13\n\x0f\x43ONTEXT_INCLUDE\x10\x01\x12\x13\n\x0f\x43ONTEXT_EXCLUDE\x10\x02*M\n\x0e\x46ilterOperator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x10\n\x0cOPERATOR_AND\x10\x01\x12\x0f\n\x0bOPERATOR_OR\x10\x02*\xbc\x07\n\nFilterType\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\x0c\n\x08TYPE_UID\x10\x01\x12\x1a\n\x16TYPE_HAS_TEXT_CONTENTS\x10\x02\x12\x0e\n\nTYPE_TITLE\x10\x03\x12\x11\n\rTYPE_IS_FRONT\x10\x04\x12\r\n\tTYPE_PAGE\x10\x05\x12\x0e\n\nTYPE_ISSUE\x10\x06\x12\x0f\n\x0bTYPE_STRING\x10\x07\x12\x0f\n\x0bTYPE_ENTITY\x10\x08\x12\x12\n\x0eTYPE_NEWSPAPER\x10\t\x12\x12\n\x0eTYPE_DATERANGE\x10\n\x12\r\n\tTYPE_YEAR\x10\x0b\x12\x11\n\rTYPE_LANGUAGE\x10\x0c\x12\r\n\tTYPE_TYPE\x10\r\x12\x0e\n\nTYPE_REGEX\x10\x0e\x12\x10\n\x0cTYPE_MENTION\x10\x0f\x12\x0f\n\x0bTYPE_PERSON\x10\x10\x12\x11\n\rTYPE_LOCATION\x10\x11\x12\x0e\n\nTYPE_TOPIC\x10\x12\x12\x13\n\x0fTYPE_COLLECTION\x10\x13\x12\x14\n\x10TYPE_OCR_QUALITY\x10\x14\x12\x17\n\x13TYPE_CONTENT_LENGTH\x10\x15\x12\x10\n\x0cTYPE_COUNTRY\x10\x16\x12\x15\n\x11TYPE_ACCESS_RIGHT\x10\x17\x12\x10\n\x0cTYPE_PARTNER\x10\x18\x12\x0e\n\nTYPE_MONTH\x10\x19\x12 \n\x1cTYPE_TEXT_REUSE_CLUSTER_SIZE\x10\x1a\x12+\n\'TYPE_TEXT_REUSE_CLUSTER_LEXICAL_OVERLAP\x10\x1b\x12%\n!TYPE_TEXT_REUSE_CLUSTER_DAY_DELTA\x10\x1c\x12\x1b\n\x17TYPE_TEXT_REUSE_CLUSTER\x10\x1d\x12\x19\n\x15TYPE_MENTION_FUNCTION\x10\x1e\x12\x0c\n\x08TYPE_NAG\x10\x1f\x12\x14\n\x10TYPE_WIKIDATA_ID\x10 \x12\x14\n\x10TYPE_DATA_DOMAIN\x10!\x12\x12\n\x0eTYPE_COPYRIGHT\x10\"\x12\x14\n\x10TYPE_SOURCE_TYPE\x10#\x12\x16\n\x12TYPE_SOURCE_MEDIUM\x10$\x12\x15\n\x11TYPE_ORGANISATION\x10%\x12\x12\n\x0eTYPE_EMBEDDING\x10&\x12\x1d\n\x19TYPE_IMAGE_VISUAL_CONTENT\x10\'\x12\x18\n\x14TYPE_IMAGE_TECHNIQUE\x10(\x12!\n\x1dTYPE_IMAGE_COMMUNICATION_GOAL\x10)\x12\x1b\n\x17TYPE_IMAGE_CONTENT_TYPE\x10**\x81\x01\n\x0f\x46ilterPrecision\x12\x19\n\x15PRECISION_UNSPECIFIED\x10\x00\x12\x13\n\x0fPRECISION_EXACT\x10\x01\x12\x15\n\x11PRECISION_PARTIAL\x10\x02\x12\x13\n\x0fPRECISION_FUZZY\x10\x03\x12\x12\n\x0ePRECISION_SOFT\x10\x04*A\n\nGroupValue\x12\x1a\n\x16GROUPVALUE_UNSPECIFIED\x10\x00\x12\x17\n\x13GROUPVALUE_ARTICLES\x10\x01\x62\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'query_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._loaded_options = None
- _globals['_FILTERCONTEXT']._serialized_start=1252
- _globals['_FILTERCONTEXT']._serialized_end=1334
- _globals['_FILTEROPERATOR']._serialized_start=1336
- _globals['_FILTEROPERATOR']._serialized_end=1413
- _globals['_FILTERTYPE']._serialized_start=1416
- _globals['_FILTERTYPE']._serialized_end=2120
- _globals['_FILTERPRECISION']._serialized_start=2123
- _globals['_FILTERPRECISION']._serialized_end=2252
- _globals['_GROUPVALUE']._serialized_start=2254
- _globals['_GROUPVALUE']._serialized_end=2319
+ _globals['_FILTER'].fields_by_name['daterange']._loaded_options = None
+ _globals['_FILTER'].fields_by_name['daterange']._serialized_options = b'\030\001'
+ _globals['_FILTERCONTEXT']._serialized_start=1256
+ _globals['_FILTERCONTEXT']._serialized_end=1338
+ _globals['_FILTEROPERATOR']._serialized_start=1340
+ _globals['_FILTEROPERATOR']._serialized_end=1417
+ _globals['_FILTERTYPE']._serialized_start=1420
+ _globals['_FILTERTYPE']._serialized_end=2376
+ _globals['_FILTERPRECISION']._serialized_start=2379
+ _globals['_FILTERPRECISION']._serialized_end=2508
+ _globals['_GROUPVALUE']._serialized_start=2510
+ _globals['_GROUPVALUE']._serialized_end=2575
_globals['_DATERANGE']._serialized_start=31
_globals['_DATERANGE']._serialized_end=68
_globals['_FILTER']._serialized_start=71
- _globals['_FILTER']._serialized_end=336
- _globals['_SEARCHQUERY']._serialized_start=338
- _globals['_SEARCHQUERY']._serialized_end=438
- _globals['_COLLECTIONRECOMMENDERPARAMETER']._serialized_start=441
- _globals['_COLLECTIONRECOMMENDERPARAMETER']._serialized_end=831
- _globals['_COLLECTIONRECOMMENDERPARAMETER_RECOMMENDERPARAMETERID']._serialized_start=624
- _globals['_COLLECTIONRECOMMENDERPARAMETER_RECOMMENDERPARAMETERID']._serialized_end=831
- _globals['_COLLECTIONRECOMMENDER']._serialized_start=834
- _globals['_COLLECTIONRECOMMENDER']._serialized_end=1155
- _globals['_COLLECTIONRECOMMENDER_RECOMMENDERTYPE']._serialized_start=1029
- _globals['_COLLECTIONRECOMMENDER_RECOMMENDERTYPE']._serialized_end=1155
- _globals['_COLLECTIONRECOMMENDERSSETTINGS']._serialized_start=1157
- _globals['_COLLECTIONRECOMMENDERSSETTINGS']._serialized_end=1250
+ _globals['_FILTER']._serialized_end=340
+ _globals['_SEARCHQUERY']._serialized_start=342
+ _globals['_SEARCHQUERY']._serialized_end=442
+ _globals['_COLLECTIONRECOMMENDERPARAMETER']._serialized_start=445
+ _globals['_COLLECTIONRECOMMENDERPARAMETER']._serialized_end=835
+ _globals['_COLLECTIONRECOMMENDERPARAMETER_RECOMMENDERPARAMETERID']._serialized_start=628
+ _globals['_COLLECTIONRECOMMENDERPARAMETER_RECOMMENDERPARAMETERID']._serialized_end=835
+ _globals['_COLLECTIONRECOMMENDER']._serialized_start=838
+ _globals['_COLLECTIONRECOMMENDER']._serialized_end=1159
+ _globals['_COLLECTIONRECOMMENDER_RECOMMENDERTYPE']._serialized_start=1033
+ _globals['_COLLECTIONRECOMMENDER_RECOMMENDERTYPE']._serialized_end=1159
+ _globals['_COLLECTIONRECOMMENDERSSETTINGS']._serialized_start=1161
+ _globals['_COLLECTIONRECOMMENDERSSETTINGS']._serialized_end=1254
# @@protoc_insertion_point(module_scope)
diff --git a/impresso/protobuf/query_pb2.pyi b/impresso/protobuf/query_pb2.pyi
index 1de5e3f..0a2bbc3 100644
--- a/impresso/protobuf/query_pb2.pyi
+++ b/impresso/protobuf/query_pb2.pyi
@@ -2,7 +2,8 @@ from google.protobuf.internal import containers as _containers
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
-from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
+from collections.abc import Iterable as _Iterable, Mapping as _Mapping
+from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
DESCRIPTOR: _descriptor.FileDescriptor
@@ -53,6 +54,16 @@ class FilterType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
TYPE_MENTION_FUNCTION: _ClassVar[FilterType]
TYPE_NAG: _ClassVar[FilterType]
TYPE_WIKIDATA_ID: _ClassVar[FilterType]
+ TYPE_DATA_DOMAIN: _ClassVar[FilterType]
+ TYPE_COPYRIGHT: _ClassVar[FilterType]
+ TYPE_SOURCE_TYPE: _ClassVar[FilterType]
+ TYPE_SOURCE_MEDIUM: _ClassVar[FilterType]
+ TYPE_ORGANISATION: _ClassVar[FilterType]
+ TYPE_EMBEDDING: _ClassVar[FilterType]
+ TYPE_IMAGE_VISUAL_CONTENT: _ClassVar[FilterType]
+ TYPE_IMAGE_TECHNIQUE: _ClassVar[FilterType]
+ TYPE_IMAGE_COMMUNICATION_GOAL: _ClassVar[FilterType]
+ TYPE_IMAGE_CONTENT_TYPE: _ClassVar[FilterType]
class FilterPrecision(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
@@ -105,6 +116,16 @@ TYPE_TEXT_REUSE_CLUSTER: FilterType
TYPE_MENTION_FUNCTION: FilterType
TYPE_NAG: FilterType
TYPE_WIKIDATA_ID: FilterType
+TYPE_DATA_DOMAIN: FilterType
+TYPE_COPYRIGHT: FilterType
+TYPE_SOURCE_TYPE: FilterType
+TYPE_SOURCE_MEDIUM: FilterType
+TYPE_ORGANISATION: FilterType
+TYPE_EMBEDDING: FilterType
+TYPE_IMAGE_VISUAL_CONTENT: FilterType
+TYPE_IMAGE_TECHNIQUE: FilterType
+TYPE_IMAGE_COMMUNICATION_GOAL: FilterType
+TYPE_IMAGE_CONTENT_TYPE: FilterType
PRECISION_UNSPECIFIED: FilterPrecision
PRECISION_EXACT: FilterPrecision
PRECISION_PARTIAL: FilterPrecision
@@ -174,7 +195,7 @@ class CollectionRecommenderParameter(_message.Message):
string_value: str
number_value: int
bool_value: bool
- def __init__(self, key: _Optional[_Union[CollectionRecommenderParameter.RecommenderParameterId, str]] = ..., string_value: _Optional[str] = ..., number_value: _Optional[int] = ..., bool_value: bool = ...) -> None: ...
+ def __init__(self, key: _Optional[_Union[CollectionRecommenderParameter.RecommenderParameterId, str]] = ..., string_value: _Optional[str] = ..., number_value: _Optional[int] = ..., bool_value: _Optional[bool] = ...) -> None: ...
class CollectionRecommender(_message.Message):
__slots__ = ("type", "weight", "parameters", "enabled")
@@ -198,7 +219,7 @@ class CollectionRecommender(_message.Message):
weight: int
parameters: _containers.RepeatedCompositeFieldContainer[CollectionRecommenderParameter]
enabled: bool
- def __init__(self, type: _Optional[_Union[CollectionRecommender.RecommenderType, str]] = ..., weight: _Optional[int] = ..., parameters: _Optional[_Iterable[_Union[CollectionRecommenderParameter, _Mapping]]] = ..., enabled: bool = ...) -> None: ...
+ def __init__(self, type: _Optional[_Union[CollectionRecommender.RecommenderType, str]] = ..., weight: _Optional[int] = ..., parameters: _Optional[_Iterable[_Union[CollectionRecommenderParameter, _Mapping]]] = ..., enabled: _Optional[bool] = ...) -> None: ...
class CollectionRecommendersSettings(_message.Message):
__slots__ = ("recommenders",)
diff --git a/impresso/resources/collections.py b/impresso/resources/collections.py
index bf83662..9d8c9b7 100644
--- a/impresso/resources/collections.py
+++ b/impresso/resources/collections.py
@@ -167,7 +167,14 @@ def find(
)
def get(self, id: str) -> GetCollectionContainer:
- """Get collection by ID."""
+ """Get collection by ID.
+
+ Args:
+ id: The ID of the collection to retrieve.
+
+ Returns:
+ GetCollectionContainer: Data container with the collection information.
+ """
result = get_collection.sync(
client=self._api_client,
@@ -230,7 +237,7 @@ def add_items(self, collection_id: str, item_ids: list[str]) -> None:
def remove_items(self, collection_id: str, item_ids: list[str]) -> None:
"""
- Add items to a collection by their IDs.
+ Remove items from a collection by their IDs.
**NOTE**: Items are not removed immediately.
This operation may take up to a few minutes
@@ -238,7 +245,7 @@ def remove_items(self, collection_id: str, item_ids: list[str]) -> None:
Args:
collection_id: ID of the collection.
- item_ids: IDs of the content items to add.
+ item_ids: IDs of the content items to remove.
"""
result = patch_collections_collection_id_items.sync(
client=self._api_client,
diff --git a/impresso/resources/content_items.py b/impresso/resources/content_items.py
index bb1d1d0..98289cf 100644
--- a/impresso/resources/content_items.py
+++ b/impresso/resources/content_items.py
@@ -81,12 +81,43 @@ def total(self) -> int:
class ContentItemsResource(Resource):
- """Get content items from the impresso database."""
+ """Get content items from the impresso database.
+
+ Examples:
+ Get a specific content item by its ID:
+ >>> item_id = "some-item-id" # Replace with a real ID
+ >>> item = content_items.get(item_id) # doctest: +SKIP
+ >>> print(item.df) # doctest: +SKIP
+
+ Get a content item with embeddings:
+ >>> item = content_items.get(item_id, include_embeddings=True) # doctest: +SKIP
+ >>> print(item.raw.get("embeddings")) # doctest: +SKIP
+
+ Get only the embeddings of a content item:
+ >>> embeddings = content_items.get_embeddings(item_id) # doctest: +SKIP
+ >>> print(embeddings) # doctest: +SKIP
+ """
name = "content_items"
- def get(self, id: str):
- result = get_content_item.sync(client=self._api_client, id=id)
+ def get(
+ self, id: str, include_embeddings: bool = False
+ ) -> ContentItemDataContainer:
+ """
+ Get a content item by its id.
+
+ Args:
+ id: The id of the content item.
+ include_embeddings: Whether to include embeddings in the response.
+
+ Returns:
+ ContentItemDataContainer: The content item data container.
+ """
+ result = get_content_item.sync(
+ client=self._api_client,
+ id=id,
+ include_embeddings=include_embeddings,
+ )
raise_for_error(result)
id_parts = id.split("-")
@@ -98,3 +129,17 @@ def get(self, id: str):
ContentItem,
f"{self._get_web_app_base_url()}/issue/{issue_id}/view?articleId={article_id}",
)
+
+ def get_embeddings(self, id: str) -> list[str]:
+ """
+ Get the embeddings of a content item by its id.
+
+ Args:
+ id: The id of the content item.
+
+ Returns:
+ list[str]: The embeddings of the content item if present (every embedding is returned
+ in the canonical form: :).
+ """
+ item = self.get(id, include_embeddings=True)
+ return item.raw.get("embeddings", []) if item else []
diff --git a/impresso/resources/data_providers.py b/impresso/resources/data_providers.py
new file mode 100644
index 0000000..f995b79
--- /dev/null
+++ b/impresso/resources/data_providers.py
@@ -0,0 +1,194 @@
+from typing import Any, Callable, Iterator, cast
+
+from pandas import DataFrame, json_normalize
+
+from impresso.api_client.api.data_providers import (
+ find_data_providers,
+ get_data_provider,
+)
+from impresso.api_client.models.find_data_providers_base_find_response import (
+ FindDataProvidersBaseFindResponse,
+)
+from impresso.api_client.types import UNSET
+from impresso.api_models import BaseFind, Filter
+from impresso.data_container import DataContainer, iterate_pages
+from impresso.resources.base import Resource
+from impresso.structures import AND, OR
+from impresso.util.error import raise_for_error
+from impresso.util.filters import and_or_filter
+
+
+class FindDataProvidersSchema(BaseFind):
+ """Schema for the find data providers response."""
+
+ data: list[dict]
+
+
+class FindDataProvidersContainer(DataContainer):
+ """Response of a find call."""
+
+ def __init__(
+ self,
+ data: FindDataProvidersBaseFindResponse,
+ pydantic_model: type[FindDataProvidersSchema],
+ fetch_method: Callable[..., "FindDataProvidersContainer"],
+ fetch_method_args: dict[str, Any],
+ web_app_search_result_url: str | None = None,
+ ):
+ super().__init__(data, pydantic_model, web_app_search_result_url)
+ self._fetch_method = fetch_method
+ self._fetch_method_args = fetch_method_args
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()["data"]
+ if len(data):
+ return json_normalize(self._data.to_dict()["data"]).set_index("id")
+ return DataFrame()
+
+ def pages(self) -> Iterator["FindDataProvidersContainer"]:
+ """Iterate over all pages of results."""
+ yield self
+ yield from iterate_pages(
+ self._fetch_method,
+ self._fetch_method_args,
+ self.offset,
+ self.limit,
+ self.total,
+ )
+
+
+class GetDataProviderContainer(DataContainer):
+ """Response of a get call."""
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()
+ if len(data):
+ return json_normalize([self._data.to_dict()]).set_index("id")
+ return DataFrame()
+
+
+class DataProvidersResource(Resource):
+ """Search data providers in the Impresso database.
+
+ Data providers are partner institutions that provide content to Impresso,
+ such as libraries, archives, and media organizations.
+
+ Examples:
+ Find all data providers:
+ >>> results = data_providers.find() # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search data providers by name:
+ >>> results = data_providers.find(term="library") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Get a specific data provider by its ID:
+ >>> provider_id = "some-provider-id" # Replace with a real ID
+ >>> provider = data_providers.get(provider_id) # doctest: +SKIP
+ >>> print(provider.df) # doctest: +SKIP
+ """
+
+ name = "data_providers"
+
+ def find(
+ self,
+ term: str | None = None,
+ provider_id: str | AND[str] | OR[str] | None = None,
+ limit: int | None = None,
+ offset: int | None = None,
+ ) -> FindDataProvidersContainer:
+ """
+ Search data providers in Impresso.
+
+ Data providers are partner institutions that provide content to Impresso,
+ such as libraries, archives, and media organizations.
+
+ Args:
+ term: Search term to find data providers by their names.
+ provider_id: Return only data provider with this ID.
+ limit: Number of results to return.
+ offset: Number of results to skip.
+
+ Returns:
+ FindDataProvidersContainer: Data container with a page of results of the search.
+ """
+
+ filters: list[Filter] = []
+ if provider_id is not None:
+ filters.extend(and_or_filter(provider_id, "id"))
+
+ # filters_pb = filters_as_protobuf(filters or [])
+
+ result = find_data_providers.sync(
+ client=self._api_client,
+ term=term if term is not None else UNSET,
+ limit=limit if limit is not None else UNSET,
+ offset=offset if offset is not None else UNSET,
+ )
+ raise_for_error(result)
+ return FindDataProvidersContainer(
+ cast(FindDataProvidersBaseFindResponse, result),
+ FindDataProvidersSchema,
+ fetch_method=self.find,
+ fetch_method_args={
+ "term": term,
+ "provider_id": provider_id,
+ },
+ web_app_search_result_url=(
+ _build_web_app_find_data_providers_url(
+ base_url=self._get_web_app_base_url(),
+ term=term,
+ )
+ if provider_id is None
+ else None
+ ),
+ )
+
+ def get(self, id: str) -> GetDataProviderContainer:
+ """Get data provider by ID.
+
+ Args:
+ id: The ID of the data provider to retrieve.
+
+ Returns:
+ GetDataProviderContainer: Data container with the data provider information.
+ """
+
+ result = get_data_provider.sync(
+ client=self._api_client,
+ id=id,
+ )
+ raise_for_error(result)
+ return GetDataProviderContainer(
+ result,
+ FindDataProvidersSchema,
+ web_app_search_result_url=_build_web_app_get_data_provider_url(
+ base_url=self._get_web_app_base_url(),
+ id=id,
+ ),
+ )
+
+
+def _build_web_app_find_data_providers_url(
+ base_url: str,
+ term: str | None = None,
+) -> str:
+ query_params = {
+ "q": term,
+ }
+ query_string = "&".join(
+ f"{key}={value}" for key, value in query_params.items() if value is not None
+ )
+ url = f"{base_url}/data-providers"
+ return f"{url}?{query_string}" if query_string else url
+
+
+def _build_web_app_get_data_provider_url(
+ base_url: str,
+ id: str,
+) -> str:
+ return f"{base_url}/data-providers/{id}"
diff --git a/impresso/resources/entities.py b/impresso/resources/entities.py
index 2b56b88..e4f17aa 100644
--- a/impresso/resources/entities.py
+++ b/impresso/resources/entities.py
@@ -77,7 +77,26 @@ def df(self) -> DataFrame:
class EntitiesResource(Resource):
- """Search entities in the Impresso database."""
+ """Search entities in the Impresso database.
+
+ Examples:
+ Search for entities by name:
+ >>> results = entities.find(term="Napoleon") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Filter entities by type:
+ >>> results = entities.find(term="Paris", entity_type="location") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Get entity details with Wikidata resolution:
+ >>> results = entities.find(term="Napoleon", resolve=True) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Get a specific entity by its ID:
+ >>> entity_id = "some-entity-id" # Replace with a real ID
+ >>> entity = entities.get(entity_id) # doctest: +SKIP
+ >>> print(entity.df) # doctest: +SKIP
+ """
name = "entities"
@@ -156,7 +175,14 @@ def find(
)
def get(self, id: str) -> GetEntityContainer:
- """Get entity by ID."""
+ """Get entity by ID.
+
+ Args:
+ id: The ID of the entity to retrieve.
+
+ Returns:
+ GetEntityContainer: Data container with the entity information.
+ """
result = get_entity.sync(
client=self._api_client,
diff --git a/impresso/resources/experiments.py b/impresso/resources/experiments.py
new file mode 100644
index 0000000..5cea173
--- /dev/null
+++ b/impresso/resources/experiments.py
@@ -0,0 +1,92 @@
+from typing import Any, Callable, cast
+
+from pandas import DataFrame, json_normalize
+
+from impresso.api_client.api.experiments import (
+ interact_with_experiment,
+ find_experiments,
+)
+from impresso.api_client.models.find_experiments_base_find_response import (
+ FindExperimentsBaseFindResponse,
+)
+from impresso.api_client.models.freeform import Freeform
+from impresso.api_client.models.interact_with_experiment_body import (
+ InteractWithExperimentBody,
+)
+from impresso.data_container import DataContainer
+from impresso.resources.base import Resource
+from impresso.util.error import raise_for_error
+
+from impresso.api_models import BaseFind, ExperimentInfo
+
+
+class FindExperimentsSchema(BaseFind):
+ """Schema for the find experiments response."""
+
+ data: list[ExperimentInfo]
+
+
+class FindExperimentsContainer(DataContainer):
+ """Response of a find call."""
+
+ def __init__(
+ self,
+ data: FindExperimentsBaseFindResponse,
+ pydantic_model: type[FindExperimentsSchema],
+ fetch_method: Callable[..., "FindExperimentsContainer"],
+ fetch_method_args: dict[str, Any],
+ web_app_search_result_url: str | None = None,
+ ):
+ super().__init__(data, pydantic_model, web_app_search_result_url)
+ self._fetch_method = fetch_method
+ self._fetch_method_args = fetch_method_args
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()["data"]
+ if len(data):
+ return json_normalize(self._data.to_dict()["data"]).set_index("id")
+ return DataFrame()
+
+
+class ExperimentsResource(Resource):
+ """Experiment with Impresso."""
+
+ name = "experiments"
+
+ def find(self) -> FindExperimentsContainer:
+ """Find all available experiments.
+
+ Returns:
+ FindExperimentsContainer: Data container with list of available experiments.
+ """
+ result = find_experiments.sync(
+ client=self._api_client,
+ )
+ raise_for_error(result)
+
+ return FindExperimentsContainer(
+ cast(FindExperimentsBaseFindResponse, result),
+ FindExperimentsSchema,
+ fetch_method=self.find,
+ fetch_method_args={},
+ )
+
+ def execute(self, experiment_id: str, body: dict) -> dict:
+ """Execute an experiment with the given ID.
+
+ Args:
+ experiment_id: ID of the experiment to execute.
+ body: Body of the experiment.
+
+ Returns:
+ dict: Result of the experiment.
+ """
+ result = interact_with_experiment.sync(
+ client=self._api_client,
+ id=experiment_id,
+ body=InteractWithExperimentBody.from_dict(body),
+ )
+ raise_for_error(result)
+ return cast(Freeform, result).to_dict()
diff --git a/impresso/resources/images.py b/impresso/resources/images.py
new file mode 100644
index 0000000..5d8ff11
--- /dev/null
+++ b/impresso/resources/images.py
@@ -0,0 +1,313 @@
+from typing import Any, Callable, Iterator, cast
+
+from pandas import DataFrame, json_normalize
+
+from impresso.api_client.api.images import find_images
+from impresso.api_client.api.images import get_image
+from impresso.api_client.models.find_images_base_find_response import (
+ FindImagesBaseFindResponse,
+)
+from impresso.api_client.models.find_images_order_by import (
+ FindImagesOrderBy,
+ FindImagesOrderByLiteral,
+)
+from impresso.api_client.types import UNSET
+from impresso.api_models import Q, BaseFind, Filter, Image
+from impresso.data_container import DataContainer, iterate_pages
+from impresso.resources.base import DEFAULT_PAGE_SIZE, Resource
+from impresso.resources.tools import Embedding
+from impresso.structures import AND, OR, DateRange
+from impresso.util.error import raise_for_error
+from impresso.util.filters import and_or_filter, filters_as_protobuf
+from impresso.util.py import get_enum_from_literal
+import pandas as pd
+
+
+# The formatter function needs to be available
+def _path_to_image_html(path):
+ # Ensure the URL is wrapped in an HTML
tag
+ return f'
'
+
+
+class ImageDataFrame(pd.DataFrame):
+ @property
+ def _constructor(self):
+ return ImageDataFrame
+
+ def _repr_html_(self):
+ """
+ Overrides the default method used by Jupyter/Colab to render DataFrames.
+ It forces the display of the styled version.
+ """
+ # 1. Apply the desired style/formatter
+ styler = self.style.format(
+ {"previewImage": _path_to_image_html},
+ )
+
+ # 2. Return the HTML string generated by the Styler
+ return styler.to_html()
+
+
+def _as_image_df(df: pd.DataFrame) -> ImageDataFrame:
+ idf = ImageDataFrame(df)
+ cols = idf.columns.tolist()
+
+ if "previewUrl" in cols:
+ idf["previewImage"] = idf["previewUrl"]
+ cols = idf.columns.tolist()
+ # Rearrange columns to make previewImage second
+ if "previewImage" in cols:
+ cols.remove("previewImage")
+ cols.insert(1, "previewImage")
+ # Push the URL to the back
+ if "previewUrl" in cols:
+ cols.remove("previewUrl")
+ cols.insert(-1, "previewUrl")
+ return cast(ImageDataFrame, idf[cols])
+
+
+class FindImagesSchema(BaseFind):
+ """Schema for the find images response."""
+
+ data: list[Image]
+
+
+class FindImagesContainer(DataContainer):
+ """Response of a find call."""
+
+ def __init__(
+ self,
+ data: FindImagesBaseFindResponse,
+ pydantic_model: type[FindImagesSchema],
+ fetch_method: Callable[..., "FindImagesContainer"],
+ fetch_method_args: dict[str, Any],
+ web_app_search_result_url: str | None = None,
+ ):
+ super().__init__(data, pydantic_model, web_app_search_result_url)
+ self._fetch_method = fetch_method
+ self._fetch_method_args = fetch_method_args
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()["data"]
+ if len(data):
+ df = json_normalize(self._data.to_dict()["data"]).set_index("uid")
+ return _as_image_df(df)
+
+ return DataFrame()
+
+ def pages(self) -> Iterator["FindImagesContainer"]:
+ """Iterate over all pages of results."""
+ yield self
+ yield from iterate_pages(
+ self._fetch_method,
+ self._fetch_method_args,
+ self.offset,
+ self.limit,
+ self.total,
+ )
+
+
+class GetImageContainer(DataContainer):
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()
+ if len(data):
+ df = json_normalize(data).set_index("uid")
+ return _as_image_df(df)
+
+ return DataFrame()
+
+
+class ImagesResource(Resource):
+ """Search images in Impresso.
+
+ Examples:
+ Search for images by keyword:
+ >>> results = images.find(term="war") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Filter images by date range and newspaper:
+ >>> from impresso import DateRange
+ >>> date_range = DateRange(start="1900-01-01", end="1910-12-31")
+ >>> results = images.find(media_id="GDL", date_range=date_range) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search for front page images only:
+ >>> results = images.find(is_front=True) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search images by visual similarity using embeddings:
+ >>> embedding = tools.embed_image("path/to/image.jpg", target="image") # doctest: +SKIP
+ >>> similar_images = images.find(embedding=embedding) # doctest: +SKIP
+ >>> print(similar_images.df) # doctest: +SKIP
+
+ Get a specific image by its ID:
+ >>> image_id = "some-image-id" # Replace with a real ID
+ >>> image = images.get(image_id) # doctest: +SKIP
+ >>> print(image.df) # doctest: +SKIP
+ """
+
+ name = "images"
+
+ def find(
+ self,
+ term: str | None = None,
+ media_id: str | AND[str] | OR[str] | None = None,
+ issue_id: str | AND[str] | OR[str] | None = None,
+ is_front: bool | None = None,
+ date_range: DateRange | None = None,
+ visual_content: str | AND[str] | OR[str] | None = None,
+ technique: str | AND[str] | OR[str] | None = None,
+ communication_goal: str | AND[str] | OR[str] | None = None,
+ content_type: str | AND[str] | OR[str] | None = None,
+ embedding: Embedding | None = None,
+ include_embeddings: bool = False,
+ order_by: FindImagesOrderByLiteral | None = None,
+ limit: int | None = None,
+ offset: int | None = None,
+ ) -> FindImagesContainer:
+ """Find images in Impresso.
+
+ Args:
+ term: The search term for text-based search.
+ media_id: Filter by newspaper media ID(s).
+ issue_id: Filter by issue ID(s).
+ is_front: Filter for front page images only.
+ date_range: Filter by date range.
+ visual_content: Filter by visual content category.
+ technique: Filter by image technique.
+ communication_goal: Filter by communication goal.
+ content_type: Filter by content type.
+ embedding: Image embedding for similarity search. Use `tools.embed_image()` or
+ `tools.embed_text()` to generate embeddings from images.
+ include_embeddings: Whether to include image embeddings in the response. Defaults to False.
+ order_by: Sort order for results.
+ limit: Maximum number of results to return per page. Defaults to 100.
+ offset: Number of results to skip.
+
+ Returns:
+ FindImagesContainer: Data container with the first page of the search results.
+ """
+ page_limit = limit if limit is not None else DEFAULT_PAGE_SIZE
+ embedding_with_limit = f"{embedding}:{page_limit}" if embedding else None
+
+ filters: list[Filter] = []
+ if media_id is not None:
+ filters.extend(and_or_filter(media_id, "newspaper"))
+ if issue_id is not None:
+ filters.extend(and_or_filter(issue_id, "issue"))
+ if is_front is not None:
+ filters.append(Filter(type="isFront", daterange=None))
+ if date_range is not None:
+ filters.append(
+ Filter(
+ type="daterange",
+ q=Q(DateRange._as_filter_value(date_range)),
+ context="exclude" if date_range.inverted else "include",
+ daterange=None,
+ )
+ )
+ if visual_content is not None:
+ filters.extend(and_or_filter(visual_content, "image_visual_content"))
+ if technique is not None:
+ filters.extend(and_or_filter(technique, "image_technique"))
+ if communication_goal is not None:
+ filters.extend(
+ and_or_filter(communication_goal, "image_communication_goal")
+ )
+ if content_type is not None:
+ filters.extend(and_or_filter(content_type, "image_content_type"))
+ if embedding_with_limit is not None:
+ filters.extend(and_or_filter(embedding_with_limit, "embedding"))
+
+ filters_pb = filters_as_protobuf(filters or [])
+
+ result = find_images.sync(
+ client=self._api_client,
+ term=term if term is not None else UNSET,
+ order_by=(
+ get_enum_from_literal(order_by, FindImagesOrderBy)
+ if order_by is not None
+ else UNSET
+ ),
+ limit=page_limit,
+ offset=offset if offset is not None else UNSET,
+ filters=filters_pb if filters_pb else UNSET,
+ include_embeddings=include_embeddings if include_embeddings else UNSET,
+ )
+ raise_for_error(result)
+
+ return FindImagesContainer(
+ cast(FindImagesBaseFindResponse, result),
+ FindImagesSchema,
+ fetch_method=self.find,
+ fetch_method_args={"term": term},
+ web_app_search_result_url=_build_web_app_find_images_url(
+ base_url=self._get_web_app_base_url(),
+ term=term,
+ ),
+ )
+
+ def get(
+ self,
+ id: str,
+ include_embeddings: bool = False,
+ ) -> GetImageContainer:
+ """
+ Get an image by its id.
+
+ Args:
+ id: The id of the image.
+ include_embeddings: Whether to include embeddings in the response.
+
+ Returns:
+ GetImageContainer: The image data container.
+ """
+ result = get_image.sync(
+ client=self._api_client,
+ id=id,
+ include_embeddings=include_embeddings,
+ )
+ raise_for_error(result)
+
+ id_parts = id.split("-")
+ issue_id = "-".join(id_parts[:-1])
+ article_id = id_parts[-1]
+
+ return GetImageContainer(
+ result,
+ Image,
+ f"{self._get_web_app_base_url()}/issue/{issue_id}/view?articleId={article_id}",
+ )
+
+ def get_embeddings(self, id: str) -> list[str]:
+ """
+ Get the embeddings of an image by its id.
+
+ Args:
+ id: The id of the image.
+
+ Returns:
+ list[str]: The embeddings of the image if present (every embedding is returned
+ in the canonical form: :).
+ """
+ item = self.get(id, include_embeddings=True)
+ return item.raw.get("embeddings", []) if item else []
+
+
+def _build_web_app_find_images_url(
+ base_url: str,
+ term: str | None = None,
+) -> str:
+ query_params = {
+ "q": term,
+ }
+ query_string = "&".join(
+ f"{key}={value}" for key, value in query_params.items() if value is not None
+ )
+ url = f"{base_url}/images"
+ return f"{url}?{query_string}" if query_string else url
diff --git a/impresso/resources/media_sources.py b/impresso/resources/media_sources.py
index 6eeba5d..d2a5968 100644
--- a/impresso/resources/media_sources.py
+++ b/impresso/resources/media_sources.py
@@ -62,7 +62,27 @@ def pages(self) -> Iterator["FindMediaSourcesContainer"]:
class MediaSourcesResource(Resource):
- """Search media sources in the Impresso database."""
+ """Search media sources in the Impresso database.
+
+ Media sources are newspapers and other publications available in Impresso.
+
+ Examples:
+ Find all media sources:
+ >>> results = media_sources.find() # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search media sources by name:
+ >>> results = media_sources.find(term="Gazette") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Filter media sources by type:
+ >>> results = media_sources.find(type="newspaper") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Get media sources with detailed properties:
+ >>> results = media_sources.find(with_properties=True) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+ """
name = "media_sources"
diff --git a/impresso/resources/search.py b/impresso/resources/search.py
index decd76e..9b648e0 100644
--- a/impresso/resources/search.py
+++ b/impresso/resources/search.py
@@ -7,6 +7,9 @@
from impresso.api_client.api.search import search
from impresso.api_client.api.search_facets import get_search_facet
+from impresso.api_client.models.content_item_copyright_status import (
+ ContentItemCopyrightStatusLiteral,
+)
from impresso.api_client.models.get_search_facet_id import (
GetSearchFacetId,
GetSearchFacetIdLiteral,
@@ -29,6 +32,7 @@
from impresso.api_models import ContentItem, BaseFind, Filter, Q, SearchFacetBucket
from impresso.data_container import DataContainer, iterate_pages
from impresso.resources.base import DEFAULT_PAGE_SIZE, Resource
+from impresso.resources.tools import Embedding
from impresso.structures import AND, OR, DateRange
from impresso.util.error import raise_for_error
from impresso.util.filters import and_or_filter, filters_as_protobuf
@@ -125,7 +129,32 @@ def pages(self) -> Iterator["FacetDataContainer"]:
class SearchResource(Resource):
- """Search content items in the impresso database."""
+ """Search content items in the impresso database.
+
+ Examples:
+ Search for articles containing a term:
+ >>> results = search.find(term="war") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Filter articles by date range and newspaper:
+ >>> from impresso import DateRange
+ >>> date_range = DateRange(start="1900-01-01", end="1910-12-31")
+ >>> results = search.find(term="revolution", newspaper_id="GDL", date_range=date_range) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search for front page articles mentioning an entity:
+ >>> results = search.find(entity_id="aida-0001-54-Napoleon", front_page=True) # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Search by semantic similarity using text embeddings:
+ >>> embedding = tools.embed_text("military conflict", target="text") # doctest: +SKIP
+ >>> similar_articles = search.find(embedding=embedding) # doctest: +SKIP
+ >>> print(similar_articles.df) # doctest: +SKIP
+
+ Get facets to analyze search results:
+ >>> newspaper_facets = search.facet(facet="newspaper", term="war") # doctest: +SKIP
+ >>> print(newspaper_facets.df) # doctest: +SKIP
+ """
name = "search"
@@ -147,38 +176,52 @@ def find(
collection_id: str | AND[str] | OR[str] | None = None,
country: str | AND[str] | OR[str] | None = None,
partner_id: str | AND[str] | OR[str] | None = None,
+ issue_id: str | OR[str] | None = None,
text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
+ embedding: Embedding | None = None,
+ copyright: (
+ ContentItemCopyrightStatusLiteral
+ | AND[ContentItemCopyrightStatusLiteral]
+ | OR[ContentItemCopyrightStatusLiteral]
+ | None
+ ) = None,
+ include_embeddings: bool = False,
) -> SearchDataContainer:
- """
- Search for content items in Impresso.
+ """Search for content items in Impresso.
Args:
- term: Search term.
- order_by: Order by aspect.
- limit: Number of results to return.
- offset: Number of results to skip.
-
- with_text_contents: Return only content items with text contents.
- title: Return only content items that have this term or all/any of the terms in the title.
+ term: Search term or combination of search terms.
+ order_by: Sort order for results.
+ limit: Maximum number of results to return per page. Defaults to 100.
+ offset: Number of results to skip for pagination.
+ with_text_contents: Return only content items with text contents. Defaults to False.
+ title: Filter by content items having this term or all/any of the terms in the title.
front_page: Return only content items that were on the front page.
- entity_id: Return only content items that mention this entity or all/any of the entities.
- date_range: Return only content items that were published in this date range.
- language: Return only content items that are in this language or all/any of the languages.
- Use 2-letter ISO language codes (e.g., 'en', 'de', 'fr').
- mention: Return only content items that mention an entity with this term or all/any
- of entities with the terms.
- topic_id: Return only content items that are about this topic or all/any of the topics.
- collection_id: Return only content items that are in this collection or all/any of the collections.
- country: Return only content items that are from this country or all/any of the countries.
- Use 2-letter ISO country codes (e.g., 'ch', 'de', 'lu').
- partner_id: Return only content items that are from this partner or all/any of the partners.
- text_reuse_cluster_id: Return only content items that are in this text reuse cluster
- or all/any of the clusters.
+ entity_id: Filter by content items mentioning this entity or all/any of the entities.
+ newspaper_id: Filter by newspaper ID(s).
+ date_range: Filter by publication date range.
+ language: Filter by content language or all/any of the languages.
+ Use 2-letter ISO language codes (e.g., 'en', 'de', 'fr').
+ mention: Filter by content items mentioning entities with this term or all/any of
+ entities with the terms.
+ topic_id: Filter by topic ID(s).
+ collection_id: Filter by collection ID(s).
+ country: Filter by country of publication. Use 2-letter ISO country codes
+ (e.g., 'ch', 'de', 'lu').
+ partner_id: Filter by partner institution ID(s).
+ text_reuse_cluster_id: Filter by text reuse cluster ID(s).
+ embedding: Text embedding for similarity search. Use `tools.embed_text()` to generate
+ embeddings from text.
+ copyright: Filter by copyright status.
+ include_embeddings: Whether to include embeddings in the response. Defaults to False.
Returns:
- SearchDataContainer: Data container with a page of results of the search.
+ SearchDataContainer: Data container with the first page of search results.
"""
+ page_limit = limit if limit is not None else DEFAULT_PAGE_SIZE
+ embedding_with_limit = f"{embedding}:{page_limit}" if embedding else None
+
filters = self._build_filters(
string=term,
with_text_contents=with_text_contents,
@@ -194,6 +237,9 @@ def find(
country=country,
partner_id=partner_id,
text_reuse_cluster_id=text_reuse_cluster_id,
+ embedding=embedding_with_limit,
+ copyright=copyright,
+ issue_id=issue_id,
)
filters_pb = filters_as_protobuf(filters or [])
@@ -207,8 +253,9 @@ def find(
else UNSET
),
filters=filters_pb if filters_pb else UNSET,
- limit=limit if limit is not None else DEFAULT_PAGE_SIZE,
+ limit=page_limit,
offset=offset if offset is not None else UNSET,
+ include_embeddings=include_embeddings if include_embeddings else UNSET,
)
raise_for_error(result)
return SearchDataContainer(
@@ -231,6 +278,7 @@ def find(
"country": country,
"partner_id": partner_id,
"text_reuse_cluster_id": text_reuse_cluster_id,
+ "embedding": embedding_with_limit,
},
web_app_search_result_url=_build_web_app_search_url(
f"{self._get_web_app_base_url()}/search",
@@ -262,36 +310,35 @@ def facet(
partner_id: str | AND[str] | OR[str] | None = None,
text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
) -> FacetDataContainer:
- """
- Get facets for a search query.
+ """Get facets for a search query.
Facets provide aggregated information about a specific dimension of search results,
such as counts of newspaper titles, languages, or topics.
Args:
facet: Type of facet to retrieve (e.g., 'newspaper', 'language', 'topic').
- term: Search term to filter facets.
- order_by: How to order facet results ('value' or 'count').
+ term: Search term or combination of terms to filter facets.
+ order_by: Sort order for facet results ('value' or 'count'). Defaults to 'value'.
limit: Maximum number of facet buckets to return.
- offset: Number of facet buckets to skip.
-
- with_text_contents: Filter for content items with text contents.
+ offset: Number of facet buckets to skip for pagination.
+ with_text_contents: Filter for content items with text contents. Defaults to False.
title: Filter by content items having this term or terms in the title.
front_page: Filter for content items that were on the front page.
entity_id: Filter by content items mentioning this entity or entities.
- newspaper_id: Filter by newspaper.
+ newspaper_id: Filter by newspaper ID(s).
date_range: Filter by publication date range.
- language: Filter by content language. Use 2-letter ISO language codes (e.g., 'en', 'de', 'fr').
+ language: Filter by content language. Use 2-letter ISO language codes
+ (e.g., 'en', 'de', 'fr').
mention: Filter by content items mentioning entities with these terms.
- topic_id: Filter by content items about this topic or topics.
- collection_id: Filter by collection.
- country: Filter by country of publication. Use 2-letter ISO country codes (e.g., 'ch', 'de', 'lu').
- partner_id: Filter by partner institution.
- text_reuse_cluster_id: Filter by text reuse cluster.
+ topic_id: Filter by topic ID(s).
+ collection_id: Filter by collection ID(s).
+ country: Filter by country of publication. Use 2-letter ISO country codes
+ (e.g., 'ch', 'de', 'lu').
+ partner_id: Filter by partner institution ID(s).
+ text_reuse_cluster_id: Filter by text reuse cluster ID(s).
Returns:
FacetDataContainer: Data container with facet results, including counts for each bucket.
- The container provides visualization capabilities through the ._get_preview_image_() method.
Examples:
>>> search = SearchResource(client)
@@ -396,7 +443,15 @@ def _build_filters(
collection_id: str | AND[str] | OR[str] | None = None,
country: str | AND[str] | OR[str] | None = None,
partner_id: str | AND[str] | OR[str] | None = None,
+ issue_id: str | OR[str] | None = None,
text_reuse_cluster_id: str | AND[str] | OR[str] | None = None,
+ embedding: Embedding | None = None,
+ copyright: (
+ ContentItemCopyrightStatusLiteral
+ | AND[ContentItemCopyrightStatusLiteral]
+ | OR[ContentItemCopyrightStatusLiteral]
+ | None
+ ) = None,
) -> list[Filter]:
filters: list[Filter] = []
if string:
@@ -434,6 +489,12 @@ def _build_filters(
filters.extend(and_or_filter(partner_id, "partner"))
if text_reuse_cluster_id is not None:
filters.extend(and_or_filter(text_reuse_cluster_id, "text_reuse_cluster"))
+ if embedding is not None:
+ filters.extend(and_or_filter(embedding, "embedding"))
+ if copyright is not None:
+ filters.extend(and_or_filter(str(copyright), "copyright"))
+ if issue_id is not None:
+ filters.extend(and_or_filter(issue_id, "issue"))
return filters
diff --git a/impresso/resources/tools.py b/impresso/resources/tools.py
index 91e4bd7..63a4c8c 100644
--- a/impresso/resources/tools.py
+++ b/impresso/resources/tools.py
@@ -1,15 +1,66 @@
+import base64
+from pathlib import Path
+from typing import Annotated, cast
+from urllib.parse import urlparse
+
+import httpx
from pandas import DataFrame, json_normalize
-from impresso.api_client.api.tools import perform_ner
+
+from impresso.api_client.api.tools import (
+ perform_image_embedding,
+ perform_ner,
+ perform_text_embedding,
+)
+from impresso.api_client.models.impresso_embedding_response import (
+ ImpressoEmbeddingResponse,
+)
+from impresso.api_client.models.impresso_image_embedding_request import (
+ ImpressoImageEmbeddingRequest,
+)
+from impresso.api_client.models.impresso_image_embedding_request_search_target import (
+ ImpressoImageEmbeddingRequestSearchTarget,
+ ImpressoImageEmbeddingRequestSearchTargetLiteral,
+)
from impresso.api_client.models.impresso_named_entity_recognition_request import (
ImpressoNamedEntityRecognitionRequest,
)
from impresso.api_client.models.impresso_named_entity_recognition_request_method import (
ImpressoNamedEntityRecognitionRequestMethod,
)
+from impresso.api_client.models.impresso_text_embedding_request import (
+ ImpressoTextEmbeddingRequest,
+)
+from impresso.api_client.models.impresso_text_embedding_request_search_target import (
+ ImpressoTextEmbeddingRequestSearchTarget,
+ ImpressoTextEmbeddingRequestSearchTargetLiteral,
+)
+from impresso.api_client.types import UNSET
from impresso.api_models import ImpressoNerResponse
from impresso.data_container import DataContainer
from impresso.resources.base import Resource
from impresso.util.error import raise_for_error
+from impresso.util.py import get_enum_from_literal
+
+Base64Str = Annotated[str, "base64-encoded string"]
+Embedding = Annotated[str, "base64-encoded string with model prefix"]
+
+
+def is_url(string: str) -> bool:
+ """Check if a string is a valid URL."""
+ try:
+ parsed = urlparse(string)
+ return parsed.scheme in ("http", "https", "ftp", "ftps") and bool(parsed.netloc)
+ except Exception:
+ return False
+
+
+def is_file(string: str) -> bool:
+ """Check if a string is a valid file path."""
+ try:
+ path = Path(string)
+ return path.exists() and path.is_file()
+ except Exception:
+ return False
class ImpressoNerSchema(ImpressoNerResponse):
@@ -51,7 +102,29 @@ def size(self) -> int:
class ToolsResource(Resource):
- """Various helper tools"""
+ """Various helper tools for text processing and embedding generation.
+
+ Examples:
+ Extract named entities from text:
+ >>> entities = tools.ner("Napoleon visited Paris in 1815.") # doctest: +SKIP
+ >>> print(entities.df) # doctest: +SKIP
+
+ Extract and link entities to Wikidata:
+ >>> entities = tools.ner_nel("Napoleon visited Paris in 1815.") # doctest: +SKIP
+ >>> print(entities.df) # doctest: +SKIP
+
+ Generate text embedding for semantic search:
+ >>> embedding = tools.embed_text("military conflict", target="text") # doctest: +SKIP
+ >>> results = search.find(embedding=embedding) # doctest: +SKIP
+
+ Generate image embedding from file:
+ >>> embedding = tools.embed_image("path/to/image.jpg", target="image") # doctest: +SKIP
+ >>> similar_images = images.find(embedding=embedding) # doctest: +SKIP
+
+ Generate image embedding from URL:
+ >>> embedding = tools.embed_image("https://example.com/image.jpg", target="image") # doctest: +SKIP
+ >>> similar_images = images.find(embedding=embedding) # doctest: +SKIP
+ """
name = "tools"
@@ -61,7 +134,7 @@ def ner(self, text: str) -> NerContainer:
This method is faster than `ner_nel` but does not provide any linking to external resources.
Args:
- text (str): Text to process
+ text: Text to process
Returns:
NerContainer: List of named entities
@@ -86,7 +159,7 @@ def ner_nel(self, text: str) -> NerContainer:
This method is slower than `ner` but provides linking to external resources.
Args:
- text (str): Text to process
+ text: Text to process
Returns:
NerContainer: List of named entities
@@ -111,7 +184,7 @@ def nel(self, text: str) -> NerContainer:
This method requires named entities to be enclosed in tags: [START]entity[END].
Args:
- text (str): Text to process
+ text: Text to process
Returns:
NerContainer: List of named entities
@@ -129,3 +202,87 @@ def nel(self, text: str) -> NerContainer:
ImpressoNerSchema,
web_app_search_result_url=None,
)
+
+ def embed_image(
+ self,
+ image: bytes | Base64Str | str,
+ target: ImpressoImageEmbeddingRequestSearchTargetLiteral,
+ ) -> Embedding:
+ """Embed an image into a vector space.
+
+ Args:
+ image: Image to embed. Can be raw bytes, a base64-encoded string, a URL of an image,
+ or a path to a file.
+ target: Target collection to embed the image into. Currently, only "image" is supported.
+
+ Returns:
+ Embedding: The image embedding as a base64 string prefixed with model tag.
+ """
+ image_as_base64: str
+ if isinstance(image, bytes):
+ image_as_base64 = base64.b64encode(image).decode("utf-8")
+ elif is_file(image):
+ with open(image, "rb") as file:
+ image_as_base64 = base64.b64encode(file.read()).decode("utf-8")
+ elif is_url(image):
+ response = httpx.get(image)
+ response.raise_for_status()
+ image_as_base64 = base64.b64encode(response.content).decode("utf-8")
+ else:
+ image_as_base64 = image # assume it's already a base64-encoded string
+
+ search_target = get_enum_from_literal(
+ target,
+ ImpressoImageEmbeddingRequestSearchTarget,
+ )
+ if search_target == UNSET:
+ raise ValueError(f"Invalid search target: {target}")
+
+ result = perform_image_embedding.sync(
+ client=self._api_client,
+ body=ImpressoImageEmbeddingRequest(
+ bytes_=image_as_base64,
+ search_target=cast(
+ ImpressoImageEmbeddingRequestSearchTarget, search_target
+ ),
+ ),
+ )
+ raise_for_error(result)
+ if isinstance(result, ImpressoEmbeddingResponse):
+ return cast(str, result.embedding)
+ raise ValueError("Unexpected response format")
+
+ def embed_text(
+ self,
+ text: str,
+ target: ImpressoTextEmbeddingRequestSearchTargetLiteral,
+ ) -> Embedding:
+ """Embed text into a vector space.
+
+ Args:
+ text: Text to embed.
+ target: Target collection to embed the text into.
+
+ Returns:
+ Embedding: The text embedding as a base64 string prefixed with model tag.
+ """
+ search_target = get_enum_from_literal(
+ target,
+ ImpressoTextEmbeddingRequestSearchTarget,
+ )
+ if search_target == UNSET:
+ raise ValueError(f"Invalid search target: {target}")
+
+ result = perform_text_embedding.sync(
+ client=self._api_client,
+ body=ImpressoTextEmbeddingRequest(
+ text=text,
+ search_target=cast(
+ ImpressoTextEmbeddingRequestSearchTarget, search_target
+ ),
+ ),
+ )
+ raise_for_error(result)
+ if isinstance(result, ImpressoEmbeddingResponse):
+ return cast(Embedding, result.embedding)
+ raise ValueError("Unexpected response format")
diff --git a/impresso/resources/topics.py b/impresso/resources/topics.py
new file mode 100644
index 0000000..f0ee6ea
--- /dev/null
+++ b/impresso/resources/topics.py
@@ -0,0 +1,188 @@
+from typing import Any, Callable, Iterator, cast
+
+from pandas import DataFrame, json_normalize
+
+from impresso.api_client.api.topics import find_topics, get_topic
+from impresso.api_client.models.find_topics_base_find_response import (
+ FindTopicsBaseFindResponse,
+)
+from impresso.api_client.models.find_topics_order_by import (
+ FindTopicsOrderBy,
+ FindTopicsOrderByLiteral,
+)
+from impresso.api_client.types import UNSET
+from impresso.api_models import BaseFind
+from impresso.data_container import DataContainer, iterate_pages
+from impresso.resources.base import Resource
+from impresso.util.error import raise_for_error
+from impresso.util.py import get_enum_from_literal
+
+
+class FindTopicsSchema(BaseFind):
+ """Schema for the find topics response."""
+
+ data: list[dict]
+
+
+class FindTopicsContainer(DataContainer):
+ """Response of a find call."""
+
+ def __init__(
+ self,
+ data: FindTopicsBaseFindResponse,
+ pydantic_model: type[FindTopicsSchema],
+ fetch_method: Callable[..., "FindTopicsContainer"],
+ fetch_method_args: dict[str, Any],
+ web_app_search_result_url: str | None = None,
+ ):
+ super().__init__(data, pydantic_model, web_app_search_result_url)
+ self._fetch_method = fetch_method
+ self._fetch_method_args = fetch_method_args
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()["data"]
+ if len(data):
+ return json_normalize(self._data.to_dict()["data"]).set_index("uid")
+ return DataFrame()
+
+ def pages(self) -> Iterator["FindTopicsContainer"]:
+ """Iterate over all pages of results."""
+ yield self
+ yield from iterate_pages(
+ self._fetch_method,
+ self._fetch_method_args,
+ self.offset,
+ self.limit,
+ self.total,
+ )
+
+
+class GetTopicContainer(DataContainer):
+ """Response of a get call."""
+
+ @property
+ def df(self) -> DataFrame:
+ """Return the data as a pandas dataframe."""
+ data = self._data.to_dict()
+ if len(data):
+ return json_normalize([self._data.to_dict()]).set_index("uid")
+ return DataFrame()
+
+
+class TopicsResource(Resource):
+ """
+ Search topics in the Impresso database.
+
+ Examples:
+ Search for topics containing specific words:
+ >>> results = topics.find(term="economy") # doctest: +SKIP
+ >>> print(results.df) # doctest: +SKIP
+
+ Get a specific topic by its ID:
+ >>> topic_id = "some-topic-id" # Replace with a real ID
+ >>> topic = topics.get(topic_id) # doctest: +SKIP
+ >>> print(topic.df) # doctest: +SKIP
+
+ Iterate through all pages of topic search results:
+ >>> results = topics.find(term="war", limit=10) # doctest: +SKIP
+ >>> for page in results.pages(): # doctest: +SKIP
+ ... print(page.df) # doctest: +SKIP
+ """
+
+ name = "topics"
+
+ def find(
+ self,
+ term: str | None = None,
+ order_by: FindTopicsOrderByLiteral | None = None,
+ limit: int | None = None,
+ offset: int | None = None,
+ ) -> FindTopicsContainer:
+ """
+ Search topics in Impresso.
+
+ Args:
+ term: Search term to find topics by their words.
+ order_by: Field to order results by.
+ limit: Number of results to return.
+ offset: Number of results to skip.
+
+ Returns:
+ FindTopicsContainer: Data container with a page of results of the search.
+ """
+
+ result = find_topics.sync(
+ client=self._api_client,
+ q=term if term is not None else UNSET,
+ order_by=(
+ get_enum_from_literal(order_by, FindTopicsOrderBy)
+ if order_by is not None
+ else UNSET
+ ),
+ limit=limit if limit is not None else UNSET,
+ offset=offset if offset is not None else UNSET,
+ filters=UNSET,
+ )
+ raise_for_error(result)
+ return FindTopicsContainer(
+ cast(FindTopicsBaseFindResponse, result),
+ FindTopicsSchema,
+ fetch_method=self.find,
+ fetch_method_args={
+ "term": term,
+ "order_by": order_by,
+ },
+ web_app_search_result_url=(
+ _build_web_app_find_topics_url(
+ base_url=self._get_web_app_base_url(),
+ term=term,
+ )
+ ),
+ )
+
+ def get(self, id: str) -> GetTopicContainer:
+ """Get topic by ID.
+
+ Args:
+ id: The ID of the topic to retrieve.
+
+ Returns:
+ GetTopicContainer: Data container with the topic information.
+ """
+
+ result = get_topic.sync(
+ client=self._api_client,
+ id=id,
+ )
+ raise_for_error(result)
+ return GetTopicContainer(
+ result,
+ FindTopicsSchema,
+ web_app_search_result_url=_build_web_app_get_topic_url(
+ base_url=self._get_web_app_base_url(),
+ id=id,
+ ),
+ )
+
+
+def _build_web_app_find_topics_url(
+ base_url: str,
+ term: str | None = None,
+) -> str:
+ query_params = {
+ "q": term,
+ }
+ query_string = "&".join(
+ f"{key}={value}" for key, value in query_params.items() if value is not None
+ )
+ url = f"{base_url}/topics"
+ return f"{url}?{query_string}" if query_string else url
+
+
+def _build_web_app_get_topic_url(
+ base_url: str,
+ id: str,
+) -> str:
+ return f"{base_url}/topics/{id}"
diff --git a/impresso/util/embeddings.py b/impresso/util/embeddings.py
new file mode 100644
index 0000000..b6b332b
--- /dev/null
+++ b/impresso/util/embeddings.py
@@ -0,0 +1,76 @@
+import base64
+import struct
+from typing import List
+
+
+def embedding_to_vector(embedding: str) -> List[float]:
+ """
+ Convert a base64-encoded embedding string to an array of floats.
+
+ The embedding string is expected to be in the format: :
+ where the base64-encoded part represents an array of float32 values.
+
+ Args:
+ embedding: A string in the format ":"
+ (e.g., "clip-ViT-B-32:AAAAAAAAAAAAAAAA...")
+
+ Returns:
+ A list of float values representing the embedding vector
+
+ Raises:
+ ValueError: If the embedding string format is invalid
+
+ Example:
+ >>> embedding = "clip-ViT-B-32:AAAAAAAAAAAAAAAA..."
+ >>> vector = embedding_to_vector(embedding)
+ >>> print(vector)
+ [0.0, 0.0, 0.0, ...]
+ """
+ if ':' not in embedding:
+ raise ValueError(
+ "Invalid embedding format. Expected ':'"
+ )
+
+ # Split the model prefix from the base64-encoded vector
+ _, base64_vector = embedding.split(':', 1)
+
+ # Decode the base64 string to bytes
+ vector_bytes = base64.b64decode(base64_vector)
+
+ # Convert bytes to array of float32 values
+ # Each float32 is 4 bytes, so we calculate the number of floats
+ num_floats = len(vector_bytes) // 4
+
+ # Unpack the bytes as little-endian float32 values
+ vector = list(struct.unpack(f'<{num_floats}f', vector_bytes))
+
+ return vector
+
+
+def vector_to_embedding(vector: List[float], model: str) -> str:
+ """
+ Convert an array of floats to a base64-encoded embedding string with model prefix.
+
+ Args:
+ vector: A list of float values representing the embedding vector
+ model: The model identifier to use as prefix (e.g., "clip-ViT-B-32")
+
+ Returns:
+ A string in the format ":"
+
+ Example:
+ >>> vector = [0.0, 0.1, 0.2, 0.3]
+ >>> embedding = vector_to_embedding(vector, "clip-ViT-B-32")
+ >>> print(embedding)
+ clip-ViT-B-32:AAAAAAAAAAAAAAAA...
+ """
+ # Pack the float values as little-endian float32 bytes
+ vector_bytes = struct.pack(f'<{len(vector)}f', *vector)
+
+ # Encode the bytes as base64
+ base64_vector = base64.b64encode(vector_bytes).decode('ascii')
+
+ # Combine model prefix with base64-encoded vector
+ embedding = f"{model}:{base64_vector}"
+
+ return embedding
diff --git a/mkdocs.yml b/mkdocs.yml
index afd8bb5..8f2d671 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -14,6 +14,7 @@ plugins:
show_signature: true
show_source: false
show_root_heading: true
+ heading_level: 3
docstring_style: google
docstring_section_style: table
annotations_path: brief
diff --git a/poetry.lock b/poetry.lock
index dac8eeb..0248302 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
[[package]]
name = "annotated-types"
@@ -1649,23 +1649,21 @@ testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "protobuf"
-version = "4.25.5"
+version = "6.32.1"
description = ""
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
groups = ["main"]
files = [
- {file = "protobuf-4.25.5-cp310-abi3-win32.whl", hash = "sha256:5e61fd921603f58d2f5acb2806a929b4675f8874ff5f330b7d6f7e2e784bbcd8"},
- {file = "protobuf-4.25.5-cp310-abi3-win_amd64.whl", hash = "sha256:4be0571adcbe712b282a330c6e89eae24281344429ae95c6d85e79e84780f5ea"},
- {file = "protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:b2fde3d805354df675ea4c7c6338c1aecd254dfc9925e88c6d31a2bcb97eb173"},
- {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:919ad92d9b0310070f8356c24b855c98df2b8bd207ebc1c0c6fcc9ab1e007f3d"},
- {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fe14e16c22be926d3abfcb500e60cab068baf10b542b8c858fa27e098123e331"},
- {file = "protobuf-4.25.5-cp38-cp38-win32.whl", hash = "sha256:98d8d8aa50de6a2747efd9cceba361c9034050ecce3e09136f90de37ddba66e1"},
- {file = "protobuf-4.25.5-cp38-cp38-win_amd64.whl", hash = "sha256:b0234dd5a03049e4ddd94b93400b67803c823cfc405689688f59b34e0742381a"},
- {file = "protobuf-4.25.5-cp39-cp39-win32.whl", hash = "sha256:abe32aad8561aa7cc94fc7ba4fdef646e576983edb94a73381b03c53728a626f"},
- {file = "protobuf-4.25.5-cp39-cp39-win_amd64.whl", hash = "sha256:7a183f592dc80aa7c8da7ad9e55091c4ffc9497b3054452d629bb85fa27c2a45"},
- {file = "protobuf-4.25.5-py3-none-any.whl", hash = "sha256:0aebecb809cae990f8129ada5ca273d9d670b76d9bfc9b1809f0a9c02b7dbf41"},
- {file = "protobuf-4.25.5.tar.gz", hash = "sha256:7f8249476b4a9473645db7f8ab42b02fe1488cbe5fb72fddd445e0665afd8584"},
+ {file = "protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085"},
+ {file = "protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1"},
+ {file = "protobuf-6.32.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8c7e6eb619ffdf105ee4ab76af5a68b60a9d0f66da3ea12d1640e6d8dab7281"},
+ {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:2f5b80a49e1eb7b86d85fcd23fe92df154b9730a725c3b38c4e43b9d77018bf4"},
+ {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:b1864818300c297265c83a4982fd3169f97122c299f56a56e2445c3698d34710"},
+ {file = "protobuf-6.32.1-cp39-cp39-win32.whl", hash = "sha256:68ff170bac18c8178f130d1ccb94700cf72852298e016a2443bdb9502279e5f1"},
+ {file = "protobuf-6.32.1-cp39-cp39-win_amd64.whl", hash = "sha256:d0975d0b2f3e6957111aa3935d08a0eb7e006b1505d825f862a1fffc8348e122"},
+ {file = "protobuf-6.32.1-py3-none-any.whl", hash = "sha256:2601b779fc7d32a866c6b4404f9d42a3f67c5b9f3f15b4db3cccabe06b95c346"},
+ {file = "protobuf-6.32.1.tar.gz", hash = "sha256:ee2469e4a021474ab9baafea6cd070e5bf27c7d29433504ddea1a4ee5850f68d"},
]
[[package]]
@@ -2186,7 +2184,7 @@ description = "C version of reader, parser and emitter for ruamel.yaml derived f
optional = false
python-versions = ">=3.9"
groups = ["dev"]
-markers = "platform_python_implementation == \"CPython\" and (python_version == \"3.12\" or python_version == \"3.11\" or python_version == \"3.10\")"
+markers = "platform_python_implementation == \"CPython\" and python_version < \"3.13\""
files = [
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969"},
@@ -2467,4 +2465,4 @@ watchmedo = ["PyYAML (>=3.10)"]
[metadata]
lock-version = "2.1"
python-versions = "^3.10.0 || ^3.11.0 || ^3.12.0"
-content-hash = "abf1c8abba88930473be1b82b9c7487b5676966790a99a45432859a6fb488db9"
+content-hash = "8541b8e5da5a76cb52b4bf3977a78f19c673c229529c07336f509c3b3a2c684a"
diff --git a/pyproject.toml b/pyproject.toml
index fc63d51..d323be6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ name = "impresso"
packages = [{ include = "impresso", from = "." }]
readme = "README.md"
repository = "https://github.com/impresso/impresso-py"
-version = "0.9.13"
+version = "0.9.14"
[tool.poetry.urls]
Endpoint = "https://impresso-project.ch/public-api/v1"
@@ -41,7 +41,7 @@ httpx = "^0.27.0"
matplotlib = "^3.7.0"
pandas = "^2.1.0"
pandas-stubs = "^2.2.1.240316"
-protobuf = "^4.25.0"
+protobuf = "^6.32.1"
pydantic = "^2.6.4"
python = "^3.10.0 || ^3.11.0 || ^3.12.0"
python-dateutil = "^2.8.0"