diff --git a/.gitignore b/.gitignore
index f32e31af..3ac7d16c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
.idea/
.DS_Store
+node_modules/
diff --git a/api-reference/answers/create.mdx b/api-reference/answers/create.mdx
index a6d260eb..a3d2cbef 100644
--- a/api-reference/answers/create.mdx
+++ b/api-reference/answers/create.mdx
@@ -1,4 +1,9 @@
---
title: "Create Answer"
+description: "Create an AI-powered answer by searching the web and extracting information."
openapi: "POST /v1/answers"
---
+
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/answers/get.mdx b/api-reference/answers/get.mdx
index 087176a7..b8f4871d 100644
--- a/api-reference/answers/get.mdx
+++ b/api-reference/answers/get.mdx
@@ -1,4 +1,9 @@
---
title: "Get Answer"
+description: "Retrieve a previously created answer by its ID."
openapi: "GET /v1/answers/{answer_id}"
---
+
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/batches/create.mdx b/api-reference/batches/create.mdx
index 4b7ddc27..2508e839 100644
--- a/api-reference/batches/create.mdx
+++ b/api-reference/batches/create.mdx
@@ -3,3 +3,7 @@ title: 'Create Batch'
description: 'Starts a new batch. You receive an `id` that you can use to track the progress of the batch as shown [here](/api-reference/batches/info). Note: Processing time is constant regardless of batch size'
openapi: POST /v1/batches
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/batches/info.mdx b/api-reference/batches/info.mdx
index e0313911..0a974d49 100644
--- a/api-reference/batches/info.mdx
+++ b/api-reference/batches/info.mdx
@@ -3,3 +3,7 @@ title: 'Batch Info'
description: 'Retrieves the status and progress information about a batch. To retrieve the content for a batch, see [here](/api-reference/batches/items)'
openapi: GET /v1/batches/{batch_id}
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/batches/items.mdx b/api-reference/batches/items.mdx
index 583e6783..e3821564 100644
--- a/api-reference/batches/items.mdx
+++ b/api-reference/batches/items.mdx
@@ -3,3 +3,7 @@ title: 'Batch Items'
description: 'Retrieves the list of items processed for a batch. You can then use the `retrieve_id` to get the content with the Retrieve Endpoint'
openapi: GET /v1/batches/{batch_id}/items
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/batches/list.mdx b/api-reference/batches/list.mdx
index a2ad44e2..185390d7 100644
--- a/api-reference/batches/list.mdx
+++ b/api-reference/batches/list.mdx
@@ -1,5 +1,4 @@
---
-title: 'Batch Items'
-description: 'Fetches the list of items processed for a batch.'
-openapi: GET /v1/batches/{batch_id}/items
----
+title: 'List Batches'
+description: 'Fetches the list of recent batches.'
+openapi: GET /v1/batches/recent
diff --git a/api-reference/common/metadata.mdx b/api-reference/common/metadata.mdx
new file mode 100644
index 00000000..50fb2003
--- /dev/null
+++ b/api-reference/common/metadata.mdx
@@ -0,0 +1,52 @@
+---
+title: 'Metadata'
+sidebarTitle: 'Metadata'
+description: 'Attach custom key-value pairs to API requests'
+icon: 'tag'
+---
+
+Many endpoints accept a `metadata` parameter for storing additional information with your requests. Metadata is returned in responses and can be used for tracking, filtering, or storing context.
+
+## Usage
+
+```json
+{
+ "url_to_scrape": "https://example.com",
+ "metadata": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high"
+ }
+}
+```
+
+Metadata follows [Stripe's approach](https://stripe.com/docs/api/metadata) — simple, flexible, and consistent across all endpoints.
+
+---
+
+## Validation Rules
+
+| Constraint | Limit | Error Example |
+|------------|-------|---------------|
+| Maximum keys | 50 | `"Metadata can have a maximum of 50 keys. You provided 51 keys."` |
+| Key length | 40 characters | `"Metadata key \"my_very_long_key_name...\" exceeds 40 character limit."` |
+| Key format | No square brackets | `"Metadata key \"items[0]\" cannot contain square brackets ([ or ])."` |
+| Value length | 500 characters | `"Metadata value for key \"description\" exceeds 500 character limit (got 523 characters)."` |
+| Value type | Strings only | `"Metadata value for key \"count\" must be a string. Got object."` |
+
+
+Numbers and booleans are automatically converted to strings. Objects and arrays are rejected.
+
+
+---
+
+## Availability
+
+| Endpoint | Status |
+|----------|--------|
+| [Batches](/api-reference/batches/create) | Available |
+| [Crawls](/api-reference/crawls/create) | Available |
+| [Maps](/api-reference/maps/create) | Available |
+| [Scrapes](/api-reference/scrapes/create) | Coming Soon |
+| [Answers](/api-reference/answers/create) | Coming Soon |
+
diff --git a/api-reference/common/pagination.mdx b/api-reference/common/pagination.mdx
new file mode 100644
index 00000000..74623403
--- /dev/null
+++ b/api-reference/common/pagination.mdx
@@ -0,0 +1,127 @@
+---
+title: 'Pagination'
+sidebarTitle: 'Pagination'
+description: 'How to paginate through large result sets using cursor-based pagination'
+icon: 'arrow-right'
+---
+
+Many endpoints return large datasets that are paginated using a **cursor-based pagination** mechanism. This allows you to efficiently retrieve all results by making multiple requests.
+
+## How It Works
+
+Pagination uses two query parameters:
+
+- **`cursor`**: A token that indicates where to start fetching results. On the first request, **omit the `cursor` parameter**. For subsequent requests, use the `cursor` value from the previous response. See the [`cursor` parameter](/api-reference/batches/items#query-cursor) documentation for details.
+- **`limit`**: The maximum number of results to return per request (recommended: 10-50 for batches/crawls).
+
+When there are more results available, the response includes a `cursor` field. Continue making requests with the new `cursor` value until the `cursor` field is absent, indicating all results have been retrieved.
+
+---
+
+## Examples
+
+
+
+```python Python
+import requests
+
+API_URL = 'https://api.olostep.com/v1'
+API_KEY = ''
+HEADERS = {'Authorization': f'Bearer {API_KEY}'}
+
+def get_batch_items(batch_id, cursor=None, limit=10):
+ params = {'limit': limit}
+ if cursor:
+ params['cursor'] = cursor
+ response = requests.get(
+ f'{API_URL}/batches/{batch_id}/items',
+ headers=HEADERS,
+ params=params
+ )
+ return response.json()
+
+# Paginate through all items
+cursor = None
+while True:
+ result = get_batch_items('batch_abc123', cursor=cursor, limit=10)
+
+ for item in result['items']:
+ print(f"Custom ID: {item['custom_id']}, URL: {item['url']}")
+
+ if 'cursor' not in result:
+ break
+
+ cursor = result['cursor']
+```
+
+```js Node.js
+const API_URL = 'https://api.olostep.com/v1';
+const API_KEY = '';
+
+async function getBatchItems(batchId, cursor = null, limit = 10) {
+ const params = new URLSearchParams();
+ if (cursor !== null) params.append('cursor', cursor);
+ params.append('limit', limit);
+
+ const response = await fetch(
+ `${API_URL}/batches/${batchId}/items?${params}`,
+ { headers: { 'Authorization': `Bearer ${API_KEY}` } }
+ );
+ return response.json();
+}
+
+// Paginate through all items
+let cursor = null;
+while (true) {
+ const result = await getBatchItems('batch_abc123', cursor, 10);
+
+ result.items.forEach(item => {
+ console.log(`Custom ID: ${item.custom_id}, URL: ${item.url}`);
+ });
+
+ if (result.cursor === undefined) break;
+ cursor = result.cursor;
+}
+```
+
+```bash cURL
+# First request (omit cursor parameter)
+curl -G "https://api.olostep.com/v1/batches/batch_abc123/items" \
+ -H "Authorization: Bearer $OLOSTEP_API_KEY" \
+ --data-urlencode "limit=10"
+
+# Subsequent requests use the cursor from previous response
+curl -G "https://api.olostep.com/v1/batches/batch_abc123/items" \
+ -H "Authorization: Bearer $OLOSTEP_API_KEY" \
+ --data-urlencode "cursor=10" \
+ --data-urlencode "limit=10"
+```
+
+
+
+---
+
+## Best Practices
+
+1. **Omit `cursor` on first request**: For batches and crawls, omit the `cursor` parameter entirely on the first request. Only include it when continuing from a previous response.
+
+2. **Use appropriate limits**:
+ - Batches/Crawls: 10-50 items per request
+ - Maps: Handled automatically (up to 10MB per response)
+
+3. **Check for cursor**: Always check if a `cursor` field exists in the response before making the next request. If it's absent, you've retrieved all results.
+
+4. **Handle errors**: Implement retry logic for network errors, but don't retry with the same cursor if you've already processed those results.
+
+5. **Streaming**: For crawls, you can start paginating while the crawl is `in_progress` to stream results as they become available. See the [`cursor` parameter](/api-reference/crawls/pages#query-cursor) documentation for details.
+
+---
+
+## Availability
+
+| Endpoint | Cursor Type | Limit Parameter | Notes |
+|----------|-------------|-----------------|-------|
+| [Batches Items](/api-reference/batches/items) | Integer | Yes (10-50 recommended) | See [`cursor` parameter](/api-reference/batches/items#query-cursor) |
+| [Crawl Pages](/api-reference/crawls/pages) | Integer | Yes (10-50 recommended) | See [`cursor` parameter](/api-reference/crawls/pages#query-cursor) |
+| [Maps](/api-reference/maps/create) | String | No (automatic) | Auto-paginates at 10MB |
+
diff --git a/api-reference/crawls/create.mdx b/api-reference/crawls/create.mdx
index 6055f060..7e871ba1 100644
--- a/api-reference/crawls/create.mdx
+++ b/api-reference/crawls/create.mdx
@@ -3,3 +3,7 @@ title: 'Create Crawl'
description: 'Starts a new crawl. You receive a `id` to track the progress. The operation may take 1-10 mins depending upon the site and depth and pages parameters.'
openapi: POST /v1/crawls
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/crawls/info.mdx b/api-reference/crawls/info.mdx
index 6679c4bf..40f87fe6 100644
--- a/api-reference/crawls/info.mdx
+++ b/api-reference/crawls/info.mdx
@@ -3,3 +3,7 @@ title: 'Crawl Info'
description: 'Fetches information about a specific crawl.'
openapi: GET /v1/crawls/{crawl_id}
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/crawls/pages.mdx b/api-reference/crawls/pages.mdx
index 229a450d..b56fa146 100644
--- a/api-reference/crawls/pages.mdx
+++ b/api-reference/crawls/pages.mdx
@@ -3,3 +3,7 @@ title: 'Crawl Pages'
description: 'Fetches the list of pages for a specific crawl.'
openapi: GET /v1/crawls/{crawl_id}/pages
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/errors.mdx b/api-reference/errors.mdx
new file mode 100644
index 00000000..2f712c7b
--- /dev/null
+++ b/api-reference/errors.mdx
@@ -0,0 +1,540 @@
+---
+title: 'Errors'
+sidebarTitle: 'Overview'
+description: 'Error codes and responses returned by the Olostep API'
+---
+
+The Olostep API uses standard HTTP response codes and returns structured error responses following [RFC 7807](https://tools.ietf.org/html/rfc7807) (Problem Details for HTTP APIs).
+
+## Error Response Format
+
+All error responses follow this structure:
+
+```json
+{
+ "object": "error",
+ "id": "error_4z93xmi8vw",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "Request validation failed",
+ "created": 1704067200,
+ "metadata": {},
+ "errors": [
+ {
+ "loc": ["url_to_scrape"],
+ "msg": "Required",
+ "type": "invalid_type"
+ }
+ ]
+}
+```
+
+### Response Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `object` | string | Always `"error"` |
+| `id` | string | Unique error identifier (e.g., `error_4z93xmi8vw`) |
+| `code` | string | Machine-readable error code for programmatic handling |
+| `type` | string | URI reference identifying the problem type |
+| `status` | number | HTTP status code |
+| `title` | string | Short, human-readable summary of the problem type |
+| `detail` | string | Human-readable explanation of the specific error |
+| `created` | number | Unix timestamp when the error occurred |
+| `metadata` | object | Any user-provided metadata from the original request |
+| `errors` | array | Field-level validation errors (only for validation errors) |
+
+---
+
+## HTTP Status Codes
+
+### Client Errors (4xx)
+
+
+
+
+
+The request is syntactically broken or structurally invalid. This typically occurs when required parameters are missing or have invalid formats.
+
+**Error Codes:**
+- `validation_error` - Request validation failed (often includes field-level errors)
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Common Causes:**
+- Missing required `url_to_scrape` parameter
+- Invalid URL format (missing `http://` or `https://` protocol)
+- Invalid JSON body
+- Invalid parameter values
+- Invalid metadata (see [Metadata Validation](#metadata-validation) section below)
+
+
+
+
+
+Authentication credentials are missing or invalid.
+
+**Error Codes:**
+- `invalid_api_key` - The API key provided is not valid
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Common Causes:**
+- Missing `Authorization` header
+- Invalid or revoked API key
+- Malformed bearer token
+
+
+
+
+
+Valid credentials but a payment or credits issue prevents the request from being processed.
+
+**Error Codes:**
+- `credits_exhausted` - Account has no remaining credits
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Check your credit balance at [dashboard](https://olostep.com/dashboard)
+- Upgrade your plan if credits are exhausted
+
+
+
+
+
+The request was understood but refused due to access restrictions.
+
+**Error Codes:**
+- `approval_required` - Feature requires approval before use
+- `blacklisted_domain` - The target domain is blocked
+- `unsupported_file_type` - File type not supported for your account
+
+**Example Responses:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "blacklisted_domain",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "approval_required",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Contact support at info@olostep.com to request access
+- Some domains and features require prior approval
+
+
+
+
+
+The requested resource does not exist. The `detail` field specifies which resource was not found.
+
+**Error Codes:**
+- `batch_not_found` - No batch exists with the given ID
+- `scrape_not_found` - No scrape exists with the given ID
+- `parser_not_found` - The specified parser does not exist
+- `pursuit_file_not_found` - Batch data file not found (for retry operations)
+- `crawl_not_found` - No crawl exists with the given ID
+- `map_not_found` - No map exists with the given ID
+- `no_urls_found` - No URLs found during crawl
+- `sitemap_error` - Could not retrieve page URLs from sitemap
+- `file_not_found` - No file exists with the given ID
+- `schedule_not_found` - No schedule exists with the given ID
+- `agent_not_found` - No agent exists with the given ID
+- `answer_not_found` - No answer exists with the given ID
+
+**Example Responses:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "scrape_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Scrape not found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Batch not found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Common Causes:**
+- Using an incorrect or expired resource ID
+- Resource has been deleted
+- Referencing a parser that doesn't exist
+
+
+
+
+
+The server understands the request but rejects its meaning due to business logic rules.
+
+**Error Codes:**
+- `invalid_parser_input` - Parser input validation failed
+- `idempotency_key_reuse` - Idempotency key was already used with different parameters
+
+**Example Responses:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "invalid_parser_input",
+ "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity",
+ "status": 422,
+ "title": "Unprocessable Entity",
+ "detail": "The parser object must contain an input object with 'profile_id' field",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "idempotency_key_reuse",
+ "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity",
+ "status": 422,
+ "title": "Unprocessable Entity",
+ "detail": "A request with this idempotency key was already made with different parameters. Idempotency keys must be unique per request.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- For parser input errors, check the required fields for the parser you're using
+- For idempotency key errors, use a new unique key for different request parameters
+
+
+
+
+
+The request conflicts with an in-progress or completed idempotent request. This occurs when using idempotency keys to ensure exactly-once processing.
+
+**Error Codes:**
+- `idempotency_key_in_progress` - A request with this idempotency key is currently being processed
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "idempotency_key_in_progress",
+ "type": "https://docs.olostep.com/api-reference/errors/idempotency_error",
+ "status": 409,
+ "title": "Idempotency Error",
+ "detail": "A request with this idempotency key is currently being processed. Please wait and retry.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Wait for the in-progress request to complete (1-5 seconds)
+- Retry the request with exponential backoff
+- Poll for the original request's result if you have the batch ID
+
+
+
+
+
+Rate limit exceeded. You're making too many requests in a given time period.
+
+**Error Codes:**
+- `rate_limit_exceeded` - Request rate limit has been exceeded
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "rate_limit_exceeded",
+ "type": "https://docs.olostep.com/api-reference/errors/too_many_requests",
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You've reached your rate limits for this API. Please try again later.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Implement exponential backoff in your requests
+- Use batch processing for multiple URLs
+- Contact support if you need higher rate limits
+
+
+
+
+
+### Server Errors (5xx)
+
+
+
+
+
+An unexpected error occurred on the server.
+
+**Error Codes:**
+- `internal_server_error` - Generic internal error
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Retry the request after a short delay
+- If the error persists, contact support
+
+
+
+
+
+The service is temporarily unavailable or at capacity.
+
+**Error Codes:**
+- `max_capacity_reached` - Service is at maximum capacity
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "max_capacity_reached",
+ "type": "https://docs.olostep.com/api-reference/errors/service_unavailable",
+ "status": 501,
+ "title": "Service Unavailable",
+ "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Retry the request after a few minutes
+- Use exponential backoff
+
+
+
+
+
+The request timed out before completing.
+
+**Error Codes:**
+- `request_timeout` - Request exceeded the timeout limit
+
+**Example Response:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "request_timeout",
+ "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout",
+ "status": 504,
+ "title": "Gateway Timeout",
+ "detail": "Your request timed out. Try increasing the timeout and making another request.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+**Resolution:**
+- Increase the timeout value in your request
+- For complex scrapes, consider using async patterns
+- Simplify the request (fewer actions, simpler targets)
+
+
+
+
+
+---
+
+## Error Code Reference
+
+Quick reference table of all error codes:
+
+| Code | HTTP Status | Description |
+|------|-------------|-------------|
+| `validation_error` | 400 | Request validation failed |
+| `invalid_api_key` | 401 | API key is invalid |
+| `credits_exhausted` | 402 | Account has no remaining credits |
+| `approval_required` | 403 | Feature requires approval |
+| `access_denied` | 403 | Access denied to this feature |
+| `blacklisted_domain` | 403 | Target domain is blocked |
+| `unsupported_file_type` | 403 | File type not supported |
+| `batch_not_found` | 404 | Batch ID not found |
+| `scrape_not_found` | 404 | Scrape ID not found |
+| `parser_not_found` | 404 | Parser not found |
+| `pursuit_file_not_found` | 404 | Batch data file not found |
+| `crawl_not_found` | 404 | Crawl ID not found |
+| `map_not_found` | 404 | Map ID not found |
+| `no_urls_found` | 404 | No URLs found during crawl |
+| `sitemap_error` | 404 | Could not retrieve page URLs from sitemap |
+| `file_not_found` | 404 | File ID not found |
+| `schedule_not_found` | 404 | Schedule ID not found |
+| `agent_not_found` | 404 | Agent ID not found |
+| `answer_not_found` | 404 | Answer ID not found |
+| `idempotency_key_in_progress` | 409 | Idempotency key request still processing (see [Idempotency Error](#idempotency_error)) |
+| `idempotency_key_reuse` | 422 | Idempotency key reused with different parameters |
+| `invalid_parser_input` | 422 | Parser input validation failed |
+| `rate_limit_exceeded` | 429 | Rate limit exceeded |
+| `internal_server_error` | 500 | Internal server error |
+| `max_capacity_reached` | 501 | Service at capacity |
+| `request_timeout` | 504 | Request timed out |
+
+---
+
+## Handling Errors
+
+### Best Practices
+
+1. **Always check the `code` field** for programmatic error handling
+2. **Display the `detail` field** to users for helpful error messages
+3. **Log the `id` field** for debugging and support tickets
+4. **Implement retry logic** for 429, 500, 501, and 504 errors with exponential backoff
+5. **Check the `errors` array** for field-level validation details on 400 errors
+
+### Example Error Handling
+
+
+
+```python Python
+from olostep import Olostep, Olostep_BaseError
+
+client = Olostep(api_key="your-api-key")
+
+try:
+ result = client.scrapes.create(url_to_scrape="https://example.com")
+except Olostep_BaseError as e:
+ print(f"Error: {type(e).__name__}")
+ print(f"Message: {e}")
+```
+
+```javascript Node.js
+import Olostep from 'olostep';
+
+const client = new Olostep({ apiKey: 'your-api-key' });
+
+try {
+ const result = await client.scrapes.create('https://example.com');
+} catch (error) {
+ console.error('Error:', error.code);
+ console.error('Message:', error.detail);
+}
+```
+
+```bash cURL
+curl -X POST https://api.olostep.com/v1/scrapes \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{"url_to_scrape": "https://example.com"}'
+
+# Check HTTP status code and parse error response
+```
+
+
+
+### SDK Error Hierarchies
+
+For detailed SDK-specific error handling, see:
+- [Python SDK Error Handling](/sdks/python#detailed-error-handling)
+- [Node.js SDK Documentation](/sdks/node-js)
+
diff --git a/api-reference/errors/bad_request.mdx b/api-reference/errors/bad_request.mdx
new file mode 100644
index 00000000..3720fe50
--- /dev/null
+++ b/api-reference/errors/bad_request.mdx
@@ -0,0 +1,90 @@
+---
+title: 'Bad Request'
+sidebarTitle: 'Bad Request'
+description: 'HTTP 400 - Request is syntactically broken or structurally invalid'
+---
+
+
+This page documents the `bad_request` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 400
+
+The request is syntactically broken or structurally invalid. This typically occurs when required parameters are missing or have invalid formats.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `validation_error` | Request validation failed (often includes field-level errors) |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along",
+ "created": 1704067200,
+ "metadata": {},
+ "errors": [
+ {
+ "loc": ["url_to_scrape"],
+ "msg": "Invalid URL. Must start with http:// or https://",
+ "type": "invalid_string"
+ }
+ ]
+}
+```
+
+## Common Causes
+
+- Missing required `url_to_scrape` parameter
+- Invalid URL format (missing `http://` or `https://` protocol)
+- Invalid JSON body structure
+- Invalid parameter values or types
+- Whitespace in URLs
+- Invalid metadata (see below)
+
+## Metadata Validation Errors
+
+When providing `metadata`, the following rules apply:
+
+| Constraint | Limit |
+|------------|-------|
+| Maximum keys | 50 |
+| Key length | 40 characters |
+| Key format | No square brackets (`[` or `]`) |
+| Value length | 500 characters |
+| Value type | Strings only (numbers/booleans auto-converted) |
+
+**Example metadata error:**
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "Metadata key \"my_very_long_key_name_that_exceeds...\" exceeds 40 character limit.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Resolution
+
+1. Ensure all required parameters are provided
+2. Verify URL format includes protocol (`http://` or `https://`)
+3. Check JSON body for syntax errors
+4. Review the `errors` array for field-specific issues
+5. For metadata errors, ensure keys are ≤40 chars, values are ≤500 chars, and all values are strings
+
diff --git a/api-reference/errors/forbidden.mdx b/api-reference/errors/forbidden.mdx
new file mode 100644
index 00000000..4b26ecdf
--- /dev/null
+++ b/api-reference/errors/forbidden.mdx
@@ -0,0 +1,87 @@
+---
+title: 'Forbidden'
+sidebarTitle: 'Forbidden'
+description: 'HTTP 403 - Request understood but refused due to access restrictions'
+---
+
+
+This page documents the `forbidden` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 403
+
+The request was understood but refused due to access restrictions. Your account may not have permission to access the requested resource or feature.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `approval_required` | Feature requires prior approval before use |
+| `blacklisted_domain` | The target domain is currently blocked |
+| `unsupported_file_type` | The file type is not supported for your account |
+
+## Example Responses
+
+### Blacklisted Domain
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "blacklisted_domain",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Approval Required
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "approval_required",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Unsupported File Type
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "unsupported_file_type",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "This type of request is not currently supported for all accounts. Reach out to info@olostep.com to have it enabled for your account",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Attempting to scrape a blacklisted domain
+- Trying to use a feature that requires approval (e.g., Instagram, LinkedIn, Reddit)
+- Accessing restricted file types (e.g., certain PDFs)
+- Using a parser that requires special access
+
+## Resolution
+
+1. Contact support at info@olostep.com to request access
+2. Provide details about your use case
+3. Wait for approval before retrying the request
+
diff --git a/api-reference/errors/gateway_timeout.mdx b/api-reference/errors/gateway_timeout.mdx
new file mode 100644
index 00000000..eb9e6be0
--- /dev/null
+++ b/api-reference/errors/gateway_timeout.mdx
@@ -0,0 +1,54 @@
+---
+title: 'Gateway Timeout'
+sidebarTitle: 'Gateway Timeout'
+description: 'HTTP 504 - The request timed out before completing'
+---
+
+
+This page documents the `gateway_timeout` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 504
+
+The request timed out before completing. This can happen with complex scrapes or slow-loading websites.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `request_timeout` | Request exceeded the timeout limit |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "request_timeout",
+ "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout",
+ "status": 504,
+ "title": "Gateway Timeout",
+ "detail": "Your request timed out. Try increasing the timeout and making another request.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Complex page with many resources
+- Slow website response
+- Too many browser actions
+- Large file downloads
+- Geographic routing delays
+
+## Resolution
+
+1. **Increase timeout** - Extend the request timeout in your client
+2. **Simplify the request** - Reduce the number of actions
+3. **Use wait_before_scraping** - Give the page more time to load
+4. **Try a different country** - Geographic location can affect load times
+5. **Retry the request** - Timeouts are often transient
+
diff --git a/api-reference/errors/idempotency_error.mdx b/api-reference/errors/idempotency_error.mdx
new file mode 100644
index 00000000..c336b256
--- /dev/null
+++ b/api-reference/errors/idempotency_error.mdx
@@ -0,0 +1,56 @@
+---
+title: 'Idempotency Error'
+sidebarTitle: 'Idempotency Error'
+description: 'HTTP 409 - Request conflicts with an idempotent operation'
+---
+
+
+This page documents the `idempotency_error` type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 409
+
+The request conflicts with an in-progress or completed idempotent request. This occurs when using idempotency keys to ensure exactly-once processing of requests.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `idempotency_key_in_progress` | A request with this idempotency key is currently being processed |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "idempotency_key_in_progress",
+ "type": "https://docs.olostep.com/api-reference/errors/idempotency_error",
+ "status": 409,
+ "title": "Idempotency Error",
+ "detail": "A request with this idempotency key is currently being processed. Please wait and retry.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Sending duplicate requests with the same idempotency key before the first request completes
+- Network timeouts causing client retries while the original request is still processing
+- Race conditions when multiple clients attempt the same operation
+
+## Resolution
+
+1. Wait for the in-progress request to complete (typically 1-5 seconds)
+2. Retry the request after a short delay with exponential backoff
+3. Poll for the result of the original request if you have the batch ID
+
+## Related Errors
+
+| Code | HTTP Status | Description |
+|------|-------------|-------------|
+| `idempotency_key_reuse` | 422 | Same key used with different parameters (see [Unprocessable Entity](/api-reference/errors/unprocessable_entity)) |
+
diff --git a/api-reference/errors/internal_error.mdx b/api-reference/errors/internal_error.mdx
new file mode 100644
index 00000000..5d976af8
--- /dev/null
+++ b/api-reference/errors/internal_error.mdx
@@ -0,0 +1,51 @@
+---
+title: 'Internal Server Error'
+sidebarTitle: 'Internal Error'
+description: 'HTTP 500 - An unexpected error occurred on the server'
+---
+
+
+This page documents the `internal_error` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 500
+
+An unexpected error occurred on the server. This is not caused by your request but by an issue on our end.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `internal_server_error` | Generic internal server error |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Resolution
+
+1. **Retry the request** - Many internal errors are transient
+2. **Implement exponential backoff** - Wait before retrying
+3. **Contact support** - If the error persists, contact support with the error `id`
+
+## Reporting Issues
+
+When reporting an internal error, please include:
+- The error `id` from the response
+- The timestamp of the request
+- The endpoint and parameters used (excluding sensitive data)
+
diff --git a/api-reference/errors/not_found.mdx b/api-reference/errors/not_found.mdx
new file mode 100644
index 00000000..600962bd
--- /dev/null
+++ b/api-reference/errors/not_found.mdx
@@ -0,0 +1,107 @@
+---
+title: 'Not Found'
+sidebarTitle: 'Not Found'
+description: 'HTTP 404 - The requested resource does not exist'
+---
+
+
+This page documents the `not_found` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 404
+
+The requested resource does not exist. The `detail` field specifies which resource was not found.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `not_found` | Generic resource not found |
+| `scrape_not_found` | No scrape exists with the given ID |
+| `parser_not_found` | The specified parser does not exist |
+
+
+The `detail` field in the response provides specifics about which resource was not found (e.g., "Scrape not found", "Batch not found", "Crawl not found", "Map not found").
+
+
+## Example Responses
+
+### Scrape Not Found
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "scrape_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Scrape not found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Batch Not Found
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Batch not found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Crawl Not Found
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Crawl not found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Parser Not Found
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "parser_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "No parser with this name and/or version found",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Using an incorrect resource ID
+- Resource has expired or been deleted
+- Typo in the parser name or version
+- Referencing a resource from a different account
+
+## Resolution
+
+1. Verify the resource ID is correct
+2. Check that the resource exists and hasn't expired
+3. For parsers, verify the parser name and version are correct
+4. Review available parsers in the [Parsers documentation](/features/structured-content/parsers)
diff --git a/api-reference/errors/payment_required.mdx b/api-reference/errors/payment_required.mdx
new file mode 100644
index 00000000..032e3542
--- /dev/null
+++ b/api-reference/errors/payment_required.mdx
@@ -0,0 +1,70 @@
+---
+title: 'Payment Required'
+sidebarTitle: 'Payment Required'
+description: 'HTTP 402 - Valid credentials but payment/credits issue'
+---
+
+
+This page documents the `payment_required` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 402
+
+Valid credentials were provided but a payment or credits issue prevents the request from being processed.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `invalid_api_key` | API key is invalid or associated with an inactive account |
+| `credits_exhausted` | Account has consumed all available credits |
+
+## Example Responses
+
+### Credits Exhausted
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+### Invalid API Key
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Account has no remaining credits
+- Subscription has expired
+- Payment method issues
+
+## Resolution
+
+1. Check your credit balance at [dashboard](https://olostep.com/dashboard)
+2. Upgrade your plan if credits are exhausted
+3. Update payment method if there are billing issues
+4. Contact support if you believe this is an error
+
diff --git a/api-reference/errors/service_unavailable.mdx b/api-reference/errors/service_unavailable.mdx
new file mode 100644
index 00000000..0dc9293b
--- /dev/null
+++ b/api-reference/errors/service_unavailable.mdx
@@ -0,0 +1,45 @@
+---
+title: 'Service Unavailable'
+sidebarTitle: 'Service Unavailable'
+description: 'HTTP 501 - The service is temporarily unavailable'
+---
+
+
+This page documents the `service_unavailable` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 501
+
+The service is temporarily unavailable or at maximum capacity. This is typically a temporary condition.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `max_capacity_reached` | Service is at maximum capacity |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "max_capacity_reached",
+ "type": "https://docs.olostep.com/api-reference/errors/service_unavailable",
+ "status": 501,
+ "title": "Service Unavailable",
+ "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Resolution
+
+1. **Wait and retry** - Capacity issues are typically temporary
+2. **Implement exponential backoff** - Gradually increase wait times between retries
+3. **Use batch processing** - Reduce the number of concurrent requests
+4. **Contact support** - If you consistently hit capacity limits
+
diff --git a/api-reference/errors/too_many_requests.mdx b/api-reference/errors/too_many_requests.mdx
new file mode 100644
index 00000000..2b630c98
--- /dev/null
+++ b/api-reference/errors/too_many_requests.mdx
@@ -0,0 +1,68 @@
+---
+title: 'Too Many Requests'
+sidebarTitle: 'Too Many Requests'
+description: 'HTTP 429 - Rate limit exceeded'
+---
+
+
+This page documents the `too_many_requests` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 429
+
+Rate limit exceeded. You're making too many requests in a given time period.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `rate_limit_exceeded` | Request rate limit has been exceeded |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "rate_limit_exceeded",
+ "type": "https://docs.olostep.com/api-reference/errors/too_many_requests",
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You've reached your rate limits for this API. Please try again later.",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Rate Limits
+
+Rate limits vary by plan. Check your current limits in your [dashboard](https://olostep.com/dashboard).
+
+## Resolution
+
+1. **Implement exponential backoff** - Wait and retry with increasing delays
+2. **Use batch processing** - Process multiple URLs in a single batch request
+3. **Spread requests over time** - Avoid bursting all requests at once
+4. **Contact support** - Request higher rate limits if needed
+
+## Example: Exponential Backoff
+
+```python
+import time
+import random
+
+def request_with_backoff(make_request, max_retries=5):
+ for attempt in range(max_retries):
+ try:
+ return make_request()
+ except RateLimitError:
+ if attempt == max_retries - 1:
+ raise
+
+ # Exponential backoff with jitter
+ delay = (2 ** attempt) + random.uniform(0, 1)
+ time.sleep(delay)
+```
+
diff --git a/api-reference/errors/unauthorized.mdx b/api-reference/errors/unauthorized.mdx
new file mode 100644
index 00000000..26332224
--- /dev/null
+++ b/api-reference/errors/unauthorized.mdx
@@ -0,0 +1,52 @@
+---
+title: 'Unauthorized'
+sidebarTitle: 'Unauthorized'
+description: 'HTTP 401 - Authentication credentials are missing or invalid'
+---
+
+
+This page documents the `unauthorized` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 401
+
+Authentication credentials are missing or invalid. The API could not verify your identity.
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| `invalid_api_key` | The API key provided is not valid |
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Missing `Authorization` header
+- Invalid API key format
+- Revoked or expired API key
+- Malformed bearer token
+
+## Resolution
+
+1. Check your API key at [dashboard](https://olostep.com/dashboard)
+2. Ensure the `Authorization` header uses the format: `Bearer YOUR_API_KEY`
+3. Verify there are no extra spaces or characters in your API key
+4. Generate a new API key if the current one is compromised
+
diff --git a/api-reference/errors/unprocessable_entity.mdx b/api-reference/errors/unprocessable_entity.mdx
new file mode 100644
index 00000000..0d456f20
--- /dev/null
+++ b/api-reference/errors/unprocessable_entity.mdx
@@ -0,0 +1,48 @@
+---
+title: 'Unprocessable Entity'
+sidebarTitle: 'Unprocessable Entity'
+description: 'HTTP 422 - Server understands the request but rejects its meaning'
+---
+
+
+This page documents the `unprocessable_entity` error type. For a complete overview of all error types, see [Errors](/api-reference/errors).
+
+
+## Overview
+
+**HTTP Status Code:** 422
+
+The server understands the request structure but rejects its meaning due to business logic rules. The request is syntactically correct but semantically invalid.
+
+## Error Codes
+
+Various codes depending on the specific business rule violation.
+
+## Example Response
+
+```json
+{
+ "object": "error",
+ "id": "error_abc123",
+ "code": "invalid_parser_input",
+ "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity",
+ "status": 422,
+ "title": "Unprocessable Entity",
+ "detail": "The parser object must contain an input object with 'profile_id' field",
+ "created": 1704067200,
+ "metadata": {}
+}
+```
+
+## Common Causes
+
+- Missing required input fields for a parser
+- Invalid combination of parameters
+- Business rule violations
+
+## Resolution
+
+1. Review the error `detail` for specific requirements
+2. Check the documentation for the feature you're using
+3. Ensure all required input fields are provided
+
diff --git a/api-reference/maps/create.mdx b/api-reference/maps/create.mdx
index 766f5146..db008aae 100644
--- a/api-reference/maps/create.mdx
+++ b/api-reference/maps/create.mdx
@@ -1,5 +1,9 @@
---
title: 'Create Map'
-description: 'This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination'
+description: 'This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination'
openapi: POST /v1/maps
---
+
+
+**Metadata** New — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/retrieve-dataset.mdx b/api-reference/retrieve-dataset.mdx
index 8641783b..1529894d 100644
--- a/api-reference/retrieve-dataset.mdx
+++ b/api-reference/retrieve-dataset.mdx
@@ -4,3 +4,6 @@ description: 'This is the endpoint to retrieve the content of a scraped page (HT
openapi: GET /olostep-p2p-dataset-API
---
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/retrieve.mdx b/api-reference/retrieve.mdx
index fcc1c103..190b6eba 100644
--- a/api-reference/retrieve.mdx
+++ b/api-reference/retrieve.mdx
@@ -4,3 +4,6 @@ description: 'Retrieve content of processed batches and crawls urls.'
openapi: GET /v1/retrieve
---
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/scrapes/create.mdx b/api-reference/scrapes/create.mdx
index 6237c0d4..f4d99551 100644
--- a/api-reference/scrapes/create.mdx
+++ b/api-reference/scrapes/create.mdx
@@ -3,3 +3,7 @@ title: 'Create Scrape'
description: '[Scrape](https://docs.olostep.com/features/scrapes/scrapes) a url with provided configuration and get content.'
openapi: POST /v1/scrapes
---
+
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/scrapes/get.mdx b/api-reference/scrapes/get.mdx
index f462625a..b7a220b9 100644
--- a/api-reference/scrapes/get.mdx
+++ b/api-reference/scrapes/get.mdx
@@ -3,3 +3,7 @@ title: 'Get Scrape'
description: 'Can be used to retrieve response for a scrape.'
openapi: GET /v1/scrapes/{scrape_id}
---
+
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/api-reference/start-agent.mdx b/api-reference/start-agent.mdx
index 0bd22731..134656a1 100644
--- a/api-reference/start-agent.mdx
+++ b/api-reference/start-agent.mdx
@@ -3,3 +3,7 @@ title: 'Start agent'
description: 'This is the endpoint to start the scraping agent (and optionally retrieve the scraped content using the expand parameters)'
openapi: GET /olostep-p2p-incomingAPI
---
+
+
+**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details.
+
diff --git a/docs.json b/docs.json
index 814d7e3c..e9fdde49 100644
--- a/docs.json
+++ b/docs.json
@@ -133,6 +133,31 @@
"pages": [
"api-reference/retrieve"
]
+ },
+ {
+ "group": "Common Objects",
+ "pages": [
+ "api-reference/common/metadata",
+ "api-reference/common/pagination",
+ {
+ "group": "Errors",
+ "icon": "triangle-exclamation",
+ "pages": [
+ "api-reference/errors",
+ "api-reference/errors/bad_request",
+ "api-reference/errors/unauthorized",
+ "api-reference/errors/payment_required",
+ "api-reference/errors/forbidden",
+ "api-reference/errors/not_found",
+ "api-reference/errors/unprocessable_entity",
+ "api-reference/errors/idempotency_error",
+ "api-reference/errors/too_many_requests",
+ "api-reference/errors/internal_error",
+ "api-reference/errors/service_unavailable",
+ "api-reference/errors/gateway_timeout"
+ ]
+ }
+ ]
}
]
}
@@ -190,4 +215,4 @@
"perplexity"
]
}
-}
\ No newline at end of file
+}
diff --git a/openapi/answers.json b/openapi/answers.json
index e3c16e41..f49841e1 100644
--- a/openapi/answers.json
+++ b/openapi/answers.json
@@ -15,6 +15,52 @@
"type": "http",
"scheme": "bearer"
}
+ },
+ "schemas": {
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ },
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string", "enum": ["error"] },
+ "code": { "type": "string" },
+ "type": { "type": "string", "format": "uri" },
+ "status": { "type": "integer" },
+ "title": { "type": "string" },
+ "detail": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
+ }
}
},
"paths": {
@@ -22,11 +68,7 @@
"post": {
"summary": "Create Answer",
"description": "The AI will perform actions like searching and browsing web pages to find the answer to the provided task.\nExecution time is 3-30s depending upon complexity. \n\n For longer tasks, use the agent endpoint instead. See [Agent feature](/features/agents/agents). See [Answers feature](/features/answers/answers).",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "security": [{ "Authorization": [] }],
"requestBody": {
"required": true,
"content": {
@@ -34,18 +76,10 @@
"schema": {
"type": "object",
"properties": {
- "task": {
- "type": "string",
- "description": "The task to be performed."
- },
- "json_format": {
- "type": "object",
- "description": "The desired output JSON object with empty values as a schema, or simply describe the data you want as a string."
- }
+ "task": { "type": "string", "description": "The task to be performed." },
+ "json_format": { "type": "object", "description": "Desired output JSON schema." }
},
- "required": [
- "task"
- ]
+ "required": ["task"]
}
}
}
@@ -58,30 +92,16 @@
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string"
- },
- "object": {
- "type": "string"
- },
- "created": {
- "type": "integer"
- },
- "metadata": {
- "type": "object"
- },
- "task": {
- "type": "string"
- },
+ "id": { "type": "string" },
+ "object": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "task": { "type": "string" },
"result": {
"type": "object",
"properties": {
- "json_content": {
- "type": "string"
- },
- "json_hosted_url": {
- "type": "string"
- }
+ "json_content": { "type": "string" },
+ "json_hosted_url": { "type": "string" }
}
}
}
@@ -90,13 +110,118 @@
}
},
"400": {
- "description": "Bad Request (Missing Task)"
+ "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "task is required",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"402": {
- "description": "Invalid API Key"
+ "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "403": {
+ "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "approval_required",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "429": {
+ "description": "Rate limit exceeded. See [Too Many Requests](/api-reference/errors/too_many_requests) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "rate_limit_exceeded",
+ "type": "https://docs.olostep.com/api-reference/errors/too_many_requests",
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You've reached your rate limits for this API.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal Server Error"
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
@@ -104,61 +229,36 @@
"/v1/answers/{answer_id}": {
"get": {
"summary": "Get Answer",
- "description": "This endpoint retrieves a previously completed answer by its ID.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "description": "Retrieve a previously completed answer by its ID.",
+ "security": [{ "Authorization": [] }],
"parameters": [
{
"name": "answer_id",
"in": "path",
"required": true,
- "description": "Unique identifier for the answer to be retrieved.",
- "schema": {
- "type": "string"
- }
+ "description": "Unique identifier for the answer.",
+ "schema": { "type": "string" }
}
],
"responses": {
"200": {
- "description": "Successful response with the answer object.",
+ "description": "Successful response with the answer.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string"
- },
- "object": {
- "type": "string"
- },
- "created": {
- "type": "integer"
- },
- "metadata": {
- "type": "object"
- },
- "task": {
- "type": "string"
- },
+ "id": { "type": "string" },
+ "object": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "task": { "type": "string" },
"result": {
"type": "object",
"properties": {
- "json_content": {
- "type": "string"
- },
- "json_hosted_url": {
- "type": "string"
- },
- "sources": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
+ "json_content": { "type": "string" },
+ "json_hosted_url": { "type": "string" },
+ "sources": { "type": "array", "items": { "type": "string" } }
}
}
}
@@ -166,17 +266,65 @@
}
}
},
- "402": {
- "description": "Invalid API Key"
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Answer Not Found"
+ "description": "Answer not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "answer_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Answer not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal Server Error"
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
}
}
-}
\ No newline at end of file
+}
diff --git a/openapi/batches.json b/openapi/batches.json
index 73fbee4d..9d304c2c 100644
--- a/openapi/batches.json
+++ b/openapi/batches.json
@@ -9,15 +9,114 @@
"url": "https://api.olostep.com"
}
],
- "components": {
- "securitySchemes": {
- "Authorization": {
- "type": "http",
- "scheme": "bearer"
- }
- }
- },
"paths": {
+ "/v1/batches/recent": {
+ "get": {
+ "summary": "List recent batches",
+ "description": "Fetches the list of recent batches for the authenticated user.",
+ "security": [
+ {
+ "Authorization": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "default": 50,
+ "maximum": 100
+ },
+ "description": "Maximum number of batches to return (default 50, max 100)."
+ },
+ {
+ "name": "source",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string"
+ },
+ "description": "Filter by source (optional)."
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful response with list of recent batches.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "object": {
+ "type": "string",
+ "enum": ["list"]
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string" },
+ "status": { "type": "string" },
+ "created": { "type": "number" },
+ "total_urls": { "type": "number" },
+ "completed_urls": { "type": "number" }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid. Try double-checking it or reaching out to info@olostep.com if you're facing issues.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ }
+ }
+ }
+ },
"/v1/batches": {
"post": {
"summary": "Start a new batch",
@@ -86,17 +185,20 @@
"items": {
"type": "string"
},
- "description": "Filter extracted links using glob patterns with `include_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})."
+ "description": "Filter extracted links using glob patterns."
},
"exclude_links": {
"type": "array",
"items": {
"type": "string"
},
- "description": "Filter extracted links using glob patterns with `exclude_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})."
+ "description": "Filter extracted links using glob patterns."
}
},
"description": "Get all the links present on each page in the batch."
+ },
+ "metadata": {
+ "$ref": "#/components/schemas/Metadata"
}
},
"required": [
@@ -114,46 +216,152 @@
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string",
- "description": "Batch ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"batch\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
- "created": {
- "type": "number",
- "description": "Created epoch"
- },
- "total_urls": {
- "type": "number",
- "description": "Count of URLs in the batch"
- },
- "completed_urls": {
- "type": "number",
- "description": "Count of completed URLs"
- },
- "parser": {
- "type": "string"
- },
- "country": {
- "type": "string"
- }
+ "id": { "type": "string", "description": "Batch ID" },
+ "object": { "type": "string", "description": "The kind of object. \"batch\" for this endpoint." },
+ "status": { "type": "string", "description": "`in_progress` or `completed`" },
+ "created": { "type": "number", "description": "Created epoch" },
+ "total_urls": { "type": "number", "description": "Count of URLs in the batch" },
+ "completed_urls": { "type": "number", "description": "Count of completed URLs" },
+ "parser": { "type": "string" },
+ "country": { "type": "string" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The 'items' array is required and must contain at least one item.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid. Try double-checking it or reaching out to info@olostep.com if you're facing issues.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "402": {
+ "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/auth/",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "403": {
+ "description": "Forbidden - access denied to this feature. See [Forbidden](/api-reference/errors/forbidden) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "access_denied",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "409": {
+ "description": "Idempotency conflict - a request with this key is in progress. See [Idempotency Error](/api-reference/errors/idempotency_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "idempotency_key_in_progress",
+ "type": "https://docs.olostep.com/api-reference/errors/idempotency_error",
+ "status": 409,
+ "title": "Idempotency Error",
+ "detail": "A request with this idempotency key is currently being processed. Please wait and retry.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Unprocessable entity - business rule violation. See [Unprocessable Entity](/api-reference/errors/unprocessable_entity) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "idempotency_key_reuse",
+ "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity",
+ "status": 422,
+ "title": "Unprocessable Entity",
+ "detail": "A request with this idempotency key was already made with different parameters. Idempotency keys must be unique per request.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
@@ -172,9 +380,7 @@
"name": "batch_id",
"in": "path",
"required": true,
- "schema": {
- "type": "string"
- },
+ "schema": { "type": "string" },
"description": "The ID of the batch to retrieve information for."
}
],
@@ -186,49 +392,76 @@
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string",
- "description": "Batch ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"batch\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
- "created": {
- "type": "number",
- "description": "Created epoch"
- },
- "total_urls": {
- "type": "number",
- "description": "Count of URLs in the batch"
- },
- "completed_urls": {
- "type": "number",
- "description": "Count of completed URLs"
- },
- "parser": {
- "type": "string"
- },
- "country": {
- "type": "string"
- }
+ "id": { "type": "string", "description": "Batch ID" },
+ "object": { "type": "string", "description": "The kind of object. \"batch\" for this endpoint." },
+ "status": { "type": "string", "description": "`in_progress` or `completed`" },
+ "created": { "type": "number", "description": "Created epoch" },
+ "total_urls": { "type": "number", "description": "Count of URLs in the batch" },
+ "completed_urls": { "type": "number", "description": "Count of completed URLs" },
+ "parser": { "type": "string" },
+ "country": { "type": "string" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
}
}
}
},
- "400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Batch not found for the provided ID."
+ "description": "Batch not found for the provided ID. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "batch_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Batch not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
@@ -247,10 +480,8 @@
"name": "batch_id",
"in": "path",
"required": true,
- "schema": {
- "type": "string"
- },
- "description": "The ID of the batch to retrieve the list of URLs and retrieve_id for."
+ "schema": { "type": "string" },
+ "description": "The ID of the batch to retrieve items for."
},
{
"name": "status",
@@ -258,10 +489,7 @@
"required": false,
"schema": {
"type": "string",
- "enum": [
- "completed",
- "failed"
- ]
+ "enum": ["completed", "failed"]
},
"description": "Status of the URLs to retrieve (completed or failed)."
},
@@ -269,118 +497,179 @@
"name": "cursor",
"in": "query",
"required": false,
- "schema": {
- "type": "integer"
- },
- "description": "Optional integer representing the index to start fetching content from. Useful to paginate until all URLs are fetched. \n\n Start with 0, then provide `response['cursor']` value of the last request."
+ "schema": { "type": "integer" },
+ "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details."
},
{
"name": "limit",
"in": "query",
"required": false,
- "schema": {
- "type": "integer"
- },
- "description": "Optional integer to limit the number of results returned. Recommended 10-50 results at a time. Paginated using *cursor*.\n\nMaximum 10MB of content can be fetched in a single request."
- },
- {
- "name": "formats",
- "in": "query",
- "required": false,
- "schema": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "html",
- "markdown",
- "json"
- ]
- }
- },
- "description": "**Deprecated:** Use `/retrieve` endpoint with `retrieve_id`.\n\nArray of formats to fetch (e.g., [\"html\", \"markdown\"])."
+ "schema": { "type": "integer" },
+ "description": "Number of results to return (recommended 10-50)."
}
],
"responses": {
"200": {
- "description": "Successful response with the list of URLs.",
+ "description": "Successful response with batch items.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
- "batch_id": {
- "type": "string",
- "description": "Batch ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"batch\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
+ "batch_id": { "type": "string" },
+ "object": { "type": "string" },
+ "status": { "type": "string" },
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
- "custom_id": {
- "type": "string"
- },
- "retrieve_id": {
- "type": "string",
- "description": "To fetch content from the `/retrieve` API"
- },
- "url": {
- "type": "string"
- },
- "html_content": {
- "type": "string",
- "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`."
- },
- "markdown_content": {
- "type": "string",
- "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`."
- },
- "json_content": {
- "type": "string",
- "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`."
- },
- "links_on_page": {
- "type": "array",
- "description": "All links on that page`.",
- "items": {
- "type": "string"
- }
- }
+ "custom_id": { "type": "string" },
+ "retrieve_id": { "type": "string" },
+ "url": { "type": "string" },
+ "links_on_page": { "type": "array", "items": { "type": "string" } }
}
}
},
- "items_count": {
- "type": "integer"
- },
- "cursor": {
- "type": "integer",
- "description": "To be passed in the query in next request to get the next items."
- }
+ "items_count": { "type": "integer" },
+ "cursor": { "type": "integer", "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." }
}
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "Status must be either 'completed' or 'failed'",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Batch not found for the provided ID."
+ "description": "Batch not found for the provided ID. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "batch_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Batch not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
}
+ },
+ "components": {
+ "securitySchemes": {
+ "Authorization": {
+ "type": "http",
+ "scheme": "bearer"
+ }
+ },
+ "schemas": {
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string", "description": "Unique error identifier" },
+ "object": { "type": "string", "enum": ["error"], "description": "Always 'error'" },
+ "code": { "type": "string", "description": "Machine-readable error code" },
+ "type": { "type": "string", "format": "uri", "description": "URI reference identifying the problem type" },
+ "status": { "type": "integer", "description": "HTTP status code" },
+ "title": { "type": "string", "description": "Short, human-readable summary" },
+ "detail": { "type": "string", "description": "Human-readable explanation" },
+ "created": { "type": "integer", "description": "Unix timestamp" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
+ },
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ }
+ }
}
-}
\ No newline at end of file
+}
diff --git a/openapi/builder.json b/openapi/builder.json
index 66957702..6f043336 100644
--- a/openapi/builder.json
+++ b/openapi/builder.json
@@ -5,341 +5,254 @@
"version": "1.0.0"
},
"servers": [
- {
- "url": "https://agent.olostep.com"
- },
- {
- "url": "https://dataset.olostep.com"
- }
+ { "url": "https://agent.olostep.com" },
+ { "url": "https://dataset.olostep.com" }
],
"components": {
"securitySchemes": {
- "Authorization": {
- "type": "http",
- "scheme": "bearer"
+ "Authorization": { "type": "http", "scheme": "bearer" }
+ },
+ "schemas": {
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ },
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string", "enum": ["error"] },
+ "code": { "type": "string" },
+ "type": { "type": "string", "format": "uri" },
+ "status": { "type": "integer" },
+ "title": { "type": "string" },
+ "detail": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
}
}
},
"paths": {
"/olostep-p2p-incomingAPI": {
"get": {
- "summary": "Initiate web page scraping with various configurations",
- "description": "This endpoint allows users to scrape web pages with various options such as setting timeouts, wait time before scraping, data format preferences, and more.",
- "servers": [
- {
- "url": "https://agent.olostep.com"
- }
- ],
- "security": [
- {
- "Authorization": []
- }
- ],
+ "summary": "Start Agent (Legacy)",
+ "description": "Initiate web page scraping with various configurations.",
+ "servers": [{ "url": "https://agent.olostep.com" }],
+ "security": [{ "Authorization": [] }],
"parameters": [
- {
- "name": "url",
- "in": "query",
- "required": true,
- "description": "The URL to start scraping from.",
- "schema": {
- "type": "string",
- "format": "uri"
- }
- },
- {
- "name": "timeout",
- "in": "query",
- "required": false,
- "description": "Timeout in seconds for the scraping process, with a maximum of 620 seconds.",
- "schema": {
- "type": "integer",
- "default": 40
- }
- },
- {
- "name": "waitBeforeScraping",
- "in": "query",
- "required": false,
- "description": "Time to wait in seconds before starting the scraping, up to 500 seconds.",
- "schema": {
- "type": "integer",
- "default": 3
- }
- },
- {
- "name": "saveHtml",
- "in": "query",
- "required": false,
- "description": "Option to save the scraped content as HTML.",
- "schema": {
- "type": "boolean",
- "default": true
- }
- },
- {
- "name": "saveMarkdown",
- "in": "query",
- "required": false,
- "description": "Option to save the scraped content as Markdown.",
- "schema": {
- "type": "boolean",
- "default": true
- }
- },
- {
- "name": "removeCSSselectors",
- "in": "query",
- "required": false,
- "description": "Option to remove certain CSS selectors from the content. Optionally, you can also pass a JSON stringified array of specific selectors you want to remove. The CSS selectors removed when this option is set to `default` are `['nav','footer','script','style','noscript','svg',[role=alert],[role=banner],[role=dialog],[role=alertdialog],[role=region][aria-label*=skip i],[aria-modal=true]]`",
- "schema": {
- "type": "string",
- "default": "default",
- "enum": ["default", "none", "JSON stringified array of CSS selectors"]
- }
- },
- {
- "name": "htmlTransformer",
- "in": "query",
- "required": false,
- "description": "Specify the HTML transformer to use, if any. Postlight's Mercury Parser library is used to remove ads and other unwanted content from the scraped content.",
- "schema": {
- "type": "string",
- "default": "none",
- "enum": ["none", "postlightParser"]
- }
- },
- {
- "name": "removeImages",
- "in": "query",
- "required": false,
- "description": "Option to remove images from the scraped content.",
- "schema": {
- "type": "boolean",
- "default": true
- }
- },
- {
- "name": "expandMarkdown",
- "in": "query",
- "required": false,
- "description": "If true, the markdown content is returned in the markdown_content field.",
- "schema": {
- "type": "boolean",
- "default": false
- }
- },
- {
- "name": "expandHtml",
- "in": "query",
- "required": false,
- "description": "If true, the HTML content is returned in the html_content field.",
- "schema": {
- "type": "boolean",
- "default": false
- }
- },
- {
- "name": "actions",
- "in": "query",
- "required": false,
- "description": "Actions to perform on the page before getting the content",
- "schema": {
- "type": "array",
- "items": {
- "type": "object",
- "discriminator": {
- "propertyName": "type"
- },
- "oneOf": [
- {
- "type": "object",
- "title": "Wait",
- "required": [
- "type",
- "milliseconds"
- ],
- "properties": {
- "type": {
- "type": "string",
- "enum": [
- "wait"
- ],
- "description": "Wait for a specified amount of milliseconds"
- },
- "milliseconds": {
- "type": "integer",
- "minimum": 0,
- "description": "Time to wait in milliseconds"
- }
- }
- },
- {
- "type": "object",
- "title": "Click",
- "required": [
- "type",
- "selector"
- ],
- "properties": {
- "type": {
- "type": "string",
- "enum": [
- "click"
- ],
- "description": "Click on an element"
- },
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to click"
- }
- }
- },
- {
- "type": "object",
- "title": "Fill Input",
- "required": [
- "type",
- "selector",
- "value"
- ],
- "properties": {
- "type": {
- "type": "string",
- "enum": [
- "fill_input"
- ],
- "description": "Fill an input element with a value"
- },
- "selector": {
- "type": "string",
- "description": "CSS selector for the input element"
- },
- "value": {
- "type": "string",
- "description": "Text to enter into the input"
- }
- }
- },
- {
- "type": "object",
- "title": "Scroll",
- "required": [
- "type",
- "direction",
- "amount"
- ],
- "properties": {
- "type": {
- "type": "string",
- "enum": [
- "scroll"
- ],
- "description": "Scroll the page"
- },
- "direction": {
- "type": "string",
- "enum": [
- "up",
- "down",
- "left",
- "right"
- ],
- "description": "Direction to scroll"
- },
- "amount": {
- "type": "number",
- "description": "Amount to scroll in pixels"
- }
- }
- }
- ]
- }
- }
- }
+ { "name": "url", "in": "query", "required": true, "schema": { "type": "string", "format": "uri" }, "description": "The URL to scrape." },
+ { "name": "timeout", "in": "query", "required": false, "schema": { "type": "integer", "default": 40 }, "description": "Timeout in seconds (max 620)." },
+ { "name": "waitBeforeScraping", "in": "query", "required": false, "schema": { "type": "integer", "default": 3 }, "description": "Wait time before scraping." },
+ { "name": "saveHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Save as HTML." },
+ { "name": "saveMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Save as Markdown." },
+ { "name": "removeCSSselectors", "in": "query", "required": false, "schema": { "type": "string", "default": "default" }, "description": "CSS selectors to remove." },
+ { "name": "htmlTransformer", "in": "query", "required": false, "schema": { "type": "string", "default": "none", "enum": ["none", "postlightParser"] }, "description": "HTML transformer." },
+ { "name": "removeImages", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Remove images." },
+ { "name": "expandMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Return markdown content inline." },
+ { "name": "expandHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Return HTML content inline." }
],
"responses": {
"200": {
- "description": "Successful response with the requested data."
+ "description": "Successful response with scraped data.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
+ }
+ }
+ }
+ }
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "url parameter is required",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"401": {
- "description": "Unauthorized access due to missing or invalid API token."
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
},
"/olostep-p2p-dataset-API": {
"get": {
- "servers": [
- {
- "url": "https://dataset.olostep.com"
- }
- ],
- "summary": "Retrieve dataset information",
- "description": "Allows users to retrieve dataset information in Markdown and/or HTML format based on the provided dataset ID.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "servers": [{ "url": "https://dataset.olostep.com" }],
+ "summary": "Retrieve Dataset (Legacy)",
+ "description": "Retrieve dataset information by ID.",
+ "security": [{ "Authorization": [] }],
"parameters": [
- {
- "name": "datasetId",
- "in": "query",
- "required": true,
- "description": "Unique identifier for the dataset to be retrieved.",
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "retrieveMarkdown",
- "in": "query",
- "required": false,
- "description": "Option to retrieve the dataset in Markdown format.",
- "schema": {
- "type": "boolean",
- "default": true
- }
- },
- {
- "name": "retrieveHtml",
- "in": "query",
- "required": false,
- "description": "Option to retrieve the dataset in HTML format.",
- "schema": {
- "type": "boolean",
- "default": false
- }
- },
- {
- "name": "fastLane",
- "in": "query",
- "required": false,
- "description": "Experimental option to speed up the scraping process",
- "schema": {
- "type": "boolean",
- "default": false
- }
- }
+ { "name": "datasetId", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Dataset ID." },
+ { "name": "retrieveMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Retrieve as Markdown." },
+ { "name": "retrieveHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Retrieve as HTML." },
+ { "name": "fastLane", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Experimental speed optimization." }
],
"responses": {
"200": {
- "description": "Successful response with the dataset information."
+ "description": "Successful response with dataset information.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
+ }
+ }
+ }
+ }
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "datasetId is required",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Dataset not found for the provided ID."
+ "description": "Dataset not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "dataset_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Dataset not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
diff --git a/openapi/crawls.json b/openapi/crawls.json
index e58af42b..37716952 100644
--- a/openapi/crawls.json
+++ b/openapi/crawls.json
@@ -15,6 +15,52 @@
"type": "http",
"scheme": "bearer"
}
+ },
+ "schemas": {
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ },
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string", "enum": ["error"] },
+ "code": { "type": "string" },
+ "type": { "type": "string", "format": "uri" },
+ "status": { "type": "integer" },
+ "title": { "type": "string" },
+ "detail": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
+ }
}
},
"paths": {
@@ -22,11 +68,7 @@
"post": {
"summary": "Start a new crawl",
"description": "Initiates a new crawl process with the specified parameters.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "security": [{ "Authorization": [] }],
"requestBody": {
"required": true,
"content": {
@@ -34,66 +76,21 @@
"schema": {
"type": "object",
"properties": {
- "start_url": {
- "type": "string",
- "description": "The starting point of the crawl."
- },
- "include_urls": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "URL path patterns to include in the crawl using glob syntax.\n\nDefaults to `/**` which includes all URLs. Use patterns like `/blog/**` to crawl specific sections (e.g., only blog pages), `/products/*.html` for product pages, or multiple patterns for different sections. Supports standard glob features like * (any characters) and ** (recursive matching)."
- },
- "exclude_urls": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "URL path names in glob pattern to exclude. For example: `/careers/**`. Excluded URLs will supersede included URLs."
- },
- "max_pages": {
- "type": "number",
- "description": "Maximum number of pages to crawl. Recommended for most use cases like crawling an entire website."
- },
- "max_depth": {
- "type": "number",
- "description": "Maximum depth of the crawl. Useful to extract only up to n-degree of links."
- },
- "include_external": {
- "type": "boolean",
- "description": "Crawl first-degree external links."
- },
- "include_subdomain": {
- "type": "boolean",
- "description": "Include subdomains of the website. `false` by default."
- },
- "search_query": {
- "type": "string",
- "description": "An optional search query to find specific links and also sort the results by relevance."
- },
- "top_n": {
- "type": "number",
- "description": "An optional number to only crawl the top N most relevant links on every page as per search query."
- },
- "webhook_url": {
- "type": "string",
- "description": "An optional POST request endpoint called when this crawl is completed. The body of the request will be same as the response of this [`v1/crawls/{crawl_id}`](./info#response-created) endpoint."
- },
- "timeout": {
- "type": "number",
- "description": "End the crawl after n seconds with the pages completed until then. May take ~10s extra from provided timeout."
- },
- "follow_robots_txt": {
- "type": "boolean",
- "description": "Whether to respect robots.txt rules. If set to `false`, the crawler will scrape the website regardless of robots.txt disallow directives. `true` by default.",
- "default": true
- }
+ "start_url": { "type": "string", "description": "The starting point of the crawl." },
+ "include_urls": { "type": "array", "items": { "type": "string" }, "description": "URL path patterns to include." },
+ "exclude_urls": { "type": "array", "items": { "type": "string" }, "description": "URL path patterns to exclude." },
+ "max_pages": { "type": "number", "description": "Maximum number of pages to crawl." },
+ "max_depth": { "type": "number", "description": "Maximum depth of the crawl." },
+ "include_external": { "type": "boolean", "description": "Crawl first-degree external links." },
+ "include_subdomain": { "type": "boolean", "description": "Include subdomains." },
+ "search_query": { "type": "string", "description": "Search query to find specific links." },
+ "top_n": { "type": "number", "description": "Only crawl top N relevant links." },
+ "webhook_url": { "type": "string", "description": "Webhook URL called on completion." },
+ "timeout": { "type": "number", "description": "End crawl after n seconds." },
+ "follow_robots_txt": { "type": "boolean", "default": true, "description": "Respect robots.txt rules." },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
},
- "required": [
- "start_url",
- "max_pages"
- ]
+ "required": ["start_url", "max_pages"]
}
}
}
@@ -106,80 +103,114 @@
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string",
- "description": "Crawl ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"crawl\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
- "created": {
- "type": "number",
- "description": "Created time in epoch"
- },
- "start_date": {
- "type": "string",
- "description": "Created time in date"
- },
- "start_url": {
- "type": "string"
- },
- "max_pages": {
- "type": "number"
- },
- "max_depth": {
- "type": "number"
- },
- "exclude_urls": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "include_urls": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "include_external": {
- "type": "boolean"
- },
- "search_query": {
- "type": "string"
- },
- "top_n": {
- "type": "number"
- },
- "current_depth": {
- "type": "number",
- "description": "The current depth of the crawl process."
- },
- "pages_count": {
- "type": "number",
- "description": "Count of pages crawled"
- },
- "webhook_url": {
- "type": "string"
- },
- "follow_robots_txt": {
- "type": "boolean"
- }
+ "id": { "type": "string", "description": "Crawl ID" },
+ "object": { "type": "string" },
+ "status": { "type": "string" },
+ "created": { "type": "number" },
+ "start_url": { "type": "string" },
+ "max_pages": { "type": "number" },
+ "max_depth": { "type": "number" },
+ "pages_count": { "type": "number" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "start_url is required",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "402": {
+ "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "403": {
+ "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "approval_required",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
@@ -187,21 +218,15 @@
"/v1/crawls/{crawl_id}": {
"get": {
"summary": "Retrieve crawl information",
- "description": "Fetches information about a specific crawl using its `crawl_id`.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "description": "Fetches information about a specific crawl.",
+ "security": [{ "Authorization": [] }],
"parameters": [
{
"name": "crawl_id",
"in": "path",
"required": true,
- "schema": {
- "type": "string"
- },
- "description": "The ID of the crawl to retrieve information for."
+ "schema": { "type": "string" },
+ "description": "The ID of the crawl."
}
],
"responses": {
@@ -212,241 +237,211 @@
"schema": {
"type": "object",
"properties": {
- "id": {
- "type": "string",
- "description": "Crawl ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"crawl\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
- "created": {
- "type": "number",
- "description": "Created time in epoch"
- },
- "start_date": {
- "type": "string",
- "description": "Created time in date"
- },
- "start_url": {
- "type": "string"
- },
- "max_pages": {
- "type": "number"
- },
- "max_depth": {
- "type": "number"
- },
- "exclude_urls": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "include_urls": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "include_external": {
- "type": "boolean"
- },
- "search_query": {
- "type": "string"
- },
- "top_n": {
- "type": "number"
- },
- "current_depth": {
- "type": "number",
- "description": "The current depth of the crawl process."
- },
- "pages_count": {
- "type": "number",
- "description": "Count of pages crawled"
- },
- "webhook_url": {
- "type": "string"
- },
- "follow_robots_txt": {
- "type": "boolean"
- }
+ "id": { "type": "string" },
+ "object": { "type": "string" },
+ "status": { "type": "string" },
+ "created": { "type": "number" },
+ "start_url": { "type": "string" },
+ "max_pages": { "type": "number" },
+ "pages_count": { "type": "number" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
}
}
}
},
- "400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Crawl not found for the provided ID."
+ "description": "Crawl not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "crawl_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Crawl not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
},
"/v1/crawls/{crawl_id}/pages": {
"get": {
- "summary": "Retrieve list of crawled pages optionally with content",
- "description": "Fetches the list of crawled pages and content that have been processed for a specific crawl ID.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "summary": "Retrieve crawled pages",
+ "description": "Fetches the list of crawled pages for a specific crawl.",
+ "security": [{ "Authorization": [] }],
"parameters": [
{
"name": "crawl_id",
"in": "path",
"required": true,
- "schema": {
- "type": "string"
- },
- "description": "The ID of the crawl to retrieve the list of URLs for."
+ "schema": { "type": "string" },
+ "description": "The ID of the crawl."
},
{
"name": "cursor",
"in": "query",
"required": false,
- "schema": {
- "type": "integer"
- },
- "description": "Optional integer representing the index to start fetching content from. Useful to paginate until all URLs are fetched. Start with 0, then provide `response['cursor']` value of the last request."
+ "schema": { "type": "integer" },
+ "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details."
},
{
"name": "limit",
"in": "query",
"required": false,
- "schema": {
- "type": "integer"
- },
- "description": "Optional integer to limit the number of results returned. Recommended 10-50 results at a time. Paginated using *cursor*. Maximum 10MB of content can be fetched in a single request."
- },
- {
- "name": "search_query",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string"
- },
- "description": "An optional search query to sort the results by relevance. Uses the original search_query by default if provided."
- },
- {
- "name": "formats",
- "in": "query",
- "required": false,
- "schema": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "html",
- "markdown"
- ]
- }
- },
- "description": "**Deprecated:** Use `/retrieve` endpoint with `retrieve_id`.\n\nArray of formats to fetch (e.g., [\"html\", \"markdown\"])."
+ "schema": { "type": "integer" },
+ "description": "Number of results to return."
}
],
"responses": {
"200": {
- "description": "Successful response with the list of URLs.",
+ "description": "Successful response with crawled pages.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
- "crawl_id": {
- "type": "string",
- "description": "Crawl ID"
- },
- "object": {
- "type": "string",
- "description": "The kind of object. \"crawl\" for this endpoint."
- },
- "status": {
- "type": "string",
- "description": "`in_progress` or `completed`"
- },
- "search_query": {
- "type": "string"
- },
- "pages_count": {
- "type": "number"
- },
+ "crawl_id": { "type": "string" },
+ "object": { "type": "string" },
+ "status": { "type": "string" },
+ "pages_count": { "type": "number" },
"pages": {
"type": "array",
"items": {
"type": "object",
"properties": {
- "id": {
- "type": "string"
- },
- "retrieve_id": {
- "type": "string",
- "description": "To fetch content from the `/retrieve` endpoint"
- },
- "url": {
- "type": "string"
- },
- "is_external": {
- "type": "boolean"
- },
- "html_content": {
- "type": "string",
- "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`."
- },
- "markdown_content": {
- "type": "string",
- "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`."
- }
+ "id": { "type": "string" },
+ "retrieve_id": { "type": "string" },
+ "url": { "type": "string" }
}
}
},
- "metadata": {
- "type": "object",
- "properties": {
- "external_urls": {
- "type": "array",
- "description": "External URLs that were found during crawl",
- "items": {
- "type": "string"
- }
- },
- "failed_urls": {
- "type": "array",
- "description": "URLs that were found but couldn't be scraped",
- "items": {
- "type": "string"
- }
- }
- }
- },
- "cursor": {
- "type": "integer",
- "description": "To be passed in the query in next request to get the next items."
- }
+ "cursor": { "type": "integer", "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." }
}
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "Invalid parameter format",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Crawl not found for the provided ID."
+ "description": "Crawl not found or no pages found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "crawl_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Crawl not found or no pages found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
diff --git a/openapi/maps.json b/openapi/maps.json
index c6df78da..3bd45603 100644
--- a/openapi/maps.json
+++ b/openapi/maps.json
@@ -15,18 +15,60 @@
"type": "http",
"scheme": "bearer"
}
+ },
+ "schemas": {
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ },
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string", "enum": ["error"] },
+ "code": { "type": "string" },
+ "type": { "type": "string", "format": "uri" },
+ "status": { "type": "integer" },
+ "title": { "type": "string" },
+ "detail": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
+ }
}
},
"paths": {
"/v1/maps": {
"post": {
- "summary": "Get all the urls on a certain website",
- "description": "This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "summary": "Get all the urls on a website",
+ "description": "Get all URLs from a website's sitemap. Can take up to 120 seconds for complex websites. Results are paginated using cursor-based pagination.",
+ "security": [{ "Authorization": [] }],
"requestBody": {
"required": true,
"content": {
@@ -34,147 +76,163 @@
"schema": {
"type": "object",
"properties": {
- "url": {
- "type": "string",
- "format": "uri",
- "description": "The URL of the website for which you want the links"
- },
- "search_query": {
- "type": "string",
- "description": "An optional search query to sort the links by search relevance."
- },
- "top_n": {
- "type": "number",
- "description": "An optional number to limit to only top n links for a search query."
- },
- "include_subdomain": {
- "type": "boolean",
- "description": "Include subdomains of the given URL. `true` by default."
- },
- "include_urls": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "URL path patterns to include using glob syntax. For example: `/blog/**` to only include blog URLs. Only URLs matching these patterns will be returned."
- },
- "exclude_urls": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "URL path patterns to exclude using glob syntax. For example: `/careers/**`. Excluded URLs will supersede included URLs."
- },
- "cursor": {
- "type": "string",
- "description": "OPTIONAL: Pagination cursor from a previous response. When provided, returns the next set of URLs from where the previous request left off due to response size limit."
- }
+ "url": { "type": "string", "format": "uri", "description": "The website URL" },
+ "search_query": { "type": "string", "description": "Sort links by search relevance" },
+ "top_n": { "type": "number", "description": "Limit to top n links" },
+ "include_subdomain": { "type": "boolean", "description": "Include subdomains (default true)" },
+ "include_urls": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to include (glob)" },
+ "exclude_urls": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to exclude (glob)" },
+ "cursor": { "type": "string", "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details." },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
},
"required": ["url"]
- },
- "examples": {
- "basic": {
- "value": {
- "url": "https://docs.olostep.com"
- }
- },
- "withFilters": {
- "value": {
- "url": "https://docs.olostep.com",
- "include_urls": ["/api-reference/**"],
- "exclude_urls": ["/api-reference/deprecated/**"]
- }
- },
- "withCursor": {
- "value": {
- "cursor": "abc123_xyz456"
- }
- }
}
}
}
},
"responses": {
"200": {
- "description": "Successful response with URLs found on the page",
+ "description": "Successful response with URLs.",
"content": {
"application/json": {
"schema": {
"type": "object",
- "required": ["urls_count", "urls"],
"properties": {
- "id": {
- "type": "string",
- "description": "Unique identifier for this map"
- },
- "urls_count": {
- "type": "integer",
- "description": "Number of URLs in the current response"
- },
- "urls": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "Array of URLs found on the page"
- },
- "cursor": {
- "type": "string",
- "description": "Pagination cursor to retrieve the next set of URLs limited due to 10MB size limit. If null or not present, all URLs have been retrieved."
- }
+ "id": { "type": "string" },
+ "urls_count": { "type": "integer" },
+ "urls": { "type": "array", "items": { "type": "string" } },
+ "cursor": { "type": "string", "nullable": true, "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
},
- "examples": {
- "basicResponse": {
- "value": {
- "id": "map_abc123",
- "urls_count": 22,
- "urls": [
- "https://docs.olostep.com/api-reference/batches/create",
- "https://docs.olostep.com/api-reference/batches/info",
- "https://docs.olostep.com/api-reference/batches/items"
- ]
- }
- },
- "cursorResponse": {
- "value": {
- "id": "map_abc123",
- "urls_count": 15,
- "urls": [
- "https://docs.olostep.com/api-reference/crawls/create",
- "https://docs.olostep.com/api-reference/crawls/info",
- "https://docs.olostep.com/api-reference/crawls/pages"
- ],
- "cursor": "abc123_def789"
- }
- },
- "allRetrievedResponse": {
- "value": {
- "id": "map_abc123",
- "urls_count": 10,
- "urls": [
- "https://docs.olostep.com/features/maps/maps",
- "https://docs.olostep.com/get-started/authentication",
- "https://docs.olostep.com/get-started/welcome"
- ],
- "cursor": null
- }
- }
+ "example": {
+ "id": "map_abc123",
+ "urls_count": 22,
+ "urls": [
+ "https://docs.olostep.com/api-reference/batches/create",
+ "https://docs.olostep.com/api-reference/batches/info"
+ ],
+ "cursor": null
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request - invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "url is required or invalid cursor format",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"401": {
- "description": "Unauthorized access due to missing or invalid API token."
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "402": {
+ "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "403": {
+ "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "approval_required",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "You don't have access to this feature.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "404": {
+ "description": "Resource not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "map_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Map not found or could not retrieve page URLs from the sitemap",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
}
}
-}
\ No newline at end of file
+}
diff --git a/openapi/scrapes.json b/openapi/scrapes.json
index 82ca60fe..58fabf57 100644
--- a/openapi/scrapes.json
+++ b/openapi/scrapes.json
@@ -15,6 +15,103 @@
"type": "http",
"scheme": "bearer"
}
+ },
+ "schemas": {
+ "ErrorResponse": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique error identifier (e.g., error_4z93xmi8vw)"
+ },
+ "object": {
+ "type": "string",
+ "enum": ["error"],
+ "description": "Always 'error'"
+ },
+ "code": {
+ "type": "string",
+ "description": "Machine-readable error code for programmatic handling"
+ },
+ "type": {
+ "type": "string",
+ "format": "uri",
+ "description": "URI reference identifying the problem type"
+ },
+ "status": {
+ "type": "integer",
+ "description": "HTTP status code"
+ },
+ "title": {
+ "type": "string",
+ "description": "Short, human-readable summary of the problem type"
+ },
+ "detail": {
+ "type": "string",
+ "description": "Human-readable explanation of the specific error"
+ },
+ "created": {
+ "type": "integer",
+ "description": "Unix timestamp when the error occurred"
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Any user-provided metadata from the original request"
+ },
+ "errors": {
+ "type": "array",
+ "description": "Field-level validation errors (only for validation errors)",
+ "items": {
+ "type": "object",
+ "properties": {
+ "loc": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ { "type": "string" },
+ { "type": "integer" }
+ ]
+ },
+ "description": "Path to the field with the error"
+ },
+ "msg": {
+ "type": "string",
+ "description": "Error message"
+ },
+ "type": {
+ "type": "string",
+ "description": "Error type code"
+ }
+ }
+ }
+ }
+ },
+ "required": ["object", "id", "code", "type", "status", "title", "detail", "created"]
+ },
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ }
}
},
"paths": {
@@ -243,14 +340,14 @@
"items": {
"type": "string"
},
- "description": "Filter extracted links using glob patterns with `include_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})."
+ "description": "Filter extracted links using glob patterns."
},
"exclude_links": {
"type": "array",
"items": {
"type": "string"
},
- "description": "Filter extracted links using glob patterns with `exclude_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})."
+ "description": "Filter extracted links using glob patterns."
}
},
"description": "With this option, you can get all the links present on the page you scrape."
@@ -288,8 +385,7 @@
}
},
"metadata": {
- "type": "object",
- "description": "User-defined metadata. Not supported yet"
+ "$ref": "#/components/schemas/Metadata"
}
},
"required": [
@@ -320,8 +416,7 @@
"description": "Created epoch"
},
"metadata": {
- "type": "object",
- "description": "User-defined metadata."
+ "$ref": "#/components/schemas/Metadata"
},
"url_to_scrape": {
"type": "string",
@@ -380,16 +475,193 @@
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"402": {
- "description": "Payment required due to invalid API key."
+ "description": "Payment required due to invalid API key or exhausted credits. See [Payment Required](/api-reference/errors/payment_required) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "credits_exhausted",
+ "type": "https://docs.olostep.com/api-reference/errors/payment_required",
+ "status": 402,
+ "title": "Payment Required",
+ "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "403": {
+ "description": "Access denied due to restrictions (blacklisted domain, approval required, unsupported file type). See [Forbidden](/api-reference/errors/forbidden) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "blacklisted_domain",
+ "type": "https://docs.olostep.com/api-reference/errors/forbidden",
+ "status": 403,
+ "title": "Forbidden",
+ "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Not found error when the requested scrape is not found."
+ "description": "Resource not found. The `detail` field specifies which resource was not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "parser_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "No parser with this name and/or version found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "429": {
+ "description": "Rate limit exceeded. See [Too Many Requests](/api-reference/errors/too_many_requests) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "rate_limit_exceeded",
+ "type": "https://docs.olostep.com/api-reference/errors/too_many_requests",
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You've reached your rate limits for this API. Please try again later.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "501": {
+ "description": "Service unavailable or at capacity. See [Service Unavailable](/api-reference/errors/service_unavailable) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "max_capacity_reached",
+ "type": "https://docs.olostep.com/api-reference/errors/service_unavailable",
+ "status": 501,
+ "title": "Service Unavailable",
+ "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "504": {
+ "description": "Request timed out. See [Gateway Timeout](/api-reference/errors/gateway_timeout) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "request_timeout",
+ "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout",
+ "status": 504,
+ "title": "Gateway Timeout",
+ "detail": "Your request timed out. Try increasing the timeout and making another request.",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
@@ -435,8 +707,7 @@
"description": "Created epoch"
},
"metadata": {
- "type": "object",
- "description": "User-defined metadata."
+ "$ref": "#/components/schemas/Metadata"
},
"url_to_scrape": {
"type": "string",
@@ -493,16 +764,88 @@
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "Invalid scrape ID format",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
- "402": {
- "description": "Payment required due to invalid API key."
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Not found error when the requested scrape is not found."
+ "description": "Resource not found. The `detail` field specifies which resource was not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "scrape_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Scrape not found",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": {
+ "$ref": "#/components/schemas/ErrorResponse"
+ },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
diff --git a/openapi/utility.json b/openapi/utility.json
index 87b9135a..dd35bb1c 100644
--- a/openapi/utility.json
+++ b/openapi/utility.json
@@ -15,27 +15,67 @@
"type": "http",
"scheme": "bearer"
}
+ },
+ "schemas": {
+ "Metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.",
+ "additionalProperties": {
+ "type": "string",
+ "maxLength": 500,
+ "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings."
+ },
+ "maxProperties": 50,
+ "example": {
+ "order_id": "12345",
+ "customer_name": "John Doe",
+ "priority": "high",
+ "processed": "true"
+ },
+ "x-validation-rules": {
+ "max_keys": 50,
+ "key_max_length": 40,
+ "key_forbidden_chars": ["[", "]"],
+ "value_max_length": 500,
+ "value_types": ["string", "number (coerced)", "boolean (coerced)"]
+ }
+ },
+ "Error": {
+ "type": "object",
+ "description": "RFC 7807 Problem Details error response",
+ "properties": {
+ "id": { "type": "string" },
+ "object": { "type": "string", "enum": ["error"] },
+ "code": { "type": "string" },
+ "type": { "type": "string", "format": "uri" },
+ "status": { "type": "integer" },
+ "title": { "type": "string" },
+ "detail": { "type": "string" },
+ "created": { "type": "integer" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" },
+ "errors": {
+ "type": "array",
+ "description": "Optional array of additional error details",
+ "items": {}
+ }
+ },
+ "required": ["id", "object", "code", "type", "status", "title", "detail", "created"]
+ }
}
},
"paths": {
"/v1/retrieve": {
"get": {
"summary": "Retrieve page content",
- "description": "Fetches the content of a crawled page using its `retrieve_id`.",
- "security": [
- {
- "Authorization": []
- }
- ],
+ "description": "Fetches the content of a crawled/scraped/batched page using its `retrieve_id`.",
+ "security": [{ "Authorization": [] }],
"parameters": [
{
"name": "retrieve_id",
"in": "query",
"required": true,
- "schema": {
- "type": "string"
- },
- "description": "The ID of the page content to retrieve. Available in the response of `/v1/crawls/{crawl_id}/pages`, `/v1/scrapes/{scrape_id}` or `/v1/batches/{batch_id}/items` endpoints"
+ "schema": { "type": "string" },
+ "description": "The ID of the page content to retrieve."
},
{
"name": "formats",
@@ -43,16 +83,9 @@
"required": false,
"schema": {
"type": "array",
- "items": {
- "type": "string",
- "enum": [
- "html",
- "markdown",
- "json"
- ]
- }
+ "items": { "type": "string", "enum": ["html", "markdown", "json"] }
},
- "description": "Optional array to retrieve only specific formats in production. If not provided, all formats will be returned."
+ "description": "Optional array to retrieve only specific formats."
}
],
"responses": {
@@ -63,50 +96,97 @@
"schema": {
"type": "object",
"properties": {
- "html_content": {
- "type": "string",
- "description": "HTML content of the page, if requested and available."
- },
- "markdown_content": {
- "type": "string",
- "description": "Markdown content of the page, if requested and available."
- },
- "json_content": {
- "type": "string",
- "description": "JSON content of the page returned from parsers, if requested and available."
- },
- "html_hosted_url": {
- "type": "string",
- "description": "S3 bucket URL of html. Expires in 7 days."
- },
- "markdown_hosted_url": {
- "type": "string",
- "description": "S3 bucket URL of markdown. Expires in 7 days."
- },
- "json_hosted_url": {
- "type": "string",
- "description": "S3 bucket URL of json. Expires in 7 days."
- },
- "size_exceeded": {
- "type": "boolean",
- "description": "If size of content objects exceeds the 6MB limit. If true, use hosted S3 urls to get content."
- }
+ "html_content": { "type": "string" },
+ "markdown_content": { "type": "string" },
+ "json_content": { "type": "string" },
+ "html_hosted_url": { "type": "string" },
+ "markdown_hosted_url": { "type": "string" },
+ "json_hosted_url": { "type": "string" },
+ "size_exceeded": { "type": "boolean" },
+ "metadata": { "$ref": "#/components/schemas/Metadata" }
}
}
}
}
},
"400": {
- "description": "Bad request due to incorrect or missing parameters."
+ "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "validation_error",
+ "type": "https://docs.olostep.com/api-reference/errors/bad_request",
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "retrieve_id is required",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
+ },
+ "401": {
+ "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "invalid_api_key",
+ "type": "https://docs.olostep.com/api-reference/errors/unauthorized",
+ "status": 401,
+ "title": "Unauthorized",
+ "detail": "Your API key is invalid",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"404": {
- "description": "Content not found for the provided `retrieve_id`."
+ "description": "Content not found. See [Not Found](/api-reference/errors/not_found) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "content_not_found",
+ "type": "https://docs.olostep.com/api-reference/errors/not_found",
+ "status": 404,
+ "title": "Not Found",
+ "detail": "Content not found for the provided retrieve_id",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
},
"500": {
- "description": "Internal server error."
+ "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.",
+ "content": {
+ "application/problem+json": {
+ "schema": { "$ref": "#/components/schemas/Error" },
+ "example": {
+ "id": "error_abc123",
+ "object": "error",
+ "code": "internal_server_error",
+ "type": "https://docs.olostep.com/api-reference/errors/internal_error",
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred",
+ "created": 1704067200,
+ "metadata": {}
+ }
+ }
+ }
}
}
}
}
}
-}
\ No newline at end of file
+}