diff --git a/.gitignore b/.gitignore index f32e31af..3ac7d16c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea/ .DS_Store +node_modules/ diff --git a/api-reference/answers/create.mdx b/api-reference/answers/create.mdx index a6d260eb..a3d2cbef 100644 --- a/api-reference/answers/create.mdx +++ b/api-reference/answers/create.mdx @@ -1,4 +1,9 @@ --- title: "Create Answer" +description: "Create an AI-powered answer by searching the web and extracting information." openapi: "POST /v1/answers" --- + + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/answers/get.mdx b/api-reference/answers/get.mdx index 087176a7..b8f4871d 100644 --- a/api-reference/answers/get.mdx +++ b/api-reference/answers/get.mdx @@ -1,4 +1,9 @@ --- title: "Get Answer" +description: "Retrieve a previously created answer by its ID." openapi: "GET /v1/answers/{answer_id}" --- + + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/batches/create.mdx b/api-reference/batches/create.mdx index 4b7ddc27..2508e839 100644 --- a/api-reference/batches/create.mdx +++ b/api-reference/batches/create.mdx @@ -3,3 +3,7 @@ title: 'Create Batch' description: 'Starts a new batch. You receive an `id` that you can use to track the progress of the batch as shown [here](/api-reference/batches/info). Note: Processing time is constant regardless of batch size' openapi: POST /v1/batches --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/batches/info.mdx b/api-reference/batches/info.mdx index e0313911..0a974d49 100644 --- a/api-reference/batches/info.mdx +++ b/api-reference/batches/info.mdx @@ -3,3 +3,7 @@ title: 'Batch Info' description: 'Retrieves the status and progress information about a batch. To retrieve the content for a batch, see [here](/api-reference/batches/items)' openapi: GET /v1/batches/{batch_id} --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/batches/items.mdx b/api-reference/batches/items.mdx index 583e6783..e3821564 100644 --- a/api-reference/batches/items.mdx +++ b/api-reference/batches/items.mdx @@ -3,3 +3,7 @@ title: 'Batch Items' description: 'Retrieves the list of items processed for a batch. You can then use the `retrieve_id` to get the content with the Retrieve Endpoint' openapi: GET /v1/batches/{batch_id}/items --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/batches/list.mdx b/api-reference/batches/list.mdx index a2ad44e2..185390d7 100644 --- a/api-reference/batches/list.mdx +++ b/api-reference/batches/list.mdx @@ -1,5 +1,4 @@ --- -title: 'Batch Items' -description: 'Fetches the list of items processed for a batch.' -openapi: GET /v1/batches/{batch_id}/items ---- +title: 'List Batches' +description: 'Fetches the list of recent batches.' +openapi: GET /v1/batches/recent diff --git a/api-reference/common/metadata.mdx b/api-reference/common/metadata.mdx new file mode 100644 index 00000000..50fb2003 --- /dev/null +++ b/api-reference/common/metadata.mdx @@ -0,0 +1,52 @@ +--- +title: 'Metadata' +sidebarTitle: 'Metadata' +description: 'Attach custom key-value pairs to API requests' +icon: 'tag' +--- + +Many endpoints accept a `metadata` parameter for storing additional information with your requests. Metadata is returned in responses and can be used for tracking, filtering, or storing context. + +## Usage + +```json +{ + "url_to_scrape": "https://example.com", + "metadata": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high" + } +} +``` + +Metadata follows [Stripe's approach](https://stripe.com/docs/api/metadata) — simple, flexible, and consistent across all endpoints. + +--- + +## Validation Rules + +| Constraint | Limit | Error Example | +|------------|-------|---------------| +| Maximum keys | 50 | `"Metadata can have a maximum of 50 keys. You provided 51 keys."` | +| Key length | 40 characters | `"Metadata key \"my_very_long_key_name...\" exceeds 40 character limit."` | +| Key format | No square brackets | `"Metadata key \"items[0]\" cannot contain square brackets ([ or ])."` | +| Value length | 500 characters | `"Metadata value for key \"description\" exceeds 500 character limit (got 523 characters)."` | +| Value type | Strings only | `"Metadata value for key \"count\" must be a string. Got object."` | + + +Numbers and booleans are automatically converted to strings. Objects and arrays are rejected. + + +--- + +## Availability + +| Endpoint | Status | +|----------|--------| +| [Batches](/api-reference/batches/create) | Available | +| [Crawls](/api-reference/crawls/create) | Available | +| [Maps](/api-reference/maps/create) | Available | +| [Scrapes](/api-reference/scrapes/create) | Coming Soon | +| [Answers](/api-reference/answers/create) | Coming Soon | + diff --git a/api-reference/common/pagination.mdx b/api-reference/common/pagination.mdx new file mode 100644 index 00000000..74623403 --- /dev/null +++ b/api-reference/common/pagination.mdx @@ -0,0 +1,127 @@ +--- +title: 'Pagination' +sidebarTitle: 'Pagination' +description: 'How to paginate through large result sets using cursor-based pagination' +icon: 'arrow-right' +--- + +Many endpoints return large datasets that are paginated using a **cursor-based pagination** mechanism. This allows you to efficiently retrieve all results by making multiple requests. + +## How It Works + +Pagination uses two query parameters: + +- **`cursor`**: A token that indicates where to start fetching results. On the first request, **omit the `cursor` parameter**. For subsequent requests, use the `cursor` value from the previous response. See the [`cursor` parameter](/api-reference/batches/items#query-cursor) documentation for details. +- **`limit`**: The maximum number of results to return per request (recommended: 10-50 for batches/crawls). + +When there are more results available, the response includes a `cursor` field. Continue making requests with the new `cursor` value until the `cursor` field is absent, indicating all results have been retrieved. + +--- + +## Examples + + + +```python Python +import requests + +API_URL = 'https://api.olostep.com/v1' +API_KEY = '' +HEADERS = {'Authorization': f'Bearer {API_KEY}'} + +def get_batch_items(batch_id, cursor=None, limit=10): + params = {'limit': limit} + if cursor: + params['cursor'] = cursor + response = requests.get( + f'{API_URL}/batches/{batch_id}/items', + headers=HEADERS, + params=params + ) + return response.json() + +# Paginate through all items +cursor = None +while True: + result = get_batch_items('batch_abc123', cursor=cursor, limit=10) + + for item in result['items']: + print(f"Custom ID: {item['custom_id']}, URL: {item['url']}") + + if 'cursor' not in result: + break + + cursor = result['cursor'] +``` + +```js Node.js +const API_URL = 'https://api.olostep.com/v1'; +const API_KEY = ''; + +async function getBatchItems(batchId, cursor = null, limit = 10) { + const params = new URLSearchParams(); + if (cursor !== null) params.append('cursor', cursor); + params.append('limit', limit); + + const response = await fetch( + `${API_URL}/batches/${batchId}/items?${params}`, + { headers: { 'Authorization': `Bearer ${API_KEY}` } } + ); + return response.json(); +} + +// Paginate through all items +let cursor = null; +while (true) { + const result = await getBatchItems('batch_abc123', cursor, 10); + + result.items.forEach(item => { + console.log(`Custom ID: ${item.custom_id}, URL: ${item.url}`); + }); + + if (result.cursor === undefined) break; + cursor = result.cursor; +} +``` + +```bash cURL +# First request (omit cursor parameter) +curl -G "https://api.olostep.com/v1/batches/batch_abc123/items" \ + -H "Authorization: Bearer $OLOSTEP_API_KEY" \ + --data-urlencode "limit=10" + +# Subsequent requests use the cursor from previous response +curl -G "https://api.olostep.com/v1/batches/batch_abc123/items" \ + -H "Authorization: Bearer $OLOSTEP_API_KEY" \ + --data-urlencode "cursor=10" \ + --data-urlencode "limit=10" +``` + + + +--- + +## Best Practices + +1. **Omit `cursor` on first request**: For batches and crawls, omit the `cursor` parameter entirely on the first request. Only include it when continuing from a previous response. + +2. **Use appropriate limits**: + - Batches/Crawls: 10-50 items per request + - Maps: Handled automatically (up to 10MB per response) + +3. **Check for cursor**: Always check if a `cursor` field exists in the response before making the next request. If it's absent, you've retrieved all results. + +4. **Handle errors**: Implement retry logic for network errors, but don't retry with the same cursor if you've already processed those results. + +5. **Streaming**: For crawls, you can start paginating while the crawl is `in_progress` to stream results as they become available. See the [`cursor` parameter](/api-reference/crawls/pages#query-cursor) documentation for details. + +--- + +## Availability + +| Endpoint | Cursor Type | Limit Parameter | Notes | +|----------|-------------|-----------------|-------| +| [Batches Items](/api-reference/batches/items) | Integer | Yes (10-50 recommended) | See [`cursor` parameter](/api-reference/batches/items#query-cursor) | +| [Crawl Pages](/api-reference/crawls/pages) | Integer | Yes (10-50 recommended) | See [`cursor` parameter](/api-reference/crawls/pages#query-cursor) | +| [Maps](/api-reference/maps/create) | String | No (automatic) | Auto-paginates at 10MB | + diff --git a/api-reference/crawls/create.mdx b/api-reference/crawls/create.mdx index 6055f060..7e871ba1 100644 --- a/api-reference/crawls/create.mdx +++ b/api-reference/crawls/create.mdx @@ -3,3 +3,7 @@ title: 'Create Crawl' description: 'Starts a new crawl. You receive a `id` to track the progress. The operation may take 1-10 mins depending upon the site and depth and pages parameters.' openapi: POST /v1/crawls --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/crawls/info.mdx b/api-reference/crawls/info.mdx index 6679c4bf..40f87fe6 100644 --- a/api-reference/crawls/info.mdx +++ b/api-reference/crawls/info.mdx @@ -3,3 +3,7 @@ title: 'Crawl Info' description: 'Fetches information about a specific crawl.' openapi: GET /v1/crawls/{crawl_id} --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/crawls/pages.mdx b/api-reference/crawls/pages.mdx index 229a450d..b56fa146 100644 --- a/api-reference/crawls/pages.mdx +++ b/api-reference/crawls/pages.mdx @@ -3,3 +3,7 @@ title: 'Crawl Pages' description: 'Fetches the list of pages for a specific crawl.' openapi: GET /v1/crawls/{crawl_id}/pages --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/errors.mdx b/api-reference/errors.mdx new file mode 100644 index 00000000..2f712c7b --- /dev/null +++ b/api-reference/errors.mdx @@ -0,0 +1,540 @@ +--- +title: 'Errors' +sidebarTitle: 'Overview' +description: 'Error codes and responses returned by the Olostep API' +--- + +The Olostep API uses standard HTTP response codes and returns structured error responses following [RFC 7807](https://tools.ietf.org/html/rfc7807) (Problem Details for HTTP APIs). + +## Error Response Format + +All error responses follow this structure: + +```json +{ + "object": "error", + "id": "error_4z93xmi8vw", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "Request validation failed", + "created": 1704067200, + "metadata": {}, + "errors": [ + { + "loc": ["url_to_scrape"], + "msg": "Required", + "type": "invalid_type" + } + ] +} +``` + +### Response Fields + +| Field | Type | Description | +|-------|------|-------------| +| `object` | string | Always `"error"` | +| `id` | string | Unique error identifier (e.g., `error_4z93xmi8vw`) | +| `code` | string | Machine-readable error code for programmatic handling | +| `type` | string | URI reference identifying the problem type | +| `status` | number | HTTP status code | +| `title` | string | Short, human-readable summary of the problem type | +| `detail` | string | Human-readable explanation of the specific error | +| `created` | number | Unix timestamp when the error occurred | +| `metadata` | object | Any user-provided metadata from the original request | +| `errors` | array | Field-level validation errors (only for validation errors) | + +--- + +## HTTP Status Codes + +### Client Errors (4xx) + + + + + +The request is syntactically broken or structurally invalid. This typically occurs when required parameters are missing or have invalid formats. + +**Error Codes:** +- `validation_error` - Request validation failed (often includes field-level errors) + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along", + "created": 1704067200, + "metadata": {} +} +``` + +**Common Causes:** +- Missing required `url_to_scrape` parameter +- Invalid URL format (missing `http://` or `https://` protocol) +- Invalid JSON body +- Invalid parameter values +- Invalid metadata (see [Metadata Validation](#metadata-validation) section below) + + + + + +Authentication credentials are missing or invalid. + +**Error Codes:** +- `invalid_api_key` - The API key provided is not valid + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} +} +``` + +**Common Causes:** +- Missing `Authorization` header +- Invalid or revoked API key +- Malformed bearer token + + + + + +Valid credentials but a payment or credits issue prevents the request from being processed. + +**Error Codes:** +- `credits_exhausted` - Account has no remaining credits + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Check your credit balance at [dashboard](https://olostep.com/dashboard) +- Upgrade your plan if credits are exhausted + + + + + +The request was understood but refused due to access restrictions. + +**Error Codes:** +- `approval_required` - Feature requires approval before use +- `blacklisted_domain` - The target domain is blocked +- `unsupported_file_type` - File type not supported for your account + +**Example Responses:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "blacklisted_domain", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.", + "created": 1704067200, + "metadata": {} +} +``` + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "approval_required", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Contact support at info@olostep.com to request access +- Some domains and features require prior approval + + + + + +The requested resource does not exist. The `detail` field specifies which resource was not found. + +**Error Codes:** +- `batch_not_found` - No batch exists with the given ID +- `scrape_not_found` - No scrape exists with the given ID +- `parser_not_found` - The specified parser does not exist +- `pursuit_file_not_found` - Batch data file not found (for retry operations) +- `crawl_not_found` - No crawl exists with the given ID +- `map_not_found` - No map exists with the given ID +- `no_urls_found` - No URLs found during crawl +- `sitemap_error` - Could not retrieve page URLs from sitemap +- `file_not_found` - No file exists with the given ID +- `schedule_not_found` - No schedule exists with the given ID +- `agent_not_found` - No agent exists with the given ID +- `answer_not_found` - No answer exists with the given ID + +**Example Responses:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "scrape_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Scrape not found", + "created": 1704067200, + "metadata": {} +} +``` + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Batch not found", + "created": 1704067200, + "metadata": {} +} +``` + +**Common Causes:** +- Using an incorrect or expired resource ID +- Resource has been deleted +- Referencing a parser that doesn't exist + + + + + +The server understands the request but rejects its meaning due to business logic rules. + +**Error Codes:** +- `invalid_parser_input` - Parser input validation failed +- `idempotency_key_reuse` - Idempotency key was already used with different parameters + +**Example Responses:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "invalid_parser_input", + "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity", + "status": 422, + "title": "Unprocessable Entity", + "detail": "The parser object must contain an input object with 'profile_id' field", + "created": 1704067200, + "metadata": {} +} +``` + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "idempotency_key_reuse", + "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity", + "status": 422, + "title": "Unprocessable Entity", + "detail": "A request with this idempotency key was already made with different parameters. Idempotency keys must be unique per request.", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- For parser input errors, check the required fields for the parser you're using +- For idempotency key errors, use a new unique key for different request parameters + + + + + +The request conflicts with an in-progress or completed idempotent request. This occurs when using idempotency keys to ensure exactly-once processing. + +**Error Codes:** +- `idempotency_key_in_progress` - A request with this idempotency key is currently being processed + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "idempotency_key_in_progress", + "type": "https://docs.olostep.com/api-reference/errors/idempotency_error", + "status": 409, + "title": "Idempotency Error", + "detail": "A request with this idempotency key is currently being processed. Please wait and retry.", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Wait for the in-progress request to complete (1-5 seconds) +- Retry the request with exponential backoff +- Poll for the original request's result if you have the batch ID + + + + + +Rate limit exceeded. You're making too many requests in a given time period. + +**Error Codes:** +- `rate_limit_exceeded` - Request rate limit has been exceeded + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "rate_limit_exceeded", + "type": "https://docs.olostep.com/api-reference/errors/too_many_requests", + "status": 429, + "title": "Too Many Requests", + "detail": "You've reached your rate limits for this API. Please try again later.", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Implement exponential backoff in your requests +- Use batch processing for multiple URLs +- Contact support if you need higher rate limits + + + + + +### Server Errors (5xx) + + + + + +An unexpected error occurred on the server. + +**Error Codes:** +- `internal_server_error` - Generic internal error + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Retry the request after a short delay +- If the error persists, contact support + + + + + +The service is temporarily unavailable or at capacity. + +**Error Codes:** +- `max_capacity_reached` - Service is at maximum capacity + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "max_capacity_reached", + "type": "https://docs.olostep.com/api-reference/errors/service_unavailable", + "status": 501, + "title": "Service Unavailable", + "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Retry the request after a few minutes +- Use exponential backoff + + + + + +The request timed out before completing. + +**Error Codes:** +- `request_timeout` - Request exceeded the timeout limit + +**Example Response:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "request_timeout", + "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout", + "status": 504, + "title": "Gateway Timeout", + "detail": "Your request timed out. Try increasing the timeout and making another request.", + "created": 1704067200, + "metadata": {} +} +``` + +**Resolution:** +- Increase the timeout value in your request +- For complex scrapes, consider using async patterns +- Simplify the request (fewer actions, simpler targets) + + + + + +--- + +## Error Code Reference + +Quick reference table of all error codes: + +| Code | HTTP Status | Description | +|------|-------------|-------------| +| `validation_error` | 400 | Request validation failed | +| `invalid_api_key` | 401 | API key is invalid | +| `credits_exhausted` | 402 | Account has no remaining credits | +| `approval_required` | 403 | Feature requires approval | +| `access_denied` | 403 | Access denied to this feature | +| `blacklisted_domain` | 403 | Target domain is blocked | +| `unsupported_file_type` | 403 | File type not supported | +| `batch_not_found` | 404 | Batch ID not found | +| `scrape_not_found` | 404 | Scrape ID not found | +| `parser_not_found` | 404 | Parser not found | +| `pursuit_file_not_found` | 404 | Batch data file not found | +| `crawl_not_found` | 404 | Crawl ID not found | +| `map_not_found` | 404 | Map ID not found | +| `no_urls_found` | 404 | No URLs found during crawl | +| `sitemap_error` | 404 | Could not retrieve page URLs from sitemap | +| `file_not_found` | 404 | File ID not found | +| `schedule_not_found` | 404 | Schedule ID not found | +| `agent_not_found` | 404 | Agent ID not found | +| `answer_not_found` | 404 | Answer ID not found | +| `idempotency_key_in_progress` | 409 | Idempotency key request still processing (see [Idempotency Error](#idempotency_error)) | +| `idempotency_key_reuse` | 422 | Idempotency key reused with different parameters | +| `invalid_parser_input` | 422 | Parser input validation failed | +| `rate_limit_exceeded` | 429 | Rate limit exceeded | +| `internal_server_error` | 500 | Internal server error | +| `max_capacity_reached` | 501 | Service at capacity | +| `request_timeout` | 504 | Request timed out | + +--- + +## Handling Errors + +### Best Practices + +1. **Always check the `code` field** for programmatic error handling +2. **Display the `detail` field** to users for helpful error messages +3. **Log the `id` field** for debugging and support tickets +4. **Implement retry logic** for 429, 500, 501, and 504 errors with exponential backoff +5. **Check the `errors` array** for field-level validation details on 400 errors + +### Example Error Handling + + + +```python Python +from olostep import Olostep, Olostep_BaseError + +client = Olostep(api_key="your-api-key") + +try: + result = client.scrapes.create(url_to_scrape="https://example.com") +except Olostep_BaseError as e: + print(f"Error: {type(e).__name__}") + print(f"Message: {e}") +``` + +```javascript Node.js +import Olostep from 'olostep'; + +const client = new Olostep({ apiKey: 'your-api-key' }); + +try { + const result = await client.scrapes.create('https://example.com'); +} catch (error) { + console.error('Error:', error.code); + console.error('Message:', error.detail); +} +``` + +```bash cURL +curl -X POST https://api.olostep.com/v1/scrapes \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"url_to_scrape": "https://example.com"}' + +# Check HTTP status code and parse error response +``` + + + +### SDK Error Hierarchies + +For detailed SDK-specific error handling, see: +- [Python SDK Error Handling](/sdks/python#detailed-error-handling) +- [Node.js SDK Documentation](/sdks/node-js) + diff --git a/api-reference/errors/bad_request.mdx b/api-reference/errors/bad_request.mdx new file mode 100644 index 00000000..3720fe50 --- /dev/null +++ b/api-reference/errors/bad_request.mdx @@ -0,0 +1,90 @@ +--- +title: 'Bad Request' +sidebarTitle: 'Bad Request' +description: 'HTTP 400 - Request is syntactically broken or structurally invalid' +--- + + +This page documents the `bad_request` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 400 + +The request is syntactically broken or structurally invalid. This typically occurs when required parameters are missing or have invalid formats. + +## Error Codes + +| Code | Description | +|------|-------------| +| `validation_error` | Request validation failed (often includes field-level errors) | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along", + "created": 1704067200, + "metadata": {}, + "errors": [ + { + "loc": ["url_to_scrape"], + "msg": "Invalid URL. Must start with http:// or https://", + "type": "invalid_string" + } + ] +} +``` + +## Common Causes + +- Missing required `url_to_scrape` parameter +- Invalid URL format (missing `http://` or `https://` protocol) +- Invalid JSON body structure +- Invalid parameter values or types +- Whitespace in URLs +- Invalid metadata (see below) + +## Metadata Validation Errors + +When providing `metadata`, the following rules apply: + +| Constraint | Limit | +|------------|-------| +| Maximum keys | 50 | +| Key length | 40 characters | +| Key format | No square brackets (`[` or `]`) | +| Value length | 500 characters | +| Value type | Strings only (numbers/booleans auto-converted) | + +**Example metadata error:** + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "Metadata key \"my_very_long_key_name_that_exceeds...\" exceeds 40 character limit.", + "created": 1704067200, + "metadata": {} +} +``` + +## Resolution + +1. Ensure all required parameters are provided +2. Verify URL format includes protocol (`http://` or `https://`) +3. Check JSON body for syntax errors +4. Review the `errors` array for field-specific issues +5. For metadata errors, ensure keys are ≤40 chars, values are ≤500 chars, and all values are strings + diff --git a/api-reference/errors/forbidden.mdx b/api-reference/errors/forbidden.mdx new file mode 100644 index 00000000..4b26ecdf --- /dev/null +++ b/api-reference/errors/forbidden.mdx @@ -0,0 +1,87 @@ +--- +title: 'Forbidden' +sidebarTitle: 'Forbidden' +description: 'HTTP 403 - Request understood but refused due to access restrictions' +--- + + +This page documents the `forbidden` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 403 + +The request was understood but refused due to access restrictions. Your account may not have permission to access the requested resource or feature. + +## Error Codes + +| Code | Description | +|------|-------------| +| `approval_required` | Feature requires prior approval before use | +| `blacklisted_domain` | The target domain is currently blocked | +| `unsupported_file_type` | The file type is not supported for your account | + +## Example Responses + +### Blacklisted Domain + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "blacklisted_domain", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.", + "created": 1704067200, + "metadata": {} +} +``` + +### Approval Required + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "approval_required", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved", + "created": 1704067200, + "metadata": {} +} +``` + +### Unsupported File Type + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "unsupported_file_type", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "This type of request is not currently supported for all accounts. Reach out to info@olostep.com to have it enabled for your account", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Attempting to scrape a blacklisted domain +- Trying to use a feature that requires approval (e.g., Instagram, LinkedIn, Reddit) +- Accessing restricted file types (e.g., certain PDFs) +- Using a parser that requires special access + +## Resolution + +1. Contact support at info@olostep.com to request access +2. Provide details about your use case +3. Wait for approval before retrying the request + diff --git a/api-reference/errors/gateway_timeout.mdx b/api-reference/errors/gateway_timeout.mdx new file mode 100644 index 00000000..eb9e6be0 --- /dev/null +++ b/api-reference/errors/gateway_timeout.mdx @@ -0,0 +1,54 @@ +--- +title: 'Gateway Timeout' +sidebarTitle: 'Gateway Timeout' +description: 'HTTP 504 - The request timed out before completing' +--- + + +This page documents the `gateway_timeout` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 504 + +The request timed out before completing. This can happen with complex scrapes or slow-loading websites. + +## Error Codes + +| Code | Description | +|------|-------------| +| `request_timeout` | Request exceeded the timeout limit | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "request_timeout", + "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout", + "status": 504, + "title": "Gateway Timeout", + "detail": "Your request timed out. Try increasing the timeout and making another request.", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Complex page with many resources +- Slow website response +- Too many browser actions +- Large file downloads +- Geographic routing delays + +## Resolution + +1. **Increase timeout** - Extend the request timeout in your client +2. **Simplify the request** - Reduce the number of actions +3. **Use wait_before_scraping** - Give the page more time to load +4. **Try a different country** - Geographic location can affect load times +5. **Retry the request** - Timeouts are often transient + diff --git a/api-reference/errors/idempotency_error.mdx b/api-reference/errors/idempotency_error.mdx new file mode 100644 index 00000000..c336b256 --- /dev/null +++ b/api-reference/errors/idempotency_error.mdx @@ -0,0 +1,56 @@ +--- +title: 'Idempotency Error' +sidebarTitle: 'Idempotency Error' +description: 'HTTP 409 - Request conflicts with an idempotent operation' +--- + + +This page documents the `idempotency_error` type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 409 + +The request conflicts with an in-progress or completed idempotent request. This occurs when using idempotency keys to ensure exactly-once processing of requests. + +## Error Codes + +| Code | Description | +|------|-------------| +| `idempotency_key_in_progress` | A request with this idempotency key is currently being processed | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "idempotency_key_in_progress", + "type": "https://docs.olostep.com/api-reference/errors/idempotency_error", + "status": 409, + "title": "Idempotency Error", + "detail": "A request with this idempotency key is currently being processed. Please wait and retry.", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Sending duplicate requests with the same idempotency key before the first request completes +- Network timeouts causing client retries while the original request is still processing +- Race conditions when multiple clients attempt the same operation + +## Resolution + +1. Wait for the in-progress request to complete (typically 1-5 seconds) +2. Retry the request after a short delay with exponential backoff +3. Poll for the result of the original request if you have the batch ID + +## Related Errors + +| Code | HTTP Status | Description | +|------|-------------|-------------| +| `idempotency_key_reuse` | 422 | Same key used with different parameters (see [Unprocessable Entity](/api-reference/errors/unprocessable_entity)) | + diff --git a/api-reference/errors/internal_error.mdx b/api-reference/errors/internal_error.mdx new file mode 100644 index 00000000..5d976af8 --- /dev/null +++ b/api-reference/errors/internal_error.mdx @@ -0,0 +1,51 @@ +--- +title: 'Internal Server Error' +sidebarTitle: 'Internal Error' +description: 'HTTP 500 - An unexpected error occurred on the server' +--- + + +This page documents the `internal_error` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 500 + +An unexpected error occurred on the server. This is not caused by your request but by an issue on our end. + +## Error Codes + +| Code | Description | +|------|-------------| +| `internal_server_error` | Generic internal server error | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} +} +``` + +## Resolution + +1. **Retry the request** - Many internal errors are transient +2. **Implement exponential backoff** - Wait before retrying +3. **Contact support** - If the error persists, contact support with the error `id` + +## Reporting Issues + +When reporting an internal error, please include: +- The error `id` from the response +- The timestamp of the request +- The endpoint and parameters used (excluding sensitive data) + diff --git a/api-reference/errors/not_found.mdx b/api-reference/errors/not_found.mdx new file mode 100644 index 00000000..600962bd --- /dev/null +++ b/api-reference/errors/not_found.mdx @@ -0,0 +1,107 @@ +--- +title: 'Not Found' +sidebarTitle: 'Not Found' +description: 'HTTP 404 - The requested resource does not exist' +--- + + +This page documents the `not_found` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 404 + +The requested resource does not exist. The `detail` field specifies which resource was not found. + +## Error Codes + +| Code | Description | +|------|-------------| +| `not_found` | Generic resource not found | +| `scrape_not_found` | No scrape exists with the given ID | +| `parser_not_found` | The specified parser does not exist | + + +The `detail` field in the response provides specifics about which resource was not found (e.g., "Scrape not found", "Batch not found", "Crawl not found", "Map not found"). + + +## Example Responses + +### Scrape Not Found + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "scrape_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Scrape not found", + "created": 1704067200, + "metadata": {} +} +``` + +### Batch Not Found + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Batch not found", + "created": 1704067200, + "metadata": {} +} +``` + +### Crawl Not Found + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Crawl not found", + "created": 1704067200, + "metadata": {} +} +``` + +### Parser Not Found + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "parser_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "No parser with this name and/or version found", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Using an incorrect resource ID +- Resource has expired or been deleted +- Typo in the parser name or version +- Referencing a resource from a different account + +## Resolution + +1. Verify the resource ID is correct +2. Check that the resource exists and hasn't expired +3. For parsers, verify the parser name and version are correct +4. Review available parsers in the [Parsers documentation](/features/structured-content/parsers) diff --git a/api-reference/errors/payment_required.mdx b/api-reference/errors/payment_required.mdx new file mode 100644 index 00000000..032e3542 --- /dev/null +++ b/api-reference/errors/payment_required.mdx @@ -0,0 +1,70 @@ +--- +title: 'Payment Required' +sidebarTitle: 'Payment Required' +description: 'HTTP 402 - Valid credentials but payment/credits issue' +--- + + +This page documents the `payment_required` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 402 + +Valid credentials were provided but a payment or credits issue prevents the request from being processed. + +## Error Codes + +| Code | Description | +|------|-------------| +| `invalid_api_key` | API key is invalid or associated with an inactive account | +| `credits_exhausted` | Account has consumed all available credits | + +## Example Responses + +### Credits Exhausted + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/", + "created": 1704067200, + "metadata": {} +} +``` + +### Invalid API Key + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Account has no remaining credits +- Subscription has expired +- Payment method issues + +## Resolution + +1. Check your credit balance at [dashboard](https://olostep.com/dashboard) +2. Upgrade your plan if credits are exhausted +3. Update payment method if there are billing issues +4. Contact support if you believe this is an error + diff --git a/api-reference/errors/service_unavailable.mdx b/api-reference/errors/service_unavailable.mdx new file mode 100644 index 00000000..0dc9293b --- /dev/null +++ b/api-reference/errors/service_unavailable.mdx @@ -0,0 +1,45 @@ +--- +title: 'Service Unavailable' +sidebarTitle: 'Service Unavailable' +description: 'HTTP 501 - The service is temporarily unavailable' +--- + + +This page documents the `service_unavailable` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 501 + +The service is temporarily unavailable or at maximum capacity. This is typically a temporary condition. + +## Error Codes + +| Code | Description | +|------|-------------| +| `max_capacity_reached` | Service is at maximum capacity | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "max_capacity_reached", + "type": "https://docs.olostep.com/api-reference/errors/service_unavailable", + "status": 501, + "title": "Service Unavailable", + "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.", + "created": 1704067200, + "metadata": {} +} +``` + +## Resolution + +1. **Wait and retry** - Capacity issues are typically temporary +2. **Implement exponential backoff** - Gradually increase wait times between retries +3. **Use batch processing** - Reduce the number of concurrent requests +4. **Contact support** - If you consistently hit capacity limits + diff --git a/api-reference/errors/too_many_requests.mdx b/api-reference/errors/too_many_requests.mdx new file mode 100644 index 00000000..2b630c98 --- /dev/null +++ b/api-reference/errors/too_many_requests.mdx @@ -0,0 +1,68 @@ +--- +title: 'Too Many Requests' +sidebarTitle: 'Too Many Requests' +description: 'HTTP 429 - Rate limit exceeded' +--- + + +This page documents the `too_many_requests` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 429 + +Rate limit exceeded. You're making too many requests in a given time period. + +## Error Codes + +| Code | Description | +|------|-------------| +| `rate_limit_exceeded` | Request rate limit has been exceeded | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "rate_limit_exceeded", + "type": "https://docs.olostep.com/api-reference/errors/too_many_requests", + "status": 429, + "title": "Too Many Requests", + "detail": "You've reached your rate limits for this API. Please try again later.", + "created": 1704067200, + "metadata": {} +} +``` + +## Rate Limits + +Rate limits vary by plan. Check your current limits in your [dashboard](https://olostep.com/dashboard). + +## Resolution + +1. **Implement exponential backoff** - Wait and retry with increasing delays +2. **Use batch processing** - Process multiple URLs in a single batch request +3. **Spread requests over time** - Avoid bursting all requests at once +4. **Contact support** - Request higher rate limits if needed + +## Example: Exponential Backoff + +```python +import time +import random + +def request_with_backoff(make_request, max_retries=5): + for attempt in range(max_retries): + try: + return make_request() + except RateLimitError: + if attempt == max_retries - 1: + raise + + # Exponential backoff with jitter + delay = (2 ** attempt) + random.uniform(0, 1) + time.sleep(delay) +``` + diff --git a/api-reference/errors/unauthorized.mdx b/api-reference/errors/unauthorized.mdx new file mode 100644 index 00000000..26332224 --- /dev/null +++ b/api-reference/errors/unauthorized.mdx @@ -0,0 +1,52 @@ +--- +title: 'Unauthorized' +sidebarTitle: 'Unauthorized' +description: 'HTTP 401 - Authentication credentials are missing or invalid' +--- + + +This page documents the `unauthorized` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 401 + +Authentication credentials are missing or invalid. The API could not verify your identity. + +## Error Codes + +| Code | Description | +|------|-------------| +| `invalid_api_key` | The API key provided is not valid | + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Missing `Authorization` header +- Invalid API key format +- Revoked or expired API key +- Malformed bearer token + +## Resolution + +1. Check your API key at [dashboard](https://olostep.com/dashboard) +2. Ensure the `Authorization` header uses the format: `Bearer YOUR_API_KEY` +3. Verify there are no extra spaces or characters in your API key +4. Generate a new API key if the current one is compromised + diff --git a/api-reference/errors/unprocessable_entity.mdx b/api-reference/errors/unprocessable_entity.mdx new file mode 100644 index 00000000..0d456f20 --- /dev/null +++ b/api-reference/errors/unprocessable_entity.mdx @@ -0,0 +1,48 @@ +--- +title: 'Unprocessable Entity' +sidebarTitle: 'Unprocessable Entity' +description: 'HTTP 422 - Server understands the request but rejects its meaning' +--- + + +This page documents the `unprocessable_entity` error type. For a complete overview of all error types, see [Errors](/api-reference/errors). + + +## Overview + +**HTTP Status Code:** 422 + +The server understands the request structure but rejects its meaning due to business logic rules. The request is syntactically correct but semantically invalid. + +## Error Codes + +Various codes depending on the specific business rule violation. + +## Example Response + +```json +{ + "object": "error", + "id": "error_abc123", + "code": "invalid_parser_input", + "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity", + "status": 422, + "title": "Unprocessable Entity", + "detail": "The parser object must contain an input object with 'profile_id' field", + "created": 1704067200, + "metadata": {} +} +``` + +## Common Causes + +- Missing required input fields for a parser +- Invalid combination of parameters +- Business rule violations + +## Resolution + +1. Review the error `detail` for specific requirements +2. Check the documentation for the feature you're using +3. Ensure all required input fields are provided + diff --git a/api-reference/maps/create.mdx b/api-reference/maps/create.mdx index 766f5146..db008aae 100644 --- a/api-reference/maps/create.mdx +++ b/api-reference/maps/create.mdx @@ -1,5 +1,9 @@ --- title: 'Create Map' -description: 'This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination' +description: 'This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination' openapi: POST /v1/maps --- + + +**Metadata** New — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/retrieve-dataset.mdx b/api-reference/retrieve-dataset.mdx index 8641783b..1529894d 100644 --- a/api-reference/retrieve-dataset.mdx +++ b/api-reference/retrieve-dataset.mdx @@ -4,3 +4,6 @@ description: 'This is the endpoint to retrieve the content of a scraped page (HT openapi: GET /olostep-p2p-dataset-API --- + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/retrieve.mdx b/api-reference/retrieve.mdx index fcc1c103..190b6eba 100644 --- a/api-reference/retrieve.mdx +++ b/api-reference/retrieve.mdx @@ -4,3 +4,6 @@ description: 'Retrieve content of processed batches and crawls urls.' openapi: GET /v1/retrieve --- + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/scrapes/create.mdx b/api-reference/scrapes/create.mdx index 6237c0d4..f4d99551 100644 --- a/api-reference/scrapes/create.mdx +++ b/api-reference/scrapes/create.mdx @@ -3,3 +3,7 @@ title: 'Create Scrape' description: '[Scrape](https://docs.olostep.com/features/scrapes/scrapes) a url with provided configuration and get content.' openapi: POST /v1/scrapes --- + + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/scrapes/get.mdx b/api-reference/scrapes/get.mdx index f462625a..b7a220b9 100644 --- a/api-reference/scrapes/get.mdx +++ b/api-reference/scrapes/get.mdx @@ -3,3 +3,7 @@ title: 'Get Scrape' description: 'Can be used to retrieve response for a scrape.' openapi: GET /v1/scrapes/{scrape_id} --- + + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/api-reference/start-agent.mdx b/api-reference/start-agent.mdx index 0bd22731..134656a1 100644 --- a/api-reference/start-agent.mdx +++ b/api-reference/start-agent.mdx @@ -3,3 +3,7 @@ title: 'Start agent' description: 'This is the endpoint to start the scraping agent (and optionally retrieve the scraped content using the expand parameters)' openapi: GET /olostep-p2p-incomingAPI --- + + +**Metadata** Coming Soon — See [Metadata](/api-reference/common/metadata) for details. + diff --git a/docs.json b/docs.json index 814d7e3c..e9fdde49 100644 --- a/docs.json +++ b/docs.json @@ -133,6 +133,31 @@ "pages": [ "api-reference/retrieve" ] + }, + { + "group": "Common Objects", + "pages": [ + "api-reference/common/metadata", + "api-reference/common/pagination", + { + "group": "Errors", + "icon": "triangle-exclamation", + "pages": [ + "api-reference/errors", + "api-reference/errors/bad_request", + "api-reference/errors/unauthorized", + "api-reference/errors/payment_required", + "api-reference/errors/forbidden", + "api-reference/errors/not_found", + "api-reference/errors/unprocessable_entity", + "api-reference/errors/idempotency_error", + "api-reference/errors/too_many_requests", + "api-reference/errors/internal_error", + "api-reference/errors/service_unavailable", + "api-reference/errors/gateway_timeout" + ] + } + ] } ] } @@ -190,4 +215,4 @@ "perplexity" ] } -} \ No newline at end of file +} diff --git a/openapi/answers.json b/openapi/answers.json index e3c16e41..f49841e1 100644 --- a/openapi/answers.json +++ b/openapi/answers.json @@ -15,6 +15,52 @@ "type": "http", "scheme": "bearer" } + }, + "schemas": { + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + }, + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string", "enum": ["error"] }, + "code": { "type": "string" }, + "type": { "type": "string", "format": "uri" }, + "status": { "type": "integer" }, + "title": { "type": "string" }, + "detail": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] + } } }, "paths": { @@ -22,11 +68,7 @@ "post": { "summary": "Create Answer", "description": "The AI will perform actions like searching and browsing web pages to find the answer to the provided task.\nExecution time is 3-30s depending upon complexity. \n\n For longer tasks, use the agent endpoint instead. See [Agent feature](/features/agents/agents). See [Answers feature](/features/answers/answers).", - "security": [ - { - "Authorization": [] - } - ], + "security": [{ "Authorization": [] }], "requestBody": { "required": true, "content": { @@ -34,18 +76,10 @@ "schema": { "type": "object", "properties": { - "task": { - "type": "string", - "description": "The task to be performed." - }, - "json_format": { - "type": "object", - "description": "The desired output JSON object with empty values as a schema, or simply describe the data you want as a string." - } + "task": { "type": "string", "description": "The task to be performed." }, + "json_format": { "type": "object", "description": "Desired output JSON schema." } }, - "required": [ - "task" - ] + "required": ["task"] } } } @@ -58,30 +92,16 @@ "schema": { "type": "object", "properties": { - "id": { - "type": "string" - }, - "object": { - "type": "string" - }, - "created": { - "type": "integer" - }, - "metadata": { - "type": "object" - }, - "task": { - "type": "string" - }, + "id": { "type": "string" }, + "object": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "task": { "type": "string" }, "result": { "type": "object", "properties": { - "json_content": { - "type": "string" - }, - "json_hosted_url": { - "type": "string" - } + "json_content": { "type": "string" }, + "json_hosted_url": { "type": "string" } } } } @@ -90,13 +110,118 @@ } }, "400": { - "description": "Bad Request (Missing Task)" + "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "task is required", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "402": { - "description": "Invalid API Key" + "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "403": { + "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "approval_required", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "429": { + "description": "Rate limit exceeded. See [Too Many Requests](/api-reference/errors/too_many_requests) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "rate_limit_exceeded", + "type": "https://docs.olostep.com/api-reference/errors/too_many_requests", + "status": 429, + "title": "Too Many Requests", + "detail": "You've reached your rate limits for this API.", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal Server Error" + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } @@ -104,61 +229,36 @@ "/v1/answers/{answer_id}": { "get": { "summary": "Get Answer", - "description": "This endpoint retrieves a previously completed answer by its ID.", - "security": [ - { - "Authorization": [] - } - ], + "description": "Retrieve a previously completed answer by its ID.", + "security": [{ "Authorization": [] }], "parameters": [ { "name": "answer_id", "in": "path", "required": true, - "description": "Unique identifier for the answer to be retrieved.", - "schema": { - "type": "string" - } + "description": "Unique identifier for the answer.", + "schema": { "type": "string" } } ], "responses": { "200": { - "description": "Successful response with the answer object.", + "description": "Successful response with the answer.", "content": { "application/json": { "schema": { "type": "object", "properties": { - "id": { - "type": "string" - }, - "object": { - "type": "string" - }, - "created": { - "type": "integer" - }, - "metadata": { - "type": "object" - }, - "task": { - "type": "string" - }, + "id": { "type": "string" }, + "object": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "task": { "type": "string" }, "result": { "type": "object", "properties": { - "json_content": { - "type": "string" - }, - "json_hosted_url": { - "type": "string" - }, - "sources": { - "type": "array", - "items": { - "type": "string" - } - } + "json_content": { "type": "string" }, + "json_hosted_url": { "type": "string" }, + "sources": { "type": "array", "items": { "type": "string" } } } } } @@ -166,17 +266,65 @@ } } }, - "402": { - "description": "Invalid API Key" + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Answer Not Found" + "description": "Answer not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "answer_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Answer not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal Server Error" + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } } } -} \ No newline at end of file +} diff --git a/openapi/batches.json b/openapi/batches.json index 73fbee4d..9d304c2c 100644 --- a/openapi/batches.json +++ b/openapi/batches.json @@ -9,15 +9,114 @@ "url": "https://api.olostep.com" } ], - "components": { - "securitySchemes": { - "Authorization": { - "type": "http", - "scheme": "bearer" - } - } - }, "paths": { + "/v1/batches/recent": { + "get": { + "summary": "List recent batches", + "description": "Fetches the list of recent batches for the authenticated user.", + "security": [ + { + "Authorization": [] + } + ], + "parameters": [ + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "default": 50, + "maximum": 100 + }, + "description": "Maximum number of batches to return (default 50, max 100)." + }, + { + "name": "source", + "in": "query", + "required": false, + "schema": { + "type": "string" + }, + "description": "Filter by source (optional)." + } + ], + "responses": { + "200": { + "description": "Successful response with list of recent batches.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "object": { + "type": "string", + "enum": ["list"] + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string" }, + "status": { "type": "string" }, + "created": { "type": "number" }, + "total_urls": { "type": "number" }, + "completed_urls": { "type": "number" } + } + } + } + } + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid. Try double-checking it or reaching out to info@olostep.com if you're facing issues.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "500": { + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } + } + } + } + }, "/v1/batches": { "post": { "summary": "Start a new batch", @@ -86,17 +185,20 @@ "items": { "type": "string" }, - "description": "Filter extracted links using glob patterns with `include_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})." + "description": "Filter extracted links using glob patterns." }, "exclude_links": { "type": "array", "items": { "type": "string" }, - "description": "Filter extracted links using glob patterns with `exclude_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})." + "description": "Filter extracted links using glob patterns." } }, "description": "Get all the links present on each page in the batch." + }, + "metadata": { + "$ref": "#/components/schemas/Metadata" } }, "required": [ @@ -114,46 +216,152 @@ "schema": { "type": "object", "properties": { - "id": { - "type": "string", - "description": "Batch ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"batch\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, - "created": { - "type": "number", - "description": "Created epoch" - }, - "total_urls": { - "type": "number", - "description": "Count of URLs in the batch" - }, - "completed_urls": { - "type": "number", - "description": "Count of completed URLs" - }, - "parser": { - "type": "string" - }, - "country": { - "type": "string" - } + "id": { "type": "string", "description": "Batch ID" }, + "object": { "type": "string", "description": "The kind of object. \"batch\" for this endpoint." }, + "status": { "type": "string", "description": "`in_progress` or `completed`" }, + "created": { "type": "number", "description": "Created epoch" }, + "total_urls": { "type": "number", "description": "Count of URLs in the batch" }, + "completed_urls": { "type": "number", "description": "Count of completed URLs" }, + "parser": { "type": "string" }, + "country": { "type": "string" }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "The 'items' array is required and must contain at least one item.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid. Try double-checking it or reaching out to info@olostep.com if you're facing issues.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "402": { + "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/auth/", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "403": { + "description": "Forbidden - access denied to this feature. See [Forbidden](/api-reference/errors/forbidden) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "access_denied", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature. Please reach out to info@olostep.com to get approved", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "409": { + "description": "Idempotency conflict - a request with this key is in progress. See [Idempotency Error](/api-reference/errors/idempotency_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "idempotency_key_in_progress", + "type": "https://docs.olostep.com/api-reference/errors/idempotency_error", + "status": 409, + "title": "Idempotency Error", + "detail": "A request with this idempotency key is currently being processed. Please wait and retry.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "422": { + "description": "Unprocessable entity - business rule violation. See [Unprocessable Entity](/api-reference/errors/unprocessable_entity) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "idempotency_key_reuse", + "type": "https://docs.olostep.com/api-reference/errors/unprocessable_entity", + "status": 422, + "title": "Unprocessable Entity", + "detail": "A request with this idempotency key was already made with different parameters. Idempotency keys must be unique per request.", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } @@ -172,9 +380,7 @@ "name": "batch_id", "in": "path", "required": true, - "schema": { - "type": "string" - }, + "schema": { "type": "string" }, "description": "The ID of the batch to retrieve information for." } ], @@ -186,49 +392,76 @@ "schema": { "type": "object", "properties": { - "id": { - "type": "string", - "description": "Batch ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"batch\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, - "created": { - "type": "number", - "description": "Created epoch" - }, - "total_urls": { - "type": "number", - "description": "Count of URLs in the batch" - }, - "completed_urls": { - "type": "number", - "description": "Count of completed URLs" - }, - "parser": { - "type": "string" - }, - "country": { - "type": "string" - } + "id": { "type": "string", "description": "Batch ID" }, + "object": { "type": "string", "description": "The kind of object. \"batch\" for this endpoint." }, + "status": { "type": "string", "description": "`in_progress` or `completed`" }, + "created": { "type": "number", "description": "Created epoch" }, + "total_urls": { "type": "number", "description": "Count of URLs in the batch" }, + "completed_urls": { "type": "number", "description": "Count of completed URLs" }, + "parser": { "type": "string" }, + "country": { "type": "string" }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } } } } }, - "400": { - "description": "Bad request due to incorrect or missing parameters." + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Batch not found for the provided ID." + "description": "Batch not found for the provided ID. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "batch_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Batch not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } @@ -247,10 +480,8 @@ "name": "batch_id", "in": "path", "required": true, - "schema": { - "type": "string" - }, - "description": "The ID of the batch to retrieve the list of URLs and retrieve_id for." + "schema": { "type": "string" }, + "description": "The ID of the batch to retrieve items for." }, { "name": "status", @@ -258,10 +489,7 @@ "required": false, "schema": { "type": "string", - "enum": [ - "completed", - "failed" - ] + "enum": ["completed", "failed"] }, "description": "Status of the URLs to retrieve (completed or failed)." }, @@ -269,118 +497,179 @@ "name": "cursor", "in": "query", "required": false, - "schema": { - "type": "integer" - }, - "description": "Optional integer representing the index to start fetching content from. Useful to paginate until all URLs are fetched. \n\n Start with 0, then provide `response['cursor']` value of the last request." + "schema": { "type": "integer" }, + "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details." }, { "name": "limit", "in": "query", "required": false, - "schema": { - "type": "integer" - }, - "description": "Optional integer to limit the number of results returned. Recommended 10-50 results at a time. Paginated using *cursor*.\n\nMaximum 10MB of content can be fetched in a single request." - }, - { - "name": "formats", - "in": "query", - "required": false, - "schema": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "html", - "markdown", - "json" - ] - } - }, - "description": "**Deprecated:** Use `/retrieve` endpoint with `retrieve_id`.\n\nArray of formats to fetch (e.g., [\"html\", \"markdown\"])." + "schema": { "type": "integer" }, + "description": "Number of results to return (recommended 10-50)." } ], "responses": { "200": { - "description": "Successful response with the list of URLs.", + "description": "Successful response with batch items.", "content": { "application/json": { "schema": { "type": "object", "properties": { - "batch_id": { - "type": "string", - "description": "Batch ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"batch\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, + "batch_id": { "type": "string" }, + "object": { "type": "string" }, + "status": { "type": "string" }, "items": { "type": "array", "items": { "type": "object", "properties": { - "custom_id": { - "type": "string" - }, - "retrieve_id": { - "type": "string", - "description": "To fetch content from the `/retrieve` API" - }, - "url": { - "type": "string" - }, - "html_content": { - "type": "string", - "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`." - }, - "markdown_content": { - "type": "string", - "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`." - }, - "json_content": { - "type": "string", - "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`." - }, - "links_on_page": { - "type": "array", - "description": "All links on that page`.", - "items": { - "type": "string" - } - } + "custom_id": { "type": "string" }, + "retrieve_id": { "type": "string" }, + "url": { "type": "string" }, + "links_on_page": { "type": "array", "items": { "type": "string" } } } } }, - "items_count": { - "type": "integer" - }, - "cursor": { - "type": "integer", - "description": "To be passed in the query in next request to get the next items." - } + "items_count": { "type": "integer" }, + "cursor": { "type": "integer", "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." } } } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "Status must be either 'completed' or 'failed'", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Batch not found for the provided ID." + "description": "Batch not found for the provided ID. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "batch_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Batch not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } } + }, + "components": { + "securitySchemes": { + "Authorization": { + "type": "http", + "scheme": "bearer" + } + }, + "schemas": { + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string", "description": "Unique error identifier" }, + "object": { "type": "string", "enum": ["error"], "description": "Always 'error'" }, + "code": { "type": "string", "description": "Machine-readable error code" }, + "type": { "type": "string", "format": "uri", "description": "URI reference identifying the problem type" }, + "status": { "type": "integer", "description": "HTTP status code" }, + "title": { "type": "string", "description": "Short, human-readable summary" }, + "detail": { "type": "string", "description": "Human-readable explanation" }, + "created": { "type": "integer", "description": "Unix timestamp" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] + }, + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + } + } } -} \ No newline at end of file +} diff --git a/openapi/builder.json b/openapi/builder.json index 66957702..6f043336 100644 --- a/openapi/builder.json +++ b/openapi/builder.json @@ -5,341 +5,254 @@ "version": "1.0.0" }, "servers": [ - { - "url": "https://agent.olostep.com" - }, - { - "url": "https://dataset.olostep.com" - } + { "url": "https://agent.olostep.com" }, + { "url": "https://dataset.olostep.com" } ], "components": { "securitySchemes": { - "Authorization": { - "type": "http", - "scheme": "bearer" + "Authorization": { "type": "http", "scheme": "bearer" } + }, + "schemas": { + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + }, + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string", "enum": ["error"] }, + "code": { "type": "string" }, + "type": { "type": "string", "format": "uri" }, + "status": { "type": "integer" }, + "title": { "type": "string" }, + "detail": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] } } }, "paths": { "/olostep-p2p-incomingAPI": { "get": { - "summary": "Initiate web page scraping with various configurations", - "description": "This endpoint allows users to scrape web pages with various options such as setting timeouts, wait time before scraping, data format preferences, and more.", - "servers": [ - { - "url": "https://agent.olostep.com" - } - ], - "security": [ - { - "Authorization": [] - } - ], + "summary": "Start Agent (Legacy)", + "description": "Initiate web page scraping with various configurations.", + "servers": [{ "url": "https://agent.olostep.com" }], + "security": [{ "Authorization": [] }], "parameters": [ - { - "name": "url", - "in": "query", - "required": true, - "description": "The URL to start scraping from.", - "schema": { - "type": "string", - "format": "uri" - } - }, - { - "name": "timeout", - "in": "query", - "required": false, - "description": "Timeout in seconds for the scraping process, with a maximum of 620 seconds.", - "schema": { - "type": "integer", - "default": 40 - } - }, - { - "name": "waitBeforeScraping", - "in": "query", - "required": false, - "description": "Time to wait in seconds before starting the scraping, up to 500 seconds.", - "schema": { - "type": "integer", - "default": 3 - } - }, - { - "name": "saveHtml", - "in": "query", - "required": false, - "description": "Option to save the scraped content as HTML.", - "schema": { - "type": "boolean", - "default": true - } - }, - { - "name": "saveMarkdown", - "in": "query", - "required": false, - "description": "Option to save the scraped content as Markdown.", - "schema": { - "type": "boolean", - "default": true - } - }, - { - "name": "removeCSSselectors", - "in": "query", - "required": false, - "description": "Option to remove certain CSS selectors from the content. Optionally, you can also pass a JSON stringified array of specific selectors you want to remove. The CSS selectors removed when this option is set to `default` are `['nav','footer','script','style','noscript','svg',[role=alert],[role=banner],[role=dialog],[role=alertdialog],[role=region][aria-label*=skip i],[aria-modal=true]]`", - "schema": { - "type": "string", - "default": "default", - "enum": ["default", "none", "JSON stringified array of CSS selectors"] - } - }, - { - "name": "htmlTransformer", - "in": "query", - "required": false, - "description": "Specify the HTML transformer to use, if any. Postlight's Mercury Parser library is used to remove ads and other unwanted content from the scraped content.", - "schema": { - "type": "string", - "default": "none", - "enum": ["none", "postlightParser"] - } - }, - { - "name": "removeImages", - "in": "query", - "required": false, - "description": "Option to remove images from the scraped content.", - "schema": { - "type": "boolean", - "default": true - } - }, - { - "name": "expandMarkdown", - "in": "query", - "required": false, - "description": "If true, the markdown content is returned in the markdown_content field.", - "schema": { - "type": "boolean", - "default": false - } - }, - { - "name": "expandHtml", - "in": "query", - "required": false, - "description": "If true, the HTML content is returned in the html_content field.", - "schema": { - "type": "boolean", - "default": false - } - }, - { - "name": "actions", - "in": "query", - "required": false, - "description": "Actions to perform on the page before getting the content", - "schema": { - "type": "array", - "items": { - "type": "object", - "discriminator": { - "propertyName": "type" - }, - "oneOf": [ - { - "type": "object", - "title": "Wait", - "required": [ - "type", - "milliseconds" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "wait" - ], - "description": "Wait for a specified amount of milliseconds" - }, - "milliseconds": { - "type": "integer", - "minimum": 0, - "description": "Time to wait in milliseconds" - } - } - }, - { - "type": "object", - "title": "Click", - "required": [ - "type", - "selector" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "click" - ], - "description": "Click on an element" - }, - "selector": { - "type": "string", - "description": "CSS selector for the element to click" - } - } - }, - { - "type": "object", - "title": "Fill Input", - "required": [ - "type", - "selector", - "value" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "fill_input" - ], - "description": "Fill an input element with a value" - }, - "selector": { - "type": "string", - "description": "CSS selector for the input element" - }, - "value": { - "type": "string", - "description": "Text to enter into the input" - } - } - }, - { - "type": "object", - "title": "Scroll", - "required": [ - "type", - "direction", - "amount" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "scroll" - ], - "description": "Scroll the page" - }, - "direction": { - "type": "string", - "enum": [ - "up", - "down", - "left", - "right" - ], - "description": "Direction to scroll" - }, - "amount": { - "type": "number", - "description": "Amount to scroll in pixels" - } - } - } - ] - } - } - } + { "name": "url", "in": "query", "required": true, "schema": { "type": "string", "format": "uri" }, "description": "The URL to scrape." }, + { "name": "timeout", "in": "query", "required": false, "schema": { "type": "integer", "default": 40 }, "description": "Timeout in seconds (max 620)." }, + { "name": "waitBeforeScraping", "in": "query", "required": false, "schema": { "type": "integer", "default": 3 }, "description": "Wait time before scraping." }, + { "name": "saveHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Save as HTML." }, + { "name": "saveMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Save as Markdown." }, + { "name": "removeCSSselectors", "in": "query", "required": false, "schema": { "type": "string", "default": "default" }, "description": "CSS selectors to remove." }, + { "name": "htmlTransformer", "in": "query", "required": false, "schema": { "type": "string", "default": "none", "enum": ["none", "postlightParser"] }, "description": "HTML transformer." }, + { "name": "removeImages", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Remove images." }, + { "name": "expandMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Return markdown content inline." }, + { "name": "expandHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Return HTML content inline." } ], "responses": { "200": { - "description": "Successful response with the requested data." + "description": "Successful response with scraped data.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "metadata": { "$ref": "#/components/schemas/Metadata" } + } + } + } + } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "url parameter is required", + "created": 1704067200, + "metadata": {} + } + } + } }, "401": { - "description": "Unauthorized access due to missing or invalid API token." + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } }, "/olostep-p2p-dataset-API": { "get": { - "servers": [ - { - "url": "https://dataset.olostep.com" - } - ], - "summary": "Retrieve dataset information", - "description": "Allows users to retrieve dataset information in Markdown and/or HTML format based on the provided dataset ID.", - "security": [ - { - "Authorization": [] - } - ], + "servers": [{ "url": "https://dataset.olostep.com" }], + "summary": "Retrieve Dataset (Legacy)", + "description": "Retrieve dataset information by ID.", + "security": [{ "Authorization": [] }], "parameters": [ - { - "name": "datasetId", - "in": "query", - "required": true, - "description": "Unique identifier for the dataset to be retrieved.", - "schema": { - "type": "string" - } - }, - { - "name": "retrieveMarkdown", - "in": "query", - "required": false, - "description": "Option to retrieve the dataset in Markdown format.", - "schema": { - "type": "boolean", - "default": true - } - }, - { - "name": "retrieveHtml", - "in": "query", - "required": false, - "description": "Option to retrieve the dataset in HTML format.", - "schema": { - "type": "boolean", - "default": false - } - }, - { - "name": "fastLane", - "in": "query", - "required": false, - "description": "Experimental option to speed up the scraping process", - "schema": { - "type": "boolean", - "default": false - } - } + { "name": "datasetId", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Dataset ID." }, + { "name": "retrieveMarkdown", "in": "query", "required": false, "schema": { "type": "boolean", "default": true }, "description": "Retrieve as Markdown." }, + { "name": "retrieveHtml", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Retrieve as HTML." }, + { "name": "fastLane", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "Experimental speed optimization." } ], "responses": { "200": { - "description": "Successful response with the dataset information." + "description": "Successful response with dataset information.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "metadata": { "$ref": "#/components/schemas/Metadata" } + } + } + } + } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "datasetId is required", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Dataset not found for the provided ID." + "description": "Dataset not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "dataset_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Dataset not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } diff --git a/openapi/crawls.json b/openapi/crawls.json index e58af42b..37716952 100644 --- a/openapi/crawls.json +++ b/openapi/crawls.json @@ -15,6 +15,52 @@ "type": "http", "scheme": "bearer" } + }, + "schemas": { + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + }, + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string", "enum": ["error"] }, + "code": { "type": "string" }, + "type": { "type": "string", "format": "uri" }, + "status": { "type": "integer" }, + "title": { "type": "string" }, + "detail": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] + } } }, "paths": { @@ -22,11 +68,7 @@ "post": { "summary": "Start a new crawl", "description": "Initiates a new crawl process with the specified parameters.", - "security": [ - { - "Authorization": [] - } - ], + "security": [{ "Authorization": [] }], "requestBody": { "required": true, "content": { @@ -34,66 +76,21 @@ "schema": { "type": "object", "properties": { - "start_url": { - "type": "string", - "description": "The starting point of the crawl." - }, - "include_urls": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL path patterns to include in the crawl using glob syntax.\n\nDefaults to `/**` which includes all URLs. Use patterns like `/blog/**` to crawl specific sections (e.g., only blog pages), `/products/*.html` for product pages, or multiple patterns for different sections. Supports standard glob features like * (any characters) and ** (recursive matching)." - }, - "exclude_urls": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL path names in glob pattern to exclude. For example: `/careers/**`. Excluded URLs will supersede included URLs." - }, - "max_pages": { - "type": "number", - "description": "Maximum number of pages to crawl. Recommended for most use cases like crawling an entire website." - }, - "max_depth": { - "type": "number", - "description": "Maximum depth of the crawl. Useful to extract only up to n-degree of links." - }, - "include_external": { - "type": "boolean", - "description": "Crawl first-degree external links." - }, - "include_subdomain": { - "type": "boolean", - "description": "Include subdomains of the website. `false` by default." - }, - "search_query": { - "type": "string", - "description": "An optional search query to find specific links and also sort the results by relevance." - }, - "top_n": { - "type": "number", - "description": "An optional number to only crawl the top N most relevant links on every page as per search query." - }, - "webhook_url": { - "type": "string", - "description": "An optional POST request endpoint called when this crawl is completed. The body of the request will be same as the response of this [`v1/crawls/{crawl_id}`](./info#response-created) endpoint." - }, - "timeout": { - "type": "number", - "description": "End the crawl after n seconds with the pages completed until then. May take ~10s extra from provided timeout." - }, - "follow_robots_txt": { - "type": "boolean", - "description": "Whether to respect robots.txt rules. If set to `false`, the crawler will scrape the website regardless of robots.txt disallow directives. `true` by default.", - "default": true - } + "start_url": { "type": "string", "description": "The starting point of the crawl." }, + "include_urls": { "type": "array", "items": { "type": "string" }, "description": "URL path patterns to include." }, + "exclude_urls": { "type": "array", "items": { "type": "string" }, "description": "URL path patterns to exclude." }, + "max_pages": { "type": "number", "description": "Maximum number of pages to crawl." }, + "max_depth": { "type": "number", "description": "Maximum depth of the crawl." }, + "include_external": { "type": "boolean", "description": "Crawl first-degree external links." }, + "include_subdomain": { "type": "boolean", "description": "Include subdomains." }, + "search_query": { "type": "string", "description": "Search query to find specific links." }, + "top_n": { "type": "number", "description": "Only crawl top N relevant links." }, + "webhook_url": { "type": "string", "description": "Webhook URL called on completion." }, + "timeout": { "type": "number", "description": "End crawl after n seconds." }, + "follow_robots_txt": { "type": "boolean", "default": true, "description": "Respect robots.txt rules." }, + "metadata": { "$ref": "#/components/schemas/Metadata" } }, - "required": [ - "start_url", - "max_pages" - ] + "required": ["start_url", "max_pages"] } } } @@ -106,80 +103,114 @@ "schema": { "type": "object", "properties": { - "id": { - "type": "string", - "description": "Crawl ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"crawl\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, - "created": { - "type": "number", - "description": "Created time in epoch" - }, - "start_date": { - "type": "string", - "description": "Created time in date" - }, - "start_url": { - "type": "string" - }, - "max_pages": { - "type": "number" - }, - "max_depth": { - "type": "number" - }, - "exclude_urls": { - "type": "array", - "items": { - "type": "string" - } - }, - "include_urls": { - "type": "array", - "items": { - "type": "string" - } - }, - "include_external": { - "type": "boolean" - }, - "search_query": { - "type": "string" - }, - "top_n": { - "type": "number" - }, - "current_depth": { - "type": "number", - "description": "The current depth of the crawl process." - }, - "pages_count": { - "type": "number", - "description": "Count of pages crawled" - }, - "webhook_url": { - "type": "string" - }, - "follow_robots_txt": { - "type": "boolean" - } + "id": { "type": "string", "description": "Crawl ID" }, + "object": { "type": "string" }, + "status": { "type": "string" }, + "created": { "type": "number" }, + "start_url": { "type": "string" }, + "max_pages": { "type": "number" }, + "max_depth": { "type": "number" }, + "pages_count": { "type": "number" }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "start_url is required", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "402": { + "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "403": { + "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "approval_required", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature.", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } @@ -187,21 +218,15 @@ "/v1/crawls/{crawl_id}": { "get": { "summary": "Retrieve crawl information", - "description": "Fetches information about a specific crawl using its `crawl_id`.", - "security": [ - { - "Authorization": [] - } - ], + "description": "Fetches information about a specific crawl.", + "security": [{ "Authorization": [] }], "parameters": [ { "name": "crawl_id", "in": "path", "required": true, - "schema": { - "type": "string" - }, - "description": "The ID of the crawl to retrieve information for." + "schema": { "type": "string" }, + "description": "The ID of the crawl." } ], "responses": { @@ -212,241 +237,211 @@ "schema": { "type": "object", "properties": { - "id": { - "type": "string", - "description": "Crawl ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"crawl\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, - "created": { - "type": "number", - "description": "Created time in epoch" - }, - "start_date": { - "type": "string", - "description": "Created time in date" - }, - "start_url": { - "type": "string" - }, - "max_pages": { - "type": "number" - }, - "max_depth": { - "type": "number" - }, - "exclude_urls": { - "type": "array", - "items": { - "type": "string" - } - }, - "include_urls": { - "type": "array", - "items": { - "type": "string" - } - }, - "include_external": { - "type": "boolean" - }, - "search_query": { - "type": "string" - }, - "top_n": { - "type": "number" - }, - "current_depth": { - "type": "number", - "description": "The current depth of the crawl process." - }, - "pages_count": { - "type": "number", - "description": "Count of pages crawled" - }, - "webhook_url": { - "type": "string" - }, - "follow_robots_txt": { - "type": "boolean" - } + "id": { "type": "string" }, + "object": { "type": "string" }, + "status": { "type": "string" }, + "created": { "type": "number" }, + "start_url": { "type": "string" }, + "max_pages": { "type": "number" }, + "pages_count": { "type": "number" }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } } } } }, - "400": { - "description": "Bad request due to incorrect or missing parameters." + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Crawl not found for the provided ID." + "description": "Crawl not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "crawl_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Crawl not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } }, "/v1/crawls/{crawl_id}/pages": { "get": { - "summary": "Retrieve list of crawled pages optionally with content", - "description": "Fetches the list of crawled pages and content that have been processed for a specific crawl ID.", - "security": [ - { - "Authorization": [] - } - ], + "summary": "Retrieve crawled pages", + "description": "Fetches the list of crawled pages for a specific crawl.", + "security": [{ "Authorization": [] }], "parameters": [ { "name": "crawl_id", "in": "path", "required": true, - "schema": { - "type": "string" - }, - "description": "The ID of the crawl to retrieve the list of URLs for." + "schema": { "type": "string" }, + "description": "The ID of the crawl." }, { "name": "cursor", "in": "query", "required": false, - "schema": { - "type": "integer" - }, - "description": "Optional integer representing the index to start fetching content from. Useful to paginate until all URLs are fetched. Start with 0, then provide `response['cursor']` value of the last request." + "schema": { "type": "integer" }, + "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details." }, { "name": "limit", "in": "query", "required": false, - "schema": { - "type": "integer" - }, - "description": "Optional integer to limit the number of results returned. Recommended 10-50 results at a time. Paginated using *cursor*. Maximum 10MB of content can be fetched in a single request." - }, - { - "name": "search_query", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "An optional search query to sort the results by relevance. Uses the original search_query by default if provided." - }, - { - "name": "formats", - "in": "query", - "required": false, - "schema": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "html", - "markdown" - ] - } - }, - "description": "**Deprecated:** Use `/retrieve` endpoint with `retrieve_id`.\n\nArray of formats to fetch (e.g., [\"html\", \"markdown\"])." + "schema": { "type": "integer" }, + "description": "Number of results to return." } ], "responses": { "200": { - "description": "Successful response with the list of URLs.", + "description": "Successful response with crawled pages.", "content": { "application/json": { "schema": { "type": "object", "properties": { - "crawl_id": { - "type": "string", - "description": "Crawl ID" - }, - "object": { - "type": "string", - "description": "The kind of object. \"crawl\" for this endpoint." - }, - "status": { - "type": "string", - "description": "`in_progress` or `completed`" - }, - "search_query": { - "type": "string" - }, - "pages_count": { - "type": "number" - }, + "crawl_id": { "type": "string" }, + "object": { "type": "string" }, + "status": { "type": "string" }, + "pages_count": { "type": "number" }, "pages": { "type": "array", "items": { "type": "object", "properties": { - "id": { - "type": "string" - }, - "retrieve_id": { - "type": "string", - "description": "To fetch content from the `/retrieve` endpoint" - }, - "url": { - "type": "string" - }, - "is_external": { - "type": "boolean" - }, - "html_content": { - "type": "string", - "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`." - }, - "markdown_content": { - "type": "string", - "description": "Deprecated: Use `/retrieve` endpoint with `retrieve_id`." - } + "id": { "type": "string" }, + "retrieve_id": { "type": "string" }, + "url": { "type": "string" } } } }, - "metadata": { - "type": "object", - "properties": { - "external_urls": { - "type": "array", - "description": "External URLs that were found during crawl", - "items": { - "type": "string" - } - }, - "failed_urls": { - "type": "array", - "description": "URLs that were found but couldn't be scraped", - "items": { - "type": "string" - } - } - } - }, - "cursor": { - "type": "integer", - "description": "To be passed in the query in next request to get the next items." - } + "cursor": { "type": "integer", "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." } } } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "Invalid parameter format", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Crawl not found for the provided ID." + "description": "Crawl not found or no pages found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "crawl_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Crawl not found or no pages found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } diff --git a/openapi/maps.json b/openapi/maps.json index c6df78da..3bd45603 100644 --- a/openapi/maps.json +++ b/openapi/maps.json @@ -15,18 +15,60 @@ "type": "http", "scheme": "bearer" } + }, + "schemas": { + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + }, + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string", "enum": ["error"] }, + "code": { "type": "string" }, + "type": { "type": "string", "format": "uri" }, + "status": { "type": "integer" }, + "title": { "type": "string" }, + "detail": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] + } } }, "paths": { "/v1/maps": { "post": { - "summary": "Get all the urls on a certain website", - "description": "This endpoint allows users to get all the urls on a certain website. It can take up to 120 seconds for complex websites. For large websites, results are paginated using cursor-based pagination.", - "security": [ - { - "Authorization": [] - } - ], + "summary": "Get all the urls on a website", + "description": "Get all URLs from a website's sitemap. Can take up to 120 seconds for complex websites. Results are paginated using cursor-based pagination.", + "security": [{ "Authorization": [] }], "requestBody": { "required": true, "content": { @@ -34,147 +76,163 @@ "schema": { "type": "object", "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "The URL of the website for which you want the links" - }, - "search_query": { - "type": "string", - "description": "An optional search query to sort the links by search relevance." - }, - "top_n": { - "type": "number", - "description": "An optional number to limit to only top n links for a search query." - }, - "include_subdomain": { - "type": "boolean", - "description": "Include subdomains of the given URL. `true` by default." - }, - "include_urls": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL path patterns to include using glob syntax. For example: `/blog/**` to only include blog URLs. Only URLs matching these patterns will be returned." - }, - "exclude_urls": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL path patterns to exclude using glob syntax. For example: `/careers/**`. Excluded URLs will supersede included URLs." - }, - "cursor": { - "type": "string", - "description": "OPTIONAL: Pagination cursor from a previous response. When provided, returns the next set of URLs from where the previous request left off due to response size limit." - } + "url": { "type": "string", "format": "uri", "description": "The website URL" }, + "search_query": { "type": "string", "description": "Sort links by search relevance" }, + "top_n": { "type": "number", "description": "Limit to top n links" }, + "include_subdomain": { "type": "boolean", "description": "Include subdomains (default true)" }, + "include_urls": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to include (glob)" }, + "exclude_urls": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to exclude (glob)" }, + "cursor": { "type": "string", "description": "Pagination cursor. Omit this parameter on the first request. For subsequent requests, use the `cursor` value from the previous response. See [Pagination](/api-reference/common/pagination) for details." }, + "metadata": { "$ref": "#/components/schemas/Metadata" } }, "required": ["url"] - }, - "examples": { - "basic": { - "value": { - "url": "https://docs.olostep.com" - } - }, - "withFilters": { - "value": { - "url": "https://docs.olostep.com", - "include_urls": ["/api-reference/**"], - "exclude_urls": ["/api-reference/deprecated/**"] - } - }, - "withCursor": { - "value": { - "cursor": "abc123_xyz456" - } - } } } } }, "responses": { "200": { - "description": "Successful response with URLs found on the page", + "description": "Successful response with URLs.", "content": { "application/json": { "schema": { "type": "object", - "required": ["urls_count", "urls"], "properties": { - "id": { - "type": "string", - "description": "Unique identifier for this map" - }, - "urls_count": { - "type": "integer", - "description": "Number of URLs in the current response" - }, - "urls": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Array of URLs found on the page" - }, - "cursor": { - "type": "string", - "description": "Pagination cursor to retrieve the next set of URLs limited due to 10MB size limit. If null or not present, all URLs have been retrieved." - } + "id": { "type": "string" }, + "urls_count": { "type": "integer" }, + "urls": { "type": "array", "items": { "type": "string" } }, + "cursor": { "type": "string", "nullable": true, "description": "Pagination cursor. Present when there are more results available. Use this value in the next request's `cursor` parameter. See [Pagination](/api-reference/common/pagination) for details." }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } }, - "examples": { - "basicResponse": { - "value": { - "id": "map_abc123", - "urls_count": 22, - "urls": [ - "https://docs.olostep.com/api-reference/batches/create", - "https://docs.olostep.com/api-reference/batches/info", - "https://docs.olostep.com/api-reference/batches/items" - ] - } - }, - "cursorResponse": { - "value": { - "id": "map_abc123", - "urls_count": 15, - "urls": [ - "https://docs.olostep.com/api-reference/crawls/create", - "https://docs.olostep.com/api-reference/crawls/info", - "https://docs.olostep.com/api-reference/crawls/pages" - ], - "cursor": "abc123_def789" - } - }, - "allRetrievedResponse": { - "value": { - "id": "map_abc123", - "urls_count": 10, - "urls": [ - "https://docs.olostep.com/features/maps/maps", - "https://docs.olostep.com/get-started/authentication", - "https://docs.olostep.com/get-started/welcome" - ], - "cursor": null - } - } + "example": { + "id": "map_abc123", + "urls_count": 22, + "urls": [ + "https://docs.olostep.com/api-reference/batches/create", + "https://docs.olostep.com/api-reference/batches/info" + ], + "cursor": null } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request - invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "url is required or invalid cursor format", + "created": 1704067200, + "metadata": {} + } + } + } }, "401": { - "description": "Unauthorized access due to missing or invalid API token." + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "402": { + "description": "Payment required - credits exhausted. See [Payment Required](/api-reference/errors/payment_required) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "403": { + "description": "Forbidden - access denied. See [Forbidden](/api-reference/errors/forbidden) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "approval_required", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "You don't have access to this feature.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "404": { + "description": "Resource not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "map_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Map not found or could not retrieve page URLs from the sitemap", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } } } -} \ No newline at end of file +} diff --git a/openapi/scrapes.json b/openapi/scrapes.json index 82ca60fe..58fabf57 100644 --- a/openapi/scrapes.json +++ b/openapi/scrapes.json @@ -15,6 +15,103 @@ "type": "http", "scheme": "bearer" } + }, + "schemas": { + "ErrorResponse": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { + "type": "string", + "description": "Unique error identifier (e.g., error_4z93xmi8vw)" + }, + "object": { + "type": "string", + "enum": ["error"], + "description": "Always 'error'" + }, + "code": { + "type": "string", + "description": "Machine-readable error code for programmatic handling" + }, + "type": { + "type": "string", + "format": "uri", + "description": "URI reference identifying the problem type" + }, + "status": { + "type": "integer", + "description": "HTTP status code" + }, + "title": { + "type": "string", + "description": "Short, human-readable summary of the problem type" + }, + "detail": { + "type": "string", + "description": "Human-readable explanation of the specific error" + }, + "created": { + "type": "integer", + "description": "Unix timestamp when the error occurred" + }, + "metadata": { + "type": "object", + "description": "Any user-provided metadata from the original request" + }, + "errors": { + "type": "array", + "description": "Field-level validation errors (only for validation errors)", + "items": { + "type": "object", + "properties": { + "loc": { + "type": "array", + "items": { + "oneOf": [ + { "type": "string" }, + { "type": "integer" } + ] + }, + "description": "Path to the field with the error" + }, + "msg": { + "type": "string", + "description": "Error message" + }, + "type": { + "type": "string", + "description": "Error type code" + } + } + } + } + }, + "required": ["object", "id", "code", "type", "status", "title", "detail", "created"] + }, + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + } } }, "paths": { @@ -243,14 +340,14 @@ "items": { "type": "string" }, - "description": "Filter extracted links using glob patterns with `include_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})." + "description": "Filter extracted links using glob patterns." }, "exclude_links": { "type": "array", "items": { "type": "string" }, - "description": "Filter extracted links using glob patterns with `exclude_links`. Use patterns like \"*.pdf\" to match file extensions, \"/blog/*\" for specific paths, or full URLs like \"https://example.com/*\". Supports wildcards (*), character classes ([a-z]), and alternation ({pattern1,pattern2})." + "description": "Filter extracted links using glob patterns." } }, "description": "With this option, you can get all the links present on the page you scrape." @@ -288,8 +385,7 @@ } }, "metadata": { - "type": "object", - "description": "User-defined metadata. Not supported yet" + "$ref": "#/components/schemas/Metadata" } }, "required": [ @@ -320,8 +416,7 @@ "description": "Created epoch" }, "metadata": { - "type": "object", - "description": "User-defined metadata." + "$ref": "#/components/schemas/Metadata" }, "url_to_scrape": { "type": "string", @@ -380,16 +475,193 @@ } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "The provided URL is invalid. This often means there's a white space or no protocol (http/https) was passed along", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "402": { - "description": "Payment required due to invalid API key." + "description": "Payment required due to invalid API key or exhausted credits. See [Payment Required](/api-reference/errors/payment_required) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "credits_exhausted", + "type": "https://docs.olostep.com/api-reference/errors/payment_required", + "status": 402, + "title": "Payment Required", + "detail": "You have consumed all available credits. Please upgrade your plan from the dashboard: https://www.olostep.com/dashboard/", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "403": { + "description": "Access denied due to restrictions (blacklisted domain, approval required, unsupported file type). See [Forbidden](/api-reference/errors/forbidden) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "blacklisted_domain", + "type": "https://docs.olostep.com/api-reference/errors/forbidden", + "status": 403, + "title": "Forbidden", + "detail": "The domain you're trying to scrape is currently blacklisted. This limitation can be removed for your account. Reach out to info@olostep.com to remove this limitation.", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Not found error when the requested scrape is not found." + "description": "Resource not found. The `detail` field specifies which resource was not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "parser_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "No parser with this name and/or version found", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "429": { + "description": "Rate limit exceeded. See [Too Many Requests](/api-reference/errors/too_many_requests) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "rate_limit_exceeded", + "type": "https://docs.olostep.com/api-reference/errors/too_many_requests", + "status": 429, + "title": "Too Many Requests", + "detail": "You've reached your rate limits for this API. Please try again later.", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "501": { + "description": "Service unavailable or at capacity. See [Service Unavailable](/api-reference/errors/service_unavailable) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "max_capacity_reached", + "type": "https://docs.olostep.com/api-reference/errors/service_unavailable", + "status": 501, + "title": "Service Unavailable", + "detail": "We have currently reached our maximum capacity for this feature flag. Consider retrying in a few minutes.", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "504": { + "description": "Request timed out. See [Gateway Timeout](/api-reference/errors/gateway_timeout) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "request_timeout", + "type": "https://docs.olostep.com/api-reference/errors/gateway_timeout", + "status": 504, + "title": "Gateway Timeout", + "detail": "Your request timed out. Try increasing the timeout and making another request.", + "created": 1704067200, + "metadata": {} + } + } + } } } } @@ -435,8 +707,7 @@ "description": "Created epoch" }, "metadata": { - "type": "object", - "description": "User-defined metadata." + "$ref": "#/components/schemas/Metadata" }, "url_to_scrape": { "type": "string", @@ -493,16 +764,88 @@ } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request due to incorrect or missing parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "Invalid scrape ID format", + "created": 1704067200, + "metadata": {} + } + } + } }, - "402": { - "description": "Payment required due to invalid API key." + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Not found error when the requested scrape is not found." + "description": "Resource not found. The `detail` field specifies which resource was not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "scrape_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Scrape not found", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } diff --git a/openapi/utility.json b/openapi/utility.json index 87b9135a..dd35bb1c 100644 --- a/openapi/utility.json +++ b/openapi/utility.json @@ -15,27 +15,67 @@ "type": "http", "scheme": "bearer" } + }, + "schemas": { + "Metadata": { + "type": "object", + "description": "Set of key-value pairs for storing additional information about an object. Follows Stripe's approach with validation rules: max 50 keys, key max 40 characters (no square brackets), value max 500 characters, all values stored as strings.", + "additionalProperties": { + "type": "string", + "maxLength": 500, + "description": "Metadata value (max 500 characters). Numbers and booleans are automatically converted to strings." + }, + "maxProperties": 50, + "example": { + "order_id": "12345", + "customer_name": "John Doe", + "priority": "high", + "processed": "true" + }, + "x-validation-rules": { + "max_keys": 50, + "key_max_length": 40, + "key_forbidden_chars": ["[", "]"], + "value_max_length": 500, + "value_types": ["string", "number (coerced)", "boolean (coerced)"] + } + }, + "Error": { + "type": "object", + "description": "RFC 7807 Problem Details error response", + "properties": { + "id": { "type": "string" }, + "object": { "type": "string", "enum": ["error"] }, + "code": { "type": "string" }, + "type": { "type": "string", "format": "uri" }, + "status": { "type": "integer" }, + "title": { "type": "string" }, + "detail": { "type": "string" }, + "created": { "type": "integer" }, + "metadata": { "$ref": "#/components/schemas/Metadata" }, + "errors": { + "type": "array", + "description": "Optional array of additional error details", + "items": {} + } + }, + "required": ["id", "object", "code", "type", "status", "title", "detail", "created"] + } } }, "paths": { "/v1/retrieve": { "get": { "summary": "Retrieve page content", - "description": "Fetches the content of a crawled page using its `retrieve_id`.", - "security": [ - { - "Authorization": [] - } - ], + "description": "Fetches the content of a crawled/scraped/batched page using its `retrieve_id`.", + "security": [{ "Authorization": [] }], "parameters": [ { "name": "retrieve_id", "in": "query", "required": true, - "schema": { - "type": "string" - }, - "description": "The ID of the page content to retrieve. Available in the response of `/v1/crawls/{crawl_id}/pages`, `/v1/scrapes/{scrape_id}` or `/v1/batches/{batch_id}/items` endpoints" + "schema": { "type": "string" }, + "description": "The ID of the page content to retrieve." }, { "name": "formats", @@ -43,16 +83,9 @@ "required": false, "schema": { "type": "array", - "items": { - "type": "string", - "enum": [ - "html", - "markdown", - "json" - ] - } + "items": { "type": "string", "enum": ["html", "markdown", "json"] } }, - "description": "Optional array to retrieve only specific formats in production. If not provided, all formats will be returned." + "description": "Optional array to retrieve only specific formats." } ], "responses": { @@ -63,50 +96,97 @@ "schema": { "type": "object", "properties": { - "html_content": { - "type": "string", - "description": "HTML content of the page, if requested and available." - }, - "markdown_content": { - "type": "string", - "description": "Markdown content of the page, if requested and available." - }, - "json_content": { - "type": "string", - "description": "JSON content of the page returned from parsers, if requested and available." - }, - "html_hosted_url": { - "type": "string", - "description": "S3 bucket URL of html. Expires in 7 days." - }, - "markdown_hosted_url": { - "type": "string", - "description": "S3 bucket URL of markdown. Expires in 7 days." - }, - "json_hosted_url": { - "type": "string", - "description": "S3 bucket URL of json. Expires in 7 days." - }, - "size_exceeded": { - "type": "boolean", - "description": "If size of content objects exceeds the 6MB limit. If true, use hosted S3 urls to get content." - } + "html_content": { "type": "string" }, + "markdown_content": { "type": "string" }, + "json_content": { "type": "string" }, + "html_hosted_url": { "type": "string" }, + "markdown_hosted_url": { "type": "string" }, + "json_hosted_url": { "type": "string" }, + "size_exceeded": { "type": "boolean" }, + "metadata": { "$ref": "#/components/schemas/Metadata" } } } } } }, "400": { - "description": "Bad request due to incorrect or missing parameters." + "description": "Bad request - missing or invalid parameters. See [Bad Request](/api-reference/errors/bad_request) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "validation_error", + "type": "https://docs.olostep.com/api-reference/errors/bad_request", + "status": 400, + "title": "Bad Request", + "detail": "retrieve_id is required", + "created": 1704067200, + "metadata": {} + } + } + } + }, + "401": { + "description": "Authentication credentials are missing or invalid. See [Unauthorized](/api-reference/errors/unauthorized) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "invalid_api_key", + "type": "https://docs.olostep.com/api-reference/errors/unauthorized", + "status": 401, + "title": "Unauthorized", + "detail": "Your API key is invalid", + "created": 1704067200, + "metadata": {} + } + } + } }, "404": { - "description": "Content not found for the provided `retrieve_id`." + "description": "Content not found. See [Not Found](/api-reference/errors/not_found) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "content_not_found", + "type": "https://docs.olostep.com/api-reference/errors/not_found", + "status": 404, + "title": "Not Found", + "detail": "Content not found for the provided retrieve_id", + "created": 1704067200, + "metadata": {} + } + } + } }, "500": { - "description": "Internal server error." + "description": "Internal server error. See [Internal Error](/api-reference/errors/internal_error) for details.", + "content": { + "application/problem+json": { + "schema": { "$ref": "#/components/schemas/Error" }, + "example": { + "id": "error_abc123", + "object": "error", + "code": "internal_server_error", + "type": "https://docs.olostep.com/api-reference/errors/internal_error", + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred", + "created": 1704067200, + "metadata": {} + } + } + } } } } } } -} \ No newline at end of file +}