Complete API reference for ScrapeMaster RESTful API.
http://localhost/api/v1
All API endpoints (except /auth/register and /auth/login) require JWT authentication.
POST /auth/login
{
"username": "your_username",
"password": "your_password"
}Response:
{
"access_token": "eyJ0eXAiOiJKV1QiLCJhbGc...",
"token_type": "bearer"
}Include the token in the Authorization header:
Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGc...
POST /jobs
Create a new scraping job.
{
"url": "https://example.com/products",
"engine": "auto",
"priority": "normal",
"selectors": {
"title": "h1.product-title::text",
"price": "span.price::text"
},
"config": {
"max_pages": 10,
"follow_links": true
}
}Response: 201 Created
{
"id": "abc123-def456",
"url": "https://example.com/products",
"status": "pending",
"created_at": "2024-01-15T10:30:00Z"
}GET /jobs?skip=0&limit=50&status=pending
Parameters:
skip(int): Number of jobs to skip (default: 0)limit(int): Max jobs to return (default: 50, max: 100)status(string): Filter by status (optional)
Response: 200 OK
[
{
"id": "abc123",
"url": "https://example.com",
"status": "completed",
"progress": 100,
"items_scraped": 150,
"created_at": "2024-01-15T10:30:00Z"
}
]GET /jobs/{job_id}
Response: 200 OK
{
"id": "abc123",
"url": "https://example.com",
"engine": "scrapy",
"status": "running",
"progress": 45.5,
"items_scraped": 68,
"items_failed": 2,
"created_at": "2024-01-15T10:30:00Z",
"started_at": "2024-01-15T10:31:00Z"
}DELETE /jobs/{job_id}
Response: 204 No Content
GET /results/{job_id}?skip=0&limit=100
Parameters:
skip(int): Number of results to skiplimit(int): Max results to return (default: 100, max: 1000)
Response: 200 OK
{
"job_id": "abc123",
"total": 150,
"skip": 0,
"limit": 100,
"results": [
{
"job_id": "abc123",
"url": "https://example.com/product/1",
"data": {
"title": "Product Name",
"price": "$99.99"
},
"scraped_at": "2024-01-15T10:32:00Z"
}
]
}GET /results/{job_id}/export?format=json
Parameters:
format(string): Export format (jsonorcsv)
Response:
- JSON:
200 OKwith JSON data - CSV:
200 OKwith CSV file download
GET /monitoring/stats
Response: 200 OK
{
"status_counts": {
"pending": 5,
"running": 2,
"completed": 145,
"failed": 3,
"cancelled": 1
},
"total_items_scraped": 15420,
"jobs_last_24h": 23,
"success_rate": 97.96,
"total_jobs": 156
}GET /monitoring/proxies
Response: 200 OK
{
"total_proxies": 10,
"by_status": {
"active": {
"count": 8,
"avg_success_rate": 95.5
},
"banned": {
"count": 2,
"avg_success_rate": 15.2
}
}
}GET /monitoring/system
Response: 200 OK
{
"redis": "healthy",
"mongodb": "healthy",
"api": "healthy",
"timestamp": "2024-01-15T10:45:00Z"
}{
"detail": "Invalid request data",
"type": "validation_error"
}{
"detail": "Could not validate credentials"
}{
"detail": "Job not found"
}{
"detail": "Internal server error",
"type": "internal_error"
}API requests are limited to 100 requests per hour per user by default.
Headers:
X-RateLimit-Limit: 100
X-RateLimit-Remaining: 95
X-RateLimit-Reset: 1642251600
Register webhooks to receive notifications when jobs complete:
{
"webhook_url": "https://your-app.com/webhook",
"events": ["job.completed", "job.failed"]
}import requests
# Login
response = requests.post('http://localhost/api/v1/auth/login', data={
'username': 'admin',
'password': 'password'
})
token = response.json()['access_token']
# Create job
headers = {'Authorization': f'Bearer {token}'}
job_data = {
'url': 'https://example.com',
'engine': 'auto',
'selectors': {
'title': 'h1::text',
'price': '.price::text'
}
}
response = requests.post('http://localhost/api/v1/jobs', json=job_data, headers=headers)
job = response.json()
# Get results
results = requests.get(f'http://localhost/api/v1/results/{job["id"]}', headers=headers)
print(results.json())// Login
const loginResponse = await fetch('http://localhost/api/v1/auth/login', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
username: 'admin',
password: 'password'
})
});
const { access_token } = await loginResponse.json();
// Create job
const jobResponse = await fetch('http://localhost/api/v1/jobs', {
method: 'POST',
headers: {
'Authorization': `Bearer ${access_token}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
url: 'https://example.com',
engine: 'auto',
selectors: {
title: 'h1::text',
price: '.price::text'
}
})
});
const job = await jobResponse.json();
console.log('Job created:', job.id);Full OpenAPI (Swagger) documentation available at:
- Swagger UI:
http://localhost/api/docs - ReDoc:
http://localhost/api/redoc - JSON:
http://localhost/api/openapi.json