Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 1.5.0 (2025-12-29)

- feat: add metrics router for prometheus
- feat: add job routes for admin query
- fix: enhance job handling and logging in crawler

## 1.4.7 (2025-12-15)

- chore: update deps
Expand Down
12 changes: 6 additions & 6 deletions blocklets/snap-kit/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ import path from 'path';
import env from './libs/env';
import { logger } from './libs/logger';
import routes from './routes';
import adminRoutes from './routes/admin';
import metricsRoutes from './routes/metrics';

const { name, version } = require('../../package.json');

dotenv.config();

logger.debug('preferences', env.preferences);

export const app = express();
createLogger.setupAccessLogger(app);

Expand All @@ -26,10 +26,10 @@ app.use(express.json({ limit: '1 mb' }));
app.use(express.urlencoded({ extended: true, limit: '1 mb' }));
app.use(cors());

const router = express.Router();
router.use('/api', routes);
app.use('/api/admin', adminRoutes);
app.use('/api/metrics', metricsRoutes);
app.use('/api', routes);

app.use(router);
app.use('/data', express.static(path.join(env.dataDir, 'data'), { maxAge: '365d', index: false }));

// const isProduction = process.env.NODE_ENV === 'production' || process.env.ABT_NODE_SERVICE_ENV === 'production';
Expand Down Expand Up @@ -61,7 +61,7 @@ export const server = app.listen(port, async (err?: any) => {

try {
await initCrawler({
concurrency: Math.max(1, env.preferences.concurrency || 0),
concurrency: Math.max(1, env.preferences.crawlConcurrency || 0),
siteCron: {
enabled: !!env.preferences.cronEnabled,
immediate: !!env.preferences.cronImmediate,
Expand Down
69 changes: 69 additions & 0 deletions blocklets/snap-kit/api/src/routes/admin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { Job } from '@arcblock/crawler';
import { Joi } from '@arcblock/validator';
import { auth, session } from '@blocklet/sdk/lib/middlewares';
import { Router } from 'express';

import { logger } from '../libs/logger';

const router = Router();

/**
* Admin API: Get job queue stats
*/
router.get('/jobs/stats', session({ accessKey: true }), auth({ roles: ['admin', 'owner'] }), async (_, res) => {
const result = await Job.stats();

logger.info('GET /admin/jobs/stats', result);

return res.json({
code: 'ok',
data: result,
});
});

/**
* Admin API: Get job list with pagination
*/
const jobsSchema = Joi.object({
page: Joi.number().integer().min(1).default(1),
pageSize: Joi.number().integer().min(1).max(100).default(20),
queue: Joi.string(),
});
router.get('/jobs', session({ accessKey: true }), auth({ roles: ['admin', 'owner'] }), async (req, res) => {
const params = await jobsSchema.validateAsync(req.query);
const result = await Job.paginate(params);

logger.info('GET /admin/jobs', { params, total: result.total });

return res.json({
code: 'ok',
data: result,
});
});

/**
* Admin API: Delete jobs by queue name or job ids
*/
const deleteJobsSchema = Joi.object({
queue: Joi.string(),
ids: Joi.array().items(Joi.string()),
}).or('queue', 'ids');
router.delete('/jobs', session({ accessKey: true }), auth({ roles: ['admin', 'owner'] }), async (req, res) => {
const params = await deleteJobsSchema.validateAsync(req.body);

let result;
if (params.queue) {
result = await Job.deleteByQueue(params.queue);
logger.info('DELETE /admin/jobs by queue', { queue: params.queue, ...result });
} else {
result = await Job.deleteByIds(params.ids);
logger.info('DELETE /admin/jobs by ids', { count: params.ids.length, ...result });
}

return res.json({
code: 'ok',
data: result,
});
});

export default router;
23 changes: 10 additions & 13 deletions blocklets/snap-kit/api/src/routes/index.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
import { crawlCode, crawlUrl, getLatestSnapshot, getSnapshot } from '@arcblock/crawler';
import { Joi } from '@arcblock/validator';
import { auth, session } from '@blocklet/sdk/lib/middlewares';
import { Router } from 'express';
import qs from 'querystring';

import { logger } from '../libs/logger';

const { session, auth } = require('@blocklet/sdk/lib/middlewares');

const router = Router();

router.use(session({ accessKey: true }));

/**
* Crawl page html
*/
const crawlSchema = Joi.object({
url: Joi.string().uri().required(),
headers: Joi.object().pattern(Joi.string(), Joi.string()).max(30),
timeout: Joi.number().integer().min(10).max(120).default(120),
timeout: Joi.number().integer().min(10).max(120).default(60),
waitTime: Joi.number().integer().min(0).max(120).default(0),
cookies: Joi.array().items(Joi.object({ name: Joi.string().required(), value: Joi.string().required() })),
localStorage: Joi.array().items(Joi.object({ key: Joi.string().required(), value: Joi.string().required() })),
sync: Joi.boolean().default(false),
});
router.post('/crawl', auth({ methods: ['accessKey'] }), async (req, res) => {
router.post('/crawl', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await crawlSchema.validateAsync(req.body);

res.setTimeout(params.timeout * 1000, () => {
Expand Down Expand Up @@ -71,7 +68,7 @@ const crawlGetSchema = Joi.object({
url: Joi.string().uri(),
}).or('jobId', 'url');

router.get('/crawl', auth({ methods: ['accessKey'] }), async (req, res) => {
router.get('/crawl', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await crawlGetSchema.validateAsync(req.query);
const snapshot = params.jobId ? await getSnapshot(params.jobId) : await getLatestSnapshot(params.url);

Expand All @@ -94,15 +91,15 @@ const snapSchema = Joi.object({
height: Joi.number().integer().min(500).default(900),
quality: Joi.number().integer().min(1).max(100).default(80),
format: Joi.string().valid('png', 'jpeg', 'webp').default('webp'),
timeout: Joi.number().integer().min(0).max(120).default(120),
timeout: Joi.number().integer().min(0).max(120).default(60),
waitTime: Joi.number().integer().min(0).max(120).default(0),
fullPage: Joi.boolean().default(false),
headers: Joi.object().pattern(Joi.string(), Joi.string()).max(30),
cookies: Joi.array().items(Joi.object({ name: Joi.string().required(), value: Joi.string().required() })),
localStorage: Joi.array().items(Joi.object({ key: Joi.string().required(), value: Joi.string().required() })),
sync: Joi.boolean().default(false),
});
router.post('/snap', auth({ methods: ['accessKey'] }), async (req, res) => {
router.post('/snap', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await snapSchema.validateAsync(req.body);

res.setTimeout(params.timeout * 1000, () => {
Expand Down Expand Up @@ -148,7 +145,7 @@ router.post('/snap', auth({ methods: ['accessKey'] }), async (req, res) => {
const snapGetSchema = Joi.object({
jobId: Joi.string().required(),
});
router.get('/snap', auth({ methods: ['accessKey'] }), async (req, res) => {
router.get('/snap', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await snapGetSchema.validateAsync(req.query);
const snapshot = await getSnapshot(params.jobId);

Expand Down Expand Up @@ -187,10 +184,10 @@ const carbonSchema = Joi.object({
code: Joi.string().required(),
format: Joi.string().valid('png', 'jpeg', 'webp').default('png'),
sync: Joi.boolean().default(false),
timeout: Joi.number().integer().min(0).max(120).default(120),
timeout: Joi.number().integer().min(0).max(120).default(60),
});

router.post('/carbon', auth({ methods: ['accessKey'] }), async (req, res) => {
router.post('/carbon', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await carbonSchema.validateAsync(req.body);
const { sync, timeout, ...carbonParams } = params;

Expand Down Expand Up @@ -235,7 +232,7 @@ router.post('/carbon', auth({ methods: ['accessKey'] }), async (req, res) => {
const carbonGetSchema = Joi.object({
jobId: Joi.string().required(),
});
router.get('/carbon', auth({ methods: ['accessKey'] }), async (req, res) => {
router.get('/carbon', session({ accessKey: true }), auth({ methods: ['accessKey'] }), async (req, res) => {
const params = await carbonGetSchema.validateAsync(req.query);
const snapshot = await getSnapshot(params.jobId);

Expand Down
12 changes: 12 additions & 0 deletions blocklets/snap-kit/api/src/routes/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { getContentType, getMetrics } from '@arcblock/crawler';
import { Router } from 'express';

const router = Router();

router.get('/', async (_req, res) => {
const metrics = await getMetrics();
res.set('Content-Type', getContentType());
res.end(metrics);
});

export default router;
2 changes: 1 addition & 1 deletion blocklets/snap-kit/blocklet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repository:
type: git
url: git+https://github.com/blocklet/create-blocklet.git
specVersion: 1.2.8
version: 1.4.7
version: 1.5.0
logo: logo.png
files:
- dist
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "crawler",
"private": true,
"version": "1.4.7",
"version": "1.5.0",
"scripts": {
"dev": "pnpm run --filter @arcblock/crawler dev & pnpm run --filter @arcblock/crawler-middleware dev & pnpm run --filter @blocklet/snap-kit dev",
"build:packages": "pnpm -r build",
Expand Down
3 changes: 2 additions & 1 deletion packages/crawler/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@arcblock/crawler",
"version": "1.4.7",
"version": "1.5.0",
"main": "lib/cjs/index.js",
"module": "lib/esm/index.js",
"types": "lib/cjs/index.d.ts",
Expand Down Expand Up @@ -69,6 +69,7 @@
"lodash": "^4.17.21",
"lru-cache": "^10.4.3",
"p-map": "^7.0.3",
"prom-client": "^15.1.3",
"robots-parser": "^3.0.1",
"sitemap": "^7.1.2",
"sqlite3": "^5.1.7",
Expand Down
Loading
Loading