Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions src/clis/baidu-scholar/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { cli, Strategy } from '../../registry.js';

cli({
site: 'baidu-scholar',
name: 'search',
description: '百度学术搜索',
domain: 'xueshu.baidu.com',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'query', positional: true, required: true, help: '搜索关键词' },
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'authors', 'journal', 'year', 'cited', 'url'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
const query = encodeURIComponent(kwargs.query);
await page.goto(`https://xueshu.baidu.com/s?wd=${query}&pn=0&tn=SE_baiduxueshu_c1gjeupa`);
await page.wait(5);
const data = await page.evaluate(`
(async () => {
const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
for (let i = 0; i < 20; i++) {
if (document.querySelectorAll('.result').length > 0) break;
await new Promise(r => setTimeout(r, 500));
}
const results = [];
for (const el of document.querySelectorAll('.result')) {
const titleEl = el.querySelector('h3 a, .paper-title a, .t a');
const title = normalize(titleEl?.textContent);
if (!title) continue;
let url = titleEl?.getAttribute('href') || '';
if (url && !url.startsWith('http')) url = 'https://xueshu.baidu.com' + url;
const infoEl = el.querySelector('.paper-info');
const authorEls = infoEl?.querySelectorAll('span.authors, span') || [];
let authors = '', journal = '', year = '', cited = '0';
const infoText = normalize(infoEl?.textContent);
const spans = infoEl ? Array.from(infoEl.querySelectorAll('span')) : [];
const authParts = [];
for (const sp of spans) {
const t = normalize(sp.textContent);
if (!t || t === ',' || t === ',') continue;
if (t.startsWith('《')) { journal = t.replace(/[《》]/g, ''); continue; }
if (t.match(/^被引量[::]/)) { cited = t.match(/(\\d+)/)?.[1] || '0'; continue; }
if (t.match(/^-\\s*(\\d{4})/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; }
if (t.match(/^\\d{4}年?$/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; }
if (!journal && !t.match(/^被引/) && !t.match(/^-/)) authParts.push(t);
}
authors = authParts.join(', ').slice(0, 80);
if (!year) { const m = infoText.match(/(19|20)\\d{2}/); year = m?.[0] || ''; }
if (!cited || cited === '0') { const m = infoText.match(/被引量[::]\\s*(\\d+)/); cited = m?.[1] || '0'; }
results.push({ rank: results.length + 1, title, authors, journal, year, cited, url });
if (results.length >= ${limit}) break;
}
return results;
})()
`);
return Array.isArray(data) ? data : [];
},
});
55 changes: 55 additions & 0 deletions src/clis/google-scholar/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { cli, Strategy } from '../../registry.js';

cli({
site: 'google-scholar',
name: 'search',
description: 'Google Scholar 学术搜索',
domain: 'scholar.google.com',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'query', positional: true, required: true, help: '搜索关键词' },
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'authors', 'source', 'year', 'cited', 'url'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
const query = encodeURIComponent(kwargs.query);
await page.goto(`https://scholar.google.com/scholar?q=${query}&hl=zh-CN`);
await page.wait(3);
const data = await page.evaluate(`
(() => {
const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
const results = [];
for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_ri')) {
const container = el.querySelector('.gs_ri') || el;
const titleEl = container.querySelector('.gs_rt a, h3 a');
const title = normalize(titleEl?.textContent);
if (!title) continue;
const url = titleEl?.getAttribute('href') || '';
const infoLine = normalize(container.querySelector('.gs_a')?.textContent);
const parts = infoLine.split(' - ');
const authors = (parts[0] || '').trim();
const sourceParts = (parts[1] || '').split(',');
const source = sourceParts.slice(0, -1).join(',').trim() || sourceParts[0]?.trim() || '';
const yearMatch = infoLine.match(/(19|20)\\d{2}/);
const citedEl = container.querySelector('.gs_fl a[href*="cites"]');
const citedMatch = normalize(citedEl?.textContent).match(/(\\d+)/);
results.push({
rank: results.length + 1,
title,
authors: authors.slice(0, 80),
source: source.slice(0, 60),
year: yearMatch?.[0] || '',
cited: citedMatch?.[1] || '0',
url,
});
if (results.length >= ${limit}) break;
}
return results;
})()
`);
return Array.isArray(data) ? data : [];
},
});
21 changes: 21 additions & 0 deletions src/clis/gov-law/recent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { cli, Strategy } from '../../registry.js';
import { navigateViaVueRouter, extractLawResults } from './shared.js';

cli({
site: 'gov-law',
name: 'recent',
description: '最新法律法规',
domain: 'flk.npc.gov.cn',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
await navigateViaVueRouter(page, {});
return extractLawResults(page, limit);
},
});
41 changes: 41 additions & 0 deletions src/clis/gov-law/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { cli, Strategy } from '../../registry.js';
import { navigateViaVueRouter, extractLawResults } from './shared.js';

cli({
site: 'gov-law',
name: 'search',
description: '国家法律法规数据库搜索',
domain: 'flk.npc.gov.cn',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'query', positional: true, required: true, help: '搜索关键词' },
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
await navigateViaVueRouter(page, { searchWord: kwargs.query });

// Set search input for Vue reactivity
const query = JSON.stringify(kwargs.query);
await page.evaluate(`
(async () => {
const input = document.querySelector('.el-input__inner');
if (input && !input.value) {
const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set;
setter.call(input, ${query});
input.dispatchEvent(new Event('input', { bubbles: true }));
input.dispatchEvent(new Event('change', { bubbles: true }));
await new Promise(r => setTimeout(r, 300));
input.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', keyCode: 13, bubbles: true }));
input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', keyCode: 13, bubbles: true }));
}
})()
`);
await page.wait(3);

return extractLawResults(page, limit);
},
});
64 changes: 64 additions & 0 deletions src/clis/gov-law/shared.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { CliError } from '../../errors.js';
import type { IPage } from '../../types.js';

/**
* Navigate to flk.npc.gov.cn and use Vue Router to reach the target page.
* Throws CliError if Vue Router is unavailable (site restructured).
*/
export async function navigateViaVueRouter(
page: IPage,
query: Record<string, string>,
): Promise<void> {
await page.goto('https://flk.npc.gov.cn/index.html');
await page.wait(4);

const routerAvailable = await page.evaluate(`
(async () => {
const app = document.querySelector('#app');
const router = app?.__vue_app__?.config?.globalProperties?.$router;
if (!router) return false;
await router.push({path: '/search', query: ${JSON.stringify(query)}});
return true;
})()
`);

if (!routerAvailable) {
throw new CliError(
'FRAMEWORK_CHANGED',
'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.',
'Please report this issue so the adapter can be updated.',
);
}

await page.wait(5);
}

/**
* Extract law/regulation items from the search results page.
*/
export async function extractLawResults(page: IPage, limit: number): Promise<any[]> {
const data = await page.evaluate(`
(async () => {
const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
for (let i = 0; i < 40; i++) {
if (document.querySelectorAll('.result-item').length > 0) break;
await new Promise(r => setTimeout(r, 500));
}
const results = [];
const items = document.querySelectorAll('.result-item');
for (const el of items) {
const title = normalize(el.querySelector('.title-content')?.textContent);
if (!title) continue;
const statusEl = el.querySelector('[class*="status"]');
const status = normalize(statusEl?.textContent);
const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, '');
const type = normalize(el.querySelector('.type')?.textContent);
const department = normalize(el.querySelector('.department')?.textContent);
results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department });
if (results.length >= ${limit}) break;
}
return results;
})()
`);
return Array.isArray(data) ? data : [];
}
50 changes: 50 additions & 0 deletions src/clis/gov-policy/recent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { cli, Strategy } from '../../registry.js';

cli({
site: 'gov-policy',
name: 'recent',
description: '国务院最新政策文件',
domain: 'www.gov.cn',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'date', 'source', 'url'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
await page.goto('https://www.gov.cn/zhengce/zuixin/index.htm');
await page.wait(4);
const data = await page.evaluate(`
(async () => {
const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
for (let i = 0; i < 20; i++) {
if (document.querySelector('.news_box li, .list li, .list_item, .news-list li')) break;
await new Promise(r => setTimeout(r, 500));
}
const results = [];
const items = document.querySelectorAll('.news_box li, .list li, .list_item, .news-list li');
for (const el of items) {
const titleEl = el.querySelector('a');
const title = normalize(titleEl?.textContent);
if (!title || title.length < 4) continue;
let url = titleEl?.getAttribute('href') || '';
if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url;
const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/);
const source = normalize(el.querySelector('.source, .from')?.textContent);
results.push({
rank: results.length + 1,
title,
date: dateMatch?.[1] || '',
source,
url,
});
if (results.length >= ${limit}) break;
}
return results;
})()
`);
return Array.isArray(data) ? data : [];
},
});
48 changes: 48 additions & 0 deletions src/clis/gov-policy/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { cli, Strategy } from '../../registry.js';

cli({
site: 'gov-policy',
name: 'search',
description: '中国政府网政策文件搜索',
domain: 'sousuo.www.gov.cn',
strategy: Strategy.PUBLIC,
browser: true,
args: [
{ name: 'query', positional: true, required: true, help: '搜索关键词' },
{ name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
],
columns: ['rank', 'title', 'description', 'date', 'url'],
navigateBefore: false,
func: async (page, kwargs) => {
const limit = Math.min(kwargs.limit || 10, 20);
const query = encodeURIComponent(kwargs.query);
// dataTypeId=107 is the policy library search
await page.goto(`https://sousuo.www.gov.cn/sousuo/search.shtml?code=17da70961a7&dataTypeId=107&searchWord=${query}`);
await page.wait(5);

const data = await page.evaluate(`
(async () => {
const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
for (let i = 0; i < 30; i++) {
if (document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item').length > 0) break;
await new Promise(r => setTimeout(r, 500));
}
const results = [];
const items = document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item');
for (const el of items) {
const titleEl = el.querySelector('a.title, .title a, a.log-anchor');
let title = normalize(titleEl?.textContent).replace(/<[^>]+>/g, '');
if (!title || title.length < 4) continue;
let url = titleEl?.getAttribute('href') || '';
if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url;
const desc = normalize(el.querySelector('.description')?.textContent).slice(0, 120);
const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/);
results.push({ rank: results.length + 1, title, description: desc, date: dateMatch?.[1] || '', url });
if (results.length >= ${limit}) break;
}
return results;
})()
`);
return Array.isArray(data) ? data : [];
},
});
Loading