diff --git a/src/clis/baidu-scholar/search.ts b/src/clis/baidu-scholar/search.ts new file mode 100644 index 00000000..b8e4e8fc --- /dev/null +++ b/src/clis/baidu-scholar/search.ts @@ -0,0 +1,61 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'baidu-scholar', + name: 'search', + description: '百度学术搜索', + domain: 'xueshu.baidu.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'journal', 'year', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://xueshu.baidu.com/s?wd=${query}&pn=0&tn=SE_baiduxueshu_c1gjeupa`); + await page.wait(5); + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.querySelectorAll('.result').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + for (const el of document.querySelectorAll('.result')) { + const titleEl = el.querySelector('h3 a, .paper-title a, .t a'); + const title = normalize(titleEl?.textContent); + if (!title) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://xueshu.baidu.com' + url; + const infoEl = el.querySelector('.paper-info'); + const authorEls = infoEl?.querySelectorAll('span.authors, span') || []; + let authors = '', journal = '', year = '', cited = '0'; + const infoText = normalize(infoEl?.textContent); + const spans = infoEl ? Array.from(infoEl.querySelectorAll('span')) : []; + const authParts = []; + for (const sp of spans) { + const t = normalize(sp.textContent); + if (!t || t === ',' || t === ',') continue; + if (t.startsWith('《')) { journal = t.replace(/[《》]/g, ''); continue; } + if (t.match(/^被引量[::]/)) { cited = t.match(/(\\d+)/)?.[1] || '0'; continue; } + if (t.match(/^-\\s*(\\d{4})/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } + if (t.match(/^\\d{4}年?$/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } + if (!journal && !t.match(/^被引/) && !t.match(/^-/)) authParts.push(t); + } + authors = authParts.join(', ').slice(0, 80); + if (!year) { const m = infoText.match(/(19|20)\\d{2}/); year = m?.[0] || ''; } + if (!cited || cited === '0') { const m = infoText.match(/被引量[::]\\s*(\\d+)/); cited = m?.[1] || '0'; } + results.push({ rank: results.length + 1, title, authors, journal, year, cited, url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/google-scholar/search.ts b/src/clis/google-scholar/search.ts new file mode 100644 index 00000000..c6af22fd --- /dev/null +++ b/src/clis/google-scholar/search.ts @@ -0,0 +1,55 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'google-scholar', + name: 'search', + description: 'Google Scholar 学术搜索', + domain: 'scholar.google.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'source', 'year', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://scholar.google.com/scholar?q=${query}&hl=zh-CN`); + await page.wait(3); + const data = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const results = []; + for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_ri')) { + const container = el.querySelector('.gs_ri') || el; + const titleEl = container.querySelector('.gs_rt a, h3 a'); + const title = normalize(titleEl?.textContent); + if (!title) continue; + const url = titleEl?.getAttribute('href') || ''; + const infoLine = normalize(container.querySelector('.gs_a')?.textContent); + const parts = infoLine.split(' - '); + const authors = (parts[0] || '').trim(); + const sourceParts = (parts[1] || '').split(','); + const source = sourceParts.slice(0, -1).join(',').trim() || sourceParts[0]?.trim() || ''; + const yearMatch = infoLine.match(/(19|20)\\d{2}/); + const citedEl = container.querySelector('.gs_fl a[href*="cites"]'); + const citedMatch = normalize(citedEl?.textContent).match(/(\\d+)/); + results.push({ + rank: results.length + 1, + title, + authors: authors.slice(0, 80), + source: source.slice(0, 60), + year: yearMatch?.[0] || '', + cited: citedMatch?.[1] || '0', + url, + }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-law/recent.ts b/src/clis/gov-law/recent.ts new file mode 100644 index 00000000..639efd69 --- /dev/null +++ b/src/clis/gov-law/recent.ts @@ -0,0 +1,21 @@ +import { cli, Strategy } from '../../registry.js'; +import { navigateViaVueRouter, extractLawResults } from './shared.js'; + +cli({ + site: 'gov-law', + name: 'recent', + description: '最新法律法规', + domain: 'flk.npc.gov.cn', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await navigateViaVueRouter(page, {}); + return extractLawResults(page, limit); + }, +}); diff --git a/src/clis/gov-law/search.ts b/src/clis/gov-law/search.ts new file mode 100644 index 00000000..c9193e45 --- /dev/null +++ b/src/clis/gov-law/search.ts @@ -0,0 +1,41 @@ +import { cli, Strategy } from '../../registry.js'; +import { navigateViaVueRouter, extractLawResults } from './shared.js'; + +cli({ + site: 'gov-law', + name: 'search', + description: '国家法律法规数据库搜索', + domain: 'flk.npc.gov.cn', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await navigateViaVueRouter(page, { searchWord: kwargs.query }); + + // Set search input for Vue reactivity + const query = JSON.stringify(kwargs.query); + await page.evaluate(` + (async () => { + const input = document.querySelector('.el-input__inner'); + if (input && !input.value) { + const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; + setter.call(input, ${query}); + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new Event('change', { bubbles: true })); + await new Promise(r => setTimeout(r, 300)); + input.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', keyCode: 13, bubbles: true })); + input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', keyCode: 13, bubbles: true })); + } + })() + `); + await page.wait(3); + + return extractLawResults(page, limit); + }, +}); diff --git a/src/clis/gov-law/shared.ts b/src/clis/gov-law/shared.ts new file mode 100644 index 00000000..c77704c9 --- /dev/null +++ b/src/clis/gov-law/shared.ts @@ -0,0 +1,64 @@ +import { CliError } from '../../errors.js'; +import type { IPage } from '../../types.js'; + +/** + * Navigate to flk.npc.gov.cn and use Vue Router to reach the target page. + * Throws CliError if Vue Router is unavailable (site restructured). + */ +export async function navigateViaVueRouter( + page: IPage, + query: Record, +): Promise { + await page.goto('https://flk.npc.gov.cn/index.html'); + await page.wait(4); + + const routerAvailable = await page.evaluate(` + (async () => { + const app = document.querySelector('#app'); + const router = app?.__vue_app__?.config?.globalProperties?.$router; + if (!router) return false; + await router.push({path: '/search', query: ${JSON.stringify(query)}}); + return true; + })() + `); + + if (!routerAvailable) { + throw new CliError( + 'FRAMEWORK_CHANGED', + 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', + 'Please report this issue so the adapter can be updated.', + ); + } + + await page.wait(5); +} + +/** + * Extract law/regulation items from the search results page. + */ +export async function extractLawResults(page: IPage, limit: number): Promise { + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 40; i++) { + if (document.querySelectorAll('.result-item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.result-item'); + for (const el of items) { + const title = normalize(el.querySelector('.title-content')?.textContent); + if (!title) continue; + const statusEl = el.querySelector('[class*="status"]'); + const status = normalize(statusEl?.textContent); + const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); + const type = normalize(el.querySelector('.type')?.textContent); + const department = normalize(el.querySelector('.department')?.textContent); + results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; +} diff --git a/src/clis/gov-policy/recent.ts b/src/clis/gov-policy/recent.ts new file mode 100644 index 00000000..f57171eb --- /dev/null +++ b/src/clis/gov-policy/recent.ts @@ -0,0 +1,50 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-policy', + name: 'recent', + description: '国务院最新政策文件', + domain: 'www.gov.cn', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'date', 'source', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await page.goto('https://www.gov.cn/zhengce/zuixin/index.htm'); + await page.wait(4); + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.querySelector('.news_box li, .list li, .list_item, .news-list li')) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.news_box li, .list li, .list_item, .news-list li'); + for (const el of items) { + const titleEl = el.querySelector('a'); + const title = normalize(titleEl?.textContent); + if (!title || title.length < 4) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url; + const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/); + const source = normalize(el.querySelector('.source, .from')?.textContent); + results.push({ + rank: results.length + 1, + title, + date: dateMatch?.[1] || '', + source, + url, + }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-policy/search.ts b/src/clis/gov-policy/search.ts new file mode 100644 index 00000000..fdb02f44 --- /dev/null +++ b/src/clis/gov-policy/search.ts @@ -0,0 +1,48 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-policy', + name: 'search', + description: '中国政府网政策文件搜索', + domain: 'sousuo.www.gov.cn', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'description', 'date', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + // dataTypeId=107 is the policy library search + await page.goto(`https://sousuo.www.gov.cn/sousuo/search.shtml?code=17da70961a7&dataTypeId=107&searchWord=${query}`); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item'); + for (const el of items) { + const titleEl = el.querySelector('a.title, .title a, a.log-anchor'); + let title = normalize(titleEl?.textContent).replace(/<[^>]+>/g, ''); + if (!title || title.length < 4) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url; + const desc = normalize(el.querySelector('.description')?.textContent).slice(0, 120); + const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/); + results.push({ rank: results.length + 1, title, description: desc, date: dateMatch?.[1] || '', url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/wanfang/search.ts b/src/clis/wanfang/search.ts new file mode 100644 index 00000000..2fa7e686 --- /dev/null +++ b/src/clis/wanfang/search.ts @@ -0,0 +1,76 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'wanfang', + name: 'search', + description: '万方数据论文搜索', + domain: 's.wanfangdata.com.cn', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'source', 'year', 'type', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://s.wanfangdata.com.cn/paper?q=${query}`); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('span.title').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const titleSpans = document.querySelectorAll('span.title'); + const results = []; + for (const titleSpan of titleSpans) { + const title = normalize(titleSpan.textContent); + if (!title || title.length < 3) continue; + // Walk up to find the result container (div.detail-list-wrap or similar) + let container = titleSpan.parentElement; + for (let i = 0; i < 6; i++) { + if (!container.parentElement || container.parentElement.tagName === 'BODY') break; + // Check if this container has exactly one span.title + if (container.querySelectorAll('span.title').length >= 1 && container.querySelectorAll('span.authors').length >= 1) break; + container = container.parentElement; + } + const idEl = container.querySelector('span.title-id-hidden'); + const id = normalize(idEl?.textContent); + let url = ''; + if (id) url = 'https://d.wanfangdata.com.cn/' + id; + + const authorEls = container.querySelectorAll('span.authors'); + const authors = Array.from(authorEls).map(a => normalize(a.textContent)).filter(Boolean).join(', ').slice(0, 80); + + const typeEl = container.querySelector('span.essay-type'); + const type = normalize(typeEl?.textContent); + + const periodicalEl = container.querySelector('span.periodical, span.source'); + const source = normalize(periodicalEl?.textContent); + + const yearEl = container.querySelector('span.year, span.date'); + let year = normalize(yearEl?.textContent); + if (!year) { + const allText = container.textContent || ''; + const ym = allText.match(/(19|20)\\d{2}/); + year = ym?.[0] || ''; + } + + const citedEl = container.querySelector('.stat-item.quote, [class*="quote"]'); + const citedMatch = normalize(citedEl?.textContent).match(/(\\d+)/); + const cited = citedMatch?.[1] || '0'; + + results.push({ rank: results.length + 1, title, authors, source, year, type, cited, url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/tests/e2e/browser-public.test.ts b/tests/e2e/browser-public.test.ts index 1ecdac4b..df7cb411 100644 --- a/tests/e2e/browser-public.test.ts +++ b/tests/e2e/browser-public.test.ts @@ -202,4 +202,70 @@ describe('browser public-data commands E2E', () => { const data = await tryBrowserCommand(['yahoo-finance', 'quote', '--symbol', 'AAPL', '-f', 'json']); expectDataOrSkip(data, 'yahoo-finance quote'); }, 60_000); + + // ── baidu-scholar (browser: true, strategy: public) ── + it('baidu-scholar search returns papers', async () => { + const data = await tryBrowserCommand(['baidu-scholar', 'search', '深度学习', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'baidu-scholar search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('rank'); + } + }, 60_000); + + // ── google-scholar (browser: true, strategy: public) ── + it('google-scholar search returns papers', async () => { + const data = await tryBrowserCommand(['google-scholar', 'search', 'quantum computing', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'google-scholar search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('authors'); + } + }, 60_000); + + // ── wanfang (browser: true, strategy: public) ── + it('wanfang search returns papers', async () => { + const data = await tryBrowserCommand(['wanfang', 'search', '人工智能', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'wanfang search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('rank'); + } + }, 60_000); + + // ── gov-law (browser: true, strategy: public, Vue SPA) ── + it('gov-law recent returns laws', async () => { + const data = await tryBrowserCommand(['gov-law', 'recent', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-law recent'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('publish_date'); + } + }, 60_000); + + it('gov-law search returns results', async () => { + const data = await tryBrowserCommand(['gov-law', 'search', '数据安全', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-law search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); + + // ── gov-policy (browser: true, strategy: public) ── + it('gov-policy recent returns policies', async () => { + const data = await tryBrowserCommand(['gov-policy', 'recent', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-policy recent'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('date'); + } + }, 60_000); + + it('gov-policy search returns results', async () => { + const data = await tryBrowserCommand(['gov-policy', 'search', '数据安全', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-policy search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); });