From db773131309455760a405ad0c690bb42b5d89521 Mon Sep 17 00:00:00 2001 From: Mu Qiao Date: Sun, 22 Mar 2026 10:51:28 +0100 Subject: [PATCH 1/4] feat: add Chinese academic & policy database adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 7 new adapters for Chinese academic and government databases: Academic: - baidu-scholar/search: 百度学术论文搜索 (cookie + DOM extraction) - wanfang/search: 万方数据论文搜索 (cookie + DOM extraction) - google-scholar/search: Google Scholar 学术搜索 (cookie + DOM extraction) Policy & Law: - gov-law/search: 国家法律法规数据库搜索 (cookie + Vue Router injection) - gov-law/recent: 最新法律法规 (cookie + Vue Router) - gov-policy/search: 中国政府网政策文件搜索 (cookie + DOM extraction) - gov-policy/recent: 国务院最新政策文件 (cookie + DOM extraction) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/clis/baidu-scholar/search.ts | 60 ++++++++++++++++++++++ src/clis/google-scholar/search.ts | 54 ++++++++++++++++++++ src/clis/gov-law/recent.ts | 56 +++++++++++++++++++++ src/clis/gov-law/search.ts | 82 +++++++++++++++++++++++++++++++ src/clis/gov-policy/recent.ts | 48 ++++++++++++++++++ src/clis/gov-policy/search.ts | 47 ++++++++++++++++++ src/clis/wanfang/search.ts | 75 ++++++++++++++++++++++++++++ 7 files changed, 422 insertions(+) create mode 100644 src/clis/baidu-scholar/search.ts create mode 100644 src/clis/google-scholar/search.ts create mode 100644 src/clis/gov-law/recent.ts create mode 100644 src/clis/gov-law/search.ts create mode 100644 src/clis/gov-policy/recent.ts create mode 100644 src/clis/gov-policy/search.ts create mode 100644 src/clis/wanfang/search.ts diff --git a/src/clis/baidu-scholar/search.ts b/src/clis/baidu-scholar/search.ts new file mode 100644 index 0000000..16b1518 --- /dev/null +++ b/src/clis/baidu-scholar/search.ts @@ -0,0 +1,60 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'baidu-scholar', + name: 'search', + description: '百度学术搜索', + domain: 'xueshu.baidu.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'journal', 'year', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://xueshu.baidu.com/s?wd=${query}&pn=0&tn=SE_baiduxueshu_c1gjeupa`); + await page.wait(5); + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.querySelectorAll('.result').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + for (const el of document.querySelectorAll('.result')) { + const titleEl = el.querySelector('h3 a, .paper-title a, .t a'); + const title = normalize(titleEl?.textContent); + if (!title) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://xueshu.baidu.com' + url; + const infoEl = el.querySelector('.paper-info'); + const authorEls = infoEl?.querySelectorAll('span.authors, span') || []; + let authors = '', journal = '', year = '', cited = '0'; + const infoText = normalize(infoEl?.textContent); + const spans = infoEl ? Array.from(infoEl.querySelectorAll('span')) : []; + const authParts = []; + for (const sp of spans) { + const t = normalize(sp.textContent); + if (!t || t === ',' || t === ',') continue; + if (t.startsWith('《') || t.startsWith('《')) { journal = t.replace(/[《》]/g, ''); continue; } + if (t.match(/^被引量[::]/)) { cited = t.match(/(\\d+)/)?.[1] || '0'; continue; } + if (t.match(/^-\\s*(\\d{4})/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } + if (t.match(/^\\d{4}年?$/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } + if (!journal && !t.match(/^被引/) && !t.match(/^-/)) authParts.push(t); + } + authors = authParts.join(', ').slice(0, 80); + if (!year) { const m = infoText.match(/(19|20)\\d{2}/); year = m?.[0] || ''; } + if (!cited || cited === '0') { const m = infoText.match(/被引量[::]\\s*(\\d+)/); cited = m?.[1] || '0'; } + results.push({ rank: results.length + 1, title, authors, journal, year, cited, url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/google-scholar/search.ts b/src/clis/google-scholar/search.ts new file mode 100644 index 0000000..e496c20 --- /dev/null +++ b/src/clis/google-scholar/search.ts @@ -0,0 +1,54 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'google-scholar', + name: 'search', + description: 'Google Scholar 学术搜索', + domain: 'scholar.google.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'source', 'year', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://scholar.google.com/scholar?q=${query}&hl=zh-CN`); + await page.wait(3); + const data = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const results = []; + for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_ri')) { + const container = el.querySelector('.gs_ri') || el; + const titleEl = container.querySelector('.gs_rt a, h3 a'); + const title = normalize(titleEl?.textContent); + if (!title) continue; + const url = titleEl?.getAttribute('href') || ''; + const infoLine = normalize(container.querySelector('.gs_a')?.textContent); + const parts = infoLine.split(' - '); + const authors = (parts[0] || '').trim(); + const sourceParts = (parts[1] || '').split(','); + const source = sourceParts.slice(0, -1).join(',').trim() || sourceParts[0]?.trim() || ''; + const yearMatch = infoLine.match(/(19|20)\\d{2}/); + const citedEl = container.querySelector('.gs_fl a[href*="cites"]'); + const citedMatch = normalize(citedEl?.textContent).match(/(\\d+)/); + results.push({ + rank: results.length + 1, + title, + authors: authors.slice(0, 80), + source: source.slice(0, 60), + year: yearMatch?.[0] || '', + cited: citedMatch?.[1] || '0', + url, + }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-law/recent.ts b/src/clis/gov-law/recent.ts new file mode 100644 index 0000000..014a47b --- /dev/null +++ b/src/clis/gov-law/recent.ts @@ -0,0 +1,56 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-law', + name: 'recent', + description: '最新法律法规', + domain: 'flk.npc.gov.cn', + strategy: Strategy.COOKIE, + args: [ + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + // Navigate to index, then use Vue Router to search page (shows all, sorted by date) + await page.goto('https://flk.npc.gov.cn/index.html'); + await page.wait(4); + + await page.evaluate(` + (async () => { + const app = document.querySelector('#app'); + const router = app?.__vue_app__?.config?.globalProperties?.$router; + if (router) { + await router.push({path: '/search', query: {}}); + } + })() + `); + await page.wait(4); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('.result-item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.result-item'); + for (const el of items) { + const title = normalize(el.querySelector('.title-content')?.textContent); + if (!title) continue; + const statusEl = el.querySelector('[class*="status"]'); + const status = normalize(statusEl?.textContent); + const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); + const type = normalize(el.querySelector('.type')?.textContent); + const department = normalize(el.querySelector('.department')?.textContent); + results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-law/search.ts b/src/clis/gov-law/search.ts new file mode 100644 index 0000000..1db5d45 --- /dev/null +++ b/src/clis/gov-law/search.ts @@ -0,0 +1,82 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-law', + name: 'search', + description: '国家法律法规数据库搜索', + domain: 'flk.npc.gov.cn', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'status', 'publish_date', 'type', 'department'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await page.goto('https://flk.npc.gov.cn/index.html'); + await page.wait(4); + + // Set search input value via Vue reactivity, then trigger search via Vue Router + const query = JSON.stringify(kwargs.query); + await page.evaluate(` + (async () => { + // Set input value to trigger Vue's v-model binding + const input = document.querySelector('.el-input__inner'); + if (input) { + const nativeSetter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; + nativeSetter.call(input, ${query}); + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new Event('change', { bubbles: true })); + } + // Wait for Vue to process the input + await new Promise(r => setTimeout(r, 500)); + // Navigate via Vue Router with searchWord + const app = document.querySelector('#app'); + const router = app?.__vue_app__?.config?.globalProperties?.$router; + if (router) { + await router.push({path: '/search', query: {searchWord: ${query}}}); + } + // After navigation, set the search input again on the search page + await new Promise(r => setTimeout(r, 1000)); + const searchInput = document.querySelector('.el-input__inner'); + if (searchInput && !searchInput.value) { + const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; + setter.call(searchInput, ${query}); + searchInput.dispatchEvent(new Event('input', { bubbles: true })); + searchInput.dispatchEvent(new Event('change', { bubbles: true })); + await new Promise(r => setTimeout(r, 300)); + // Trigger Enter key to execute search + searchInput.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', keyCode: 13, bubbles: true })); + searchInput.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', keyCode: 13, bubbles: true })); + } + })() + `); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 40; i++) { + if (document.querySelectorAll('.result-item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.result-item'); + for (const el of items) { + const title = normalize(el.querySelector('.title-content')?.textContent); + if (!title) continue; + const statusEl = el.querySelector('[class*="status"]'); + const status = normalize(statusEl?.textContent); + const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); + const type = normalize(el.querySelector('.type')?.textContent); + const department = normalize(el.querySelector('.department')?.textContent); + results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-policy/recent.ts b/src/clis/gov-policy/recent.ts new file mode 100644 index 0000000..cf4f582 --- /dev/null +++ b/src/clis/gov-policy/recent.ts @@ -0,0 +1,48 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-policy', + name: 'recent', + description: '国务院最新政策文件', + domain: 'www.gov.cn', + strategy: Strategy.COOKIE, + args: [ + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'date', 'source', 'url'], + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await page.goto('https://www.gov.cn/zhengce/zuixin/index.htm'); + await page.wait(4); + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.querySelector('.news_box li, .list li, .list_item, .news-list li')) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.news_box li, .list li, .list_item, .news-list li'); + for (const el of items) { + const titleEl = el.querySelector('a'); + const title = normalize(titleEl?.textContent); + if (!title || title.length < 4) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url; + const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/); + const source = normalize(el.querySelector('.source, .from')?.textContent); + results.push({ + rank: results.length + 1, + title, + date: dateMatch?.[1] || '', + source, + url, + }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/gov-policy/search.ts b/src/clis/gov-policy/search.ts new file mode 100644 index 0000000..b02b7ba --- /dev/null +++ b/src/clis/gov-policy/search.ts @@ -0,0 +1,47 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'gov-policy', + name: 'search', + description: '中国政府网政策文件搜索', + domain: 'sousuo.www.gov.cn', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'description', 'date', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + // dataTypeId=107 is the policy library search + await page.goto(`https://sousuo.www.gov.cn/sousuo/search.shtml?code=17da70961a7&dataTypeId=107&searchWord=${query}`); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item'); + for (const el of items) { + const titleEl = el.querySelector('a.title, .title a, a.log-anchor'); + let title = normalize(titleEl?.textContent).replace(/<[^>]+>/g, ''); + if (!title || title.length < 4) continue; + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url; + const desc = normalize(el.querySelector('.description')?.textContent).slice(0, 120); + const dateMatch = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/); + results.push({ rank: results.length + 1, title, description: desc, date: dateMatch?.[1] || '', url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/wanfang/search.ts b/src/clis/wanfang/search.ts new file mode 100644 index 0000000..79e7be7 --- /dev/null +++ b/src/clis/wanfang/search.ts @@ -0,0 +1,75 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'wanfang', + name: 'search', + description: '万方数据论文搜索', + domain: 's.wanfangdata.com.cn', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'source', 'year', 'type', 'cited', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://s.wanfangdata.com.cn/paper?q=${query}`); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('span.title').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const titleSpans = document.querySelectorAll('span.title'); + const results = []; + for (const titleSpan of titleSpans) { + const title = normalize(titleSpan.textContent); + if (!title || title.length < 3) continue; + // Walk up to find the result container (div.detail-list-wrap or similar) + let container = titleSpan.parentElement; + for (let i = 0; i < 6; i++) { + if (!container.parentElement || container.parentElement.tagName === 'BODY') break; + // Check if this container has exactly one span.title + if (container.querySelectorAll('span.title').length >= 1 && container.querySelectorAll('span.authors').length >= 1) break; + container = container.parentElement; + } + const idEl = container.querySelector('span.title-id-hidden'); + const id = normalize(idEl?.textContent); + let url = ''; + if (id) url = 'https://d.wanfangdata.com.cn/' + id; + + const authorEls = container.querySelectorAll('span.authors'); + const authors = Array.from(authorEls).map(a => normalize(a.textContent)).filter(Boolean).join(', ').slice(0, 80); + + const typeEl = container.querySelector('span.essay-type'); + const type = normalize(typeEl?.textContent); + + const periodicalEl = container.querySelector('span.periodical, span.source'); + const source = normalize(periodicalEl?.textContent); + + const yearEl = container.querySelector('span.year, span.date'); + let year = normalize(yearEl?.textContent); + if (!year) { + const allText = container.textContent || ''; + const ym = allText.match(/(19|20)\\d{2}/); + year = ym?.[0] || ''; + } + + const citedEl = container.querySelector('.stat-item.quote, [class*="quote"]'); + const citedMatch = normalize(citedEl?.textContent).match(/(\\d+)/); + const cited = citedMatch?.[1] || '0'; + + results.push({ rank: results.length + 1, title, authors, source, year, type, cited, url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); From 7381eeabec3b60f12dfd343ef77590771c374c1d Mon Sep 17 00:00:00 2001 From: Mu Qiao Date: Sun, 22 Mar 2026 16:13:07 +0100 Subject: [PATCH 2/4] fix: address review feedback on academic/policy adapters - Change Strategy.COOKIE to Strategy.PUBLIC + browser:true for all adapters (these sites serve public data without login) - Add navigateBefore:false to gov-policy/recent.ts (was missing, causing double navigation) - Fix duplicate condition in baidu-scholar/search.ts (both sides of || were identical U+300A) - Add Vue Router null guards with CliError to gov-law/search.ts and gov-law/recent.ts for graceful failure if site restructures Co-Authored-By: Claude Opus 4.6 (1M context) --- src/clis/baidu-scholar/search.ts | 5 +++-- src/clis/google-scholar/search.ts | 3 ++- src/clis/gov-law/recent.ts | 18 ++++++++++++++---- src/clis/gov-law/search.ts | 19 +++++++++++++++---- src/clis/gov-policy/recent.ts | 4 +++- src/clis/gov-policy/search.ts | 3 ++- src/clis/wanfang/search.ts | 3 ++- 7 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/clis/baidu-scholar/search.ts b/src/clis/baidu-scholar/search.ts index 16b1518..b8e4e8f 100644 --- a/src/clis/baidu-scholar/search.ts +++ b/src/clis/baidu-scholar/search.ts @@ -5,7 +5,8 @@ cli({ name: 'search', description: '百度学术搜索', domain: 'xueshu.baidu.com', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'query', positional: true, required: true, help: '搜索关键词' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, @@ -40,7 +41,7 @@ cli({ for (const sp of spans) { const t = normalize(sp.textContent); if (!t || t === ',' || t === ',') continue; - if (t.startsWith('《') || t.startsWith('《')) { journal = t.replace(/[《》]/g, ''); continue; } + if (t.startsWith('《')) { journal = t.replace(/[《》]/g, ''); continue; } if (t.match(/^被引量[::]/)) { cited = t.match(/(\\d+)/)?.[1] || '0'; continue; } if (t.match(/^-\\s*(\\d{4})/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } if (t.match(/^\\d{4}年?$/)) { year = t.match(/(\\d{4})/)?.[1] || ''; continue; } diff --git a/src/clis/google-scholar/search.ts b/src/clis/google-scholar/search.ts index e496c20..c6af22f 100644 --- a/src/clis/google-scholar/search.ts +++ b/src/clis/google-scholar/search.ts @@ -5,7 +5,8 @@ cli({ name: 'search', description: 'Google Scholar 学术搜索', domain: 'scholar.google.com', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'query', positional: true, required: true, help: '搜索关键词' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, diff --git a/src/clis/gov-law/recent.ts b/src/clis/gov-law/recent.ts index 014a47b..c28091c 100644 --- a/src/clis/gov-law/recent.ts +++ b/src/clis/gov-law/recent.ts @@ -1,11 +1,13 @@ import { cli, Strategy } from '../../registry.js'; +import { CliError } from '../../errors.js'; cli({ site: 'gov-law', name: 'recent', description: '最新法律法规', domain: 'flk.npc.gov.cn', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, ], @@ -21,13 +23,21 @@ cli({ (async () => { const app = document.querySelector('#app'); const router = app?.__vue_app__?.config?.globalProperties?.$router; - if (router) { - await router.push({path: '/search', query: {}}); - } + if (!router) return 'no_router'; + await router.push({path: '/search', query: {}}); })() `); await page.wait(4); + const navResult = await page.evaluate(`location.href`); + if (typeof navResult === 'string' && !navResult.includes('/search')) { + throw new CliError( + 'FRAMEWORK_CHANGED', + 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', + 'Please report this issue so the adapter can be updated.', + ); + } + const data = await page.evaluate(` (async () => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); diff --git a/src/clis/gov-law/search.ts b/src/clis/gov-law/search.ts index 1db5d45..3aca246 100644 --- a/src/clis/gov-law/search.ts +++ b/src/clis/gov-law/search.ts @@ -1,11 +1,13 @@ import { cli, Strategy } from '../../registry.js'; +import { CliError } from '../../errors.js'; cli({ site: 'gov-law', name: 'search', description: '国家法律法规数据库搜索', domain: 'flk.npc.gov.cn', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'query', positional: true, required: true, help: '搜索关键词' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, @@ -34,9 +36,8 @@ cli({ // Navigate via Vue Router with searchWord const app = document.querySelector('#app'); const router = app?.__vue_app__?.config?.globalProperties?.$router; - if (router) { - await router.push({path: '/search', query: {searchWord: ${query}}}); - } + if (!router) return 'no_router'; + await router.push({path: '/search', query: {searchWord: ${query}}}); // After navigation, set the search input again on the search page await new Promise(r => setTimeout(r, 1000)); const searchInput = document.querySelector('.el-input__inner'); @@ -54,6 +55,16 @@ cli({ `); await page.wait(5); + // Check if Vue Router was available + const navResult = await page.evaluate(`location.href`); + if (typeof navResult === 'string' && !navResult.includes('/search')) { + throw new CliError( + 'FRAMEWORK_CHANGED', + 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', + 'Please report this issue so the adapter can be updated.', + ); + } + const data = await page.evaluate(` (async () => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); diff --git a/src/clis/gov-policy/recent.ts b/src/clis/gov-policy/recent.ts index cf4f582..f57171e 100644 --- a/src/clis/gov-policy/recent.ts +++ b/src/clis/gov-policy/recent.ts @@ -5,11 +5,13 @@ cli({ name: 'recent', description: '国务院最新政策文件', domain: 'www.gov.cn', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, ], columns: ['rank', 'title', 'date', 'source', 'url'], + navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 20); await page.goto('https://www.gov.cn/zhengce/zuixin/index.htm'); diff --git a/src/clis/gov-policy/search.ts b/src/clis/gov-policy/search.ts index b02b7ba..fdb02f4 100644 --- a/src/clis/gov-policy/search.ts +++ b/src/clis/gov-policy/search.ts @@ -5,7 +5,8 @@ cli({ name: 'search', description: '中国政府网政策文件搜索', domain: 'sousuo.www.gov.cn', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'query', positional: true, required: true, help: '搜索关键词' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, diff --git a/src/clis/wanfang/search.ts b/src/clis/wanfang/search.ts index 79e7be7..2fa7e68 100644 --- a/src/clis/wanfang/search.ts +++ b/src/clis/wanfang/search.ts @@ -5,7 +5,8 @@ cli({ name: 'search', description: '万方数据论文搜索', domain: 's.wanfangdata.com.cn', - strategy: Strategy.COOKIE, + strategy: Strategy.PUBLIC, + browser: true, args: [ { name: 'query', positional: true, required: true, help: '搜索关键词' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, From 8c2701759890d8ef0897c96e75614b0db47c91ee Mon Sep 17 00:00:00 2001 From: Mu Qiao Date: Sun, 22 Mar 2026 16:15:35 +0100 Subject: [PATCH 3/4] test: add E2E tests for academic and policy adapters Add 7 browser-public E2E tests covering all new adapters: - baidu-scholar/search - google-scholar/search - wanfang/search - gov-law/recent, gov-law/search - gov-policy/recent, gov-policy/search Tests use tryBrowserCommand + expectDataOrSkip pattern (warn+pass on geo-blocking/bot-detection, per TESTING.md conventions). Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/e2e/browser-public.test.ts | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/e2e/browser-public.test.ts b/tests/e2e/browser-public.test.ts index 1ecdac4..df7cb41 100644 --- a/tests/e2e/browser-public.test.ts +++ b/tests/e2e/browser-public.test.ts @@ -202,4 +202,70 @@ describe('browser public-data commands E2E', () => { const data = await tryBrowserCommand(['yahoo-finance', 'quote', '--symbol', 'AAPL', '-f', 'json']); expectDataOrSkip(data, 'yahoo-finance quote'); }, 60_000); + + // ── baidu-scholar (browser: true, strategy: public) ── + it('baidu-scholar search returns papers', async () => { + const data = await tryBrowserCommand(['baidu-scholar', 'search', '深度学习', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'baidu-scholar search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('rank'); + } + }, 60_000); + + // ── google-scholar (browser: true, strategy: public) ── + it('google-scholar search returns papers', async () => { + const data = await tryBrowserCommand(['google-scholar', 'search', 'quantum computing', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'google-scholar search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('authors'); + } + }, 60_000); + + // ── wanfang (browser: true, strategy: public) ── + it('wanfang search returns papers', async () => { + const data = await tryBrowserCommand(['wanfang', 'search', '人工智能', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'wanfang search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('rank'); + } + }, 60_000); + + // ── gov-law (browser: true, strategy: public, Vue SPA) ── + it('gov-law recent returns laws', async () => { + const data = await tryBrowserCommand(['gov-law', 'recent', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-law recent'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('publish_date'); + } + }, 60_000); + + it('gov-law search returns results', async () => { + const data = await tryBrowserCommand(['gov-law', 'search', '数据安全', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-law search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); + + // ── gov-policy (browser: true, strategy: public) ── + it('gov-policy recent returns policies', async () => { + const data = await tryBrowserCommand(['gov-policy', 'recent', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-policy recent'); + if (data) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('date'); + } + }, 60_000); + + it('gov-policy search returns results', async () => { + const data = await tryBrowserCommand(['gov-policy', 'search', '数据安全', '--limit', '3', '-f', 'json']); + expectDataOrSkip(data, 'gov-policy search'); + if (data) { + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); }); From e2b85d7d904ebd0e216c6f62d10dad7163c08538 Mon Sep 17 00:00:00 2001 From: Mu Qiao Date: Sun, 22 Mar 2026 16:39:33 +0100 Subject: [PATCH 4/4] refactor(gov-law): extract shared helper, address review nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract navigateViaVueRouter() and extractLawResults() into gov-law/shared.ts — eliminates ~15 lines of duplication - CliError is now thrown directly in shared helper (no unconsumed 'no_router' return value) - search.ts and recent.ts simplified to ~20 lines each Co-Authored-By: Claude Opus 4.6 (1M context) --- src/clis/gov-law/recent.ts | 51 ++------------------------- src/clis/gov-law/search.ts | 72 ++++++-------------------------------- src/clis/gov-law/shared.ts | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 110 deletions(-) create mode 100644 src/clis/gov-law/shared.ts diff --git a/src/clis/gov-law/recent.ts b/src/clis/gov-law/recent.ts index c28091c..639efd6 100644 --- a/src/clis/gov-law/recent.ts +++ b/src/clis/gov-law/recent.ts @@ -1,5 +1,5 @@ import { cli, Strategy } from '../../registry.js'; -import { CliError } from '../../errors.js'; +import { navigateViaVueRouter, extractLawResults } from './shared.js'; cli({ site: 'gov-law', @@ -15,52 +15,7 @@ cli({ navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 20); - // Navigate to index, then use Vue Router to search page (shows all, sorted by date) - await page.goto('https://flk.npc.gov.cn/index.html'); - await page.wait(4); - - await page.evaluate(` - (async () => { - const app = document.querySelector('#app'); - const router = app?.__vue_app__?.config?.globalProperties?.$router; - if (!router) return 'no_router'; - await router.push({path: '/search', query: {}}); - })() - `); - await page.wait(4); - - const navResult = await page.evaluate(`location.href`); - if (typeof navResult === 'string' && !navResult.includes('/search')) { - throw new CliError( - 'FRAMEWORK_CHANGED', - 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', - 'Please report this issue so the adapter can be updated.', - ); - } - - const data = await page.evaluate(` - (async () => { - const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); - for (let i = 0; i < 30; i++) { - if (document.querySelectorAll('.result-item').length > 0) break; - await new Promise(r => setTimeout(r, 500)); - } - const results = []; - const items = document.querySelectorAll('.result-item'); - for (const el of items) { - const title = normalize(el.querySelector('.title-content')?.textContent); - if (!title) continue; - const statusEl = el.querySelector('[class*="status"]'); - const status = normalize(statusEl?.textContent); - const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); - const type = normalize(el.querySelector('.type')?.textContent); - const department = normalize(el.querySelector('.department')?.textContent); - results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); - if (results.length >= ${limit}) break; - } - return results; - })() - `); - return Array.isArray(data) ? data : []; + await navigateViaVueRouter(page, {}); + return extractLawResults(page, limit); }, }); diff --git a/src/clis/gov-law/search.ts b/src/clis/gov-law/search.ts index 3aca246..c9193e4 100644 --- a/src/clis/gov-law/search.ts +++ b/src/clis/gov-law/search.ts @@ -1,5 +1,5 @@ import { cli, Strategy } from '../../registry.js'; -import { CliError } from '../../errors.js'; +import { navigateViaVueRouter, extractLawResults } from './shared.js'; cli({ site: 'gov-law', @@ -16,78 +16,26 @@ cli({ navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 20); - await page.goto('https://flk.npc.gov.cn/index.html'); - await page.wait(4); + await navigateViaVueRouter(page, { searchWord: kwargs.query }); - // Set search input value via Vue reactivity, then trigger search via Vue Router + // Set search input for Vue reactivity const query = JSON.stringify(kwargs.query); await page.evaluate(` (async () => { - // Set input value to trigger Vue's v-model binding const input = document.querySelector('.el-input__inner'); - if (input) { - const nativeSetter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; - nativeSetter.call(input, ${query}); + if (input && !input.value) { + const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; + setter.call(input, ${query}); input.dispatchEvent(new Event('input', { bubbles: true })); input.dispatchEvent(new Event('change', { bubbles: true })); - } - // Wait for Vue to process the input - await new Promise(r => setTimeout(r, 500)); - // Navigate via Vue Router with searchWord - const app = document.querySelector('#app'); - const router = app?.__vue_app__?.config?.globalProperties?.$router; - if (!router) return 'no_router'; - await router.push({path: '/search', query: {searchWord: ${query}}}); - // After navigation, set the search input again on the search page - await new Promise(r => setTimeout(r, 1000)); - const searchInput = document.querySelector('.el-input__inner'); - if (searchInput && !searchInput.value) { - const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; - setter.call(searchInput, ${query}); - searchInput.dispatchEvent(new Event('input', { bubbles: true })); - searchInput.dispatchEvent(new Event('change', { bubbles: true })); await new Promise(r => setTimeout(r, 300)); - // Trigger Enter key to execute search - searchInput.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', keyCode: 13, bubbles: true })); - searchInput.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', keyCode: 13, bubbles: true })); + input.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', keyCode: 13, bubbles: true })); + input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', keyCode: 13, bubbles: true })); } })() `); - await page.wait(5); + await page.wait(3); - // Check if Vue Router was available - const navResult = await page.evaluate(`location.href`); - if (typeof navResult === 'string' && !navResult.includes('/search')) { - throw new CliError( - 'FRAMEWORK_CHANGED', - 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', - 'Please report this issue so the adapter can be updated.', - ); - } - - const data = await page.evaluate(` - (async () => { - const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); - for (let i = 0; i < 40; i++) { - if (document.querySelectorAll('.result-item').length > 0) break; - await new Promise(r => setTimeout(r, 500)); - } - const results = []; - const items = document.querySelectorAll('.result-item'); - for (const el of items) { - const title = normalize(el.querySelector('.title-content')?.textContent); - if (!title) continue; - const statusEl = el.querySelector('[class*="status"]'); - const status = normalize(statusEl?.textContent); - const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); - const type = normalize(el.querySelector('.type')?.textContent); - const department = normalize(el.querySelector('.department')?.textContent); - results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); - if (results.length >= ${limit}) break; - } - return results; - })() - `); - return Array.isArray(data) ? data : []; + return extractLawResults(page, limit); }, }); diff --git a/src/clis/gov-law/shared.ts b/src/clis/gov-law/shared.ts new file mode 100644 index 0000000..c77704c --- /dev/null +++ b/src/clis/gov-law/shared.ts @@ -0,0 +1,64 @@ +import { CliError } from '../../errors.js'; +import type { IPage } from '../../types.js'; + +/** + * Navigate to flk.npc.gov.cn and use Vue Router to reach the target page. + * Throws CliError if Vue Router is unavailable (site restructured). + */ +export async function navigateViaVueRouter( + page: IPage, + query: Record, +): Promise { + await page.goto('https://flk.npc.gov.cn/index.html'); + await page.wait(4); + + const routerAvailable = await page.evaluate(` + (async () => { + const app = document.querySelector('#app'); + const router = app?.__vue_app__?.config?.globalProperties?.$router; + if (!router) return false; + await router.push({path: '/search', query: ${JSON.stringify(query)}}); + return true; + })() + `); + + if (!routerAvailable) { + throw new CliError( + 'FRAMEWORK_CHANGED', + 'Could not access Vue Router on flk.npc.gov.cn — the site may have been restructured.', + 'Please report this issue so the adapter can be updated.', + ); + } + + await page.wait(5); +} + +/** + * Extract law/regulation items from the search results page. + */ +export async function extractLawResults(page: IPage, limit: number): Promise { + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 40; i++) { + if (document.querySelectorAll('.result-item').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const results = []; + const items = document.querySelectorAll('.result-item'); + for (const el of items) { + const title = normalize(el.querySelector('.title-content')?.textContent); + if (!title) continue; + const statusEl = el.querySelector('[class*="status"]'); + const status = normalize(statusEl?.textContent); + const pubDate = normalize(el.querySelector('.publish-time')?.textContent).replace(/^公布日期[::]\\s*/, ''); + const type = normalize(el.querySelector('.type')?.textContent); + const department = normalize(el.querySelector('.department')?.textContent); + results.push({ rank: results.length + 1, title, status, publish_date: pubDate, type, department }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; +}