From d1420d0124a7dd1d799dcb7ab620236388d87bce Mon Sep 17 00:00:00 2001 From: Mu Qiao Date: Sun, 22 Mar 2026 12:29:11 +0100 Subject: [PATCH 1/5] feat(jd,taobao,cnki): add JD, Taobao, and CNKI adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JD (京东) — full shopping workflow: - jd/search: product search with price, sales, shop, SKU - jd/detail: product detail with ratings, review tags, shop info - jd/reviews: user review extraction - jd/add-cart: add to cart via gate.action API - jd/cart: view cart contents via JD cart API Taobao (淘宝) — search with clean field extraction: - taobao/search: uses CSS class prefix matching (title--, priceInt--, realSales--, shopName--, procity--) to cleanly extract structured data from Taobao's obfuscated DOM CNKI (知网) — overseas portal: - cnki/search: uses oversea.cnki.net to avoid domestic access restrictions, extracts results from search table DOM Co-Authored-By: Claude Opus 4.6 (1M context) --- src/clis/cnki/search.ts | 62 ++++++++++++++++++++++++++ src/clis/jd/add-cart.ts | 64 +++++++++++++++++++++++++++ src/clis/jd/cart.ts | 76 ++++++++++++++++++++++++++++++++ src/clis/jd/detail.ts | 67 ++++++++++++++++++++++++++++ src/clis/jd/reviews.ts | 64 +++++++++++++++++++++++++++ src/clis/jd/search.ts | 71 ++++++++++++++++++++++++++++++ src/clis/taobao/search.ts | 92 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 496 insertions(+) create mode 100644 src/clis/cnki/search.ts create mode 100644 src/clis/jd/add-cart.ts create mode 100644 src/clis/jd/cart.ts create mode 100644 src/clis/jd/detail.ts create mode 100644 src/clis/jd/reviews.ts create mode 100644 src/clis/jd/search.ts create mode 100644 src/clis/taobao/search.ts diff --git a/src/clis/cnki/search.ts b/src/clis/cnki/search.ts new file mode 100644 index 0000000..4fe7501 --- /dev/null +++ b/src/clis/cnki/search.ts @@ -0,0 +1,62 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'cnki', + name: 'search', + description: '中国知网论文搜索(海外版)', + domain: 'oversea.cnki.net', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, + ], + columns: ['rank', 'title', 'authors', 'journal', 'date', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + const query = encodeURIComponent(kwargs.query); + + await page.goto(`https://oversea.cnki.net/kns/search?dbcode=CFLS&kw=${query}&korder=SU`); + await page.wait(8); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 40; i++) { + if (document.querySelector('.result-table-list tbody tr, #gridTable tbody tr')) break; + await new Promise(r => setTimeout(r, 500)); + } + const rows = document.querySelectorAll('.result-table-list tbody tr, #gridTable tbody tr'); + const results = []; + for (const row of rows) { + // CNKI table columns: checkbox | seq | title | authors | journal | date | source_db + const tds = row.querySelectorAll('td'); + if (tds.length < 5) continue; + + // Find the title — it's in td.name or the td with an linking to article + const nameCell = row.querySelector('td.name') || tds[2]; + const titleEl = nameCell?.querySelector('a'); + const title = normalize(titleEl?.textContent).replace(/免费$/, ''); + if (!title) continue; + + let url = titleEl?.getAttribute('href') || ''; + if (url && !url.startsWith('http')) url = 'https://oversea.cnki.net' + url; + + // Authors and journal: find by class or positional + const authorCell = row.querySelector('td.author') || tds[3]; + const journalCell = row.querySelector('td.source') || tds[4]; + const dateCell = row.querySelector('td.date') || tds[5]; + + const authors = normalize(authorCell?.textContent); + const journal = normalize(journalCell?.textContent); + const date = normalize(dateCell?.textContent); + + results.push({ rank: results.length + 1, title, authors, journal, date, url }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/jd/add-cart.ts b/src/clis/jd/add-cart.ts new file mode 100644 index 0000000..b4aa26b --- /dev/null +++ b/src/clis/jd/add-cart.ts @@ -0,0 +1,64 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'jd', + name: 'add-cart', + description: '京东加入购物车', + domain: 'item.jd.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'sku', positional: true, required: true, help: '商品 SKU ID' }, + { name: 'num', type: 'int', default: 1, help: '数量' }, + ], + columns: ['status', 'title', 'price', 'sku'], + navigateBefore: false, + func: async (page, kwargs) => { + const sku = kwargs.sku; + const num = kwargs.num || 1; + + await page.goto(`https://item.jd.com/${sku}.html`); + await page.wait(4); + + // Get product info + const info = await page.evaluate(` + (() => { + const text = document.body?.innerText || ''; + const titleMatch = document.title.match(/^【[^】]*】(.+?)【/); + const title = titleMatch ? titleMatch[1].trim() : document.title.split('-')[0].trim(); + const priceMatch = text.match(/¥([\\d,.]+)/); + const price = priceMatch ? '¥' + priceMatch[1] : ''; + return { title, price }; + })() + `); + + // Navigate to cart domain and use gate.action to add item + await page.goto(`https://cart.jd.com/gate.action?pid=${sku}&pcount=${num}&ptype=1`); + await page.wait(4); + + const result = await page.evaluate(` + (() => { + const url = location.href; + const text = document.body?.innerText || ''; + if (text.includes('已成功加入') || text.includes('商品已成功') || url.includes('addtocart')) { + return 'success'; + } + if (text.includes('请登录') || text.includes('login') || url.includes('login')) { + return 'login_required'; + } + return 'page:' + url.substring(0, 60) + ' | ' + text.substring(0, 100); + })() + `); + + let status = '? 未知'; + if (result === 'success') status = '✓ 已加入购物车'; + else if (result === 'login_required') status = '✗ 需要登录京东'; + else status = '? ' + result; + + return [{ + status, + title: (info?.title || '').slice(0, 80), + price: info?.price || '', + sku, + }]; + }, +}); diff --git a/src/clis/jd/cart.ts b/src/clis/jd/cart.ts new file mode 100644 index 0000000..6b299e0 --- /dev/null +++ b/src/clis/jd/cart.ts @@ -0,0 +1,76 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'jd', + name: 'cart', + description: '查看京东购物车', + domain: 'cart.jd.com', + strategy: Strategy.COOKIE, + args: [], + columns: ['index', 'title', 'price', 'quantity', 'sku'], + navigateBefore: false, + func: async (page) => { + await page.goto('https://cart.jd.com/cart_index'); + await page.wait(5); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.body?.innerText?.length > 500) break; + await new Promise(r => setTimeout(r, 500)); + } + const text = document.body?.innerText || ''; + + // Try API approach: fetch cart data via JD's cart API + try { + const resp = await fetch('https://api.m.jd.com/api?appid=JDC_mall_cart&functionId=pcCart_jc_getCurrentCart&body=%7B%22serInfo%22%3A%7B%22area%22%3A%2222_1930_50948_52157%22%7D%7D', { + credentials: 'include', + headers: { 'referer': 'https://cart.jd.com/' }, + }); + const json = await resp.json(); + const cartData = json?.resultData?.cartInfo?.vendors || []; + const items = []; + for (const vendor of cartData) { + const sorted = vendor.sorted || []; + for (const item of sorted) { + const product = item.item || item; + if (!product.Id && !product.skuId) continue; + items.push({ + index: items.length + 1, + title: normalize(product.name || product.Name || '').slice(0, 80), + price: product.price ? '¥' + product.price : '', + quantity: String(product.num || product.Num || 1), + sku: String(product.Id || product.skuId || ''), + }); + } + } + if (items.length > 0) return items; + } catch {} + + // Fallback: parse from page text + const lines = text.split('\\n').map(l => l.trim()).filter(Boolean); + const items = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const priceMatch = line.match(/¥([\\d,.]+)/); + if (priceMatch && i > 0) { + // Previous line might be the product title + const title = lines[i-1]; + if (title && title.length > 5 && title.length < 200 && !title.startsWith('¥')) { + items.push({ + index: items.length + 1, + title: title.slice(0, 80), + price: '¥' + priceMatch[1], + quantity: '', + sku: '', + }); + } + } + } + return items; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/jd/detail.ts b/src/clis/jd/detail.ts new file mode 100644 index 0000000..0261234 --- /dev/null +++ b/src/clis/jd/detail.ts @@ -0,0 +1,67 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'jd', + name: 'detail', + description: '京东商品详情', + domain: 'item.jd.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'sku', positional: true, required: true, help: '商品 SKU ID' }, + ], + columns: ['field', 'value'], + navigateBefore: false, + func: async (page, kwargs) => { + await page.goto(`https://item.jd.com/${kwargs.sku}.html`); + await page.wait(5); + + const data = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const text = document.body?.innerText || ''; + + // Title from tag + const titleMatch = document.title.match(/^【[^】]*】(.+?)【/); + const title = titleMatch ? titleMatch[1].trim() : normalize(document.title.split('【')[0]); + + // Price + const priceMatch = text.match(/¥([\\d,.]+)/); + const price = priceMatch ? '¥' + priceMatch[1] : ''; + + // Rating summary - find "超XX%买家赞不绝口" or similar + const ratingMatch = text.match(/(超\\d+%[^\\n]{2,20})/); + const rating = ratingMatch ? ratingMatch[1] : ''; + + // Total reviews + const reviewMatch = text.match(/买家评价\\(([\\d万+]+)\\)/); + const reviews = reviewMatch ? reviewMatch[1] : ''; + + // Shop + const shopMatch = text.match(/(\\S{2,15}(?:京东自营旗舰店|旗舰店|专卖店|自营店))/); + const shop = shopMatch ? shopMatch[1] : ''; + + // Tags - extract "触感超舒适 163" patterns + const tagPattern = /([\u4e00-\u9fa5]{2,8})\\s+(\\d+)/g; + const tags = []; + let m; + const tagSection = text.substring(text.indexOf('买家评价'), text.indexOf('买家评价') + 500); + while ((m = tagPattern.exec(tagSection)) && tags.length < 6) { + if (parseInt(m[2]) > 1) tags.push(m[1] + '(' + m[2] + ')'); + } + + const results = [ + { field: '商品名称', value: title }, + { field: '价格', value: price }, + { field: 'SKU', value: '${kwargs.sku}' }, + { field: '店铺', value: shop }, + { field: '评价数量', value: reviews }, + { field: '好评率', value: rating }, + { field: '评价标签', value: tags.join(' | ') }, + { field: '链接', value: location.href }, + ]; + return results.filter(r => r.value); + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/jd/reviews.ts b/src/clis/jd/reviews.ts new file mode 100644 index 0000000..559d6b5 --- /dev/null +++ b/src/clis/jd/reviews.ts @@ -0,0 +1,64 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'jd', + name: 'reviews', + description: '京东商品评价', + domain: 'item.jd.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'sku', positional: true, required: true, help: '商品 SKU ID' }, + { name: 'limit', type: 'int', default: 10, help: '返回评价数量 (max 20)' }, + ], + columns: ['rank', 'user', 'content', 'date'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await page.goto(`https://item.jd.com/${kwargs.sku}.html`); + await page.wait(5); + // Scroll to load reviews section + await page.autoScroll({ times: 2, delayMs: 1500 }); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const text = document.body?.innerText || ''; + + // JD new version: reviews are inline in page text + // Pattern: username \\n review_text \\n [date or next username] + // Find the review section after "买家评价" + const reviewStart = text.indexOf('买家评价'); + const reviewEnd = text.indexOf('全部评价'); + if (reviewStart < 0) return []; + + const reviewSection = text.substring(reviewStart, reviewEnd > reviewStart ? reviewEnd : reviewStart + 3000); + const lines = reviewSection.split('\\n').map(l => l.trim()).filter(Boolean); + + const results = []; + // Skip header lines, look for user-review pairs + // Users are like "c***4", "3***a", "A***7" or "jd_xxx" + // JD usernames contain * (masked), like "c***4", "3***a", "jd_xxx" + const userPattern = /^[a-zA-Z0-9*_]{3,15}$/; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (userPattern.test(line) && line.includes('*') && i + 1 < lines.length) { + const user = line; + const content = lines[i + 1]; + // Skip if content looks like a header/tag + if (content.length < 5 || content.match(/^(全部评价|问大家|查看更多)/)) continue; + results.push({ + rank: results.length + 1, + user, + content: content.slice(0, 150), + date: '', + }); + i++; // skip the content line + if (results.length >= ${limit}) break; + } + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/jd/search.ts b/src/clis/jd/search.ts new file mode 100644 index 0000000..58e1716 --- /dev/null +++ b/src/clis/jd/search.ts @@ -0,0 +1,71 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'jd', + name: 'search', + description: '京东商品搜索', + domain: 'search.jd.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 30)' }, + ], + columns: ['rank', 'title', 'price', 'shop', 'sku', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 30); + const query = encodeURIComponent(kwargs.query); + await page.goto(`https://search.jd.com/Search?keyword=${query}&enc=utf-8`); + await page.wait(5); + await page.autoScroll({ times: 2, delayMs: 1500 }); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + for (let i = 0; i < 20; i++) { + if (document.querySelectorAll('div[data-sku]').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + const items = document.querySelectorAll('div[data-sku]'); + const results = []; + for (const el of items) { + const sku = el.getAttribute('data-sku') || ''; + if (!sku) continue; + const text = normalize(el.textContent); + if (text.length < 10) continue; + + // Parse product text: pattern is usually [badge?] title price shop ... + // Price: first ¥ number + const priceMatch = text.match(/¥([\\d,.]+)/); + const price = priceMatch ? '¥' + priceMatch[1] : ''; + + // Title: text before the price, strip leading badges like "海外无货" "京东超市" + let title = ''; + if (priceMatch) { + const beforePrice = text.substring(0, text.indexOf('¥')); + title = beforePrice.replace(/^(海外无货|京东超市|自营|秒杀|新品|预售|PLUS)/, '').trim(); + } + if (!title || title.length < 4) continue; + + // Shop: look for "旗舰店" / "专卖店" / "自营" patterns + let shop = ''; + const shopMatch = text.match(/(\\S{2,15}(?:旗舰店|专卖店|自营店|官方旗舰店|京东自营旗舰店|京东自营))/); + if (shopMatch) shop = shopMatch[1]; + + const url = 'https://item.jd.com/' + sku + '.html'; + results.push({ + rank: results.length + 1, + title: title.slice(0, 80), + price, + shop, + sku, + url, + }); + if (results.length >= ${limit}) break; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/taobao/search.ts b/src/clis/taobao/search.ts new file mode 100644 index 0000000..26cc05d --- /dev/null +++ b/src/clis/taobao/search.ts @@ -0,0 +1,92 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'taobao', + name: 'search', + description: '淘宝商品搜索', + domain: 's.taobao.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词' }, + { name: 'sort', default: 'default', choices: ['default', 'sale', 'price'], help: '排序 (default/sale销量/price价格)' }, + { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 40)' }, + ], + columns: ['rank', 'title', 'price', 'sales', 'shop', 'location', 'url'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 40); + const query = encodeURIComponent(kwargs.query); + const sortMap: Record<string, string> = { default: '', sale: '&sort=sale-desc', price: '&sort=price-asc' }; + const sortParam = sortMap[kwargs.sort] || ''; + + await page.goto('https://www.taobao.com'); + await page.wait(2); + await page.evaluate(`location.href = 'https://s.taobao.com/search?q=${query}${sortParam}'`); + await page.wait(8); + await page.autoScroll({ times: 3, delayMs: 2000 }); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + + // Check login + const bodyText = document.body?.innerText || ''; + if (bodyText.length < 1000 && bodyText.includes('请登录')) { + return [{rank:0, title:'[未登录] 请在自动化窗口中登录淘宝', price:'', sales:'', shop:'', location:'', url:''}]; + } + + // Wait for cards + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('[class*="doubleCard--"]').length > 3) break; + await new Promise(r => setTimeout(r, 500)); + } + + const cards = document.querySelectorAll('[class*="doubleCard--"]'); + const results = []; + const seenTitles = new Set(); + + for (const card of cards) { + // Title + const titleEl = card.querySelector('[class*="title--"]'); + const title = titleEl ? normalize(titleEl.textContent) : ''; + if (!title || title.length < 3 || seenTitles.has(title)) continue; + seenTitles.add(title); + + // Price: integer + optional decimal + const intEl = card.querySelector('[class*="priceInt--"]'); + const floatEl = card.querySelector('[class*="priceFloat--"]'); + let price = ''; + if (intEl) { + price = '¥' + normalize(intEl.textContent) + (floatEl ? normalize(floatEl.textContent) : ''); + } + + // Sales + const salesEl = card.querySelector('[class*="realSales--"]'); + const sales = salesEl ? normalize(salesEl.textContent) : ''; + + // Shop name (strip leading "X年老店" prefix) + const shopEl = card.querySelector('[class*="shopName--"]'); + let shop = shopEl ? normalize(shopEl.textContent) : ''; + shop = shop.replace(/^\\d+年老店/, '').replace(/^回头客[\\d万]+/, ''); + + // Location + const locEls = card.querySelectorAll('[class*="procity--"]'); + const location = Array.from(locEls).map(el => normalize(el.textContent)).join(''); + + // URL: first <a> in card (simba tracking link, redirects to product) + const linkEl = card.querySelector('a[href*="simba"], a[href*="taobao.com"], a[href*="tmall.com"]'); + const url = linkEl ? linkEl.getAttribute('href')?.substring(0, 120) || '' : ''; + + results.push({ rank: results.length + 1, title: title.slice(0, 80), price, sales, shop, location, url }); + if (results.length >= ${limit}) break; + } + + if (results.length === 0) { + return [{rank:0, title:'[无结果] cards=' + cards.length, price:'', sales:'', shop:'', location:'', url: location.href}]; + } + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); From 8f7e89f69ced4984a21b5bb3bc5a8188d1955dc6 Mon Sep 17 00:00:00 2001 From: Mu Qiao <muqiao@Mus-Mac-mini.local> Date: Sun, 22 Mar 2026 12:38:14 +0100 Subject: [PATCH 2/5] feat(taobao): add detail, reviews, add-cart, cart adapters Complete the Taobao shopping workflow to match JD feature parity: - taobao/detail: product info with title, price, shop, location - taobao/reviews: user review extraction from product page - taobao/add-cart: add to cart via button click automation - taobao/cart: view cart contents Also update taobao/search to extract item_id from data-spm-act-id attribute and generate proper item.taobao.com URLs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- src/clis/taobao/add-cart.ts | 77 +++++++++++++++++++++++++++++++ src/clis/taobao/cart.ts | 90 +++++++++++++++++++++++++++++++++++++ src/clis/taobao/detail.ts | 67 +++++++++++++++++++++++++++ src/clis/taobao/reviews.ts | 76 +++++++++++++++++++++++++++++++ src/clis/taobao/search.ts | 17 ++++--- 5 files changed, 322 insertions(+), 5 deletions(-) create mode 100644 src/clis/taobao/add-cart.ts create mode 100644 src/clis/taobao/cart.ts create mode 100644 src/clis/taobao/detail.ts create mode 100644 src/clis/taobao/reviews.ts diff --git a/src/clis/taobao/add-cart.ts b/src/clis/taobao/add-cart.ts new file mode 100644 index 0000000..b7533e8 --- /dev/null +++ b/src/clis/taobao/add-cart.ts @@ -0,0 +1,77 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'taobao', + name: 'add-cart', + description: '淘宝加入购物车', + domain: 'item.taobao.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'id', positional: true, required: true, help: '商品 ID' }, + ], + columns: ['status', 'title', 'price', 'item_id'], + navigateBefore: false, + func: async (page, kwargs) => { + const itemId = kwargs.id; + await page.goto(`https://item.taobao.com/item.htm?id=${itemId}`); + await page.wait(5); + + // Get product info + const info = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const titleEl = document.querySelector('[class*="mainTitle--"], [class*="ItemHeader--"], h1'); + const title = titleEl ? normalize(titleEl.textContent) : document.title.split('-')[0].trim(); + const priceEl = document.querySelector('[class*="priceText--"], [class*="Price--"]'); + const price = priceEl ? '¥' + normalize(priceEl.textContent).replace(/[¥¥]/g, '') : ''; + return { title: title.slice(0, 80), price }; + })() + `); + + // Click add-to-cart button + await page.evaluate(` + (() => { + // Find add-to-cart button by text content + const buttons = document.querySelectorAll('button, [role="button"], a, div[class*="btn"], span[class*="btn"]'); + for (const btn of buttons) { + const t = (btn.textContent || '').trim(); + if (t === '加入购物车' || t === '加入 购物车' || t.includes('加入购物车')) { + btn.click(); + return 'clicked'; + } + } + return 'btn_not_found'; + })() + `); + await page.wait(3); + + // Check result + const result = await page.evaluate(` + (() => { + const text = document.body?.innerText || ''; + if (text.includes('已加入购物车') || text.includes('商品已成功') || text.includes('去购物车')) { + return 'success'; + } + if (text.includes('请选择') || text.includes('请先选择')) { + return 'need_spec'; + } + if (text.includes('请登录') || text.includes('login')) { + return 'login_required'; + } + return 'unknown'; + })() + `); + + let status = '? 未知'; + if (result === 'success') status = '✓ 已加入购物车'; + else if (result === 'need_spec') status = '✗ 需要先选择规格(请在浏览器中操作)'; + else if (result === 'login_required') status = '✗ 需要登录'; + + return [{ + status, + title: info?.title || '', + price: info?.price || '', + item_id: itemId, + }]; + }, +}); diff --git a/src/clis/taobao/cart.ts b/src/clis/taobao/cart.ts new file mode 100644 index 0000000..b9094d5 --- /dev/null +++ b/src/clis/taobao/cart.ts @@ -0,0 +1,90 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'taobao', + name: 'cart', + description: '查看淘宝购物车', + domain: 'cart.taobao.com', + strategy: Strategy.COOKIE, + args: [], + columns: ['index', 'title', 'price', 'quantity', 'shop', 'url'], + navigateBefore: false, + func: async (page) => { + await page.goto('https://cart.taobao.com/cart.htm'); + await page.wait(6); + await page.autoScroll({ times: 1, delayMs: 1000 }); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + + for (let i = 0; i < 20; i++) { + if (document.body?.innerText?.length > 500) break; + await new Promise(r => setTimeout(r, 500)); + } + + const results = []; + + // Strategy 1: Find cart items by class prefix patterns + const items = document.querySelectorAll('[class*="order--"], [class*="item--"], [class*="cartItem--"]'); + const seen = new Set(); + for (const item of items) { + const titleEl = item.querySelector('[class*="itemTitle--"], [class*="title--"] a, a[href*="item.htm"]'); + const title = titleEl ? normalize(titleEl.textContent) : ''; + if (!title || title.length < 3 || seen.has(title)) continue; + seen.add(title); + + const priceEl = item.querySelector('[class*="price--"], [class*="Price--"]'); + const price = priceEl ? normalize(priceEl.textContent) : ''; + + const qtyEl = item.querySelector('[class*="quantity--"] input, [class*="amount--"] input, input[type="text"]'); + const quantity = qtyEl ? qtyEl.value || '1' : '1'; + + const shopEl = item.querySelector('[class*="shopName--"], [class*="shop--"] a'); + const shop = shopEl ? normalize(shopEl.textContent) : ''; + + const linkEl = item.querySelector('a[href*="item.htm"]'); + let url = linkEl ? linkEl.getAttribute('href') || '' : ''; + if (url.startsWith('//')) url = 'https:' + url; + + results.push({ + index: results.length + 1, + title: title.slice(0, 80), + price, + quantity, + shop, + url: url.split('&')[0], + }); + } + + // Strategy 2: parse from text if DOM failed + if (results.length === 0) { + const text = document.body?.innerText || ''; + if (text.includes('购物车') && text.length > 200) { + const lines = text.split('\\n').map(l => l.trim()).filter(Boolean); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line.length > 10 && line.length < 150 && !line.match(/^(购物车|全选|结算|商品|合计|¥)/) && lines[i+1]?.includes('¥')) { + results.push({ + index: results.length + 1, + title: line.slice(0, 80), + price: (lines[i+1].match(/¥[\\d.]+/) || [''])[0], + quantity: '1', + shop: '', + url: '', + }); + } + } + } + } + + if (results.length === 0 && document.body?.innerText?.includes('登录')) { + return [{index:0, title:'[需要登录] 请在自动化窗口中登录淘宝', price:'', quantity:'', shop:'', url:''}]; + } + + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/taobao/detail.ts b/src/clis/taobao/detail.ts new file mode 100644 index 0000000..da8908d --- /dev/null +++ b/src/clis/taobao/detail.ts @@ -0,0 +1,67 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'taobao', + name: 'detail', + description: '淘宝商品详情', + domain: 'item.taobao.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'id', positional: true, required: true, help: '商品 ID' }, + ], + columns: ['field', 'value'], + navigateBefore: false, + func: async (page, kwargs) => { + await page.goto(`https://item.taobao.com/item.htm?id=${kwargs.id}`); + await page.wait(6); + await page.autoScroll({ times: 1, delayMs: 1000 }); + + const data = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const text = document.body?.innerText || ''; + const results = []; + + // Title + const titleEl = document.querySelector('[class*="mainTitle--"], [class*="ItemHeader--"], h1, .tb-main-title'); + const title = titleEl ? normalize(titleEl.textContent) : document.title.split('-')[0].trim(); + results.push({ field: '商品名称', value: title.slice(0, 100) }); + + // Price: find the main price number + const priceMatch = text.match(/¥\s*(\d+(?:\.\d{1,2})?)/); + const price = priceMatch ? '¥' + priceMatch[1] : ''; + if (price) results.push({ field: '价格', value: price }); + + // Sales / reviews + const salesMatch = text.match(/(\\d+万?\\+?)\\s*人付款/) || text.match(/月销\\s*(\\d+万?\\+?)/); + if (salesMatch) results.push({ field: '销量', value: salesMatch[0] }); + + const reviewMatch = text.match(/累计评价\\s*(\\d+万?\\+?)/) || text.match(/(\\d+万?\\+?)\\s*条评价/); + if (reviewMatch) results.push({ field: '评价数', value: reviewMatch[1] || reviewMatch[0] }); + + // Rating + const ratingMatch = text.match(/(\\d+\\.?\\d*)\\s*分/) || text.match(/描述\\s*(\\d+\\.\\d+)/); + if (ratingMatch) results.push({ field: '评分', value: ratingMatch[0] }); + + // Shop: use class prefix matching, exclude nav links + const shopEl = document.querySelector('[class*="shopName--"] a, [class*="ShopHeader--"] a, [class*="seller--"] a'); + let shop = shopEl ? normalize(shopEl.textContent) : ''; + if (!shop || shop.length < 2 || shop.includes('免费') || shop.includes('登录')) { + const shopMatch = text.match(/([\u4e00-\u9fa5A-Za-z]{2,15}(?:旗舰店|专卖店|企业店|专营店))/); + shop = shopMatch ? shopMatch[1] : ''; + } + if (shop && shop.length > 1 && shop.length < 30) results.push({ field: '店铺', value: shop }); + + // Location + const locMatch = text.match(/发货地[::]*\\s*([\u4e00-\u9fa5]{2,10})/); + if (locMatch) results.push({ field: '发货地', value: locMatch[1] }); + + results.push({ field: 'ID', value: '${kwargs.id}' }); + results.push({ field: '链接', value: location.href.split('?')[0] + '?id=${kwargs.id}' }); + + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/taobao/reviews.ts b/src/clis/taobao/reviews.ts new file mode 100644 index 0000000..253cfc9 --- /dev/null +++ b/src/clis/taobao/reviews.ts @@ -0,0 +1,76 @@ +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'taobao', + name: 'reviews', + description: '淘宝商品评价', + domain: 'item.taobao.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'id', positional: true, required: true, help: '商品 ID' }, + { name: 'limit', type: 'int', default: 10, help: '返回评价数量 (max 20)' }, + ], + columns: ['rank', 'user', 'content', 'date', 'spec'], + navigateBefore: false, + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 10, 20); + await page.goto(`https://item.taobao.com/item.htm?id=${kwargs.id}`); + await page.wait(6); + await page.autoScroll({ times: 3, delayMs: 2000 }); + + const data = await page.evaluate(` + (async () => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + + // Wait for reviews to load + for (let i = 0; i < 30; i++) { + if (document.querySelectorAll('[class*="comment--"], [class*="Comment--"], [class*="review--"], .rate-grid').length > 0) break; + await new Promise(r => setTimeout(r, 500)); + } + + const results = []; + + // Strategy 1: Find comment elements by class prefix + const commentEls = document.querySelectorAll('[class*="comment--"], [class*="Comment--"], [class*="rateContent--"]'); + if (commentEls.length > 0) { + for (const el of commentEls) { + const content = normalize(el.textContent); + if (content.length < 5 || content.length > 500) continue; + // Find user name nearby + const parent = el.closest('[class*="rateItem--"], [class*="item--"]') || el.parentElement?.parentElement; + const userEl = parent?.querySelector('[class*="userName--"], [class*="user--"]'); + const user = userEl ? normalize(userEl.textContent) : ''; + const dateEl = parent?.querySelector('[class*="date--"], [class*="time--"]'); + const date = dateEl ? normalize(dateEl.textContent) : ''; + const specEl = parent?.querySelector('[class*="sku--"], [class*="spec--"]'); + const spec = specEl ? normalize(specEl.textContent) : ''; + + results.push({ rank: results.length + 1, user, content: content.slice(0, 150), date, spec }); + if (results.length >= ${limit}) break; + } + } + + // Strategy 2: parse from page text if DOM extraction failed + if (results.length === 0) { + const text = document.body?.innerText || ''; + // Look for review section + const reviewIdx = text.search(/评价|评论|买家秀/); + if (reviewIdx > 0) { + const section = text.substring(reviewIdx, reviewIdx + 3000); + const lines = section.split('\\n').map(l => l.trim()).filter(l => l.length > 10 && l.length < 300); + for (const line of lines) { + // Skip headers and navigation + if (line.match(/^(评价|评论|买家秀|全部|好评|中评|差评|有图|追评)/)) continue; + if (line.match(/^\\d+$/)) continue; + results.push({ rank: results.length + 1, user: '', content: line.slice(0, 150), date: '', spec: '' }); + if (results.length >= ${limit}) break; + } + } + } + + return results; + })() + `); + return Array.isArray(data) ? data : []; + }, +}); diff --git a/src/clis/taobao/search.ts b/src/clis/taobao/search.ts index 26cc05d..f3dfd19 100644 --- a/src/clis/taobao/search.ts +++ b/src/clis/taobao/search.ts @@ -11,7 +11,7 @@ cli({ { name: 'sort', default: 'default', choices: ['default', 'sale', 'price'], help: '排序 (default/sale销量/price价格)' }, { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 40)' }, ], - columns: ['rank', 'title', 'price', 'sales', 'shop', 'location', 'url'], + columns: ['rank', 'title', 'price', 'sales', 'shop', 'location', 'item_id', 'url'], navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 40); @@ -73,11 +73,18 @@ cli({ const locEls = card.querySelectorAll('[class*="procity--"]'); const location = Array.from(locEls).map(el => normalize(el.textContent)).join(''); - // URL: first <a> in card (simba tracking link, redirects to product) - const linkEl = card.querySelector('a[href*="simba"], a[href*="taobao.com"], a[href*="tmall.com"]'); - const url = linkEl ? linkEl.getAttribute('href')?.substring(0, 120) || '' : ''; + // Item ID from data-spm-act-id on parent wrapper + let itemId = ''; + let wrapper = card.parentElement; + for (let i = 0; i < 3 && wrapper; i++) { + const spmId = wrapper.getAttribute('data-spm-act-id'); + if (spmId && /^\\d{10,}$/.test(spmId)) { itemId = spmId; break; } + wrapper = wrapper.parentElement; + } + + const url = itemId ? 'https://item.taobao.com/item.htm?id=' + itemId : ''; - results.push({ rank: results.length + 1, title: title.slice(0, 80), price, sales, shop, location, url }); + results.push({ rank: results.length + 1, title: title.slice(0, 80), price, sales, shop, location, item_id: itemId, url }); if (results.length >= ${limit}) break; } From ce822cbfc44a114d19c10d07aa1446b792eca96c Mon Sep 17 00:00:00 2001 From: Mu Qiao <muqiao@Mus-Mac-mini.local> Date: Sun, 22 Mar 2026 12:48:39 +0100 Subject: [PATCH 3/5] fix(taobao): fix detail, cart, add-cart; improve reviews MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - detail: fix price extraction (regex from text), shop name matching, add spec listing - cart: rewrite using text section parsing (split by "移入收藏"), extract price from split ¥/digits lines - add-cart: fix navigation (go via taobao.com for session cookies) - reviews: use tmall/taobao rate API (currently returns empty as API requires MTOP signing — documented limitation) - search: extract item_id from data-spm-act-id, generate proper URLs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- src/clis/taobao/add-cart.ts | 10 +-- src/clis/taobao/cart.ts | 117 ++++++++++++++++++++---------------- src/clis/taobao/detail.ts | 61 +++++++++++-------- src/clis/taobao/reviews.ts | 92 ++++++++++++++-------------- 4 files changed, 153 insertions(+), 127 deletions(-) diff --git a/src/clis/taobao/add-cart.ts b/src/clis/taobao/add-cart.ts index b7533e8..bff12f0 100644 --- a/src/clis/taobao/add-cart.ts +++ b/src/clis/taobao/add-cart.ts @@ -13,8 +13,10 @@ cli({ navigateBefore: false, func: async (page, kwargs) => { const itemId = kwargs.id; - await page.goto(`https://item.taobao.com/item.htm?id=${itemId}`); - await page.wait(5); + await page.goto('https://www.taobao.com'); + await page.wait(2); + await page.evaluate(`location.href = 'https://item.taobao.com/item.htm?id=${itemId}'`); + await page.wait(6); // Get product info const info = await page.evaluate(` @@ -62,9 +64,9 @@ cli({ })() `); - let status = '? 未知'; + let status = '? 未确认(可能需要选规格)'; if (result === 'success') status = '✓ 已加入购物车'; - else if (result === 'need_spec') status = '✗ 需要先选择规格(请在浏览器中操作)'; + else if (result === 'need_spec') status = '✗ 需要先选择规格'; else if (result === 'login_required') status = '✗ 需要登录'; return [{ diff --git a/src/clis/taobao/cart.ts b/src/clis/taobao/cart.ts index b9094d5..e9bc21a 100644 --- a/src/clis/taobao/cart.ts +++ b/src/clis/taobao/cart.ts @@ -6,82 +6,93 @@ cli({ description: '查看淘宝购物车', domain: 'cart.taobao.com', strategy: Strategy.COOKIE, - args: [], - columns: ['index', 'title', 'price', 'quantity', 'shop', 'url'], + args: [ + { name: 'limit', type: 'int', default: 20, help: '返回数量 (max 50)' }, + ], + columns: ['index', 'title', 'price', 'spec', 'shop'], navigateBefore: false, - func: async (page) => { - await page.goto('https://cart.taobao.com/cart.htm'); + func: async (page, kwargs) => { + const limit = Math.min(kwargs.limit || 20, 50); + await page.goto('https://www.taobao.com'); + await page.wait(2); + await page.evaluate(`location.href = 'https://cart.taobao.com/cart.htm'`); await page.wait(6); - await page.autoScroll({ times: 1, delayMs: 1000 }); + await page.autoScroll({ times: 3, delayMs: 1500 }); const data = await page.evaluate(` (async () => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const text = document.body?.innerText || ''; - for (let i = 0; i < 20; i++) { - if (document.body?.innerText?.length > 500) break; - await new Promise(r => setTimeout(r, 500)); + if (text.length < 500 || text.includes('请登录')) { + return [{index:0, title:'[需要登录] 请在自动化窗口中登录淘宝', price:'', spec:'', shop:''}]; } + // Parse cart from text: each item ends with "移入收藏 删除" + // Split text by "移入收藏" delimiter + const sections = text.split(/移入收藏/); const results = []; - // Strategy 1: Find cart items by class prefix patterns - const items = document.querySelectorAll('[class*="order--"], [class*="item--"], [class*="cartItem--"]'); - const seen = new Set(); - for (const item of items) { - const titleEl = item.querySelector('[class*="itemTitle--"], [class*="title--"] a, a[href*="item.htm"]'); - const title = titleEl ? normalize(titleEl.textContent) : ''; - if (!title || title.length < 3 || seen.has(title)) continue; - seen.add(title); + for (const section of sections) { + const lines = section.split('\\n').map(l => l.trim()).filter(Boolean); + if (lines.length < 3) continue; - const priceEl = item.querySelector('[class*="price--"], [class*="Price--"]'); - const price = priceEl ? normalize(priceEl.textContent) : ''; + // Find the product title: longest line that looks like a product name + let title = ''; + let titleIdx = -1; + for (let i = 0; i < lines.length; i++) { + const l = lines[i]; + if (l.length > 15 && l.length < 200 && !l.match(/^(删除|全选|全部商品|合计|结算|找同款|退货|¥|¥|\\d+$|颜色|尺码|规格|套餐|主板|运行)/)) { + if (l.length > title.length) { + title = l; + titleIdx = i; + } + } + } + if (!title) continue; - const qtyEl = item.querySelector('[class*="quantity--"] input, [class*="amount--"] input, input[type="text"]'); - const quantity = qtyEl ? qtyEl.value || '1' : '1'; + // Price: find ¥ followed by digits (may be split across lines) + let price = ''; + for (let i = 0; i < lines.length; i++) { + if (lines[i] === '¥' || lines[i] === '¥') { + // Next lines contain the price digits + let p = ''; + for (let j = i + 1; j < Math.min(i + 4, lines.length); j++) { + if (lines[j].match(/^[\\d,.]+$/)) { p += lines[j]; } + else if (lines[j] === '.') { p += '.'; } + else break; + } + if (p) { price = '¥' + p; break; } + } + } - const shopEl = item.querySelector('[class*="shopName--"], [class*="shop--"] a'); - const shop = shopEl ? normalize(shopEl.textContent) : ''; + // Spec: lines starting with 颜色/尺码/规格/套餐 + let spec = ''; + for (const l of lines) { + if (l.match(/^(颜色分类|尺码|规格|套餐|主板|运行)[::]/)) { + spec = l.slice(0, 40); + break; + } + } - const linkEl = item.querySelector('a[href*="item.htm"]'); - let url = linkEl ? linkEl.getAttribute('href') || '' : ''; - if (url.startsWith('//')) url = 'https:' + url; + // Shop: check the line before title or look for shop patterns + let shop = ''; + if (titleIdx > 0) { + const prev = lines[titleIdx - 1]; + if (prev && prev.length > 2 && prev.length < 30 && !prev.match(/^(删除|\\d|¥|¥|券|退|满|超)/)) { + shop = prev; + } + } results.push({ index: results.length + 1, title: title.slice(0, 80), price, - quantity, + spec, shop, - url: url.split('&')[0], }); + if (results.length >= ${limit}) break; } - - // Strategy 2: parse from text if DOM failed - if (results.length === 0) { - const text = document.body?.innerText || ''; - if (text.includes('购物车') && text.length > 200) { - const lines = text.split('\\n').map(l => l.trim()).filter(Boolean); - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - if (line.length > 10 && line.length < 150 && !line.match(/^(购物车|全选|结算|商品|合计|¥)/) && lines[i+1]?.includes('¥')) { - results.push({ - index: results.length + 1, - title: line.slice(0, 80), - price: (lines[i+1].match(/¥[\\d.]+/) || [''])[0], - quantity: '1', - shop: '', - url: '', - }); - } - } - } - } - - if (results.length === 0 && document.body?.innerText?.includes('登录')) { - return [{index:0, title:'[需要登录] 请在自动化窗口中登录淘宝', price:'', quantity:'', shop:'', url:''}]; - } - return results; })() `); diff --git a/src/clis/taobao/detail.ts b/src/clis/taobao/detail.ts index da8908d..5b4aa4e 100644 --- a/src/clis/taobao/detail.ts +++ b/src/clis/taobao/detail.ts @@ -12,9 +12,10 @@ cli({ columns: ['field', 'value'], navigateBefore: false, func: async (page, kwargs) => { - await page.goto(`https://item.taobao.com/item.htm?id=${kwargs.id}`); + await page.goto('https://www.taobao.com'); + await page.wait(2); + await page.evaluate(`location.href = 'https://item.taobao.com/item.htm?id=${kwargs.id}'`); await page.wait(6); - await page.autoScroll({ times: 1, delayMs: 1000 }); const data = await page.evaluate(` (() => { @@ -23,41 +24,53 @@ cli({ const results = []; // Title - const titleEl = document.querySelector('[class*="mainTitle--"], [class*="ItemHeader--"], h1, .tb-main-title'); - const title = titleEl ? normalize(titleEl.textContent) : document.title.split('-')[0].trim(); + const titleEl = document.querySelector('[class*="mainTitle--"]'); + const title = titleEl ? normalize(titleEl.textContent) : document.title.split('-')[0].replace(/^【[^】]+】/, '').trim(); results.push({ field: '商品名称', value: title.slice(0, 100) }); - // Price: find the main price number - const priceMatch = text.match(/¥\s*(\d+(?:\.\d{1,2})?)/); - const price = priceMatch ? '¥' + priceMatch[1] : ''; - if (price) results.push({ field: '价格', value: price }); + // Price: find ¥ or ¥ followed by digits in text (they may be split by newlines) + const pricePattern = /[¥¥]\\s*(\\d+(?:\\.\\d{1,2})?)/g; + const prices = []; + let m; + while ((m = pricePattern.exec(text)) && prices.length < 3) { + const p = parseFloat(m[1]); + if (p > 0.1 && p < 100000) prices.push(p); + } + if (prices.length > 0) { + const minPrice = Math.min(...prices); + results.push({ field: '价格', value: '¥' + minPrice }); + } - // Sales / reviews + // Sales const salesMatch = text.match(/(\\d+万?\\+?)\\s*人付款/) || text.match(/月销\\s*(\\d+万?\\+?)/); if (salesMatch) results.push({ field: '销量', value: salesMatch[0] }); - const reviewMatch = text.match(/累计评价\\s*(\\d+万?\\+?)/) || text.match(/(\\d+万?\\+?)\\s*条评价/); - if (reviewMatch) results.push({ field: '评价数', value: reviewMatch[1] || reviewMatch[0] }); + // Reviews + const reviewMatch = text.match(/累计评价\\s*(\\d+万?\\+?)/) || text.match(/评价[((]\\s*(\\d+万?\\+?)/); + if (reviewMatch) results.push({ field: '评价数', value: reviewMatch[1] }); - // Rating - const ratingMatch = text.match(/(\\d+\\.?\\d*)\\s*分/) || text.match(/描述\\s*(\\d+\\.\\d+)/); - if (ratingMatch) results.push({ field: '评分', value: ratingMatch[0] }); + // Shop rating + const ratingMatch = text.match(/(\\d+\\.\\d)\\s*(?:分|描述|物流|服务)/); + if (ratingMatch) results.push({ field: '店铺评分', value: ratingMatch[0] }); - // Shop: use class prefix matching, exclude nav links - const shopEl = document.querySelector('[class*="shopName--"] a, [class*="ShopHeader--"] a, [class*="seller--"] a'); - let shop = shopEl ? normalize(shopEl.textContent) : ''; - if (!shop || shop.length < 2 || shop.includes('免费') || shop.includes('登录')) { - const shopMatch = text.match(/([\u4e00-\u9fa5A-Za-z]{2,15}(?:旗舰店|专卖店|企业店|专营店))/); - shop = shopMatch ? shopMatch[1] : ''; - } - if (shop && shop.length > 1 && shop.length < 30) results.push({ field: '店铺', value: shop }); + // Shop name + const shopMatch = text.match(/([\u4e00-\u9fa5A-Za-z0-9]{2,15}(?:旗舰店|专卖店|企业店|专营店))/); + if (shopMatch) results.push({ field: '店铺', value: shopMatch[1] }); // Location - const locMatch = text.match(/发货地[::]*\\s*([\u4e00-\u9fa5]{2,10})/); + const locMatch = text.match(/发货地[::]*\\s*([\u4e00-\u9fa5]{2,10})/) || text.match(/([\u4e00-\u9fa5]{2,4}(?:省|市))\\s*发货/); if (locMatch) results.push({ field: '发货地', value: locMatch[1] }); + // Specs available + const specMatch = text.match(/颜色分类/); + if (specMatch) { + const specSection = text.substring(text.indexOf('颜色分类'), text.indexOf('颜色分类') + 200); + const specs = specSection.split('\\n').filter(l => l.trim().length > 2 && l.trim().length < 50).slice(0, 5); + if (specs.length) results.push({ field: '可选规格', value: specs.join(' | ') }); + } + results.push({ field: 'ID', value: '${kwargs.id}' }); - results.push({ field: '链接', value: location.href.split('?')[0] + '?id=${kwargs.id}' }); + results.push({ field: '链接', value: location.href.split('&')[0] }); return results; })() diff --git a/src/clis/taobao/reviews.ts b/src/clis/taobao/reviews.ts index 253cfc9..b5f0c88 100644 --- a/src/clis/taobao/reviews.ts +++ b/src/clis/taobao/reviews.ts @@ -14,61 +14,61 @@ cli({ navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 20); - await page.goto(`https://item.taobao.com/item.htm?id=${kwargs.id}`); - await page.wait(6); - await page.autoScroll({ times: 3, delayMs: 2000 }); + // Navigate to product page first (to get cookies/session) + await page.goto('https://www.taobao.com'); + await page.wait(2); + await page.evaluate(`location.href = 'https://item.taobao.com/item.htm?id=${kwargs.id}'`); + await page.wait(5); + // Try to fetch reviews via the rate API const data = await page.evaluate(` (async () => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); - // Wait for reviews to load - for (let i = 0; i < 30; i++) { - if (document.querySelectorAll('[class*="comment--"], [class*="Comment--"], [class*="review--"], .rate-grid').length > 0) break; - await new Promise(r => setTimeout(r, 500)); - } - - const results = []; - - // Strategy 1: Find comment elements by class prefix - const commentEls = document.querySelectorAll('[class*="comment--"], [class*="Comment--"], [class*="rateContent--"]'); - if (commentEls.length > 0) { - for (const el of commentEls) { - const content = normalize(el.textContent); - if (content.length < 5 || content.length > 500) continue; - // Find user name nearby - const parent = el.closest('[class*="rateItem--"], [class*="item--"]') || el.parentElement?.parentElement; - const userEl = parent?.querySelector('[class*="userName--"], [class*="user--"]'); - const user = userEl ? normalize(userEl.textContent) : ''; - const dateEl = parent?.querySelector('[class*="date--"], [class*="time--"]'); - const date = dateEl ? normalize(dateEl.textContent) : ''; - const specEl = parent?.querySelector('[class*="sku--"], [class*="spec--"]'); - const spec = specEl ? normalize(specEl.textContent) : ''; - - results.push({ rank: results.length + 1, user, content: content.slice(0, 150), date, spec }); - if (results.length >= ${limit}) break; + // Try MTOP rate list API + try { + const resp = await fetch( + 'https://rate.tmall.com/list_detail_rate.htm?itemId=${kwargs.id}&sellerId=&order=3¤tPage=1&pageSize=${limit}&callback=', + { credentials: 'include' } + ); + let text = await resp.text(); + // Remove JSONP wrapper if any + text = text.replace(/^[^{]*/, '').replace(/[^}]*$/, ''); + const json = JSON.parse(text); + const list = json?.rateDetail?.rateList || json?.rateList || []; + if (list.length > 0) { + return list.slice(0, ${limit}).map((item, i) => ({ + rank: i + 1, + user: (item.displayUserNick || item.userNick || '').slice(0, 15), + content: normalize(item.rateContent || '').slice(0, 150), + date: item.rateDate || '', + spec: normalize(item.auctionSku || '').slice(0, 40), + })); } - } + } catch {} - // Strategy 2: parse from page text if DOM extraction failed - if (results.length === 0) { - const text = document.body?.innerText || ''; - // Look for review section - const reviewIdx = text.search(/评价|评论|买家秀/); - if (reviewIdx > 0) { - const section = text.substring(reviewIdx, reviewIdx + 3000); - const lines = section.split('\\n').map(l => l.trim()).filter(l => l.length > 10 && l.length < 300); - for (const line of lines) { - // Skip headers and navigation - if (line.match(/^(评价|评论|买家秀|全部|好评|中评|差评|有图|追评)/)) continue; - if (line.match(/^\\d+$/)) continue; - results.push({ rank: results.length + 1, user: '', content: line.slice(0, 150), date: '', spec: '' }); - if (results.length >= ${limit}) break; - } + // Try alternative API endpoint + try { + const resp2 = await fetch( + 'https://rate.taobao.com/feedRateList.htm?auctionNumId=${kwargs.id}¤tPageNum=1&pageSize=${limit}&orderType=feedbackdate&callback=', + { credentials: 'include' } + ); + let text2 = await resp2.text(); + text2 = text2.replace(/^[^{]*/, '').replace(/[^}]*$/, ''); + const json2 = JSON.parse(text2); + const comments = json2?.comments || []; + if (comments.length > 0) { + return comments.slice(0, ${limit}).map((item, i) => ({ + rank: i + 1, + user: (item.user?.nick || '').slice(0, 15), + content: normalize(item.content || '').slice(0, 150), + date: item.date || '', + spec: normalize(item.auction?.sku || '').slice(0, 40), + })); } - } + } catch {} - return results; + return []; })() `); return Array.isArray(data) ? data : []; From 564b54a65de5b0c209ef363357914ba270eab2e4 Mon Sep 17 00:00:00 2001 From: Mu Qiao <muqiao@Mus-Mac-mini.local> Date: Sun, 22 Mar 2026 13:03:28 +0100 Subject: [PATCH 4/5] fix(taobao): fix reviews via JSONP injection, fix cart text parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviews: bypass CORS by injecting a <script> tag with JSONP callback to call rate.tmall.com API directly from the product page context. Extracts sellerId from page HTML, constructs rate API URL, and parses the JSONP response for user, content, date, and SKU spec. Cart: parse cart items by splitting text on "移入收藏" delimiters, extracting product title, split-digit prices (¥/digits), and specs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- src/clis/taobao/reviews.ts | 79 ++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/src/clis/taobao/reviews.ts b/src/clis/taobao/reviews.ts index b5f0c88..961eda0 100644 --- a/src/clis/taobao/reviews.ts +++ b/src/clis/taobao/reviews.ts @@ -14,47 +14,68 @@ cli({ navigateBefore: false, func: async (page, kwargs) => { const limit = Math.min(kwargs.limit || 10, 20); - // Navigate to product page first (to get cookies/session) + + // Navigate to product page first to get session cookies and sellerId await page.goto('https://www.taobao.com'); await page.wait(2); await page.evaluate(`location.href = 'https://item.taobao.com/item.htm?id=${kwargs.id}'`); - await page.wait(5); + await page.wait(6); - // Try to fetch reviews via the rate API + // Extract sellerId from page and call rate API with proper cookies const data = await page.evaluate(` (async () => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); - // Try MTOP rate list API + // Get sellerId from page context + let sellerId = ''; + // Try various sources + const pageText = document.documentElement.innerHTML || ''; + const sellerMatch = pageText.match(/sellerId['":\\s]+['"]?(\\d+)/) || pageText.match(/userId['":\\s]+['"]?(\\d+)/) || pageText.match(/shopId['":\\s]+['"]?(\\d+)/); + if (sellerMatch) sellerId = sellerMatch[1]; + + // Also try from shopkeeper link + if (!sellerId) { + const shopLink = document.querySelector('a[href*="shopId="], a[href*="seller_id="], a[href*="userId="]'); + const href = shopLink?.getAttribute('href') || ''; + const m = href.match(/(?:shopId|seller_id|userId)=(\\d+)/); + if (m) sellerId = m[1]; + } + + // Call the rate JSONP API + const url = 'https://rate.tmall.com/list_detail_rate.htm?itemId=${kwargs.id}' + + (sellerId ? '&sellerId=' + sellerId : '') + + '&order=3¤tPage=1&append=0&content=1&tagId=&posi=&picture=&groupValue=&needFold=0&_ksTS=' + Date.now(); + + // Call rate API via JSONP script injection (avoids CORS) try { - const resp = await fetch( - 'https://rate.tmall.com/list_detail_rate.htm?itemId=${kwargs.id}&sellerId=&order=3¤tPage=1&pageSize=${limit}&callback=', - { credentials: 'include' } - ); - let text = await resp.text(); - // Remove JSONP wrapper if any - text = text.replace(/^[^{]*/, '').replace(/[^}]*$/, ''); - const json = JSON.parse(text); - const list = json?.rateDetail?.rateList || json?.rateList || []; - if (list.length > 0) { - return list.slice(0, ${limit}).map((item, i) => ({ - rank: i + 1, - user: (item.displayUserNick || item.userNick || '').slice(0, 15), - content: normalize(item.rateContent || '').slice(0, 150), - date: item.rateDate || '', - spec: normalize(item.auctionSku || '').slice(0, 40), - })); - } + const results = await new Promise((resolve) => { + const cbName = '_ocli_rate_' + Date.now(); + window[cbName] = (data) => { + delete window[cbName]; + const list = data?.rateDetail?.rateList || []; + resolve(list.slice(0, ${limit}).map((item, i) => ({ + rank: i + 1, + user: (item.displayUserNick || item.userNick || '').slice(0, 15), + content: normalize(item.rateContent || '').slice(0, 150), + date: (item.rateDate || '').slice(0, 10), + spec: normalize(item.auctionSku || '').slice(0, 40), + }))); + }; + const script = document.createElement('script'); + script.src = url + '&callback=' + cbName; + script.onerror = () => { delete window[cbName]; resolve([]); }; + document.head.appendChild(script); + setTimeout(() => { delete window[cbName]; resolve([]); }, 10000); + }); + if (results.length > 0) return results; } catch {} - // Try alternative API endpoint + // Try taobao rate API as fallback try { - const resp2 = await fetch( - 'https://rate.taobao.com/feedRateList.htm?auctionNumId=${kwargs.id}¤tPageNum=1&pageSize=${limit}&orderType=feedbackdate&callback=', - { credentials: 'include' } - ); + const url2 = 'https://rate.taobao.com/feedRateList.htm?auctionNumId=${kwargs.id}&userNumId=' + sellerId + '¤tPageNum=1&pageSize=${limit}&orderType=feedbackdate&callback='; + const resp2 = await fetch(url2, { credentials: 'include' }); let text2 = await resp2.text(); - text2 = text2.replace(/^[^{]*/, '').replace(/[^}]*$/, ''); + text2 = text2.replace(/^[^(]*\\(/, '').replace(/\\);?\\s*$/, ''); const json2 = JSON.parse(text2); const comments = json2?.comments || []; if (comments.length > 0) { @@ -62,7 +83,7 @@ cli({ rank: i + 1, user: (item.user?.nick || '').slice(0, 15), content: normalize(item.content || '').slice(0, 150), - date: item.date || '', + date: (item.date || '').slice(0, 10), spec: normalize(item.auction?.sku || '').slice(0, 40), })); } From d8168fee30fe1321b334ab3b12ea2a3ab2232370 Mon Sep 17 00:00:00 2001 From: Mu Qiao <muqiao@Mus-Mac-mini.local> Date: Sun, 22 Mar 2026 13:12:01 +0100 Subject: [PATCH 5/5] feat(taobao): add spec selection to add-cart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --spec flag for specifying product variants when adding to cart. Usage: opencli taobao add-cart <id> --spec "SG90 180度 小5孔" - Multiple keywords are space-separated, matched against spec options - Each spec group selects the option with the most keyword matches - Without --spec, auto-selects the first available option per group - Polls for cart confirmation dialog (handles async UI) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- src/clis/taobao/add-cart.ts | 123 ++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 25 deletions(-) diff --git a/src/clis/taobao/add-cart.ts b/src/clis/taobao/add-cart.ts index bff12f0..c97ea22 100644 --- a/src/clis/taobao/add-cart.ts +++ b/src/clis/taobao/add-cart.ts @@ -8,11 +8,14 @@ cli({ strategy: Strategy.COOKIE, args: [ { name: 'id', positional: true, required: true, help: '商品 ID' }, + { name: 'spec', help: '规格关键词(如 "180度" "红色 XL"),多个规格用空格分隔,模糊匹配' }, ], - columns: ['status', 'title', 'price', 'item_id'], + columns: ['status', 'title', 'price', 'selected_spec', 'item_id'], navigateBefore: false, func: async (page, kwargs) => { const itemId = kwargs.id; + const specKeywords = kwargs.spec ? String(kwargs.spec).split(/\s+/).filter(Boolean) : []; + await page.goto('https://www.taobao.com'); await page.wait(2); await page.evaluate(`location.href = 'https://item.taobao.com/item.htm?id=${itemId}'`); @@ -22,57 +25,127 @@ cli({ const info = await page.evaluate(` (() => { const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); - const titleEl = document.querySelector('[class*="mainTitle--"], [class*="ItemHeader--"], h1'); + const titleEl = document.querySelector('[class*="mainTitle--"]'); const title = titleEl ? normalize(titleEl.textContent) : document.title.split('-')[0].trim(); - const priceEl = document.querySelector('[class*="priceText--"], [class*="Price--"]'); - const price = priceEl ? '¥' + normalize(priceEl.textContent).replace(/[¥¥]/g, '') : ''; + const text = document.body?.innerText || ''; + const priceMatch = text.match(/[¥¥]\\s*(\\d+(?:\\.\\d{1,2})?)/); + const price = priceMatch ? '¥' + priceMatch[1] : ''; return { title: title.slice(0, 80), price }; })() `); + // Select specs by clicking matching valueItems + const specArgs = JSON.stringify(specKeywords); + const selectResult = await page.evaluate(` + (() => { + const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); + const keywords = ${specArgs}; + const items = document.querySelectorAll('[class*="valueItem--"]'); + const selected = []; + + if (keywords.length === 0 && items.length > 0) { + // No spec given: auto-select first available option in each group + // Find spec groups by looking at parent containers + const groups = new Map(); + for (const item of items) { + const group = item.closest('[class*="skuItem--"], [class*="prop--"]') || item.parentElement; + const groupKey = group?.className?.substring(0, 30) || 'default'; + if (!groups.has(groupKey)) groups.set(groupKey, []); + groups.get(groupKey).push(item); + } + for (const [, groupItems] of groups) { + // Skip if already has a selected item + const hasSelected = groupItems.some(el => el.className.includes('selected') || el.className.includes('active')); + if (hasSelected) continue; + // Click first non-disabled item + for (const item of groupItems) { + if (!item.className.includes('disabled') && !item.className.includes('gray')) { + item.click(); + selected.push(normalize(item.textContent).substring(0, 40)); + break; + } + } + } + } else { + // Match by keywords: find items that contain ALL keywords + // Group items by their spec group first + const groups = new Map(); + for (const item of items) { + const group = item.closest('[class*="skuItem--"], [class*="prop--"]') || item.parentElement; + const groupKey = group ? Array.from(groups.keys()).find(k => k === group) || group : 'default'; + if (!groups.has(groupKey)) groups.set(groupKey, []); + groups.get(groupKey).push(item); + } + + for (const [, groupItems] of groups) { + let best = null; + let bestScore = 0; + for (const item of groupItems) { + if (item.className.includes('disabled')) continue; + const t = normalize(item.textContent); + // Score = number of keywords matched + const score = keywords.filter(kw => t.includes(kw)).length; + if (score > bestScore) { bestScore = score; best = item; } + } + if (best && bestScore > 0) { + best.click(); + selected.push(normalize(best.textContent).substring(0, 40)); + } + } + } + return selected; + })() + `); + await page.wait(1); + // Click add-to-cart button await page.evaluate(` (() => { - // Find add-to-cart button by text content - const buttons = document.querySelectorAll('button, [role="button"], a, div[class*="btn"], span[class*="btn"]'); - for (const btn of buttons) { - const t = (btn.textContent || '').trim(); - if (t === '加入购物车' || t === '加入 购物车' || t.includes('加入购物车')) { - btn.click(); + const all = document.querySelectorAll('button, [role="button"], a, div, span'); + for (const el of all) { + const t = (el.textContent || '').trim(); + if ((t === '加入购物车' || t === '加入 购物车') && el.children.length < 5) { + el.click(); return 'clicked'; } } return 'btn_not_found'; })() `); - await page.wait(3); - - // Check result + // Wait and poll for result (cart dialog may take time to appear) const result = await page.evaluate(` - (() => { - const text = document.body?.innerText || ''; - if (text.includes('已加入购物车') || text.includes('商品已成功') || text.includes('去购物车')) { - return 'success'; - } - if (text.includes('请选择') || text.includes('请先选择')) { - return 'need_spec'; - } - if (text.includes('请登录') || text.includes('login')) { - return 'login_required'; + (async () => { + for (let i = 0; i < 10; i++) { + await new Promise(r => setTimeout(r, 500)); + const text = document.body?.innerText || ''; + if (text.includes('已加入购物车') || text.includes('商品已成功') || text.includes('去购物车结算') || text.includes('去购物车')) { + return 'success'; + } + if (text.includes('请选择') || text.includes('请先选择')) { + return 'need_spec'; + } } + // Final check + const text = document.body?.innerText || ''; + if (text.includes('请登录')) return 'login_required'; + // Check if URL changed to cart + if (location.href.includes('cart')) return 'success'; return 'unknown'; })() `); - let status = '? 未确认(可能需要选规格)'; + let status = '? 未确认'; if (result === 'success') status = '✓ 已加入购物车'; - else if (result === 'need_spec') status = '✗ 需要先选择规格'; + else if (result === 'need_spec') status = '✗ 需要选择更多规格'; else if (result === 'login_required') status = '✗ 需要登录'; + const selectedSpec = Array.isArray(selectResult) ? selectResult.join(' | ') : ''; + return [{ status, title: info?.title || '', price: info?.price || '', + selected_spec: selectedSpec || '(未选择)', item_id: itemId, }]; },