From acfd6db4dbf79a10e723c3912629cd5c05b852f4 Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 12:48:29 +0800 Subject: [PATCH 1/7] Add util to get image URLs in documents --- package.json | 1 + src/utils/extractImageUrls.js | 63 ++++++++++ src/utils/extractImageUrls.test.js | 194 +++++++++++++++++++++++++++++ 3 files changed, 258 insertions(+) create mode 100644 src/utils/extractImageUrls.js create mode 100644 src/utils/extractImageUrls.test.js diff --git a/package.json b/package.json index 4ccbbe49..ed63f543 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "lint": "eslint .", "prettier-check": "prettier --check ./src", "format:fix": "prettier --write ./src", + "test": "react-scripts test", "storybook": "storybook dev -p 6006", "build-storybook": "storybook build", "cy:open": "cypress open", diff --git a/src/utils/extractImageUrls.js b/src/utils/extractImageUrls.js new file mode 100644 index 00000000..586b2752 --- /dev/null +++ b/src/utils/extractImageUrls.js @@ -0,0 +1,63 @@ +/** + * Extracts all potential image URLs from a JSON document object. + * + * @param {any} documentObject - The JSON object to search for image URLs + * @returns {string[]} Array of unique image URLs found in the document + */ +export default function extractImageUrls(documentObject) { + // Handle null, undefined, or non-object inputs + if (!documentObject || typeof documentObject !== 'object') { + return [] + } + + const imageUrls = new Set() // Use Set to automatically handle uniqueness + + // Regular expression patterns for image URLs + const imageExtensionPattern = /\.(png|jpg|jpeg|gif|webp|svg)(\?.*)?$/i + const dataImagePattern = /^data:image\//i + + /** + * Recursively traverses an object to find image URLs + * @param {any} obj - Current object/value being processed + */ + function traverse(obj) { + // Handle null or undefined + if (obj === null || obj === undefined) { + return + } + + // If it's a string, check if it's an image URL + if (typeof obj === 'string') { + const trimmedStr = obj.trim() + + // Check for common image file extensions + if (imageExtensionPattern.test(trimmedStr)) { + imageUrls.add(trimmedStr) + } + // Check for data:image/ URLs (base64 encoded images) + else if (dataImagePattern.test(trimmedStr)) { + imageUrls.add(trimmedStr) + } + return + } + + // If it's an array, traverse each element + if (Array.isArray(obj)) { + obj.forEach((item) => traverse(item)) + return + } + + // If it's an object, traverse each property value + if (typeof obj === 'object') { + Object.values(obj).forEach((value) => traverse(value)) + } + + // For primitive types (number, boolean, etc.), do nothing + } + + // Start the traversal + traverse(documentObject) + + // Convert Set back to Array and return + return Array.from(imageUrls) +} diff --git a/src/utils/extractImageUrls.test.js b/src/utils/extractImageUrls.test.js new file mode 100644 index 00000000..f5b659ef --- /dev/null +++ b/src/utils/extractImageUrls.test.js @@ -0,0 +1,194 @@ +import extractImageUrls from './extractImageUrls' + +describe('extractImageUrls', () => { + test('should extract image URL from simple object', () => { + const input = { image: 'http://example.com/image.png' } + const result = extractImageUrls(input) + expect(result).toEqual(['http://example.com/image.png']) + }) + + test('should extract image URLs from nested object', () => { + const input = { + details: { + mainImage: 'http://example.com/photo.jpg', + description: 'A beautiful photo', + }, + } + const result = extractImageUrls(input) + expect(result).toEqual(['http://example.com/photo.jpg']) + }) + + test('should extract image URLs from array', () => { + const input = { + gallery: ['img1.gif', 'http://example.com/img2.webp'], + } + const result = extractImageUrls(input) + expect(result).toEqual(['img1.gif', 'http://example.com/img2.webp']) + }) + + test('should handle mixed content including non-URL strings', () => { + const input = { + title: 'My Article', + content: 'This is some text content', + image: 'http://example.com/article.png', + author: 'John Doe', + tags: ['tech', 'programming'], + } + const result = extractImageUrls(input) + expect(result).toEqual(['http://example.com/article.png']) + }) + + test('should extract data:image URLs', () => { + const input = { + avatar: + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', + name: 'User', + } + const result = extractImageUrls(input) + expect(result).toEqual([ + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', + ]) + }) + + test('should return unique URLs when duplicates exist', () => { + const input = { + image1: 'http://example.com/duplicate.jpg', + image2: 'http://example.com/duplicate.jpg', + gallery: [ + 'http://example.com/duplicate.jpg', + 'http://example.com/unique.png', + ], + } + const result = extractImageUrls(input) + expect(result).toEqual([ + 'http://example.com/duplicate.jpg', + 'http://example.com/unique.png', + ]) + }) + + test('should return empty array when no image URLs found', () => { + const input = { + title: 'Article Title', + content: 'Some text content', + author: 'John Doe', + tags: ['tech', 'programming'], + metadata: { + created: '2023-01-01', + views: 100, + }, + } + const result = extractImageUrls(input) + expect(result).toEqual([]) + }) + + test('should handle null input gracefully', () => { + const result = extractImageUrls(null) + expect(result).toEqual([]) + }) + + test('should handle undefined input gracefully', () => { + const result = extractImageUrls(undefined) + expect(result).toEqual([]) + }) + + test('should handle non-object input gracefully', () => { + expect(extractImageUrls('string')).toEqual([]) + expect(extractImageUrls(123)).toEqual([]) + expect(extractImageUrls(true)).toEqual([]) + }) + + test('should extract multiple different image formats', () => { + const input = { + images: { + png: 'http://example.com/image.png', + jpg: 'http://example.com/photo.jpg', + jpeg: 'http://example.com/picture.jpeg', + gif: 'http://example.com/animation.gif', + webp: 'http://example.com/modern.webp', + svg: 'http://example.com/vector.svg', + }, + } + const result = extractImageUrls(input) + expect(result).toHaveLength(6) + expect(result).toContain('http://example.com/image.png') + expect(result).toContain('http://example.com/photo.jpg') + expect(result).toContain('http://example.com/picture.jpeg') + expect(result).toContain('http://example.com/animation.gif') + expect(result).toContain('http://example.com/modern.webp') + expect(result).toContain('http://example.com/vector.svg') + }) + + test('should handle URLs with query parameters', () => { + const input = { + thumbnail: 'http://example.com/thumb.jpg?size=small&quality=80', + fullsize: 'http://example.com/full.png?width=1920&height=1080', + } + const result = extractImageUrls(input) + expect(result).toEqual([ + 'http://example.com/thumb.jpg?size=small&quality=80', + 'http://example.com/full.png?width=1920&height=1080', + ]) + }) + + test('should handle deeply nested objects', () => { + const input = { + level1: { + level2: { + level3: { + level4: { + deepImage: 'http://example.com/deep.jpg', + }, + }, + }, + }, + } + const result = extractImageUrls(input) + expect(result).toEqual(['http://example.com/deep.jpg']) + }) + + test('should handle arrays within nested objects', () => { + const input = { + article: { + content: { + sections: [ + { + type: 'text', + value: 'Some text', + }, + { + type: 'image', + value: 'http://example.com/section1.png', + }, + { + type: 'gallery', + images: [ + 'http://example.com/gallery1.jpg', + 'http://example.com/gallery2.jpg', + ], + }, + ], + }, + }, + } + const result = extractImageUrls(input) + expect(result).toEqual([ + 'http://example.com/section1.png', + 'http://example.com/gallery1.jpg', + 'http://example.com/gallery2.jpg', + ]) + }) + + test('should handle case-insensitive file extensions', () => { + const input = { + upperCase: 'http://example.com/IMAGE.PNG', + mixedCase: 'http://example.com/Photo.JpG', + lowerCase: 'http://example.com/picture.gif', + } + const result = extractImageUrls(input) + expect(result).toEqual([ + 'http://example.com/IMAGE.PNG', + 'http://example.com/Photo.JpG', + 'http://example.com/picture.gif', + ]) + }) +}) From b376229e609fd003582a8a6b8a579a9305c9c7a3 Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 13:00:39 +0800 Subject: [PATCH 2/7] Retrieve image URLs in the Hit component --- src/components/Results/Hit.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index 0a674284..6f3e7030 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -11,6 +11,7 @@ import Card from 'components/Card' import BaseLink from 'components/Link' import Typography from 'components/Typography' import Highlight from './Highlight' +import extractImageUrls from '../../utils/extractImageUrls' const EmptyImage = styled.div` width: 100%; @@ -202,6 +203,12 @@ const Hit = ({ hit, imageKey }) => { ? Object.entries(hit._highlightResult) : [] + // Extract all image URLs from the hit document + const imageUrls = extractImageUrls(hit) + + // Temporary logging for development verification (Task 1.2) + console.log('Image URLs found for hit:', imageUrls) + useEffect(() => { if (!hit._highlightResult) { // eslint-disable-next-line no-console From ac54333da8a850a26b24d18aa2b8db5f195656c9 Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 13:09:32 +0800 Subject: [PATCH 3/7] Display auto-detected image --- src/components/Results/Hit.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index 6f3e7030..73c8bbf4 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -20,6 +20,14 @@ const EmptyImage = styled.div` border-radius: 10px; ` +const StyledResultImage = styled(LazyLoadImage)` + max-width: 100%; + max-height: 264px; + object-fit: cover; + display: block; + border-radius: 10px; +` + const CustomCard = styled(Card)` display: flex; ` @@ -216,15 +224,15 @@ const Hit = ({ hit, imageKey }) => { } }, []) + // Determine the image source to display (prioritize auto-detected images) + const imageSource = imageUrls.length > 0 ? imageUrls[0] : hit[imageKey] + const altText = hit.title || hit.name || 'Result image' + return ( - {hit[imageKey] ? ( - + {imageSource ? ( + ) : ( )} From 23a3d21d78608c1c451978fc37b19ed526e2a2ec Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 13:12:53 +0800 Subject: [PATCH 4/7] Handle image loading errors --- src/components/Results/Hit.js | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index 73c8bbf4..5165692e 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -206,6 +206,7 @@ const FieldValue = ({ hit, objectKey }) => { const Hit = ({ hit, imageKey }) => { const [displayMore, setDisplayMore] = React.useState(false) + const [imageError, setImageError] = React.useState(false) const hasFields = !!hit._highlightResult const documentProperties = hasFields ? Object.entries(hit._highlightResult) @@ -214,9 +215,6 @@ const Hit = ({ hit, imageKey }) => { // Extract all image URLs from the hit document const imageUrls = extractImageUrls(hit) - // Temporary logging for development verification (Task 1.2) - console.log('Image URLs found for hit:', imageUrls) - useEffect(() => { if (!hit._highlightResult) { // eslint-disable-next-line no-console @@ -228,11 +226,26 @@ const Hit = ({ hit, imageKey }) => { const imageSource = imageUrls.length > 0 ? imageUrls[0] : hit[imageKey] const altText = hit.title || hit.name || 'Result image' + // Reset image error state when image source changes + useEffect(() => { + setImageError(false) + }, [imageSource]) + + // Handle image load error + const handleImageError = () => { + setImageError(true) + } + return ( - {imageSource ? ( - + {imageSource && !imageError ? ( + ) : ( )} From 4a4e3edcedcf5abff27d7274c6eb98d27280361d Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 16:18:06 +0800 Subject: [PATCH 5/7] Remove old image path identification logic --- src/components/Results/Hit.js | 6 +- src/components/Results/InfiniteHits.js | 82 +++++++------------------- src/utils/extractImageUrls.js | 14 ++++- src/utils/extractImageUrls.test.js | 27 +++++++++ 4 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index 5165692e..d6e0a67e 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -204,7 +204,7 @@ const FieldValue = ({ hit, objectKey }) => { ) } -const Hit = ({ hit, imageKey }) => { +const Hit = ({ hit }) => { const [displayMore, setDisplayMore] = React.useState(false) const [imageError, setImageError] = React.useState(false) const hasFields = !!hit._highlightResult @@ -222,8 +222,8 @@ const Hit = ({ hit, imageKey }) => { } }, []) - // Determine the image source to display (prioritize auto-detected images) - const imageSource = imageUrls.length > 0 ? imageUrls[0] : hit[imageKey] + // Determine the image source to display + const imageSource = imageUrls.length > 0 ? imageUrls[0] : null const altText = hit.title || hit.name || 'Result image' // Reset image error state when image source changes diff --git a/src/components/Results/InfiniteHits.js b/src/components/Results/InfiniteHits.js index a99ddc56..80222a1a 100644 --- a/src/components/Results/InfiniteHits.js +++ b/src/components/Results/InfiniteHits.js @@ -18,53 +18,16 @@ const HitsList = styled.ul` } ` -const isAnImage = async (elem) => { - // Test the standard way with regex and image extensions - if ( - typeof elem === 'string' && - elem.match(/^(https|http):\/\/.*(jpe?g|png|gif|webp)(\?.*)?$/gi) - ) - return true - - if (typeof elem === 'string' && elem.match(/^https?:\/\//)) { - // Tries to load an image that is a valid URL but doesn't have a correct extension - return new Promise((resolve) => { - const img = new Image() - img.src = elem - img.onload = () => resolve(true) - img.onerror = () => resolve(false) - }) - } - return false -} - -const findImageKey = async (array) => { - const promises = array.map(async (elem) => isAnImage(elem[1])) - const results = await Promise.all(promises) - const index = results.findIndex((result) => result) - const imageField = array[index] - return imageField?.[0] -} - -const InfiniteHits = connectInfiniteHits(({ hits, hasMore, refineNext }) => { - const [imageKey, setImageKey] = React.useState(false) - - React.useEffect(() => { - const getImageKey = async () => { - setImageKey(hits[0] ? await findImageKey(Object.entries(hits[0])) : null) - } - getImageKey() - }, [hits[0]]) +const InfiniteHits = connectInfiniteHits(({ hits, hasMore, refineNext }) => ( // ({ hits, hasMore, refineNext, mode }) => { - return ( -
- {/* {mode === 'fancy' ? ( */} - - {hits.map((hit, index) => ( - - ))} - - {/* ) : ( +
+ {/* {mode === 'fancy' ? ( */} + + {hits.map((hit, index) => ( + + ))} + + {/* ) : ( { /> )} */} - {hasMore && ( - - )} - -
- ) -}) + {hasMore && ( + + )} + +
+)) export default InfiniteHits diff --git a/src/utils/extractImageUrls.js b/src/utils/extractImageUrls.js index 586b2752..4524cb01 100644 --- a/src/utils/extractImageUrls.js +++ b/src/utils/extractImageUrls.js @@ -1,3 +1,5 @@ +const MAX_DEPTH = 10 + /** * Extracts all potential image URLs from a JSON document object. * @@ -19,8 +21,14 @@ export default function extractImageUrls(documentObject) { /** * Recursively traverses an object to find image URLs * @param {any} obj - Current object/value being processed + * @param {number} depth - Current depth in the object hierarchy */ - function traverse(obj) { + function traverse(obj, depth = 0) { + // Stop traversal if we've reached the maximum depth + if (depth >= MAX_DEPTH) { + return + } + // Handle null or undefined if (obj === null || obj === undefined) { return @@ -43,13 +51,13 @@ export default function extractImageUrls(documentObject) { // If it's an array, traverse each element if (Array.isArray(obj)) { - obj.forEach((item) => traverse(item)) + obj.forEach((item) => traverse(item, depth + 1)) return } // If it's an object, traverse each property value if (typeof obj === 'object') { - Object.values(obj).forEach((value) => traverse(value)) + Object.values(obj).forEach((value) => traverse(value, depth + 1)) } // For primitive types (number, boolean, etc.), do nothing diff --git a/src/utils/extractImageUrls.test.js b/src/utils/extractImageUrls.test.js index f5b659ef..e189432c 100644 --- a/src/utils/extractImageUrls.test.js +++ b/src/utils/extractImageUrls.test.js @@ -191,4 +191,31 @@ describe('extractImageUrls', () => { 'http://example.com/picture.gif', ]) }) + + test('should respect MAX_DEPTH limit and stop traversal at maximum depth', () => { + const input = { + level1: { + level2: { + level3: { + level4: { + level5: { + level6: { + level7: { + level8: { + level9Image: 'http://example.com/level9.jpg', + level9: { + level10Image: 'http://example.com/level10.jpg', + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + const result = extractImageUrls(input) + expect(result).toEqual(['http://example.com/level9.jpg']) + }) }) From f716bfb527614e636c45db1b3a0fc6b000a476e9 Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 16:27:53 +0800 Subject: [PATCH 6/7] Update image object fit to contain --- src/components/Results/Hit.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index d6e0a67e..f4e7d9e3 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -23,7 +23,7 @@ const EmptyImage = styled.div` const StyledResultImage = styled(LazyLoadImage)` max-width: 100%; max-height: 264px; - object-fit: cover; + object-fit: contain; display: block; border-radius: 10px; ` From 31d8cb3845d4d8c4a9c57530ad8cb9af0b01a85a Mon Sep 17 00:00:00 2001 From: Strift Date: Tue, 27 May 2025 17:24:34 +0800 Subject: [PATCH 7/7] Only retrieve first image --- src/components/Results/Hit.js | 8 +- ...s.test.js => extractFirstImageUrl.test.js} | 135 +++++++----------- ...tImageUrls.js => extractFirstImageUrls.js} | 51 ++++--- 3 files changed, 80 insertions(+), 114 deletions(-) rename src/utils/{extractImageUrls.test.js => extractFirstImageUrl.test.js} (54%) rename src/utils/{extractImageUrls.js => extractFirstImageUrls.js} (53%) diff --git a/src/components/Results/Hit.js b/src/components/Results/Hit.js index f4e7d9e3..d702c8b6 100644 --- a/src/components/Results/Hit.js +++ b/src/components/Results/Hit.js @@ -11,7 +11,7 @@ import Card from 'components/Card' import BaseLink from 'components/Link' import Typography from 'components/Typography' import Highlight from './Highlight' -import extractImageUrls from '../../utils/extractImageUrls' +import extractFirstImageUrl from '../../utils/extractFirstImageUrls' const EmptyImage = styled.div` width: 100%; @@ -212,8 +212,7 @@ const Hit = ({ hit }) => { ? Object.entries(hit._highlightResult) : [] - // Extract all image URLs from the hit document - const imageUrls = extractImageUrls(hit) + const imageSource = extractFirstImageUrl(hit) useEffect(() => { if (!hit._highlightResult) { @@ -221,9 +220,6 @@ const Hit = ({ hit }) => { console.warn('Your hits have no field. Please check your index settings.') } }, []) - - // Determine the image source to display - const imageSource = imageUrls.length > 0 ? imageUrls[0] : null const altText = hit.title || hit.name || 'Result image' // Reset image error state when image source changes diff --git a/src/utils/extractImageUrls.test.js b/src/utils/extractFirstImageUrl.test.js similarity index 54% rename from src/utils/extractImageUrls.test.js rename to src/utils/extractFirstImageUrl.test.js index e189432c..7f8da824 100644 --- a/src/utils/extractImageUrls.test.js +++ b/src/utils/extractFirstImageUrl.test.js @@ -1,10 +1,10 @@ -import extractImageUrls from './extractImageUrls' +import extractFirstImageUrl from './extractFirstImageUrls' -describe('extractImageUrls', () => { +describe('extractFirstImageUrl', () => { test('should extract image URL from simple object', () => { const input = { image: 'http://example.com/image.png' } - const result = extractImageUrls(input) - expect(result).toEqual(['http://example.com/image.png']) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/image.png') }) test('should extract image URLs from nested object', () => { @@ -14,16 +14,31 @@ describe('extractImageUrls', () => { description: 'A beautiful photo', }, } - const result = extractImageUrls(input) - expect(result).toEqual(['http://example.com/photo.jpg']) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/photo.jpg') }) - test('should extract image URLs from array', () => { + test('should extract the first image URL from array', () => { const input = { gallery: ['img1.gif', 'http://example.com/img2.webp'], } - const result = extractImageUrls(input) - expect(result).toEqual(['img1.gif', 'http://example.com/img2.webp']) + const result = extractFirstImageUrl(input) + expect(result).toBe('img1.gif') + }) + + test('should extract first image from object', () => { + const input = { + images: { + png: 'http://example.com/image.png', + jpg: 'http://example.com/photo.jpg', + jpeg: 'http://example.com/picture.jpeg', + gif: 'http://example.com/animation.gif', + webp: 'http://example.com/modern.webp', + svg: 'http://example.com/vector.svg', + }, + } + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/image.png') }) test('should handle mixed content including non-URL strings', () => { @@ -34,8 +49,8 @@ describe('extractImageUrls', () => { author: 'John Doe', tags: ['tech', 'programming'], } - const result = extractImageUrls(input) - expect(result).toEqual(['http://example.com/article.png']) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/article.png') }) test('should extract data:image URLs', () => { @@ -44,29 +59,13 @@ describe('extractImageUrls', () => { 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', name: 'User', } - const result = extractImageUrls(input) - expect(result).toEqual([ - 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', - ]) - }) - - test('should return unique URLs when duplicates exist', () => { - const input = { - image1: 'http://example.com/duplicate.jpg', - image2: 'http://example.com/duplicate.jpg', - gallery: [ - 'http://example.com/duplicate.jpg', - 'http://example.com/unique.png', - ], - } - const result = extractImageUrls(input) - expect(result).toEqual([ - 'http://example.com/duplicate.jpg', - 'http://example.com/unique.png', - ]) + const result = extractFirstImageUrl(input) + expect(result).toBe( + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' + ) }) - test('should return empty array when no image URLs found', () => { + test('should return null when no image URLs found', () => { const input = { title: 'Article Title', content: 'Some text content', @@ -77,45 +76,24 @@ describe('extractImageUrls', () => { views: 100, }, } - const result = extractImageUrls(input) - expect(result).toEqual([]) + const result = extractFirstImageUrl(input) + expect(result).toBeNull() }) test('should handle null input gracefully', () => { - const result = extractImageUrls(null) - expect(result).toEqual([]) + const result = extractFirstImageUrl(null) + expect(result).toBeNull() }) test('should handle undefined input gracefully', () => { - const result = extractImageUrls(undefined) - expect(result).toEqual([]) + const result = extractFirstImageUrl(undefined) + expect(result).toBeNull() }) test('should handle non-object input gracefully', () => { - expect(extractImageUrls('string')).toEqual([]) - expect(extractImageUrls(123)).toEqual([]) - expect(extractImageUrls(true)).toEqual([]) - }) - - test('should extract multiple different image formats', () => { - const input = { - images: { - png: 'http://example.com/image.png', - jpg: 'http://example.com/photo.jpg', - jpeg: 'http://example.com/picture.jpeg', - gif: 'http://example.com/animation.gif', - webp: 'http://example.com/modern.webp', - svg: 'http://example.com/vector.svg', - }, - } - const result = extractImageUrls(input) - expect(result).toHaveLength(6) - expect(result).toContain('http://example.com/image.png') - expect(result).toContain('http://example.com/photo.jpg') - expect(result).toContain('http://example.com/picture.jpeg') - expect(result).toContain('http://example.com/animation.gif') - expect(result).toContain('http://example.com/modern.webp') - expect(result).toContain('http://example.com/vector.svg') + expect(extractFirstImageUrl('string')).toBeNull() + expect(extractFirstImageUrl(123)).toBeNull() + expect(extractFirstImageUrl(true)).toBeNull() }) test('should handle URLs with query parameters', () => { @@ -123,11 +101,8 @@ describe('extractImageUrls', () => { thumbnail: 'http://example.com/thumb.jpg?size=small&quality=80', fullsize: 'http://example.com/full.png?width=1920&height=1080', } - const result = extractImageUrls(input) - expect(result).toEqual([ - 'http://example.com/thumb.jpg?size=small&quality=80', - 'http://example.com/full.png?width=1920&height=1080', - ]) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/thumb.jpg?size=small&quality=80') }) test('should handle deeply nested objects', () => { @@ -142,8 +117,8 @@ describe('extractImageUrls', () => { }, }, } - const result = extractImageUrls(input) - expect(result).toEqual(['http://example.com/deep.jpg']) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/deep.jpg') }) test('should handle arrays within nested objects', () => { @@ -170,26 +145,16 @@ describe('extractImageUrls', () => { }, }, } - const result = extractImageUrls(input) - expect(result).toEqual([ - 'http://example.com/section1.png', - 'http://example.com/gallery1.jpg', - 'http://example.com/gallery2.jpg', - ]) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/section1.png') }) test('should handle case-insensitive file extensions', () => { const input = { - upperCase: 'http://example.com/IMAGE.PNG', mixedCase: 'http://example.com/Photo.JpG', - lowerCase: 'http://example.com/picture.gif', } - const result = extractImageUrls(input) - expect(result).toEqual([ - 'http://example.com/IMAGE.PNG', - 'http://example.com/Photo.JpG', - 'http://example.com/picture.gif', - ]) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/Photo.JpG') }) test('should respect MAX_DEPTH limit and stop traversal at maximum depth', () => { @@ -215,7 +180,7 @@ describe('extractImageUrls', () => { }, }, } - const result = extractImageUrls(input) - expect(result).toEqual(['http://example.com/level9.jpg']) + const result = extractFirstImageUrl(input) + expect(result).toBe('http://example.com/level9.jpg') }) }) diff --git a/src/utils/extractImageUrls.js b/src/utils/extractFirstImageUrls.js similarity index 53% rename from src/utils/extractImageUrls.js rename to src/utils/extractFirstImageUrls.js index 4524cb01..c4c12b8a 100644 --- a/src/utils/extractImageUrls.js +++ b/src/utils/extractFirstImageUrls.js @@ -1,37 +1,36 @@ const MAX_DEPTH = 10 /** - * Extracts all potential image URLs from a JSON document object. + * Extracts the first image URL from a JSON document object. * * @param {any} documentObject - The JSON object to search for image URLs - * @returns {string[]} Array of unique image URLs found in the document + * @returns {string|null} The first image URL found in the document (or null if none found) */ -export default function extractImageUrls(documentObject) { +export default function extractFirstImageUrl(documentObject) { // Handle null, undefined, or non-object inputs if (!documentObject || typeof documentObject !== 'object') { - return [] + return null } - const imageUrls = new Set() // Use Set to automatically handle uniqueness - // Regular expression patterns for image URLs const imageExtensionPattern = /\.(png|jpg|jpeg|gif|webp|svg)(\?.*)?$/i const dataImagePattern = /^data:image\//i /** - * Recursively traverses an object to find image URLs + * Recursively traverses an object to find the first image URL * @param {any} obj - Current object/value being processed * @param {number} depth - Current depth in the object hierarchy + * @returns {string|null} The first image URL found, or null if none found */ function traverse(obj, depth = 0) { - // Stop traversal if we've reached the maximum depth + // Stop traversal if we've reached maximum depth if (depth >= MAX_DEPTH) { - return + return null } // Handle null or undefined if (obj === null || obj === undefined) { - return + return null } // If it's a string, check if it's an image URL @@ -40,32 +39,38 @@ export default function extractImageUrls(documentObject) { // Check for common image file extensions if (imageExtensionPattern.test(trimmedStr)) { - imageUrls.add(trimmedStr) + return trimmedStr } // Check for data:image/ URLs (base64 encoded images) - else if (dataImagePattern.test(trimmedStr)) { - imageUrls.add(trimmedStr) + if (dataImagePattern.test(trimmedStr)) { + return trimmedStr } - return + return null } // If it's an array, traverse each element if (Array.isArray(obj)) { - obj.forEach((item) => traverse(item, depth + 1)) - return + let result = null + obj.some((item) => { + result = traverse(item, depth + 1) + return result !== null + }) + return result } // If it's an object, traverse each property value if (typeof obj === 'object') { - Object.values(obj).forEach((value) => traverse(value, depth + 1)) + let result = null + Object.values(obj).some((value) => { + result = traverse(value, depth + 1) + return result !== null + }) + return result } - // For primitive types (number, boolean, etc.), do nothing + // For primitive types (number, boolean, etc.), return null + return null } - // Start the traversal - traverse(documentObject) - - // Convert Set back to Array and return - return Array.from(imageUrls) + return traverse(documentObject) }