From 3e29651b98fd96dd27631480607904952e33af5b Mon Sep 17 00:00:00 2001 From: kurema Date: Wed, 3 Mar 2021 00:25:16 +0900 Subject: [PATCH] Add DownloadMagickImages and some fix --- WebImageExtractor.App/Program.cs | 40 ++++++----- WebImageExtractor/Extensions/UriExtensions.cs | 13 ++-- WebImageExtractor/ImageDownloader.cs | 66 +++++++++++++++++-- WebImageExtractor/Recurser.cs | 2 +- WebImageExtractor/WebImage.cs | 34 ++++++++-- 5 files changed, 121 insertions(+), 34 deletions(-) diff --git a/WebImageExtractor.App/Program.cs b/WebImageExtractor.App/Program.cs index 3dd990b..3465fdd 100644 --- a/WebImageExtractor.App/Program.cs +++ b/WebImageExtractor.App/Program.cs @@ -39,30 +39,34 @@ public static void Main(string[] args) int i = 0; foreach (WebImage image in images) { - string outputFilePath = Path.Combine(outputPath, $"{i}.{Enum.GetName(typeof(MagickFormat), image.GetImageIfDownloaded().Format).ToLower()}"); - if (image.GetImageIfDownloaded().Format != MagickFormat.Svg) + var magickImages = image.GetImagesIfDownloaded(); + foreach (MagickImage magickImage in magickImages) { - image.GetImageIfDownloaded().Write(outputFilePath, image.GetImageIfDownloaded().Format); - } - else - { - // MagickImage.Write would rasterize svgs which is not what we want - // Instead just download to a file - HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(image.Uri); - request.Method = "GET"; - using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) + string outputFilePath = Path.Combine(outputPath, $"{i}.{Enum.GetName(typeof(MagickFormat), magickImage.Format).ToLowerInvariant()}"); + if (magickImage.Format != MagickFormat.Svg) + { + magickImage.Write(outputFilePath, magickImage.Format); + } + else { - using (var reader = new System.IO.StreamReader(response.GetResponseStream())) + // MagickImage.Write would rasterize svgs which is not what we want + // Instead just download to a file + HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(image.Uri); + request.Method = "GET"; + using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { - string responseText = reader.ReadToEnd(); - StreamWriter writer = new StreamWriter(outputFilePath, false); - writer.Write(responseText); - writer.Close(); + using (var reader = new System.IO.StreamReader(response.GetResponseStream())) + { + string responseText = reader.ReadToEnd(); + StreamWriter writer = new StreamWriter(outputFilePath, false); + writer.Write(responseText); + writer.Close(); + } } } - } - i++; + i++; + } } Console.WriteLine("Finished"); diff --git a/WebImageExtractor/Extensions/UriExtensions.cs b/WebImageExtractor/Extensions/UriExtensions.cs index a7ecb54..b89636f 100644 --- a/WebImageExtractor/Extensions/UriExtensions.cs +++ b/WebImageExtractor/Extensions/UriExtensions.cs @@ -70,12 +70,13 @@ public static Uri RemoveLastSegment(this Uri uri) /// Image extension as . public static MagickFormat ToMagickFormat(this Uri uri) { - string uriString = uri.ToString().ToLower(); + string uriString = uri.ToString().ToLowerInvariant(); + string uriStringBody = System.Text.RegularExpressions.Regex.Replace(uriString, @"\?.+$|\#.+$", string.Empty); Array values = Enum.GetValues(typeof(MagickFormat)); foreach (MagickFormat value in values) { - if (uriString.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLower()}")) + if (uriString.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLowerInvariant()}") || uriStringBody.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLowerInvariant()}")) { return value; } @@ -91,9 +92,9 @@ public static MagickFormat ToMagickFormat(this Uri uri) /// True if image extension supported by Magick.NET. public static bool HasImageExtension(this Uri uri) { - string uriString = uri.ToString().ToLower(); + string uriString = uri.ToString().ToLowerInvariant(); string[] magickFormats = Enum.GetNames(typeof(MagickFormat)); - return magickFormats.Any(f => !Constants.BadMagickTypes.Contains(f) && uriString.EndsWith($".{f.ToLower()}")); + return magickFormats.Any(f => !Constants.BadMagickTypes.Contains(f) && uriString.EndsWith($".{f.ToLowerInvariant()}")); } /// @@ -103,7 +104,7 @@ public static bool HasImageExtension(this Uri uri) /// True if image extension is not supported. public static bool IsBadMagickType(this Uri uri) { - string uriString = uri.ToString().ToLower(); + string uriString = uri.ToString().ToLowerInvariant(); return Constants.BadMagickTypes.Any(ts => uriString.EndsWith($".{ts}")); } @@ -114,7 +115,7 @@ public static bool IsBadMagickType(this Uri uri) /// True if image extension is .svg public static bool HasSvgExtension(this Uri uri) { - string uriString = uri.ToString().ToLower(); + string uriString = uri.ToString().ToLowerInvariant(); return uriString.EndsWith($".svg"); } diff --git a/WebImageExtractor/ImageDownloader.cs b/WebImageExtractor/ImageDownloader.cs index 076420c..f387f72 100644 --- a/WebImageExtractor/ImageDownloader.cs +++ b/WebImageExtractor/ImageDownloader.cs @@ -23,6 +23,66 @@ internal static class ImageDownloader /// Cancellation Token. /// Downloaded MagickImage, null if unsuccessful. public static async Task DownloadMagickImage(Uri uri, CancellationToken cancellationToken) + { + MagickImage image = null; + await DownloadAction(uri, cancellationToken, (stream) => + { + if (!(stream is null)) + { + try + { + image = new MagickImage(stream, uri.ToMagickFormat()); + } + catch + { + try + { + stream.Seek(0, SeekOrigin.Begin); + image = new MagickImage(stream); + } + catch + { + image = null; + } + } + } + + return Task.CompletedTask; + }); + return image; + } + + public static async Task DownloadMagickImages(Uri uri, CancellationToken cancellationToken) + { + MagickImageCollection image = null; + await DownloadAction(uri, cancellationToken, (stream) => + { + if (!(stream is null)) + { + try + { + image = new MagickImageCollection(stream, uri.ToMagickFormat()); + } + catch + { + try + { + stream.Seek(0, SeekOrigin.Begin); + image = new MagickImageCollection(stream); + } + catch + { + image = null; + } + } + } + + return Task.CompletedTask; + }); + return image; + } + + public static async Task DownloadAction(Uri uri, CancellationToken cancellationToken, Func action) { if (Extractor.ExtractionSettings == null) { @@ -49,14 +109,12 @@ public static async Task DownloadMagickImage(Uri uri, CancellationT { if (cancellationToken.IsCancellationRequested) { - return null; + await action?.Invoke(null); } using (Stream stream = await response.Content.ReadAsStreamAsync()) { - MagickImage image = null; - image = new MagickImage(stream, uri.ToMagickFormat()); - return image; + await action?.Invoke(stream); } } } diff --git a/WebImageExtractor/Recurser.cs b/WebImageExtractor/Recurser.cs index 1cdd3c4..70d4e48 100644 --- a/WebImageExtractor/Recurser.cs +++ b/WebImageExtractor/Recurser.cs @@ -152,7 +152,7 @@ public static async Task> HyperlinkRecurse(ExtractionMethod metho return null; } - await Task.WhenAll(images.Select(i => i.GetImageAsync(cancellationToken)).ToArray()); + await Task.WhenAll(images.Select(i => i.GetImagesAsync(cancellationToken)).ToArray()); } if (settings.OnFoundImage != null) diff --git a/WebImageExtractor/WebImage.cs b/WebImageExtractor/WebImage.cs index d091c9a..6a95ffd 100644 --- a/WebImageExtractor/WebImage.cs +++ b/WebImageExtractor/WebImage.cs @@ -4,6 +4,8 @@ // ------------------------------------------------------------------------------------------------- using System; +using System.Collections.Generic; +using System.Linq; using System.Threading; using System.Threading.Tasks; using ImageMagick; @@ -15,7 +17,7 @@ namespace WebImageExtractor /// public class WebImage { - private MagickImage image; + private IList> images; private bool downloadAttempted = false; /// @@ -55,7 +57,7 @@ public WebImage(string uri, bool isFavicon = false, bool isAppleTouchIcon = fals internal void SetImage(MagickImage val) { - image = val; + images = new List>(new IMagickImage[] { val }); } /// @@ -70,7 +72,17 @@ internal void SetImage(MagickImage val) await DownloadImage(cancellationToken); } - return image; + return GetImageIfDownloaded(); + } + + public async Task[]> GetImagesAsync(CancellationToken cancellationToken = default(CancellationToken)) + { + if (!downloadAttempted) + { + await DownloadImage(cancellationToken); + } + + return images?.ToArray(); } /// @@ -79,7 +91,19 @@ internal void SetImage(MagickImage val) /// MagickImage if it has been downloaded, otherwise null. public MagickImage GetImageIfDownloaded() { - return image; + if (images?.Count > 0) + { + return images[0] as MagickImage; + } + else + { + return null; + } + } + + public IMagickImage[] GetImagesIfDownloaded() + { + return images.ToArray(); } private async Task DownloadImage(CancellationToken cancellationToken) @@ -94,7 +118,7 @@ private async Task DownloadImage(CancellationToken cancellationToken) return; } - image = await ImageDownloader.DownloadMagickImage(new Uri(Uri), cancellationToken); + images = await ImageDownloader.DownloadMagickImages(new Uri(Uri), cancellationToken); downloadAttempted = true; } }