Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions WebImageExtractor.App/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,34 @@ public static void Main(string[] args)
int i = 0;
foreach (WebImage image in images)
{
string outputFilePath = Path.Combine(outputPath, $"{i}.{Enum.GetName(typeof(MagickFormat), image.GetImageIfDownloaded().Format).ToLower()}");
if (image.GetImageIfDownloaded().Format != MagickFormat.Svg)
var magickImages = image.GetImagesIfDownloaded();
foreach (MagickImage magickImage in magickImages)
{
image.GetImageIfDownloaded().Write(outputFilePath, image.GetImageIfDownloaded().Format);
}
else
{
// MagickImage.Write would rasterize svgs which is not what we want
// Instead just download to a file
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(image.Uri);
request.Method = "GET";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
string outputFilePath = Path.Combine(outputPath, $"{i}.{Enum.GetName(typeof(MagickFormat), magickImage.Format).ToLowerInvariant()}");
if (magickImage.Format != MagickFormat.Svg)
{
magickImage.Write(outputFilePath, magickImage.Format);
}
else
{
using (var reader = new System.IO.StreamReader(response.GetResponseStream()))
// MagickImage.Write would rasterize svgs which is not what we want
// Instead just download to a file
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(image.Uri);
request.Method = "GET";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
string responseText = reader.ReadToEnd();
StreamWriter writer = new StreamWriter(outputFilePath, false);
writer.Write(responseText);
writer.Close();
using (var reader = new System.IO.StreamReader(response.GetResponseStream()))
{
string responseText = reader.ReadToEnd();
StreamWriter writer = new StreamWriter(outputFilePath, false);
writer.Write(responseText);
writer.Close();
}
}
}
}

i++;
i++;
}
}

Console.WriteLine("Finished");
Expand Down
13 changes: 7 additions & 6 deletions WebImageExtractor/Extensions/UriExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,13 @@ public static Uri RemoveLastSegment(this Uri uri)
/// <returns>Image extension as <see cref="MagickFormat"/>.</returns>
public static MagickFormat ToMagickFormat(this Uri uri)
{
string uriString = uri.ToString().ToLower();
string uriString = uri.ToString().ToLowerInvariant();
string uriStringBody = System.Text.RegularExpressions.Regex.Replace(uriString, @"\?.+$|\#.+$", string.Empty);

Array values = Enum.GetValues(typeof(MagickFormat));
foreach (MagickFormat value in values)
{
if (uriString.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLower()}"))
if (uriString.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLowerInvariant()}") || uriStringBody.EndsWith($".{Enum.GetName(typeof(MagickFormat), value).ToLowerInvariant()}"))
{
return value;
}
Expand All @@ -91,9 +92,9 @@ public static MagickFormat ToMagickFormat(this Uri uri)
/// <returns>True if image extension supported by Magick.NET.</returns>
public static bool HasImageExtension(this Uri uri)
{
string uriString = uri.ToString().ToLower();
string uriString = uri.ToString().ToLowerInvariant();
string[] magickFormats = Enum.GetNames(typeof(MagickFormat));
return magickFormats.Any(f => !Constants.BadMagickTypes.Contains(f) && uriString.EndsWith($".{f.ToLower()}"));
return magickFormats.Any(f => !Constants.BadMagickTypes.Contains(f) && uriString.EndsWith($".{f.ToLowerInvariant()}"));
}

/// <summary>
Expand All @@ -103,7 +104,7 @@ public static bool HasImageExtension(this Uri uri)
/// <returns>True if image extension is not supported.</returns>
public static bool IsBadMagickType(this Uri uri)
{
string uriString = uri.ToString().ToLower();
string uriString = uri.ToString().ToLowerInvariant();
return Constants.BadMagickTypes.Any(ts => uriString.EndsWith($".{ts}"));
}

Expand All @@ -114,7 +115,7 @@ public static bool IsBadMagickType(this Uri uri)
/// <returns>True if image extension is .svg</returns>
public static bool HasSvgExtension(this Uri uri)
{
string uriString = uri.ToString().ToLower();
string uriString = uri.ToString().ToLowerInvariant();
return uriString.EndsWith($".svg");
}

Expand Down
66 changes: 62 additions & 4 deletions WebImageExtractor/ImageDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,66 @@ internal static class ImageDownloader
/// <param name="cancellationToken">Cancellation Token.</param>
/// <returns>Downloaded MagickImage, null if unsuccessful.</returns>
public static async Task<MagickImage> DownloadMagickImage(Uri uri, CancellationToken cancellationToken)
{
MagickImage image = null;
await DownloadAction(uri, cancellationToken, (stream) =>
{
if (!(stream is null))
{
try
{
image = new MagickImage(stream, uri.ToMagickFormat());
}
catch
{
try
{
stream.Seek(0, SeekOrigin.Begin);
image = new MagickImage(stream);
}
catch
{
image = null;
}
}
}

return Task.CompletedTask;
});
return image;
}

public static async Task<MagickImageCollection> DownloadMagickImages(Uri uri, CancellationToken cancellationToken)
{
MagickImageCollection image = null;
await DownloadAction(uri, cancellationToken, (stream) =>
{
if (!(stream is null))
{
try
{
image = new MagickImageCollection(stream, uri.ToMagickFormat());
}
catch
{
try
{
stream.Seek(0, SeekOrigin.Begin);
image = new MagickImageCollection(stream);
}
catch
{
image = null;
}
}
}

return Task.CompletedTask;
});
return image;
}

public static async Task DownloadAction(Uri uri, CancellationToken cancellationToken, Func<Stream, Task> action)
{
if (Extractor.ExtractionSettings == null)
{
Expand All @@ -49,14 +109,12 @@ public static async Task<MagickImage> DownloadMagickImage(Uri uri, CancellationT
{
if (cancellationToken.IsCancellationRequested)
{
return null;
await action?.Invoke(null);
}

using (Stream stream = await response.Content.ReadAsStreamAsync())
{
MagickImage image = null;
image = new MagickImage(stream, uri.ToMagickFormat());
return image;
await action?.Invoke(stream);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion WebImageExtractor/Recurser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public static async Task<List<WebImage>> HyperlinkRecurse(ExtractionMethod metho
return null;
}

await Task.WhenAll(images.Select(i => i.GetImageAsync(cancellationToken)).ToArray());
await Task.WhenAll(images.Select(i => i.GetImagesAsync(cancellationToken)).ToArray());
}

if (settings.OnFoundImage != null)
Expand Down
34 changes: 29 additions & 5 deletions WebImageExtractor/WebImage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
// -------------------------------------------------------------------------------------------------

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using ImageMagick;
Expand All @@ -15,7 +17,7 @@ namespace WebImageExtractor
/// </summary>
public class WebImage
{
private MagickImage image;
private IList<IMagickImage<ushort>> images;
private bool downloadAttempted = false;

/// <summary>
Expand Down Expand Up @@ -55,7 +57,7 @@ public WebImage(string uri, bool isFavicon = false, bool isAppleTouchIcon = fals

internal void SetImage(MagickImage val)
{
image = val;
images = new List<IMagickImage<ushort>>(new IMagickImage<ushort>[] { val });
}

/// <summary>
Expand All @@ -70,7 +72,17 @@ internal void SetImage(MagickImage val)
await DownloadImage(cancellationToken);
}

return image;
return GetImageIfDownloaded();
}

public async Task<IMagickImage<ushort>[]> GetImagesAsync(CancellationToken cancellationToken = default(CancellationToken))
{
if (!downloadAttempted)
{
await DownloadImage(cancellationToken);
}

return images?.ToArray();
}

/// <summary>
Expand All @@ -79,7 +91,19 @@ internal void SetImage(MagickImage val)
/// <returns>MagickImage if it has been downloaded, otherwise null.</returns>
public MagickImage GetImageIfDownloaded()
{
return image;
if (images?.Count > 0)
{
return images[0] as MagickImage;
}
else
{
return null;
}
}

public IMagickImage<ushort>[] GetImagesIfDownloaded()
{
return images.ToArray();
}

private async Task DownloadImage(CancellationToken cancellationToken)
Expand All @@ -94,7 +118,7 @@ private async Task DownloadImage(CancellationToken cancellationToken)
return;
}

image = await ImageDownloader.DownloadMagickImage(new Uri(Uri), cancellationToken);
images = await ImageDownloader.DownloadMagickImages(new Uri(Uri), cancellationToken);
downloadAttempted = true;
}
}
Expand Down