From 02b968b751711bc0c506703c964bcda98781381b Mon Sep 17 00:00:00 2001
From: Cryptex <64497526+Cryptex-github@users.noreply.github.com>
Date: Thu, 22 Dec 2022 21:58:34 -0800
Subject: [PATCH 1/4] Implement x86 specific merge impl

---
 src/arch/aarch64.rs |  0
 src/arch/manual.rs  | 40 ++++++++++++++++++++++++
 src/arch/mod.rs     | 17 ++++++++++
 src/arch/x86.rs     | 76 +++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs          |  2 ++
 src/pixel.rs        | 39 ++---------------------
 6 files changed, 137 insertions(+), 37 deletions(-)
 create mode 100644 src/arch/aarch64.rs
 create mode 100644 src/arch/manual.rs
 create mode 100644 src/arch/mod.rs
 create mode 100644 src/arch/x86.rs

diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/arch/manual.rs b/src/arch/manual.rs
new file mode 100644
index 000000000..8f9c36080
--- /dev/null
+++ b/src/arch/manual.rs
@@ -0,0 +1,40 @@
+use crate::Rgba;
+
+#[allow(clippy::cast_lossless)]
+pub fn _merge_impl(original: Rgba, other: Rgba) -> Rgba {
+    // Optimize for common cases
+    if other.a == 255 {
+        return other;
+    } else if other.a == 0 {
+        return original;
+    }
+
+    let (base_r, base_g, base_b, base_a) = (
+        original.r as f32 / 255.,
+        original.g as f32 / 255.,
+        original.b as f32 / 255.,
+        original.a as f32 / 255.,
+    );
+
+    let (overlay_r, overlay_g, overlay_b, overlay_a) = (
+        other.r as f32 / 255.,
+        other.g as f32 / 255.,
+        other.b as f32 / 255.,
+        other.a as f32 / 255.,
+    );
+
+    let a_diff = 1. - overlay_a;
+    let a = a_diff.mul_add(base_a, overlay_a);
+
+    let a_ratio = a_diff * base_a;
+    let r = a_ratio.mul_add(base_r, overlay_a * overlay_r) / a;
+    let g = a_ratio.mul_add(base_g, overlay_a * overlay_g) / a;
+    let b = a_ratio.mul_add(base_b, overlay_a * overlay_b) / a;
+
+    Rgba {
+        r: (r * 255.) as u8,
+        g: (g * 255.) as u8,
+        b: (b * 255.) as u8,
+        a: (a * 255.) as u8,
+    }
+}
diff --git a/src/arch/mod.rs b/src/arch/mod.rs
new file mode 100644
index 000000000..fc133c62b
--- /dev/null
+++ b/src/arch/mod.rs
@@ -0,0 +1,17 @@
+mod aarch64;
+mod manual;
+mod x86;
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+use std::is_x86_feature_detected;
+
+pub fn merge_impl() -> unsafe fn(crate::Rgba, crate::Rgba) -> crate::Rgba {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("sse") && is_x86_feature_detected!("fma") {
+            return x86::_merge_impl;
+        }
+    }
+
+    manual::_merge_impl
+}
diff --git a/src/arch/x86.rs b/src/arch/x86.rs
new file mode 100644
index 000000000..3219c6e49
--- /dev/null
+++ b/src/arch/x86.rs
@@ -0,0 +1,76 @@
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "sse")]
+#[target_feature(enable = "fma")]
+pub unsafe fn _merge_impl(original: crate::pixel::Rgba, other: crate::pixel::Rgba) -> crate::pixel::Rgba {
+        // Optimize for common cases
+        if other.a == 255 {
+            return other;
+        } else if other.a == 0 {
+            return original;
+        }
+
+        let mut base = [0_f32; 4];
+
+        _mm_store_ps(
+            base.as_mut_ptr(),
+            _mm_div_ps(_mm_setr_ps(original.r as f32, original.g as f32, original.b as f32, original.a as f32), _mm_set1_ps(255.)),
+        );
+
+        let [base_r, base_g, base_b, base_a] = base;
+
+        let mut overlay = [0_f32; 4];
+
+        _mm_store_ps(
+            overlay.as_mut_ptr(),
+            _mm_div_ps(_mm_setr_ps(other.r as f32, other.g as f32, other.b as f32, other.a as f32), _mm_set1_ps(255.)),
+        );
+
+        let [overlay_r, overlay_g, overlay_b, overlay_a] = overlay;
+
+        let a_diff = 1. - overlay_a;
+
+        let mut overlay_rgba = [0_f32; 4];
+
+        _mm_store_ps(
+            overlay_rgba.as_mut_ptr(),
+            _mm_mul_ps(
+                _mm_setr_ps(overlay_r, overlay_g, overlay_b, base_a),
+                _mm_setr_ps(overlay_a, overlay_a, overlay_a, a_diff),
+            ),
+        );
+
+        let [overlay_r, overlay_g, overlay_b, a_ratio] = overlay_rgba;
+
+        let mut rgba = [0_f32; 4];
+
+        _mm_store_ps(
+            rgba.as_mut_ptr(),
+            _mm_fmadd_ps(
+                _mm_setr_ps(a_ratio, a_ratio, a_ratio, a_diff),
+                _mm_setr_ps(base_r, base_g, base_b, base_a),
+                _mm_setr_ps(overlay_r, overlay_g, overlay_b, overlay_a),
+            ),
+        );
+
+        let [r, g, b, a] = rgba;
+
+        let mut res = [0_f32; 4];
+
+        _mm_store_ps(
+            res.as_mut_ptr(),
+            _mm_mul_ps(
+                _mm_div_ps(_mm_setr_ps(r, g, b, a), _mm_setr_ps(a, a, a, 1.)),
+                _mm_set1_ps(255.),
+            ),
+        );
+
+        let [r, g, b, a] = res;
+
+        crate::pixel::Rgba { r: r as u8, g: g as u8, b: b as u8, a: a as u8 }
+}
diff --git a/src/lib.rs b/src/lib.rs
index ad20623a8..dbc1e1c6e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -276,6 +276,8 @@ pub mod sequence;
 #[cfg(feature = "text")]
 pub mod text;
 
+mod arch;
+
 macro_rules! inline_doc {
     ($($token:item)*) => {
         $(#[doc(inline)] $token)*
diff --git a/src/pixel.rs b/src/pixel.rs
index e98e88e85..775603e17 100644
--- a/src/pixel.rs
+++ b/src/pixel.rs
@@ -1,6 +1,7 @@
 //! Encloses pixel-related traits and pixel type implementations.
 
 use crate::Error::DecodingError;
+use crate::arch;
 use crate::{
     encodings::ColorType,
     image::OverlayMode,
@@ -888,44 +889,8 @@ impl Pixel for Rgba {
         [self.r, self.g, self.b, self.a]
     }
 
-    // TODO: SIMD could speed this up significantly
-    #[allow(clippy::cast_lossless)]
     fn merge(self, other: Self) -> Self {
-        // Optimize for common cases
-        if other.a == 255 {
-            return other;
-        } else if other.a == 0 {
-            return self;
-        }
-
-        let (base_r, base_g, base_b, base_a) = (
-            self.r as f32 / 255.,
-            self.g as f32 / 255.,
-            self.b as f32 / 255.,
-            self.a as f32 / 255.,
-        );
-
-        let (overlay_r, overlay_g, overlay_b, overlay_a) = (
-            other.r as f32 / 255.,
-            other.g as f32 / 255.,
-            other.b as f32 / 255.,
-            other.a as f32 / 255.,
-        );
-
-        let a_diff = 1. - overlay_a;
-        let a = a_diff.mul_add(base_a, overlay_a);
-
-        let a_ratio = a_diff * base_a;
-        let r = a_ratio.mul_add(base_r, overlay_a * overlay_r) / a;
-        let g = a_ratio.mul_add(base_g, overlay_a * overlay_g) / a;
-        let b = a_ratio.mul_add(base_b, overlay_a * overlay_b) / a;
-
-        Self {
-            r: (r * 255.) as u8,
-            g: (g * 255.) as u8,
-            b: (b * 255.) as u8,
-            a: (a * 255.) as u8,
-        }
+        unsafe { arch::merge_impl()(self, other) }
     }
 
     #[allow(clippy::cast_lossless)]

From 432c9a2327d037ba534258f2f44621f92acec2bf Mon Sep 17 00:00:00 2001
From: Cryptex <64497526+Cryptex-github@users.noreply.github.com>
Date: Fri, 23 Dec 2022 12:29:42 -0800
Subject: [PATCH 2/4] Resolve requested changes

---
 src/arch/aarch64.rs |   1 +
 src/arch/manual.rs  |   8 ---
 src/arch/mod.rs     |  23 ++++++--
 src/arch/x86.rs     | 137 +++++++++++++++++++++++---------------------
 src/lib.rs          |   3 +-
 src/pixel.rs        |   7 +--
 6 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs
index e69de29bb..8b1378917 100644
--- a/src/arch/aarch64.rs
+++ b/src/arch/aarch64.rs
@@ -0,0 +1 @@
+
diff --git a/src/arch/manual.rs b/src/arch/manual.rs
index 8f9c36080..4e71e10ba 100644
--- a/src/arch/manual.rs
+++ b/src/arch/manual.rs
@@ -1,14 +1,6 @@
 use crate::Rgba;
 
-#[allow(clippy::cast_lossless)]
 pub fn _merge_impl(original: Rgba, other: Rgba) -> Rgba {
-    // Optimize for common cases
-    if other.a == 255 {
-        return other;
-    } else if other.a == 0 {
-        return original;
-    }
-
     let (base_r, base_g, base_b, base_a) = (
         original.r as f32 / 255.,
         original.g as f32 / 255.,
diff --git a/src/arch/mod.rs b/src/arch/mod.rs
index fc133c62b..1be948743 100644
--- a/src/arch/mod.rs
+++ b/src/arch/mod.rs
@@ -1,17 +1,30 @@
+#![allow(clippy::cast_lossless)]
+#![allow(clippy::wildcard_imports)]
+
 mod aarch64;
 mod manual;
 mod x86;
 
+use crate::Rgba;
+
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 use std::is_x86_feature_detected;
 
-pub fn merge_impl() -> unsafe fn(crate::Rgba, crate::Rgba) -> crate::Rgba {
+#[inline]
+pub fn merge_impl(base: Rgba, other: Rgba) -> Rgba {
+    // Optimize for common cases
+    if other.a == 255 {
+        return other;
+    } else if other.a == 0 {
+        return base;
+    }
+
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    {
-        if is_x86_feature_detected!("sse") && is_x86_feature_detected!("fma") {
-            return x86::_merge_impl;
+    if is_x86_feature_detected!("sse") && is_x86_feature_detected!("fma") {
+        unsafe {
+            return x86::_merge_impl(base, other);
         }
     }
 
-    manual::_merge_impl
+    manual::_merge_impl(base, other)
 }
diff --git a/src/arch/x86.rs b/src/arch/x86.rs
index 3219c6e49..8598f9216 100644
--- a/src/arch/x86.rs
+++ b/src/arch/x86.rs
@@ -3,74 +3,79 @@ use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "sse")]
 #[target_feature(enable = "fma")]
-pub unsafe fn _merge_impl(original: crate::pixel::Rgba, other: crate::pixel::Rgba) -> crate::pixel::Rgba {
-        // Optimize for common cases
-        if other.a == 255 {
-            return other;
-        } else if other.a == 0 {
-            return original;
-        }
-
-        let mut base = [0_f32; 4];
-
-        _mm_store_ps(
-            base.as_mut_ptr(),
-            _mm_div_ps(_mm_setr_ps(original.r as f32, original.g as f32, original.b as f32, original.a as f32), _mm_set1_ps(255.)),
-        );
-
-        let [base_r, base_g, base_b, base_a] = base;
-
-        let mut overlay = [0_f32; 4];
-
-        _mm_store_ps(
-            overlay.as_mut_ptr(),
-            _mm_div_ps(_mm_setr_ps(other.r as f32, other.g as f32, other.b as f32, other.a as f32), _mm_set1_ps(255.)),
-        );
-
-        let [overlay_r, overlay_g, overlay_b, overlay_a] = overlay;
-
-        let a_diff = 1. - overlay_a;
-
-        let mut overlay_rgba = [0_f32; 4];
-
-        _mm_store_ps(
-            overlay_rgba.as_mut_ptr(),
-            _mm_mul_ps(
-                _mm_setr_ps(overlay_r, overlay_g, overlay_b, base_a),
-                _mm_setr_ps(overlay_a, overlay_a, overlay_a, a_diff),
+pub unsafe fn _merge_impl(
+    base: crate::pixel::Rgba,
+    other: crate::pixel::Rgba,
+) -> crate::pixel::Rgba {
+    let mut base_rgba = [0_f32; 4];
+    let mut overlay = [0_f32; 4];
+    let mut overlay_rgba = [0_f32; 4];
+    let mut rgba = [0_f32; 4];
+    let mut res = [0_f32; 4];
+
+    _mm_store_ps(
+        base_rgba.as_mut_ptr(),
+        _mm_div_ps(
+            _mm_setr_ps(base.r as f32, base.g as f32, base.b as f32, base.a as f32),
+            _mm_set1_ps(255.),
+        ),
+    );
+
+    let [base_r, base_g, base_b, base_a] = base_rgba;
+    _mm_store_ps(
+        overlay.as_mut_ptr(),
+        _mm_div_ps(
+            _mm_setr_ps(
+                other.r as f32,
+                other.g as f32,
+                other.b as f32,
+                other.a as f32,
             ),
-        );
-
-        let [overlay_r, overlay_g, overlay_b, a_ratio] = overlay_rgba;
-
-        let mut rgba = [0_f32; 4];
-
-        _mm_store_ps(
-            rgba.as_mut_ptr(),
-            _mm_fmadd_ps(
-                _mm_setr_ps(a_ratio, a_ratio, a_ratio, a_diff),
-                _mm_setr_ps(base_r, base_g, base_b, base_a),
-                _mm_setr_ps(overlay_r, overlay_g, overlay_b, overlay_a),
-            ),
-        );
-
-        let [r, g, b, a] = rgba;
-
-        let mut res = [0_f32; 4];
-
-        _mm_store_ps(
-            res.as_mut_ptr(),
-            _mm_mul_ps(
-                _mm_div_ps(_mm_setr_ps(r, g, b, a), _mm_setr_ps(a, a, a, 1.)),
-                _mm_set1_ps(255.),
-            ),
-        );
-
-        let [r, g, b, a] = res;
-
-        crate::pixel::Rgba { r: r as u8, g: g as u8, b: b as u8, a: a as u8 }
+            _mm_set1_ps(255.),
+        ),
+    );
+
+    let [overlay_r, overlay_g, overlay_b, overlay_a] = overlay;
+    let a_diff = 1. - overlay_a;
+
+    _mm_store_ps(
+        overlay_rgba.as_mut_ptr(),
+        _mm_mul_ps(
+            _mm_setr_ps(overlay_r, overlay_g, overlay_b, base_a),
+            _mm_setr_ps(overlay_a, overlay_a, overlay_a, a_diff),
+        ),
+    );
+
+    let [overlay_r, overlay_g, overlay_b, a_ratio] = overlay_rgba;
+
+    _mm_store_ps(
+        rgba.as_mut_ptr(),
+        _mm_fmadd_ps(
+            _mm_setr_ps(a_ratio, a_ratio, a_ratio, a_diff),
+            _mm_setr_ps(base_r, base_g, base_b, base_a),
+            _mm_setr_ps(overlay_r, overlay_g, overlay_b, overlay_a),
+        ),
+    );
+
+    let [r, g, b, a] = rgba;
+
+    _mm_store_ps(
+        res.as_mut_ptr(),
+        _mm_mul_ps(
+            _mm_div_ps(_mm_setr_ps(r, g, b, a), _mm_setr_ps(a, a, a, 1.)),
+            _mm_set1_ps(255.),
+        ),
+    );
+
+    let [r, g, b, a] = res;
+
+    crate::pixel::Rgba {
+        r: r as u8,
+        g: g as u8,
+        b: b as u8,
+        a: a as u8,
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index dbc1e1c6e..7cca54251 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -261,6 +261,7 @@
     clippy::doc_markdown
 )]
 
+mod arch;
 pub mod draw;
 pub mod encode;
 pub mod encodings;
@@ -276,8 +277,6 @@ pub mod sequence;
 #[cfg(feature = "text")]
 pub mod text;
 
-mod arch;
-
 macro_rules! inline_doc {
     ($($token:item)*) => {
         $(#[doc(inline)] $token)*
diff --git a/src/pixel.rs b/src/pixel.rs
index 775603e17..0af2d7e72 100644
--- a/src/pixel.rs
+++ b/src/pixel.rs
@@ -1,11 +1,10 @@
 //! Encloses pixel-related traits and pixel type implementations.
 
-use crate::Error::DecodingError;
-use crate::arch;
 use crate::{
+    arch,
     encodings::ColorType,
     image::OverlayMode,
-    Error::{InvalidHexCode, InvalidPaletteIndex, UnsupportedColorType},
+    Error::{DecodingError, InvalidHexCode, InvalidPaletteIndex, UnsupportedColorType},
     Result,
 };
 use std::borrow::Cow;
@@ -890,7 +889,7 @@ impl Pixel for Rgba {
     }
 
     fn merge(self, other: Self) -> Self {
-        unsafe { arch::merge_impl()(self, other) }
+        arch::merge_impl(self, other)
     }
 
     #[allow(clippy::cast_lossless)]

From 3ccae17e4135cc4b05ef596cead96954bbec559f Mon Sep 17 00:00:00 2001
From: Cryptex <64497526+Cryptex-github@users.noreply.github.com>
Date: Fri, 23 Dec 2022 13:36:41 -0800
Subject: [PATCH 3/4] SIMD implementation of invert

---
 src/arch/manual.rs | 20 +++++++++++++++-----
 src/arch/mod.rs    | 13 +++++++++++++
 src/arch/x86.rs    | 34 +++++++++++++++++++++++++++++-----
 src/pixel.rs       | 13 ++-----------
 4 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/src/arch/manual.rs b/src/arch/manual.rs
index 4e71e10ba..8264b0f25 100644
--- a/src/arch/manual.rs
+++ b/src/arch/manual.rs
@@ -1,11 +1,11 @@
 use crate::Rgba;
 
-pub fn _merge_impl(original: Rgba, other: Rgba) -> Rgba {
+pub fn _merge_impl(base: Rgba, other: Rgba) -> Rgba {
     let (base_r, base_g, base_b, base_a) = (
-        original.r as f32 / 255.,
-        original.g as f32 / 255.,
-        original.b as f32 / 255.,
-        original.a as f32 / 255.,
+        base.r as f32 / 255.,
+        base.g as f32 / 255.,
+        base.b as f32 / 255.,
+        base.a as f32 / 255.,
     );
 
     let (overlay_r, overlay_g, overlay_b, overlay_a) = (
@@ -30,3 +30,13 @@ pub fn _merge_impl(original: Rgba, other: Rgba) -> Rgba {
         a: (a * 255.) as u8,
     }
 }
+
+#[inline]
+pub fn _invert_impl(base: Rgba) -> Rgba {
+    Rgba {
+        r: !base.r,
+        g: !base.g,
+        b: !base.b,
+        a: !base.a,
+    }
+}
diff --git a/src/arch/mod.rs b/src/arch/mod.rs
index 1be948743..ccac3865a 100644
--- a/src/arch/mod.rs
+++ b/src/arch/mod.rs
@@ -1,5 +1,6 @@
 #![allow(clippy::cast_lossless)]
 #![allow(clippy::wildcard_imports)]
+#![allow(dead_code)]
 
 mod aarch64;
 mod manual;
@@ -28,3 +29,15 @@ pub fn merge_impl(base: Rgba, other: Rgba) -> Rgba {
 
     manual::_merge_impl(base, other)
 }
+
+#[inline]
+pub fn invert_impl(base: Rgba) -> Rgba {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    if is_x86_feature_detected!("sse") {
+        unsafe {
+            return x86::_invert_impl(base);
+        }
+    }
+
+    manual::_invert_impl(base)
+}
diff --git a/src/arch/x86.rs b/src/arch/x86.rs
index 8598f9216..f958db4c6 100644
--- a/src/arch/x86.rs
+++ b/src/arch/x86.rs
@@ -3,13 +3,14 @@ use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
+use crate::Rgba;
+
+const ONES: f32 = unsafe { std::mem::transmute(0xff_ff_ff_ff_u32) };
+
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "sse")]
 #[target_feature(enable = "fma")]
-pub unsafe fn _merge_impl(
-    base: crate::pixel::Rgba,
-    other: crate::pixel::Rgba,
-) -> crate::pixel::Rgba {
+pub unsafe fn _merge_impl(base: Rgba, other: Rgba) -> Rgba {
     let mut base_rgba = [0_f32; 4];
     let mut overlay = [0_f32; 4];
     let mut overlay_rgba = [0_f32; 4];
@@ -72,7 +73,30 @@ pub unsafe fn _merge_impl(
 
     let [r, g, b, a] = res;
 
-    crate::pixel::Rgba {
+    Rgba {
+        r: r as u8,
+        g: g as u8,
+        b: b as u8,
+        a: a as u8,
+    }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "sse")]
+pub unsafe fn _invert_impl(base: Rgba) -> Rgba {
+    let mut res = [0_f32; 4];
+
+    _mm_store_ps(
+        res.as_mut_ptr(),
+        _mm_xor_ps(
+            _mm_setr_ps(base.r as f32, base.g as f32, base.b as f32, base.a as f32),
+            _mm_set1_ps(ONES),
+        ),
+    );
+
+    let [r, g, b, a] = res;
+
+    Rgba {
         r: r as u8,
         g: g as u8,
         b: b as u8,
diff --git a/src/pixel.rs b/src/pixel.rs
index c363c5653..0bdb9d4ed 100644
--- a/src/pixel.rs
+++ b/src/pixel.rs
@@ -625,11 +625,7 @@ impl Pixel for Rgb {
     type Data = [u8; 3];
 
     fn inverted(&self) -> Self {
-        Self {
-            r: !self.r,
-            g: !self.g,
-            b: !self.b,
-        }
+        arch::invert_impl((*self).into()).into()
     }
 
     fn map_subpixels<F, A>(self, f: F, _: A) -> Self
@@ -791,12 +787,7 @@ impl Pixel for Rgba {
     type Data = [u8; 4];
 
     fn inverted(&self) -> Self {
-        Self {
-            r: !self.r,
-            g: !self.g,
-            b: !self.b,
-            a: !self.a,
-        }
+        arch::invert_impl(*self)
     }
 
     fn map_subpixels<F, A>(self, f: F, a: A) -> Self

From bc3f458e097d8853b3743b92edd3408c5ff9cbd6 Mon Sep 17 00:00:00 2001
From: Cryptex <64497526+Cryptex-github@users.noreply.github.com>
Date: Fri, 23 Dec 2022 14:37:34 -0800
Subject: [PATCH 4/4] Add a cfg flag to detect if rustc is nightly

---
 Cargo.toml |  3 +++
 build.rs   | 10 ++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 build.rs

diff --git a/Cargo.toml b/Cargo.toml
index 05e12aee5..3e0db7e1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,6 +37,9 @@ quantize = ["dep:color_quant"]
 gradient = ["dep:colorgrad"]
 static = ["libwebp-sys2?/static"]
 
+[build-dependencies]
+rustc_version = "0.4"
+
 [dev-dependencies]
 criterion = "^0.4"
 image = "^0"
diff --git a/build.rs b/build.rs
new file mode 100644
index 000000000..9506529b5
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,10 @@
+use rustc_version::{version, version_meta, Channel};
+
+fn main() {
+    assert!(version().unwrap().major >= 1);
+
+    match version_meta().unwrap().channel {
+        Channel::Nightly => println!("cargo:rustc-cfg=RUSTC_IS_NIGHTLY"),
+        _ => {}
+    }
+}