InfiniTensor · pwhMass · Sep 18, 2024 · Sep 22, 2024 · Sep 24, 2024 · Oct 25, 2024
diff --git a/ggml-quants/Cargo.toml b/ggml-quants/Cargo.toml
@@ -12,6 +12,7 @@ types = ["digit-layout"]
 rayon.workspace = true
 digit-layout = { version = "0.1", optional = true }
 half = "2.4"
+itertools = "0.13.0"
 
 [dev-dependencies]
 rand = "0.8"
diff --git a/ggml-quants/src/structs.rs b/ggml-quants/src/structs.rs
@@ -24,6 +24,8 @@ mod q8_0;
 mod q8_1;
 mod q8_k;
 
+use std::{cmp::max_by, convert::identity, iter::repeat_with, usize};
+
 pub use ::half::{bf16, f16};
 pub use iq1m::IQ1M;
 pub use iq1s::IQ1S;
@@ -103,6 +105,83 @@ fn min_max(data: &[f32]) -> (f32, f32) {
     })
 }
 
+fn make_qx_quants(x: &[f32], nmax: isize) -> (f32, Vec<usize>) {
+    assert!(nmax > 0);
+    let max = max_by_abs(x);
+    if max.abs() < GROUP_MAX_EPS {
+        return (0., std::iter::repeat(0).take(x.len()).collect());
+    }
+    (-9..=9)
+        .into_iter()
+        .map(|i| -(nmax as f32) / max + (i as f32) * 0.1)
+        .map(|iscale| {
+            let suml = x.iter().fold((0., 0.), |(sumlx, suml2), a| {
+                let l = (*a * iscale).round() as isize;
+                let l = (-nmax).max((nmax - 1).min(l)) as f32;
+                let w = a * a;
+                (sumlx + w * a * l, suml2 + w * l * l)
+            });
+            (iscale, suml)
+        })
+        .max_by(|(_, suml_1), (_, suml_2)| {
+            let best = |(sumlx, suml2): &(f32, f32)| {
+                if *suml2 != 0.0 {
+                    sumlx * sumlx / suml2
+                } else {
+                    0.0
+                }
+            };
+            best(suml_1).partial_cmp(&best(suml_2)).unwrap()
+        })
+        .map(|(iscale, (sumlx, suml2))| {
+            let L = x.iter().map(|a| {
+                let l = (*a * iscale).round() as isize;
+                let l = (-nmax).max((nmax - 1).min(l));
+                (l + nmax) as usize
+            });
+            (sumlx / suml2, L.collect())
+        })
+        .unwrap()
+}
+
+#[test]
+fn test_make_qx_quants() {
+    let a: [f32; 16] = [
+        0.6134935558875086,
+        0.27100422321951445,
+        0.662907299814267,
+        0.3012722972026105,
+        0.8809210890237902,
+        0.9113154578272312,
+        0.9586460741733003,
+        0.17865102136670108,
+        0.5596914646668039,
+        0.09094331112669951,
+        0.01917780062861074,
+        0.5313069088633986,
+        0.1885782128334208,
+        0.4678985378766791,
+        0.060239429412906054,
+        0.7827442050642704,
+    ];
+    let (delta, L) = make_qx_quants(&a, 16);
+    println!(" delta: {}", delta);
+    let a_after = L
+        .iter()
+        .map(|&x| (x as isize - 16) as f32 * delta)
+        .collect::<Vec<_>>();
+    println!(" a_after: {:?}", &a_after);
+    println!(
+        " error is :{:?}",
+        a.iter()
+            .zip(a_after.iter())
+            .map(|(x, y)| { (x - y) * (x - y) })
+            .collect::<Vec<_>>()
+    )
+}
+
 const _1: usize = 1;
 const _32: usize = 32;
 const _256: usize = 256;
+
+const GROUP_MAX_EPS: f32 = 1e-15;
diff --git a/ggml-quants/src/structs/q2_k.rs b/ggml-quants/src/structs/q2_k.rs
@@ -1,5 +1,6 @@
 use super::{DeltaMin, _256};
 use crate::{DataBlock, Quantize};
+use itertools::Itertools;
 
 #[repr(C)]
 pub struct Q2K {
@@ -24,6 +25,30 @@ impl Quantize<f32, _256> for Q2K {
         todo!()
     }
     fn dequantize(&self) -> [f32; _256] {
-        todo!()
+        let mut ans = [0.; _256];
+        let (delta, min) = self.delta_min.to_f32();
+        let de_qs = self
+            .qs
+            .iter()
+            .flat_map(|&q| [q & 0b11, q >> 2 & 0b11, q >> 4 & 0b11, q >> 6 & 0b11]);
+        let dl_and_ml = self
+            .scales
+            .iter()
+            .map(|sc| (delta * ((sc & 0xF) as f32), min * ((sc >> 4) as f32)));
+
+        let de_qs_chunks = de_qs.chunks(16);
+        let result = de_qs_chunks
+            .into_iter()
+            .zip(dl_and_ml)
+            .map(|(qs, (delta_l, min_l))| qs.map(move |n| delta_l * n as f32 - min_l))
+            .flatten();
+
+        let mut count = 0;
+        for (i, q) in result.enumerate() {
+            ans[i] = q;
+            count += 1;
+        }
+        assert!(count == _256);
+        ans
     }
 }
diff --git a/ggml-quants/src/structs/q6_k.rs b/ggml-quants/src/structs/q6_k.rs
@@ -1,5 +1,8 @@
 use super::_256;
-use crate::{DataBlock, Quantize};
+use crate::{
+    structs::{make_qx_quants, max_by_abs, GROUP_MAX_EPS},
+    DataBlock, Quantize,
+};
 use half::f16;
 
 #[repr(C)]
@@ -23,10 +26,76 @@ impl DataBlock for Q6K {
 }
 
 impl Quantize<f32, _256> for Q6K {
-    fn quantize(_data: &[f32; _256]) -> Self {
+    fn quantize(data: &[f32; _256]) -> Self {
+        let (scales, L_vec): (Vec<_>, Vec<_>) = data
+            .chunks(16)
+            .into_iter()
+            .map(|x| make_qx_quants(x, 32))
+            .unzip();
+        let max_abs_scale = max_by_abs(&scales);
+
+        if max_abs_scale.abs() < GROUP_MAX_EPS {
+            return Self::ZEROS;
+        }
+
+
         todo!()
     }
     fn dequantize(&self) -> [f32; _256] {
-        todo!()
+        // let (low, high) = self.qh.split_at(32);
+        // let qh = [low, high]
+        let qh = self
+            .qh
+            .chunks(32)
+            .into_iter()
+            .map(|qh| {
+                let qh1 = qh.iter().map(|a| ((a >> 0) & 0b11) << 4);
+                let qh2 = qh.iter().map(|a| ((a >> 2) & 0b11) << 4);
+                let qh3 = qh.iter().map(|a| ((a >> 4) & 0b11) << 4);
+                let qh4 = qh.iter().map(|a| ((a >> 6) & 0b11) << 4);
+                qh1.chain(qh2).chain(qh3).chain(qh4)
+            })
+            .flatten();
+
+        // let (low, high) = self.ql.split_at(64);
+        // let ql = [low, high];
+        let ql = self
+            .ql
+            .chunks(64)
+            .into_iter()
+            .map(|ql| {
+                let (l, h) = ql.split_at(32);
+                let ql1 = l.iter().map(|a| a & 0b1111);
+                let ql2 = h.iter().map(|a| a & 0b1111);
+                let ql3 = l.iter().map(|a: &u8| (a >> 4) & 0b1111);
+                let ql4 = h.iter().map(|a| (a >> 4) & 0b1111);
+                ql1.chain(ql2).chain(ql3).chain(ql4)
+            })
+            .flatten();
+        let y = qh
+            .zip(ql)
+            .zip(
+                self.scales
+                    .iter()
+                    .flat_map(|x| std::iter::repeat(x).take(16)),
+            )
+            .map(|((qh, ql), scales)| {
+                let q = (qh | ql) - 32;
+                self.delta.to_f32() * q as f32 * *scales as f32
+            });
+        let mut ans: [f32; _256] = [0.; _256];
+        let mut count = 0;
+        for (i, q) in y.enumerate() {
+            ans[i] = q;
+            count += 1;
+        }
+        assert!(count == _256);
+        ans
     }
 }
+
+#[test]
+fn test_Q6K() {
+    let a: f32 = 0.7;
+    println!("{}", a as isize);
+}