diff --git a/ggml-quants/Cargo.toml b/ggml-quants/Cargo.toml index 4283ee7..b49d301 100644 --- a/ggml-quants/Cargo.toml +++ b/ggml-quants/Cargo.toml @@ -12,6 +12,7 @@ types = ["digit-layout"] rayon.workspace = true digit-layout = { version = "0.1", optional = true } half = "2.4" +itertools = "0.13.0" [dev-dependencies] rand = "0.8" diff --git a/ggml-quants/src/structs.rs b/ggml-quants/src/structs.rs index 2bea1cd..c35fea8 100644 --- a/ggml-quants/src/structs.rs +++ b/ggml-quants/src/structs.rs @@ -24,6 +24,8 @@ mod q8_0; mod q8_1; mod q8_k; +use std::{cmp::max_by, convert::identity, iter::repeat_with, usize}; + pub use ::half::{bf16, f16}; pub use iq1m::IQ1M; pub use iq1s::IQ1S; @@ -103,6 +105,83 @@ fn min_max(data: &[f32]) -> (f32, f32) { }) } +fn make_qx_quants(x: &[f32], nmax: isize) -> (f32, Vec) { + assert!(nmax > 0); + let max = max_by_abs(x); + if max.abs() < GROUP_MAX_EPS { + return (0., std::iter::repeat(0).take(x.len()).collect()); + } + (-9..=9) + .into_iter() + .map(|i| -(nmax as f32) / max + (i as f32) * 0.1) + .map(|iscale| { + let suml = x.iter().fold((0., 0.), |(sumlx, suml2), a| { + let l = (*a * iscale).round() as isize; + let l = (-nmax).max((nmax - 1).min(l)) as f32; + let w = a * a; + (sumlx + w * a * l, suml2 + w * l * l) + }); + (iscale, suml) + }) + .max_by(|(_, suml_1), (_, suml_2)| { + let best = |(sumlx, suml2): &(f32, f32)| { + if *suml2 != 0.0 { + sumlx * sumlx / suml2 + } else { + 0.0 + } + }; + best(suml_1).partial_cmp(&best(suml_2)).unwrap() + }) + .map(|(iscale, (sumlx, suml2))| { + let L = x.iter().map(|a| { + let l = (*a * iscale).round() as isize; + let l = (-nmax).max((nmax - 1).min(l)); + (l + nmax) as usize + }); + (sumlx / suml2, L.collect()) + }) + .unwrap() +} + +#[test] +fn test_make_qx_quants() { + let a: [f32; 16] = [ + 0.6134935558875086, + 0.27100422321951445, + 0.662907299814267, + 0.3012722972026105, + 0.8809210890237902, + 0.9113154578272312, + 0.9586460741733003, + 0.17865102136670108, + 0.5596914646668039, + 0.09094331112669951, + 0.01917780062861074, + 0.5313069088633986, + 0.1885782128334208, + 0.4678985378766791, + 0.060239429412906054, + 0.7827442050642704, + ]; + let (delta, L) = make_qx_quants(&a, 16); + println!(" delta: {}", delta); + let a_after = L + .iter() + .map(|&x| (x as isize - 16) as f32 * delta) + .collect::>(); + println!(" a_after: {:?}", &a_after); + println!( + " error is :{:?}", + a.iter() + .zip(a_after.iter()) + .map(|(x, y)| { (x - y) * (x - y) }) + .collect::>() + ) +} + const _1: usize = 1; const _32: usize = 32; const _256: usize = 256; + +const GROUP_MAX_EPS: f32 = 1e-15; diff --git a/ggml-quants/src/structs/q2_k.rs b/ggml-quants/src/structs/q2_k.rs index 76eb2a0..b998b72 100644 --- a/ggml-quants/src/structs/q2_k.rs +++ b/ggml-quants/src/structs/q2_k.rs @@ -1,5 +1,6 @@ use super::{DeltaMin, _256}; use crate::{DataBlock, Quantize}; +use itertools::Itertools; #[repr(C)] pub struct Q2K { @@ -24,6 +25,30 @@ impl Quantize for Q2K { todo!() } fn dequantize(&self) -> [f32; _256] { - todo!() + let mut ans = [0.; _256]; + let (delta, min) = self.delta_min.to_f32(); + let de_qs = self + .qs + .iter() + .flat_map(|&q| [q & 0b11, q >> 2 & 0b11, q >> 4 & 0b11, q >> 6 & 0b11]); + let dl_and_ml = self + .scales + .iter() + .map(|sc| (delta * ((sc & 0xF) as f32), min * ((sc >> 4) as f32))); + + let de_qs_chunks = de_qs.chunks(16); + let result = de_qs_chunks + .into_iter() + .zip(dl_and_ml) + .map(|(qs, (delta_l, min_l))| qs.map(move |n| delta_l * n as f32 - min_l)) + .flatten(); + + let mut count = 0; + for (i, q) in result.enumerate() { + ans[i] = q; + count += 1; + } + assert!(count == _256); + ans } } diff --git a/ggml-quants/src/structs/q6_k.rs b/ggml-quants/src/structs/q6_k.rs index 433efe7..14fdc47 100644 --- a/ggml-quants/src/structs/q6_k.rs +++ b/ggml-quants/src/structs/q6_k.rs @@ -1,5 +1,8 @@ use super::_256; -use crate::{DataBlock, Quantize}; +use crate::{ + structs::{make_qx_quants, max_by_abs, GROUP_MAX_EPS}, + DataBlock, Quantize, +}; use half::f16; #[repr(C)] @@ -23,10 +26,76 @@ impl DataBlock for Q6K { } impl Quantize for Q6K { - fn quantize(_data: &[f32; _256]) -> Self { + fn quantize(data: &[f32; _256]) -> Self { + let (scales, L_vec): (Vec<_>, Vec<_>) = data + .chunks(16) + .into_iter() + .map(|x| make_qx_quants(x, 32)) + .unzip(); + let max_abs_scale = max_by_abs(&scales); + + if max_abs_scale.abs() < GROUP_MAX_EPS { + return Self::ZEROS; + } + + todo!() } fn dequantize(&self) -> [f32; _256] { - todo!() + // let (low, high) = self.qh.split_at(32); + // let qh = [low, high] + let qh = self + .qh + .chunks(32) + .into_iter() + .map(|qh| { + let qh1 = qh.iter().map(|a| ((a >> 0) & 0b11) << 4); + let qh2 = qh.iter().map(|a| ((a >> 2) & 0b11) << 4); + let qh3 = qh.iter().map(|a| ((a >> 4) & 0b11) << 4); + let qh4 = qh.iter().map(|a| ((a >> 6) & 0b11) << 4); + qh1.chain(qh2).chain(qh3).chain(qh4) + }) + .flatten(); + + // let (low, high) = self.ql.split_at(64); + // let ql = [low, high]; + let ql = self + .ql + .chunks(64) + .into_iter() + .map(|ql| { + let (l, h) = ql.split_at(32); + let ql1 = l.iter().map(|a| a & 0b1111); + let ql2 = h.iter().map(|a| a & 0b1111); + let ql3 = l.iter().map(|a: &u8| (a >> 4) & 0b1111); + let ql4 = h.iter().map(|a| (a >> 4) & 0b1111); + ql1.chain(ql2).chain(ql3).chain(ql4) + }) + .flatten(); + let y = qh + .zip(ql) + .zip( + self.scales + .iter() + .flat_map(|x| std::iter::repeat(x).take(16)), + ) + .map(|((qh, ql), scales)| { + let q = (qh | ql) - 32; + self.delta.to_f32() * q as f32 * *scales as f32 + }); + let mut ans: [f32; _256] = [0.; _256]; + let mut count = 0; + for (i, q) in y.enumerate() { + ans[i] = q; + count += 1; + } + assert!(count == _256); + ans } } + +#[test] +fn test_Q6K() { + let a: f32 = 0.7; + println!("{}", a as isize); +}