Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
026236f
Adding process_immutable_with_scratch method
michaelciraci May 2, 2025
ce3c981
Starting to addressing pull request comments
michaelciraci May 16, 2025
67a1408
Undoing fmt on autogen code
michaelciraci May 26, 2025
0106c16
More work in progress
michaelciraci May 26, 2025
0620130
Correcting neon imports
michaelciraci May 26, 2025
8d2424c
More pull request comments
michaelciraci May 31, 2025
434203b
Reducing immut scratch length
michaelciraci May 31, 2025
b55da7b
Adding explicit sse radix4 method
michaelciraci May 31, 2025
003cf3a
Correcting pipeline fails
michaelciraci Jun 2, 2025
7547421
Changing immut default from out of place to immut for some algorithms
michaelciraci Jun 4, 2025
0fb3a7a
Correcting autogen check
michaelciraci Jun 5, 2025
a12b9cb
Explicitly using fft_error_immut in macro
michaelciraci Jun 5, 2025
734e407
Merge remote-tracking branch 'origin/master' into pr/157
ejmahler Jun 5, 2025
6c44599
Keep the usage of ffr_error_immut consistent with the rest of the err…
ejmahler Jun 5, 2025
0281042
Dft doesn't need any scratch
ejmahler Jun 5, 2025
59de6f9
Updated comment in raders algorithm immutable impl
ejmahler Jun 5, 2025
ec19a53
Make sure error messages are correct in butterfly boilerplate
ejmahler Jun 5, 2025
3ce4039
Update scratch requests for RadixK
ejmahler Jun 5, 2025
99824d5
Make sure to use correct error messages in avx butterflies
ejmahler Jun 5, 2025
5e9af29
Keep scratch usage consistent in avx large butterflies
ejmahler Jun 5, 2025
8f66acb
Consistently forward from perform_fft_out_of_place to perform_fft_immut
ejmahler Jun 5, 2025
f6148ea
Skip the initial copy in avx mixed radix partial butterflies
ejmahler Jun 5, 2025
a34a7df
Use correct error messages in avx boilerplate
ejmahler Jun 5, 2025
4f394c2
Use correct error messages in scalar boilerplate
ejmahler Jun 5, 2025
801cca2
Only need one perform_oop_fft_butterfly_multi since they're identical
ejmahler Jun 5, 2025
4ce7490
Consistently forward from out of place to immutable
ejmahler Jun 5, 2025
f04e8ce
Consistently forward from out of place to immutable
ejmahler Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions benches/bench_rustfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ impl<T: FftNum> Fft<T> for Noop {
fn process_outofplace_with_scratch(&self, _input: &mut [Complex<T>], _output: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {}
fn get_inplace_scratch_len(&self) -> usize { self.len }
fn get_outofplace_scratch_len(&self) -> usize { 0 }
fn process_immutable_with_scratch(
&self,
_input: &[Complex<T>],
_output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {}
fn get_immutable_scratch_len(&self) -> usize { 0 }
}
impl Length for Noop {
fn len(&self) -> usize { self.len }
Expand Down
12 changes: 12 additions & 0 deletions benches/bench_rustfft_scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ impl<T: FftNum> Fft<T> for Noop {
fn get_outofplace_scratch_len(&self) -> usize {
0
}

fn process_immutable_with_scratch(
&self,
_input: &[Complex<T>],
_output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
}

fn get_immutable_scratch_len(&self) -> usize {
0
}
}
impl Length for Noop {
fn len(&self) -> usize {
Expand Down
18 changes: 15 additions & 3 deletions src/algorithm/bluesteins_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,10 @@ impl<T: FftNum> BluesteinsAlgorithm<T> {
}
}

fn perform_fft_out_of_place(
#[inline]
fn perform_fft_immut(
&self,
input: &mut [Complex<T>],
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
Expand Down Expand Up @@ -179,14 +180,25 @@ impl<T: FftNum> BluesteinsAlgorithm<T> {
*buffer_entry = inner_entry.conj() * twiddle;
}
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
self.perform_fft_immut(input, output, scratch);
}
}
boilerplate_fft!(
BluesteinsAlgorithm,
|this: &BluesteinsAlgorithm<_>| this.len, // FFT len
|this: &BluesteinsAlgorithm<_>| this.inner_fft_multiplier.len()
+ this.inner_fft.get_inplace_scratch_len(), // in-place scratch len
|this: &BluesteinsAlgorithm<_>| this.inner_fft_multiplier.len()
+ this.inner_fft.get_inplace_scratch_len() // out of place scratch len
+ this.inner_fft.get_inplace_scratch_len(), // out of place scratch len
|this: &BluesteinsAlgorithm<_>| this.inner_fft_multiplier.len()
+ this.inner_fft.get_inplace_scratch_len() // immut scratch len
);

#[cfg(test)]
Expand Down
58 changes: 54 additions & 4 deletions src/algorithm/butterflies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use num_complex::Complex;
use crate::{common::FftNum, FftDirection};

use crate::array_utils::{self, DoubleBuf, LoadStore};
use crate::common::{fft_error_inplace, fft_error_outofplace};
use crate::common::{fft_error_immut, fft_error_inplace, fft_error_outofplace};
use crate::twiddles;
use crate::{Direction, Fft, Length};

Expand All @@ -17,6 +17,39 @@ macro_rules! boilerplate_fft_butterfly {
}
}
impl<T: FftNum> Fft<T> for $struct_name<T> {
#[inline]
fn process_immutable_with_scratch(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
if input.len() < self.len() || output.len() != input.len() {
// We want to trigger a panic, but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
fft_error_immut(self.len(), input.len(), output.len(), 0, 0);
return; // Unreachable, because fft_error_immut asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks_zipped(
input,
output,
self.len(),
|in_chunk, out_chunk| {
unsafe {
self.perform_fft_butterfly(DoubleBuf {
input: in_chunk,
output: out_chunk,
})
};
},
);

if result.is_err() {
// We want to trigger a panic, because the buffer sizes weren't cleanly divisible by the FFT size,
// but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
fft_error_immut(self.len(), input.len(), output.len(), 0, 0);
}
}
fn process_outofplace_with_scratch(
&self,
input: &mut [Complex<T>],
Expand All @@ -26,7 +59,7 @@ macro_rules! boilerplate_fft_butterfly {
if input.len() < self.len() || output.len() != input.len() {
// We want to trigger a panic, but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
return; // Unreachable, because fft_error_outofplace asserts, but it helps codegen to put it here
return; // Unreachable, because fft_error_immut asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks_zipped(
Expand Down Expand Up @@ -56,7 +89,7 @@ macro_rules! boilerplate_fft_butterfly {
return; // Unreachable, because fft_error_inplace asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks(buffer, self.len(), |chunk| unsafe {
let result = array_utils::iter_chunks_mut(buffer, self.len(), |chunk| unsafe {
self.perform_fft_butterfly(chunk)
});

Expand All @@ -74,6 +107,10 @@ macro_rules! boilerplate_fft_butterfly {
fn get_outofplace_scratch_len(&self) -> usize {
0
}
#[inline(always)]
fn get_immutable_scratch_len(&self) -> usize {
0
}
}
impl<T> Length for $struct_name<T> {
#[inline(always)]
Expand Down Expand Up @@ -104,13 +141,22 @@ impl<T: FftNum> Butterfly1<T> {
}
}
impl<T: FftNum> Fft<T> for Butterfly1<T> {
fn process_immutable_with_scratch(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
output.copy_from_slice(input);
}

fn process_outofplace_with_scratch(
&self,
input: &mut [Complex<T>],
output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
output.copy_from_slice(&input);
output.copy_from_slice(input);
}

fn process_with_scratch(&self, _buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {}
Expand All @@ -122,6 +168,10 @@ impl<T: FftNum> Fft<T> for Butterfly1<T> {
fn get_outofplace_scratch_len(&self) -> usize {
0
}

fn get_immutable_scratch_len(&self) -> usize {
0
}
}
impl<T> Length for Butterfly1<T> {
fn len(&self) -> usize {
Expand Down
15 changes: 12 additions & 3 deletions src/algorithm/dft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use num_complex::Complex;
use num_traits::Zero;

use crate::array_utils;
use crate::common::{fft_error_inplace, fft_error_outofplace};
use crate::common::{fft_error_immut, fft_error_inplace, fft_error_outofplace};
use crate::{twiddles, FftDirection};
use crate::{Direction, Fft, FftNum, Length};

Expand Down Expand Up @@ -45,7 +45,7 @@ impl<T: FftNum> Dft<T> {
0
}

fn perform_fft_out_of_place(
fn perform_fft_immut(
&self,
signal: &[Complex<T>],
spectrum: &mut [Complex<T>],
Expand All @@ -68,8 +68,17 @@ impl<T: FftNum> Dft<T> {
}
}
}

fn perform_fft_out_of_place(
&self,
signal: &[Complex<T>],
spectrum: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
self.perform_fft_immut(signal, spectrum, _scratch);
}
}
boilerplate_fft_oop!(Dft, |this: &Dft<_>| this.twiddles.len());
boilerplate_fft_oop!(Dft, |this: &Dft<_>| this.twiddles.len(), |_: &Dft<_>| 0);

#[cfg(test)]
mod unit_tests {
Expand Down
95 changes: 87 additions & 8 deletions src/algorithm/good_thomas_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub struct GoodThomasAlgorithm<T> {

inplace_scratch_len: usize,
outofplace_scratch_len: usize,
immut_scratch_len: usize,

len: usize,
direction: FftDirection,
Expand Down Expand Up @@ -117,6 +118,11 @@ impl<T: FftNum> GoodThomasAlgorithm<T> {
height_outofplace_scratch,
);

let immut_scratch_len = max(
width_fft.get_inplace_scratch_len(),
len + height_fft.get_inplace_scratch_len(),
);

Self {
width,
width_size_fft: width_fft,
Expand All @@ -129,6 +135,7 @@ impl<T: FftNum> GoodThomasAlgorithm<T> {

inplace_scratch_len,
outofplace_scratch_len,
immut_scratch_len,

len,
direction,
Expand Down Expand Up @@ -241,6 +248,31 @@ impl<T: FftNum> GoodThomasAlgorithm<T> {
self.reindex_output(scratch, buffer);
}

fn perform_fft_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// Re-index the input, copying from the input to the output in the process
self.reindex_input(input, output);

// run FFTs of size `width`
self.width_size_fft.process_with_scratch(output, scratch);

let (scratch, inner_scratch) = scratch.split_at_mut(self.len());

// transpose
transpose::transpose(output, scratch, self.width, self.height);

// run FFTs of size 'height'
self.height_size_fft
.process_with_scratch(scratch, inner_scratch);

// Re-index the output, copying from the input to the output in the process
self.reindex_output(scratch, output);
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down Expand Up @@ -279,7 +311,8 @@ boilerplate_fft!(
GoodThomasAlgorithm,
|this: &GoodThomasAlgorithm<_>| this.len,
|this: &GoodThomasAlgorithm<_>| this.inplace_scratch_len,
|this: &GoodThomasAlgorithm<_>| this.outofplace_scratch_len
|this: &GoodThomasAlgorithm<_>| this.outofplace_scratch_len,
|this: &GoodThomasAlgorithm<_>| this.immut_scratch_len
);

/// Implementation of the Good-Thomas Algorithm, specialized for smaller input sizes
Expand Down Expand Up @@ -384,6 +417,38 @@ impl<T: FftNum> GoodThomasAlgorithmSmall<T> {
}
}

fn perform_fft_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// These asserts are for the unsafe blocks down below. we're relying on the optimizer to get rid of this assert
assert_eq!(self.len(), input.len());
assert_eq!(self.len(), output.len());

let (input_map, output_map) = self.input_output_map.split_at(self.len());

// copy the input using our reordering mapping
for (output_element, &input_index) in output.iter_mut().zip(input_map.iter()) {
*output_element = input[input_index];
}

// run FFTs of size `width`
self.width_size_fft.process_with_scratch(output, scratch);

// transpose
unsafe { array_utils::transpose_small(self.width, self.height, output, scratch) };

// run FFTs of size 'height'
self.height_size_fft.process_with_scratch(scratch, output);

// copy to the output, using our output redordeing mapping
for (input_element, &output_index) in scratch.iter().zip(output_map.iter()) {
output[output_index] = *input_element;
}
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down Expand Up @@ -448,7 +513,8 @@ boilerplate_fft!(
GoodThomasAlgorithmSmall,
|this: &GoodThomasAlgorithmSmall<_>| this.width * this.height,
|this: &GoodThomasAlgorithmSmall<_>| this.len(),
|_| 0
|_| 0,
|this: &GoodThomasAlgorithmSmall<_>| this.len()
);

#[cfg(test)]
Expand Down Expand Up @@ -532,12 +598,15 @@ mod unit_tests {
for &len in &scratch_lengths {
for &inplace_scratch in &scratch_lengths {
for &outofplace_scratch in &scratch_lengths {
inner_ffts.push(Arc::new(BigScratchAlgorithm {
len,
inplace_scratch,
outofplace_scratch,
direction: FftDirection::Forward,
}) as Arc<dyn Fft<f32>>);
for &immut_scratch in &scratch_lengths {
inner_ffts.push(Arc::new(BigScratchAlgorithm {
len,
inplace_scratch,
outofplace_scratch,
immut_scratch,
direction: FftDirection::Forward,
}) as Arc<dyn Fft<f32>>);
}
}
}
}
Expand Down Expand Up @@ -565,6 +634,16 @@ mod unit_tests {
&mut outofplace_output,
&mut outofplace_scratch,
);

let immut_input = vec![Complex::zero(); fft.len()];
let mut immut_output = vec![Complex::zero(); fft.len()];
let mut immut_scratch = vec![Complex::zero(); fft.get_immutable_scratch_len()];

fft.process_immutable_with_scratch(
&immut_input,
&mut immut_output,
&mut immut_scratch,
);
}
}
}
Expand Down
Loading