Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ categories = ["algorithms", "compression", "multimedia::encoding", "science"]
license = "MIT OR Apache-2.0"

[features]
default = ["avx", "sse", "neon"]
default = ["avx","neon"]

# On x86_64, the "avx" feature enables compilation of AVX-acclerated code.
# Similarly, the "sse" feature enables compilation of SSE-accelerated code.
Expand Down
12 changes: 12 additions & 0 deletions src/algorithm/bluesteins_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,18 @@ impl<T: FftNum> BluesteinsAlgorithm<T> {
}
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// TODO - Is there a better way to do this?
let (mut input_scratch, scratch) = scratch.split_at_mut(input.len());
input_scratch.copy_from_slice(input);
self.process_outofplace_with_scratch(&mut input_scratch, output, scratch);
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
46 changes: 44 additions & 2 deletions src/algorithm/butterflies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,39 @@ macro_rules! boilerplate_fft_butterfly {
}
}
impl<T: FftNum> Fft<T> for $struct_name<T> {
fn process_outofplace_with_scratch_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
if input.len() < self.len() || output.len() != input.len() {
// We want to trigger a panic, but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
return; // Unreachable, because fft_error_outofplace asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks_zipped(
input,
output,
self.len(),
|in_chunk, out_chunk| {
unsafe {
self.perform_fft_butterfly(DoubleBuf {
input: in_chunk,
output: out_chunk,
})
};
},
);

if result.is_err() {
// We want to trigger a panic, because the buffer sizes weren't cleanly divisible by the FFT size,
// but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
}
}

fn process_outofplace_with_scratch(
&self,
input: &mut [Complex<T>],
Expand All @@ -29,7 +62,7 @@ macro_rules! boilerplate_fft_butterfly {
return; // Unreachable, because fft_error_outofplace asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks_zipped(
let result = array_utils::iter_chunks_zipped_mut(
input,
output,
self.len(),
Expand All @@ -56,7 +89,7 @@ macro_rules! boilerplate_fft_butterfly {
return; // Unreachable, because fft_error_inplace asserts, but it helps codegen to put it here
}

let result = array_utils::iter_chunks(buffer, self.len(), |chunk| unsafe {
let result = array_utils::iter_chunks_mut(buffer, self.len(), |chunk| unsafe {
self.perform_fft_butterfly(chunk)
});

Expand Down Expand Up @@ -104,6 +137,15 @@ impl<T: FftNum> Butterfly1<T> {
}
}
impl<T: FftNum> Fft<T> for Butterfly1<T> {
fn process_outofplace_with_scratch_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
_scratch: &mut [Complex<T>],
) {
output.copy_from_slice(&input);
}

fn process_outofplace_with_scratch(
&self,
input: &mut [Complex<T>],
Expand Down
9 changes: 9 additions & 0 deletions src/algorithm/dft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ impl<T: FftNum> Dft<T> {
}
}
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}
}
boilerplate_fft_oop!(Dft, |this: &Dft<_>| this.twiddles.len());

Expand Down
41 changes: 41 additions & 0 deletions src/algorithm/good_thomas_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,15 @@ impl<T: FftNum> GoodThomasAlgorithm<T> {
self.reindex_output(scratch, buffer);
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down Expand Up @@ -384,6 +393,38 @@ impl<T: FftNum> GoodThomasAlgorithmSmall<T> {
}
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// These asserts are for the unsafe blocks down below. we're relying on the optimizer to get rid of this assert
assert_eq!(self.len(), input.len());
assert_eq!(self.len(), output.len());

let (input_map, output_map) = self.input_output_map.split_at(self.len());

// copy the input using our reordering mapping
for (output_element, &input_index) in output.iter_mut().zip(input_map.iter()) {
*output_element = input[input_index];
}

// run FFTs of size `width`
self.width_size_fft.process_with_scratch(output, scratch);

// transpose
unsafe { array_utils::transpose_small(self.width, self.height, output, scratch) };

// run FFTs of size 'height'
self.height_size_fft.process_with_scratch(scratch, output);

// copy to the output, using our output redordeing mapping
for (input_element, &output_index) in scratch.iter().zip(output_map.iter()) {
output[output_index] = *input_element;
}
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
67 changes: 67 additions & 0 deletions src/algorithm/mixed_radix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,45 @@ impl<T: FftNum> MixedRadix<T> {
transpose::transpose(scratch, buffer, self.width, self.height);
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// STEP 1: transpose
transpose::transpose(input, output, self.width, self.height);

// STEP 2: perform FFTs of size `height`
// let height_scratch = if scratch.len() > input.len() {
// &mut scratch[..]
// } else {
// &mut input[..]
// };
self.height_size_fft
.process_with_scratch(output, scratch);

// STEP 3: Apply twiddle factors
for (element, twiddle) in output.iter_mut().zip(self.twiddles.iter()) {
*element = *element * twiddle;
}

// STEP 4: transpose again
transpose::transpose(output, scratch, self.height, self.width);

// STEP 5: perform FFTs of size `width`
// let width_scratch = if scratch.len() > output.len() {
// &mut scratch[..]
// } else {
// &mut output[..]
// };
self.width_size_fft
.process_with_scratch(scratch, output);

// STEP 6: transpose again
transpose::transpose(scratch, output, self.width, self.height);
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down Expand Up @@ -302,6 +341,34 @@ impl<T: FftNum> MixedRadixSmall<T> {
unsafe { array_utils::transpose_small(self.width, self.height, scratch, buffer) };
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
// SIX STEP FFT:
// STEP 1: transpose
unsafe { array_utils::transpose_small(self.width, self.height, input, output) };

// STEP 2: perform FFTs of size `height`
self.height_size_fft.process_with_scratch(output, scratch);

// STEP 3: Apply twiddle factors
for (element, twiddle) in output.iter_mut().zip(self.twiddles.iter()) {
*element = *element * twiddle;
}

// STEP 4: transpose again
unsafe { array_utils::transpose_small(self.height, self.width, output, scratch) };

// STEP 5: perform FFTs of size `width`
self.width_size_fft.process_with_scratch(scratch, output);

// STEP 6: transpose again
unsafe { array_utils::transpose_small(self.width, self.height, scratch, output) };
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
9 changes: 9 additions & 0 deletions src/algorithm/raders_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ impl<T: FftNum> RadersAlgorithm<T> {
}
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
9 changes: 9 additions & 0 deletions src/algorithm/radix3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,15 @@ impl<T: FftNum> Radix3<T> {
self.outofplace_scratch_len
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
9 changes: 9 additions & 0 deletions src/algorithm/radix4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ impl<T: FftNum> Radix4<T> {
self.outofplace_scratch_len
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
9 changes: 9 additions & 0 deletions src/algorithm/radixn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ impl<T: FftNum> RadixN<T> {
self.outofplace_scratch_len
}

fn perform_fft_out_of_place_immut(
&self,
input: &[Complex<T>],
output: &mut [Complex<T>],
scratch: &mut [Complex<T>],
) {
todo!()
}

fn perform_fft_out_of_place(
&self,
input: &mut [Complex<T>],
Expand Down
61 changes: 61 additions & 0 deletions src/array_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,29 @@ mod unit_tests {
// Loop over exact chunks of the provided buffer. Very similar in semantics to ChunksExactMut, but generates smaller code and requires no modulo operations
// Returns Ok() if every element ended up in a chunk, Err() if there was a remainder
pub fn iter_chunks<T>(
mut buffer: &[T],
chunk_size: usize,
mut chunk_fn: impl FnMut(&[T]),
) -> Result<(), ()> {
// Loop over the buffer, splicing off chunk_size at a time, and calling chunk_fn on each
while buffer.len() >= chunk_size {
let (head, tail) = buffer.split_at(chunk_size);
buffer = tail;

chunk_fn(head);
}

// We have a remainder if there's data still in the buffer -- in which case we want to indicate to the caller that there was an unwanted remainder
if buffer.len() == 0 {
Ok(())
} else {
Err(())
}
}

// Loop over exact chunks of the provided buffer. Very similar in semantics to ChunksExactMut, but generates smaller code and requires no modulo operations
// Returns Ok() if every element ended up in a chunk, Err() if there was a remainder
pub fn iter_chunks_mut<T>(
mut buffer: &mut [T],
chunk_size: usize,
mut chunk_fn: impl FnMut(&mut [T]),
Expand All @@ -169,6 +192,44 @@ pub fn iter_chunks<T>(
// Loop over exact zipped chunks of the 2 provided buffers. Very similar in semantics to ChunksExactMut.zip(ChunksExactMut), but generates smaller code and requires no modulo operations
// Returns Ok() if every element of both buffers ended up in a chunk, Err() if there was a remainder
pub fn iter_chunks_zipped<T>(
mut buffer1: &[T],
mut buffer2: &mut [T],
chunk_size: usize,
mut chunk_fn: impl FnMut(&[T], &mut [T]),
) -> Result<(), ()> {
// If the two buffers aren't the same size, record the fact that they're different, then snip them to be the same size
let uneven = if buffer1.len() > buffer2.len() {
buffer1 = &buffer1[..buffer2.len()];
true
} else if buffer2.len() < buffer1.len() {
buffer2 = &mut buffer2[..buffer1.len()];
true
} else {
false
};

// Now that we know the two slices are the same length, loop over each one, splicing off chunk_size at a time, and calling chunk_fn on each
while buffer1.len() >= chunk_size && buffer2.len() >= chunk_size {
let (head1, tail1) = buffer1.split_at(chunk_size);
buffer1 = tail1;

let (head2, tail2) = buffer2.split_at_mut(chunk_size);
buffer2 = tail2;

chunk_fn(head1, head2);
}

// We have a remainder if the 2 chunks were uneven to start with, or if there's still data in the buffers -- in which case we want to indicate to the caller that there was an unwanted remainder
if !uneven && buffer1.len() == 0 {
Ok(())
} else {
Err(())
}
}

// Loop over exact zipped chunks of the 2 provided buffers. Very similar in semantics to ChunksExactMut.zip(ChunksExactMut), but generates smaller code and requires no modulo operations
// Returns Ok() if every element of both buffers ended up in a chunk, Err() if there was a remainder
pub fn iter_chunks_zipped_mut<T>(
mut buffer1: &mut [T],
mut buffer2: &mut [T],
chunk_size: usize,
Expand Down
Loading