-
Notifications
You must be signed in to change notification settings - Fork 3
2D Extension of 1D Cooley-Tukey FFT. Refactoring original not necesss… #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Copyright (c) 2022 Graphcore Ltd. All rights reserved. | ||
|
|
||
| RUN_DIR="2d_profiles" | ||
| mkdir -p $RUN_DIR | ||
| CSV_FILE=${RUN_DIR}/"2d_csv_results_2.txt" | ||
|
|
||
| for SIZE in 256 512 1024 2048 | ||
| do | ||
| for BS in 1 4 8 16 32 64 | ||
| do | ||
| for RADIX in 2 4 8 16 32 64 128 | ||
| do | ||
| RUN_NAME="fft_2d_${SIZE}_bs${BS}_radix${RADIX}" | ||
| export POPLAR_ENGINE_OPTIONS="{\"autoReport.all\":\"true\", \"autoReport.directory\":\"${RUN_DIR}/${RUN_NAME}\", \"profiler.includeFlopEstimates\":\"true\"}" | ||
| echo "Running size: ${SIZE} batch-size: ${BS} radix: ${RADIX}" | ||
| mkdir -p ${RUN_DIR}/${RUN_NAME} | ||
| ./multi-tool FourierTransform2D --fft-size ${SIZE} --batch-size ${BS} --radix-size ${RADIX} > ${RUN_DIR}/${RUN_NAME}/run_log.txt & | ||
| done | ||
| wait | ||
| done | ||
| done | ||
|
|
||
| # Overwrite the CSV file writing new headers: | ||
| python3 ../python/fft/perf_analysis.py --report-file fake --log-file fake --csv-out ${CSV_FILE} --csv-write-headers | ||
|
|
||
| # Append all run results to CSV file: | ||
| for DIR in $RUN_DIR/* | ||
| do | ||
| echo Processing path $DIR | ||
| LOG_FILE=$DIR/run_log.txt | ||
| REPORT_FILE=$DIR/ipu_utils_engine/profile.pop | ||
| python3 ../python/fft/perf_analysis.py --report-file ${REPORT_FILE} --log-file ${LOG_FILE} --csv-out ${CSV_FILE} | ||
| done |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,13 +55,10 @@ namespace complex { | |
| namespace pe = popops::expr; | ||
| auto complexMulExprRe = pe::Sub(pe::Mul(pe::_1, pe::_2), pe::Mul(pe::_3, pe::_4)); | ||
| auto complexMulExprIm = pe::Add(pe::Mul(pe::_1, pe::_2), pe::Mul(pe::_3, pe::_4)); | ||
|
|
||
| // Can only do the second expression in-place: | ||
| auto tmpReal = popops::map(graph, complexMulExprRe, {real, v.real, imag, v.imag}, | ||
| prog, debugPrefix + "/complex_mul_re"); | ||
| popops::mapInPlace(graph, complexMulExprRe, {real, v.real, imag, v.imag}, | ||
| prog, debugPrefix + "/complex_mul_re"); | ||
| popops::mapInPlace(graph, complexMulExprIm, {imag, v.real, real, v.imag}, | ||
| prog, debugPrefix + "/complex_mul_im"); | ||
| real = tmpReal; | ||
| } | ||
|
|
||
| ComplexTensor multiply(poplar::Graph& graph, | ||
|
|
@@ -129,17 +126,12 @@ namespace complex { | |
| graph.setTileMapping(matrix.real, graph.getTileMapping(matmulMapping)); | ||
| graph.setTileMapping(matrix.imag, graph.getTileMapping(matmulMapping)); | ||
|
|
||
| poplar::OptionFlags matmulOptions; | ||
| if (availableMemoryProportion > 0.f) { | ||
| matmulOptions.set("availableMemoryProportion", std::to_string(availableMemoryProportion)); | ||
| } | ||
|
|
||
| poplar::Tensor partial = | ||
| poplin::matMul(graph, matrix.real, realBatch, prog, | ||
| elemType, debugStr + "/real_matmul", matmulOptions); | ||
| elemType, debugStr + "/real_matmul"); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to keep my change here (matmul options). |
||
|
|
||
| poplin::matMulAcc(graph, partial, 1.f, matrix.imag, imagBatch, prog, | ||
| debugStr + "/imag_matmul", matmulOptions); | ||
| debugStr + "/imag_matmul"); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep matmul options as above. |
||
|
|
||
| // FLOP estimates for matrix multiplies: | ||
| flopEstimate += 2 * matrix.dim(0) * matrix.dim(1) * realBatch.dim(1) * 2; | ||
|
|
@@ -219,25 +211,21 @@ namespace complex { | |
| auto result_odd = fftSubResult.transpose().slice(batchSize, 2*batchSize, 0); | ||
| ipu_utils::logger()->debug("Twiddle coeff shape: {} and multiply shape: {}", w.shape(), result_odd.shape()); | ||
|
|
||
| // Copy the DFT results to a linear layout if there are enough | ||
| // elements for this to make sense (this heuristic is very approximate): | ||
| if (result_even.real.numElements() > graph.getTarget().getNumTiles()) { | ||
| ipu_utils::logger()->debug("Re-mapping DFT result ({} > {}).", | ||
| result_even.real.numElements(), graph.getTarget().getNumTiles()); | ||
| auto result_even_remapped = ComplexTensor(graph, result_even.elementType(), result_even.shape(), "dft_even_remapped"); | ||
| result_even_remapped.mapLinearly(graph); | ||
| prog.add(copy(result_even, result_even_remapped)); | ||
| result_even = result_even_remapped; | ||
|
|
||
| auto result_odd_remapped = ComplexTensor(graph, result_even.elementType(), result_even.shape(), "dft_even_remapped"); | ||
| result_odd_remapped.mapLinearly(graph); | ||
| prog.add(copy(result_odd, result_odd_remapped)); | ||
| result_odd = result_odd_remapped; | ||
| } | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep my change here: it uses a better layout for small FFTs to improve their performance and memory use. |
||
| // Copy the DFT results to a linear layout: | ||
| auto result_even_remapped = ComplexTensor(graph, result_even.elementType(), result_even.shape(), "dft_even_remapped"); | ||
| result_even_remapped.mapLinearly(graph); | ||
| prog.add(copy(result_even, result_even_remapped)); | ||
| result_even = result_even_remapped; | ||
|
|
||
| auto result_odd_remapped = ComplexTensor(graph, result_even.elementType(), result_even.shape(), "dft_even_remapped"); | ||
| result_odd_remapped.mapLinearly(graph); | ||
| prog.add(copy(result_odd, result_odd_remapped)); | ||
| result_odd = result_odd_remapped; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep this as it improves memory use and perf. |
||
|
|
||
| // Element-wise multiply odd components by coefficients: | ||
| result_odd.multiplyInPlace(graph, w, prog, "twiddle"); | ||
| auto tmp = result_odd; | ||
| auto tmp = multiply(graph, w, result_odd, prog, "twiddle"); | ||
| //result_odd.multiplyInPlace(graph, w, prog, "twiddle"); | ||
| //auto tmp = result_odd; | ||
| // FLOP estimate for complex multiply: | ||
| flopEstimate += 6 * tmp.real.numElements(); | ||
|
|
||
|
|
@@ -266,7 +254,7 @@ namespace complex { | |
|
|
||
| ComplexTensor FFTBuilder::inverseFourierMatrices( | ||
| std::size_t length, poplar::Type elemType) { | ||
| const double twoPi_over_length = (2.0L / length) * 3.141592653589793238462643383279502884L; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep my change as it improves prexision significantly (same below). |
||
| const float twoPi_over_length = (2.0 / length) * 3.14159265358979323846; | ||
| std::vector<float> real(length * length, 0.f); | ||
| std::vector<float> imag(length * length, 0.f); | ||
| for (std::size_t row = 0; row < length; ++row) { | ||
|
|
@@ -292,7 +280,7 @@ namespace complex { | |
| throw std::logic_error("FFT size must be a multiple of 2."); | ||
| } | ||
| auto baseSize = N / 2; | ||
| const double s = ((2.0L * (N-1)) / N) * 3.141592653589793238462643383279502884L; | ||
| const float s = ((2.0 * (N-1)) / N) * 3.14159265358979323846; | ||
| std::vector<float> real(baseSize, 0.f); | ||
| std::vector<float> imag(baseSize, 0.f); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,29 +105,23 @@ class FFTBuilder { | |
| FFTBuilder(poplar::Graph &graph, | ||
| poplar::program::Sequence &sequence, | ||
| const std::string debugName) | ||
| : graph(graph), prog(sequence), debugPrefix(debugName), | ||
| availableMemoryProportion(-1.f), flopEstimate(0) {} | ||
|
|
||
| /// Set the proportion of memory available for the inner DFT matrix-multiplies. | ||
| void setAvailableMemoryProportion(float proportion) { availableMemoryProportion = proportion; } | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep availableMem prop changes. |
||
| : graph(graph), prog(sequence), debugPrefix(debugName), flopEstimate(0) {} | ||
|
|
||
| /// Build the compute graph that applies FFT to the given complex vector. | ||
| /// The program will be appended to the sequence specified in construction | ||
| /// of this object. | ||
| ComplexTensor fft1d(ComplexTensor input, std::size_t radix = 0); | ||
|
|
||
| poplar::program::Sequence& getProgram() { return prog; } | ||
| std::size_t getFlopEstimate() const { return flopEstimate; } | ||
|
|
||
| private: | ||
| float availableMemoryProportion; | ||
| std::size_t flopEstimate; | ||
|
|
||
| // Utility functions used in construction of the FFT graph program. | ||
| ComplexTensor multiplyMatrixByVectorBatch(const ComplexTensor matrix, ComplexTensor vectors); | ||
| ComplexTensor dft1d(ComplexTensor fourierMatrix, ComplexTensor even, ComplexTensor odd); | ||
| std::pair<ComplexTensor, ComplexTensor> splitEvenOdd(ComplexTensor input); | ||
| ComplexTensor inverseFourierMatrices(std::size_t length, poplar::Type elemType); | ||
| ComplexTensor twiddleCoefficients(std::size_t N, poplar::Type elemType); | ||
| std::size_t flopEstimate; | ||
| }; | ||
|
|
||
| } // namespace complex | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to keep my change here. Only the second operation can be done in place because it needs the input from the first.