diff --git a/fec.cpp b/fec.cpp index 47c3577..f5bc960 100644 --- a/fec.cpp +++ b/fec.cpp @@ -2,12 +2,9 @@ // Created by 理 傅 on 2017/1/2. // -#include -#include #include #include #include "fec.h" -#include "sess.h" #include "encoding.h" FEC::FEC(ReedSolomon enc) :enc(enc) {} @@ -33,13 +30,11 @@ FEC::New(int rxlimit, int dataShards, int parityShards) { } fecPacket -FEC::Decode(byte *data, size_t sz) { +FEC::Decode(byte *data, size_t sz, uint32_t ts) { fecPacket pkt; data = decode32u(data, &pkt.seqid); data = decode16u(data, &pkt.flag); - struct timeval time; - gettimeofday(&time, NULL); - pkt.ts = uint32_t(time.tv_sec * 1000 + time.tv_usec/1000); + pkt.ts = ts; pkt.data = std::make_shared>(data, data+sz - fecHeaderSize); return pkt; } @@ -62,11 +57,8 @@ FEC::MarkFEC(byte *data) { } } -std::vector -FEC::Input(fecPacket &pkt) { - std::vector recovered; - - uint32_t now = currentMs(); +void +FEC::Input(fecPacket &pkt, uint32_t now, std::vector& recovered) { if (now-lastCheck >= fecExpire) { for (auto it = rx.begin();it !=rx.end();) { if (now - it->ts > fecExpire) { @@ -78,13 +70,12 @@ FEC::Input(fecPacket &pkt) { lastCheck = now; } - // insertion auto n = this->rx.size() -1; int insertIdx = 0; for (int i=n;i>=0;i--) { if (pkt.seqid == rx[i].seqid) { - return recovered; + return; } else if (pkt.seqid > rx[i].seqid) { insertIdx = i + 1; break; @@ -113,10 +104,10 @@ FEC::Input(fecPacket &pkt) { int numDataShard = 0; int first = 0; size_t maxlen = 0; - - std::vector shardVec(totalShards); - std::vector shardflag(totalShards, false); - + static thread_local std::vector shardVec(totalShards); + static thread_local std::vector shardflag(totalShards, false); + std::fill(shardVec.begin(), shardVec.end(), nullptr); + std::fill(shardflag.begin(), shardflag.end(), false); for (auto i = searchBegin; i <= searchEnd; i++) { auto seqid = rx[i].seqid; if (seqid > shardEnd) { @@ -163,7 +154,7 @@ FEC::Input(fecPacket &pkt) { rx.erase(rx.begin()); } - return recovered; + return; } diff --git a/fec.h b/fec.h index 361d4fe..cd838d4 100644 --- a/fec.h +++ b/fec.h @@ -35,13 +35,13 @@ class FEC { inline bool isEnabled() { return dataShards > 0 && parityShards > 0 ; } // Input a FEC packet, and return recovered data if possible. - std::vector Input(fecPacket &pkt); + void Input(fecPacket &pkt, uint32_t now, std::vector& recovered); // Calc Parity Shards void Encode(std::vector &shards); // Decode a raw array into fecPacket - static fecPacket Decode(byte *data, size_t sz); + static fecPacket Decode(byte *data, size_t sz, uint32_t ts); // Mark raw array as typeData, and write correct size. void MarkData(byte *data, uint16_t sz); diff --git a/fec_test.cpp b/fec_test.cpp index 2638625..83be9a7 100644 --- a/fec_test.cpp +++ b/fec_test.cpp @@ -61,7 +61,8 @@ int main() { } else { pkt.flag = typeFEC; } - auto recovered = fec.Input(pkt); + std::vector recovered; + fec.Input(pkt, 0, recovered); if (recovered.size() > 0) { std::cout << "recovered:" << std::endl; diff --git a/galois_noasm.cpp b/galois_noasm.cpp index d82e267..9089f7b 100644 --- a/galois_noasm.cpp +++ b/galois_noasm.cpp @@ -3,18 +3,17 @@ // #include "galois_noasm.h" -#include "matrix.h" extern const byte mulTable[256][256]; -void galMulSlice(byte c, row_type in, row_type out) { - for (int n=0;nsize();n++) { - (*out)[n] = mulTable[c][(*in)[n]]; - } +void galMulSlice(byte c, byte* in, byte* out, int size) { + for (int n = 0; n < size; n++) { + out[n] = mulTable[c][in[n]]; + } } -void galMulSliceXor(byte c, row_type in, row_type out) { - for (int n=0;nsize();n++) { - (*out)[n] ^= mulTable[c][(*in)[n]]; - } -} +void galMulSliceXor(byte c, byte* in, byte* out, int size) { + for (int n = 0; n < size; n++) { + out[n] ^= mulTable[c][in[n]]; + } +} \ No newline at end of file diff --git a/galois_noasm.h b/galois_noasm.h index 42d5ebe..8348f6d 100644 --- a/galois_noasm.h +++ b/galois_noasm.h @@ -6,13 +6,12 @@ #define KCP_GALOIS_NOASM_H #include "galois.h" -#include "matrix.h" #ifdef __cplusplus extern "C" { #endif -void galMulSlice(byte c, row_type in, row_type out); -void galMulSliceXor(byte c, row_type in, row_type out); +void galMulSlice(byte c, byte* in, byte* out, int size); +void galMulSliceXor(byte c, byte* in, byte* out, int size); #ifdef __cplusplus } diff --git a/inversion_tree.cpp b/inversion_tree.cpp index 71d8214..69a57ad 100644 --- a/inversion_tree.cpp +++ b/inversion_tree.cpp @@ -8,65 +8,65 @@ inversionTree inversionTree::newInversionTree(int dataShards, int parityShards) { inversionTree tree; tree.m_root.m_children.resize(dataShards + parityShards, nullptr); - tree.m_root.m_matrix = matrix::identityMatrix(dataShards); + tree.m_root.m_matrix = std::make_shared(dataShards); return tree; } -matrix +MatrixPtr inversionTree::GetInvertedMatrix(std::vector &invalidIndices) { if (invalidIndices.size() == 0) { return m_root.m_matrix; } - return m_root.getInvertedMatrix(invalidIndices, 0); + return m_root.getInvertedMatrix(invalidIndices, 0, 0); } int -inversionTree::InsertInvertedMatrix(std::vector &invalidIndices, matrix &matrix, int shards) { +inversionTree::InsertInvertedMatrix(std::vector &invalidIndices, MatrixPtr &matrix, int shards) { // If no invalid indices were given then we are done because the // m_root node is already set with the identity matrix. if (invalidIndices.size() == 0) { return -1; } - if (!matrix.IsSquare()) { + if (!matrix->IsSquare()) { return -2; } // Recursively create nodes for the inverted matrix in the tree until // we reach the node to insert the matrix to. We start by passing in // 0 as the parent index as we start at the m_root of the tree. - m_root.insertInvertedMatrix(invalidIndices, matrix, shards, 0); + m_root.insertInvertedMatrix(invalidIndices, 0, matrix, shards, 0); return 0; } -matrix -inversionNode::getInvertedMatrix(std::vector &invalidIndices, int parent) { +MatrixPtr +inversionNode::getInvertedMatrix(const std::vector &invalidIndices, int index, int parent) { // Get the child node to search next from the list of m_children. The // list of m_children starts relative to the parent index passed in // because the indices of invalid rows is sorted (by default). As we // search recursively, the first invalid index gets popped off the list, // so when searching through the list of m_children, use that first invalid // index to find the child node. - int firstIndex = invalidIndices[0]; + int firstIndex = invalidIndices[index]; auto node = m_children[firstIndex - parent]; // If the child node doesn't exist in the list yet, fail fast by // returning, so we can construct and insert the proper inverted matrix. if (node == nullptr) { - return matrix{}; + return nullptr; } // If there's more than one invalid index left in the list we should // keep searching recursively. - if (invalidIndices.size() > 1) { + if (invalidIndices.size() - index> 1) { // Search recursively on the child node by passing in the invalid indices // with the first index popped off the front. Also the parent index to // pass down is the first index plus one. - std::vector v(invalidIndices.begin() + 1, invalidIndices.end()); - return node->getInvertedMatrix(v, firstIndex + 1); + // no copy std::vector v(invalidIndices.begin() + 1, invalidIndices.end()); + return node->getInvertedMatrix(invalidIndices, index + 1, firstIndex + 1); } // If there aren't any more invalid indices to search, we've found our @@ -79,8 +79,9 @@ inversionNode::getInvertedMatrix(std::vector &invalidIndices, int parent) { void inversionNode::insertInvertedMatrix( - std::vector &invalidIndices, - struct matrix &matrix, + const std::vector &invalidIndices, + int index, + MatrixPtr &matrix, int shards, int parent) { // As above, get the child node to search next from the list of m_children. @@ -89,7 +90,7 @@ inversionNode::insertInvertedMatrix( // search recursively, the first invalid index gets popped off the list, // so when searching through the list of m_children, use that first invalid // index to find the child node. - int firstIndex = invalidIndices[0]; + int firstIndex = invalidIndices[index]; auto node = m_children[firstIndex - parent]; // If the child node doesn't exist in the list yet, create a new @@ -109,13 +110,13 @@ inversionNode::insertInvertedMatrix( // If there's more than one invalid index left in the list we should // keep searching recursively in order to find the node to add our // matrix. - if (invalidIndices.size() > 1) { + if (invalidIndices.size() - index > 1) { // As above, search recursively on the child node by passing in // the invalid indices with the first index popped off the front. // Also the total number of shards and parent index are passed down // which is equal to the first index plus one. - std::vector v(invalidIndices.begin() + 1, invalidIndices.end()); - node->insertInvertedMatrix(v, matrix, shards, firstIndex + 1); + // no copy std::vector v(invalidIndices.begin() + 1, invalidIndices.end()); + node->insertInvertedMatrix(invalidIndices, index + 1, matrix, shards, firstIndex + 1); } else { node->m_matrix = matrix; } diff --git a/inversion_tree.h b/inversion_tree.h index 47cc9e3..fb8258b 100644 --- a/inversion_tree.h +++ b/inversion_tree.h @@ -9,11 +9,11 @@ #include "matrix.h" struct inversionNode { - struct matrix m_matrix; + MatrixPtr m_matrix; std::vector> m_children; - struct matrix getInvertedMatrix(std::vector & invalidIndices, int parent); + MatrixPtr getInvertedMatrix(const std::vector & invalidIndices, int index, int parent); - void insertInvertedMatrix(std::vector &invalidIndices, struct matrix &matrix, int shards, int parent); + void insertInvertedMatrix(const std::vector &invalidIndices, int index, MatrixPtr &matrix, int shards, int parent); }; class inversionTree { @@ -25,13 +25,13 @@ class inversionTree { // GetInvertedMatrix returns the cached inverted matrix or nil if it // is not found in the tree keyed on the indices of invalid rows. - matrix GetInvertedMatrix(std::vector & invalidIndices); + MatrixPtr GetInvertedMatrix(std::vector & invalidIndices); // InsertInvertedMatrix inserts a new inverted matrix into the tree // keyed by the indices of invalid rows. The total number of shards // is required for creating the proper length lists of child nodes for // each node. - int InsertInvertedMatrix(std::vector & invalidIndices, struct matrix &matrix, int shards); + int InsertInvertedMatrix(std::vector & invalidIndices, MatrixPtr &matrix, int shards); private: inversionNode m_root; diff --git a/matrix.cpp b/matrix.cpp index f94864c..81d15f9 100644 --- a/matrix.cpp +++ b/matrix.cpp @@ -2,180 +2,131 @@ // Created by 理 傅 on 2016/12/30. // +#include #include "galois.h" #include "matrix.h" -#include -matrix -matrix::newMatrix(int rows, int cols) { - if (rows <= 0 || cols <= 0) { - throw std::invalid_argument("invalid arguments"); +int Matrix::GaussianElimination() { + auto rows = this->rows; + auto columns = this->cols; + // Clear out the part below the main diagonal and scale the main + // diagonal to be 1. + for (int r = 0; r < rows; r++) { + // If the element on the diagonal is 0, find a row below + // that has a non-zero and swap them. + if (at(r, r) == 0) { + for (int rowBelow = r + 1; rowBelow < rows; rowBelow++) { + if (at(rowBelow, r) != 0) { + this->SwapRows(r, rowBelow); + break; + } + } } - matrix m; - m.rows = rows; - m.cols = cols; - m.data.resize(rows, nullptr); - for (auto i = 0; i < rows; i++) { - m.data[i] = std::make_shared>(cols); + // If we couldn't find one, the matrix is singular. + if (at(r, r) == 0) { + return -1; } - return m; -} -matrix -matrix::identityMatrix(int size) { - matrix m = matrix::newMatrix(size, size); - for (int i = 0; i < size; i++) { - m.at(i, i) = 1; + // Scale to 1. + if (at(r, r) != 1) { + byte scale = galDivide(1, at(r, r)); + for (int c = 0; c < columns; c++) { + at(r, c) = galMultiply(at(r, c), scale); + } } - return m; -} - - -matrix -matrix::Multiply(matrix &right) { - if (cols != right.rows) { - return matrix{}; - } - - matrix result = matrix::newMatrix(rows, right.cols); - for (int r = 0; r < result.rows; r++) { - for (int c = 0; c < result.cols; c++) { - byte value = 0; - for (int i = 0; i < this->cols; i++) { - value ^= galMultiply(at(r, i), right.at(i, c)); - } - result.at(r, c) = value; + // Make everything below the 1 be a 0 by subtracting + // a multiple of it. (Subtraction and addition are + // both exclusive or in the Galois field.) + for (int rowBelow = r + 1; rowBelow < rows; rowBelow++) { + if (at(rowBelow, r) != 0) { + byte scale = at(rowBelow, r); + for (int c = 0; c < columns; c++) { + at(rowBelow, c) ^= galMultiply(scale, at(r, c)); } + } } - return result; -} - -matrix -matrix::Augment(matrix &right) { - matrix result = matrix::newMatrix(this->rows, this->cols + right.cols); - - for (int r = 0; r < this->rows; r++) { - for (int c = 0; c < this->cols; c++) { - result.at(r, c) = at(r, c); - } - auto cols = this->cols; - for (int c = 0; c < right.cols; c++) { - result.at(r, cols + c) = right.at(r, c); + } + + // Now clear the part above the main diagonal. + for (int d = 0; d < rows; d++) { + for (int rowAbove = 0; rowAbove < d; rowAbove++) { + if (at(rowAbove, d) != 0) { + byte scale = at(rowAbove, d); + for (int c = 0; c < columns; c++) { + at(rowAbove, c) ^= galMultiply(scale, at(d, c)); } + } } - return result; + } + return 0; } -matrix -matrix::SubMatrix(int rmin, int cmin, int rmax, int cmax) { - matrix result = matrix::newMatrix(rmax - rmin, cmax - cmin); - for (int r = rmin; r < rmax; r++) { - for (int c = cmin; c < cmax; c++) { - result.at(r - rmin, c - cmin) = at(r, c); - } - } - return result; -} +int Matrix::SwapRows(int r1, int r2) { + if (r1 < 0 || rows <= r1 || r2 < 0 || rows <= r2) { + return -1; + } -// SwapRows Exchanges two rows in the matrix. -int -matrix::SwapRows(int r1, int r2) { - if (r1 < 0 || rows <= r1 || r2 < 0 || rows <= r2) { - return -1; - } - - std::swap(data[r1], data[r2]); - return 0; + std::swap(data[r1], data[r2]); + return 0; } -bool -matrix::IsSquare() { - return this->rows == this->cols; -} +MatrixPtr Multiply(const MatrixPtr& left, const MatrixPtr& right) { + if (left->cols != right->rows) { + return nullptr; + } -matrix -matrix::Invert() { - if (!IsSquare()) { - return matrix{}; - } - auto work = matrix::identityMatrix(rows); - work = matrix::Augment(work); + auto result = std::make_shared(left->rows, right->cols); - auto ret = work.gaussianElimination(); - if (ret != 0) { - return matrix{}; + for (int r = 0; r < result->rows; r++) { + for (int c = 0; c < result->cols; c++) { + byte value = 0; + for (int i = 0; i < left->cols; i++) { + value ^= galMultiply(left->at(r, i), right->at(i, c)); + } + result->at(r, c) = value; } - - return work.SubMatrix(0, rows, rows, rows * 2); + } + return result; } -int -matrix::gaussianElimination() { - auto rows = this->rows; - auto columns = this->cols; - // Clear out the part below the main diagonal and scale the main - // diagonal to be 1. - for (int r = 0; r < rows; r++) { - // If the element on the diagonal is 0, find a row below - // that has a non-zero and swap them. - if (at(r, r) == 0) { - for (int rowBelow = r + 1; rowBelow < rows; rowBelow++) { - if (at(rowBelow, r) != 0) { - this->SwapRows(r, rowBelow); - break; - } - } - } - - // If we couldn't find one, the matrix is singular. - if (at(r, r) == 0) { - return -1; - } - - // Scale to 1. - if (at(r, r) != 1) { - byte scale = galDivide(1, at(r, r)); - for (int c = 0; c < columns; c++) { - at(r, c) = galMultiply(at(r, c), scale); - } - } +MatrixPtr Augment(const MatrixPtr& left, const MatrixPtr& right) { + auto result = std::make_shared(left->rows, left->cols + right->cols); - // Make everything below the 1 be a 0 by subtracting - // a multiple of it. (Subtraction and addition are - // both exclusive or in the Galois field.) - for (int rowBelow = r + 1; rowBelow < rows; rowBelow++) { - if (at(rowBelow, r) != 0) { - byte scale = at(rowBelow, r); - for (int c = 0; c < columns; c++) { - at(rowBelow, c) ^= galMultiply(scale, at(r, c)); - } - } - } + for (int r = 0; r < left->rows; r++) { + for (int c = 0; c < left->cols; c++) { + result->at(r, c) = left->at(r, c); } - - // Now clear the part above the main diagonal. - for (int d = 0; d < rows; d++) { - for (int rowAbove = 0; rowAbove < d; rowAbove++) { - if (at(rowAbove, d) != 0) { - byte scale = at(rowAbove, d); - for (int c = 0; c < columns; c++) { - at(rowAbove, c) ^= galMultiply(scale, at(d, c)); - } - } - } + auto cols = left->cols; + for (int c = 0; c < right->cols; c++) { + result->at(r, cols + c) = right->at(r, c); } - return 0; + } + return result; } -matrix -matrix::vandermonde(int rows, int cols) { - matrix result = matrix::newMatrix(rows, cols); - for (int r = 0; r < rows; r++) { - for (int c = 0; c < cols; c++) { - result.at(r, c) = galExp(byte(r), byte(c)); - } +MatrixPtr SubMatrix(const MatrixPtr& mp, int rmin, int cmin, int rmax, int cmax) { + auto result = std::make_shared(rmax - rmin, cmax - cmin); + for (int r = rmin; r < rmax; r++) { + for (int c = cmin; c < cmax; c++) { + result->at(r - rmin, c - cmin) = mp->at(r, c); } - return result; + } + return result; } + +MatrixPtr Invert(const MatrixPtr& mp) { + if (!mp->IsSquare()) { + return nullptr; + } + std::shared_ptr work = std::make_shared(mp->rows); + work = Augment(mp, work); + + auto ret = work->GaussianElimination(); + if (ret != 0) { + return nullptr; + } + + return SubMatrix(work, 0, mp->rows, mp->rows, mp->rows * 2); +} \ No newline at end of file diff --git a/matrix.h b/matrix.h index deffd52..9380185 100644 --- a/matrix.h +++ b/matrix.h @@ -9,51 +9,74 @@ #include #include "galois.h" -using row_type = std::shared_ptr>; +// newMatrix returns a matrix of zeros. +class Matrix { + public: + Matrix(int r, int c) : rows(r), cols(c) { + data = new byte*[r]; + for (int i = 0; i < r; i++) data[i] = new byte[c]{0}; + } -struct matrix { - // newMatrix returns a matrix of zeros. - static matrix newMatrix(int rows, int cols); + ~Matrix() { + for (int i = 0; i < rows; i++) delete[] data[i]; + delete[] data; + data = nullptr; + } - // IdentityMatrix returns an identity matrix of the given empty. - static matrix identityMatrix(int size); + inline byte& at(int row, int col) { return data[row][col]; } - // Create a Vandermonde matrix, which is guaranteed to have the - // property that any subset of rows that forms a square matrix - // is invertible. - static matrix vandermonde(int rows, int cols); + inline bool IsSquare() { return this->rows == this->cols; } - // Multiply multiplies this matrix (the one on the left) by another - // matrix (the one on the right) and returns a new matrix with the result. - matrix Multiply(matrix &right); + // SwapRows Exchanges two rows in the matrix. + int SwapRows(int r1, int r2); - // Augment returns the concatenation of this matrix and the matrix on the right. - matrix Augment(matrix &right); + // Gaussian elimination (also known as row reduction) + int GaussianElimination(); - // Returns a part of this matrix. Data is copied. - matrix SubMatrix(int rmin, int cmin, int rmax, int cmax); + int rows{0}, cols{0}; - // IsSquare will return true if the matrix is square - bool IsSquare(); + byte** data; +}; - // SwapRows Exchanges two rows in the matrix. - int SwapRows(int r1, int r2); +// IdentityMatrix returns an identity matrix of the given empty. +class IdentityMatrix : public Matrix { + public: + IdentityMatrix(int size) : Matrix(size, size) { + for (int i = 0; i < size; i++) { + at(i, i) = 1; + } + } +}; - // Invert returns the inverse of this matrix. - // Returns ErrSingular when the matrix is singular and doesn't have an inverse. - // The matrix must be square, otherwise ErrNotSquare is returned. - matrix Invert(); +// Create a Vandermonde matrix, which is guaranteed to have the +// property that any subset of rows that forms a square matrix +// is invertible. +class VandermondeMatrix : public Matrix { + public: + VandermondeMatrix(int rows, int cols) : Matrix(rows, cols) { + for (int r = 0; r < rows; r++) { + for (int c = 0; c < cols; c++) { + at(r, c) = galExp(byte(r), byte(c)); + } + } + } +}; - // Gaussian elimination (also known as row reduction) - int gaussianElimination(); +using MatrixPtr = std::shared_ptr; - std::vector data; - int rows{0}, cols{0}; +// Multiply multiplies this matrix (the one on the left) by another +// matrix (the one on the right) and returns a new matrix with the result. +MatrixPtr Multiply(const MatrixPtr& left, const MatrixPtr& right); - inline byte &at(int row, int col) { return (*(data[row]))[col]; } +// Augment returns the concatenation of this matrix and the matrix on the right. +MatrixPtr Augment(const MatrixPtr& left, const MatrixPtr& right); - inline bool empty() { return (rows == 0 || cols == 0); } -}; +// Returns a part of this matrix. Data is copied. +MatrixPtr SubMatrix(const MatrixPtr& mp, int rmin, int cmin, int rmax, int cmax); +// Invert returns the inverse of this matrix. +// Returns ErrSingular when the matrix is singular and doesn't have an inverse. +// The matrix must be square, otherwise ErrNotSquare is returned. +MatrixPtr Invert(const MatrixPtr& mp); #endif //KCP_MATRIX_H diff --git a/reedsolomon.cpp b/reedsolomon.cpp index ab32cb2..b50f18c 100644 --- a/reedsolomon.cpp +++ b/reedsolomon.cpp @@ -30,15 +30,15 @@ ReedSolomon::New(int dataShards, int parityShards) { // Start with a Vandermonde matrix. This matrix would work, // in theory, but doesn't have the property that the data // shards are unchanged after encoding. - matrix vm = matrix::vandermonde(r.m_totalShards, r.m_dataShards); + MatrixPtr vm = std::make_shared(r.m_totalShards, r.m_dataShards); // Multiply by the inverse of the top square of the matrix. // This will make the top square be the identity matrix, but // preserve the property that any square subset of rows is // invertible. - auto top = vm.SubMatrix(0, 0, dataShards, dataShards); - top = top.Invert(); - r.m = vm.Multiply(top); + auto top = SubMatrix(vm, 0, 0, dataShards, dataShards); + top = Invert(top); + r.m = Multiply(vm, top); // Inverted matrices are cached in a tree keyed by the indices // of the invalid rows of the data to reconstruct. @@ -47,9 +47,10 @@ ReedSolomon::New(int dataShards, int parityShards) { // with the original data. r.tree = inversionTree::newInversionTree(dataShards, parityShards); - r.parity = std::vector(parityShards); + r.parity = std::vector(parityShards); for (int i = 0; i < parityShards; i++) { - r.parity[i] = r.m.data[dataShards + i]; + r.parity[i] = r.m->data[dataShards + i]; + // std::make_shared>(r.m->data[dataShards + i], r.m->data[dataShards + i] + r.m->cols); } return r; } @@ -63,24 +64,31 @@ ReedSolomon::Encode(std::vector &shards) { checkShards(shards, false); // Get the slice of output buffers. - std::vector output(shards.begin() + m_dataShards, shards.end()); + static thread_local std::vector output(shards.size() - m_dataShards); + std::fill(output.begin(), output.end(), nullptr); + for (int i = m_dataShards; i < shards.size(); i++) output[i-m_dataShards] = shards[i]->data(); + + // Get the slice of input buffers. + static thread_local std::vector input(m_dataShards); + std::fill(input.begin(), input.end(), nullptr); + for (int i = 0; i < m_dataShards; i++) input[i] = shards[i]->data(); // Do the coding. - std::vector input(shards.begin(), shards.begin() + m_dataShards); - codeSomeShards(parity, input, output, m_parityShards); + auto indata_size = shards[0]->size(); + codeSomeShards(parity, input, indata_size, output, m_parityShards); }; void -ReedSolomon::codeSomeShards(std::vector &matrixRows, std::vector &inputs, - std::vector &outputs, int outputCount) { +ReedSolomon::codeSomeShards(std::vector &matrixRows, std::vector &inputs, int data_size, + std::vector &outputs, int outputCount) { for (int c = 0; c < m_dataShards; c++) { auto in = inputs[c]; - for (int iRow = 0; iRow < outputCount; iRow++) { + for (int r = 0; r < outputCount; r++) { if (c == 0) { - galMulSlice((*matrixRows[iRow])[c], in, outputs[iRow]); + galMulSlice(matrixRows[r][c], in, outputs[r], data_size); } else { - galMulSliceXor((*matrixRows[iRow])[c], in, outputs[iRow]); + galMulSliceXor(matrixRows[r][c], in, outputs[r], data_size); } } } @@ -124,14 +132,18 @@ ReedSolomon::Reconstruct(std::vector &shards) { // // Also, create an array of indices of the valid rows we do have // and the invalid rows we don't have up until we have enough valid rows. - std::vector subShards(m_dataShards); - std::vector validIndices(m_dataShards, 0); - std::vector invalidIndices; + static thread_local std::vector subShards(m_dataShards); + static thread_local std::vector validIndices(m_dataShards, 0); + static thread_local std::vector invalidIndices; + + // clean + std::fill(subShards.begin(), subShards.end(), nullptr); + std::fill(validIndices.begin(), validIndices.end(), 0); + invalidIndices.clear(); int subMatrixRow = 0; - for (int matrixRow = 0; matrixRow < m_totalShards && subMatrixRow < m_dataShards; matrixRow++) { if (shards[matrixRow] != nullptr) { - subShards[subMatrixRow] = shards[matrixRow]; + subShards[subMatrixRow] = shards[matrixRow]->data(); validIndices[subMatrixRow] = matrixRow; subMatrixRow++; } else { @@ -146,15 +158,15 @@ ReedSolomon::Reconstruct(std::vector &shards) { // If the inverted matrix isn't cached in the tree yet we must // construct it ourselves and insert it into the tree for the // future. In this way the inversion tree is lazily loaded. - if (dataDecodeMatrix.empty()) { + if (dataDecodeMatrix == nullptr) { // Pull out the rows of the matrix that correspond to the // shards that we have and build a square matrix. This // matrix could be used to generate the shards that we have // from the original data. - auto subMatrix = matrix::newMatrix(m_dataShards, m_dataShards); + auto subMatrix = std::make_shared(m_dataShards, m_dataShards); for (subMatrixRow = 0; subMatrixRow < validIndices.size(); subMatrixRow++) { for (int c = 0; c < m_dataShards; c++) { - subMatrix.at(subMatrixRow, c) = m.at(validIndices[subMatrixRow], c); + subMatrix->at(subMatrixRow, c) = m->at(validIndices[subMatrixRow], c); }; } @@ -163,8 +175,8 @@ ReedSolomon::Reconstruct(std::vector &shards) { // generates the shard that we want to Decode. Note that // since this matrix maps back to the original data, it can // be used to create a data shard, but not a parity shard. - dataDecodeMatrix = subMatrix.Invert(); - if (dataDecodeMatrix.empty()) { + dataDecodeMatrix = Invert(subMatrix); + if (dataDecodeMatrix == nullptr) { throw std::runtime_error("cannot get matrix invert"); } @@ -181,36 +193,43 @@ ReedSolomon::Reconstruct(std::vector &shards) { // The Input to the coding is all of the shards we actually // have, and the output is the missing data shards. The computation // is done using the special Decode matrix we just built. - std::vector outputs(m_parityShards); - std::vector matrixRows(m_parityShards); + static thread_local std::vector outputs(m_parityShards); + static thread_local std::vector matrixRows(m_parityShards); + // clean + std::fill(outputs.begin(), outputs.end(), nullptr); + std::fill(matrixRows.begin(), matrixRows.end(), nullptr); int outputCount = 0; for (int iShard = 0; iShard < m_dataShards; iShard++) { if (shards[iShard] == nullptr) { shards[iShard] = std::make_shared>(shardSize); - outputs[outputCount] = shards[iShard]; - matrixRows[outputCount] = dataDecodeMatrix.data[iShard]; + outputs[outputCount] = shards[iShard]->data(); + matrixRows[outputCount] = dataDecodeMatrix->data[iShard]; outputCount++; } } - codeSomeShards(matrixRows, subShards, outputs, outputCount); + + auto indata_size = shards[0]->size(); + codeSomeShards(matrixRows, subShards, indata_size, outputs, outputCount); // Now that we have all of the data shards intact, we can // compute any of the parity that is missing. // // The Input to the coding is ALL of the data shards, including // any that we just calculated. The output is whichever of the - // data shards were missing. + // data shards were missing. + outputCount = 0; + for (int iShard = 0; iShard < m_dataShards; iShard++) subShards[iShard] = shards[iShard]->data(); for (int iShard = m_dataShards; iShard < m_totalShards; iShard++) { if (shards[iShard] == nullptr) { shards[iShard] = std::make_shared>(shardSize); - outputs[outputCount] = shards[iShard]; + outputs[outputCount] = shards[iShard]->data(); matrixRows[outputCount] = parity[iShard - m_dataShards]; outputCount++; } } - codeSomeShards(matrixRows, shards, outputs, outputCount); + codeSomeShards(matrixRows, subShards, indata_size, outputs, outputCount); } void diff --git a/reedsolomon.h b/reedsolomon.h index 65fe72e..f2d9157 100644 --- a/reedsolomon.h +++ b/reedsolomon.h @@ -9,6 +9,8 @@ #include "inversion_tree.h" #include "galois.h" +using row_type = std::shared_ptr>; + class ReedSolomon { public: ReedSolomon() = default; @@ -49,9 +51,9 @@ class ReedSolomon { int m_parityShards; // Number of parity shards, should not be modified. int m_totalShards; // Total number of shards. Calculated, and should not be modified. - matrix m; + MatrixPtr m; inversionTree tree; - std::vector parity; + std::vector parity; int shardSize(std::vector &shards); @@ -66,7 +68,7 @@ class ReedSolomon { // number of matrix rows used, is determined by // outputCount, which is the number of outputs to compute. void - codeSomeShards(std::vector &matrixRows, std::vector &inputs, std::vector &outputs, + codeSomeShards(std::vector &matrixRows, std::vector &inputs, int data_size, std::vector &outputs, int outputCount); // checkShards will check if shards are the same size diff --git a/sess.cpp b/sess.cpp index f5230f2..e45de21 100644 --- a/sess.cpp +++ b/sess.cpp @@ -99,7 +99,7 @@ UDPSession::Update(uint32_t current) noexcept { if (n > 0) { if (fec.isEnabled()) { // decode FEC packet - auto pkt = fec.Decode(m_buf, static_cast(n)); + auto pkt = fec.Decode(m_buf, static_cast(n), current); if (pkt.flag == typeData) { auto ptr = pkt.data->data(); // we have 2B size, ignore for typeData @@ -109,7 +109,9 @@ UDPSession::Update(uint32_t current) noexcept { // allow FEC packet processing with correct flags. if (pkt.flag == typeData || pkt.flag == typeFEC) { // input to FEC, and see if we can recover data. - auto recovered = fec.Input(pkt); + static thread_local std::vector recovered; + recovered.clear(); + fec.Input(pkt, current, recovered); // we have some data recovered. for (auto &r : recovered) { diff --git a/sess.h b/sess.h index f856fce..3db5f0d 100644 --- a/sess.h +++ b/sess.h @@ -4,7 +4,6 @@ #include "ikcp.h" #include "fec.h" #include -#include class UDPSession { private: @@ -76,11 +75,4 @@ class UDPSession { }; -inline uint32_t currentMs() { - struct timeval time; - gettimeofday(&time, NULL); - return uint32_t((time.tv_sec * 1000) + (time.tv_usec / 1000)); -} - - #endif //KCP_SESS_H