diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ccbd774ec..15f7779137 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017-2025 Arm Ltd and Contributors. All rights reserved. # Copyright © 2020 NXP # SPDX-License-Identifier: MIT # @@ -387,8 +387,6 @@ list(APPEND armnn_sources src/armnn/Profiling.hpp src/armnn/Runtime.cpp src/armnn/Runtime.hpp - src/armnn/RangeTracker.cpp - src/armnn/RangeTracker.hpp src/armnn/ResolveType.hpp src/armnn/SerializeLayerParameters.cpp src/armnn/SerializeLayerParameters.hpp diff --git a/scripts/get_compute_library.sh b/scripts/get_compute_library.sh index 009b449834..4fa260273e 100755 --- a/scripts/get_compute_library.sh +++ b/scripts/get_compute_library.sh @@ -10,7 +10,7 @@ CMD=$( basename "$0" ) #DEFAULT_CLFRAMEWORKREVISION="branches/arm_compute_24_08" # Release 24.08 # # For pinning to a revision use this: -DEFAULT_CLFRAMEWORKREVISION="f1be55c81f7f580fbbfd88894bc3e79af3185743" #fix: gemm_hybrid_quantized.hpp was passing incorrect K size to the kernel. +DEFAULT_CLFRAMEWORKREVISION="7d6fc756b2a9dfd8776be1c5c17ba45e2652c9fc" #Update release version and SONAME" usage() { echo -e "get_compute_library.sh: Clones the Arm Compute Library (ACL) repo from the ML Platform server and checks out the pinned version of ACL based on the SHA string defined at the top of this script (DEFAULT_CLFRAMEWORKREVISION). diff --git a/src/armnn/RangeTracker.cpp b/src/armnn/RangeTracker.cpp deleted file mode 100644 index 346c8bdd2e..0000000000 --- a/src/armnn/RangeTracker.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RangeTracker.hpp" -#include "InternalTypes.hpp" - -namespace armnn -{ - -void RangeTracker::SetRange(const armnn::IConnectableLayer* layer, unsigned int outputIdx, float min, float max) -{ - auto& ranges = m_GuidToRangesMap[layer->GetGuid()]; - - unsigned int numOfOutputSlots = layer->GetNumOutputSlots(); - // output layers are a special case - if (numOfOutputSlots == 0) - { - ++numOfOutputSlots; - } - if (ranges.size() < numOfOutputSlots) - { - ranges.resize(numOfOutputSlots); - } - ranges[outputIdx] = std::make_pair(min, max); -} - -RangeTracker::MinMaxRange RangeTracker::GetRange(LayerGuid guid, unsigned int idx) const -{ - auto search = m_GuidToRangesMap.find(guid); - if (search == m_GuidToRangesMap.end()) - { - if (IsInDynamicMode()) - { - throw armnn::Exception("Have no entry for layer GUID [" + std::to_string(guid) + "]"); - } - else - { - return DefaultRange(); - } - } - return search->second.at(idx); -} - -void RangeTracker::RefineMin(LayerGuid guid, unsigned int idx, float newMin) -{ - auto& currentMin = m_GuidToRangesMap.find(guid)->second.at(idx).first; - if (newMin < currentMin) - { - currentMin = newMin; - } -} - -void RangeTracker::RefineMax(LayerGuid guid, unsigned int idx, float newMax) -{ - auto& currentMax = m_GuidToRangesMap.find(guid)->second.at(idx).second; - if (newMax > currentMax) - { - currentMax = newMax; - } -} - -void RangeTracker::ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax) -{ - auto minMaxPair = m_GuidToRangesMap.find(guid); - auto& currentMin = minMaxPair->second.at(idx).first; - auto& currentMax = minMaxPair->second.at(idx).second; - - currentMin = newMin; - currentMax = newMax; -} - -void RangeTracker::Reset() -{ - m_GuidToRangesMap.clear(); -} - -} //namespace armnn \ No newline at end of file diff --git a/src/armnn/RangeTracker.hpp b/src/armnn/RangeTracker.hpp deleted file mode 100644 index 89d19acf79..0000000000 --- a/src/armnn/RangeTracker.hpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include - -#include - -#include -#include - -namespace armnn -{ - -class RangeTracker -{ -public: - using MinMaxRange = std::pair; - - /// Retrieve the Range for a particular output slot on a particular layer - MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const; - - /// Set the range for an output slot on a layer - void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); - - /// Query function to check that the RangeTracker is empty. - bool IsEmpty() const { return m_GuidToRangesMap.empty(); } - - /// Query that there is an entry for a layer - bool HasRanges(LayerGuid guid) const { return m_GuidToRangesMap.find(guid) != m_GuidToRangesMap.end(); } - - /// Update min in RangeTracker with new_min if it is lower than current value - void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin); - - /// Update max in RangeTracker with new_max if it is greater than current value - void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax); - - /// Overwrite min and max in RangeTracker with newMin and newMax - void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax); - - void Reset(); - - void SetDynamicMode(bool flag) { m_DynamicMode = flag; } - - bool IsInDynamicMode() const { return m_DynamicMode; } - -private: - using MinMaxRanges = std::vector; - - /// Retrieve the default range - MinMaxRange DefaultRange() const { return std::make_pair(-15.0f, 15.0f); } - - /// Mapping from a layer Guid to an array of ranges for outputs - std::unordered_map m_GuidToRangesMap; - - bool m_DynamicMode = false; -}; - -} //namespace armnn \ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp index 93754cb4b4..66a2d15dba 100644 --- a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp @@ -1,41 +1,11 @@ // -// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2025 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ElementwiseBinaryOperator.hpp" #include "TosaRescaleOperatorUtils.hpp" -void AddRescaleOp(const string &inputName, - const string &outputName, - std::vector& tensors, - const std::vector& inputs, - const std::vector& outputs, - std::vector& operators) -{ - double scale_alpha = inputs[1]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(); - int32_t input_zp = inputs[1]->GetQuantizationOffset(); - int32_t output_zp = outputs[0]->GetQuantizationOffset(); - - TosaSerializationOperator* rescaleOp = nullptr; - CreateRescaleTosaOperator(inputName, - outputName, - scale_alpha, - input_zp, - output_zp, - false, - false, - true, - true, - &rescaleOp); - - std::vector inputShape = GetTosaTensorShape(inputs[1]->GetShape()); - tensors.push_back(new TosaSerializationTensor(outputName, - inputShape, - DType_INT32, {})); - operators.push_back(rescaleOp); -} - TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, const LayerType type, const std::vector& inputs, @@ -102,8 +72,39 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* bool isMulOp = (type == LayerType::Multiplication) || isMulDesc ? true : false; if (isInputInt8 && !isMulOp) { - AddRescaleOp(input0Name, input0ElemenwiseBinaryName, tensors, inputs, outputs, operators); - AddRescaleOp(input1Name, input1ElemenwiseBinaryName, tensors, inputs, outputs, operators); + TosaSerializationOperator* rescaleOp0 = nullptr; + CreateRescaleTosaOperator(input0Name, + input0ElemenwiseBinaryName, + inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(), + inputs[0]->GetQuantizationOffset(), + 0, + false, + false, + true, + true, + &rescaleOp0); + tensors.push_back(new TosaSerializationTensor(input0ElemenwiseBinaryName, + GetTosaTensorShape(inputs[0]->GetShape()), + DType_INT32, + {})); + operators.push_back(rescaleOp0); + + TosaSerializationOperator* rescaleOp1 = nullptr; + CreateRescaleTosaOperator(input1Name, + input1ElemenwiseBinaryName, + inputs[1]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(), + inputs[1]->GetQuantizationOffset(), + 0, + false, + false, + true, + true, + &rescaleOp1); + tensors.push_back(new TosaSerializationTensor(input1ElemenwiseBinaryName, + GetTosaTensorShape(inputs[1]->GetShape()), + DType_INT32, + {})); + operators.push_back(rescaleOp1); } std::string& elementwiseInput0Str = isInputInt8 ? input0ElemenwiseBinaryName : input0Name; @@ -213,7 +214,28 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* // from DType_INT32 to DType_INT8 when the input is DType_INT8 if (inputDType0 == DType_INT8) { - AddRescaleOp(outputElemenwiseBinaryName, outputName, tensors, inputs, outputs, operators); + // double output_rescale_scale = in_lhs_scale * in_rhs_scale / output_scale; + float input0QScale = inputs[0]->IsQuantized()?inputs[0]->GetQuantizationScale():1.0f; + float input1QScale = inputs[1]->IsQuantized()?inputs[1]->GetQuantizationScale():1.0f; + float outputQScale = outputs[0]->IsQuantized()?outputs[0]->GetQuantizationScale():1.0f; + double combinedQScale = input0QScale * input1QScale / outputQScale; + + TosaSerializationOperator* rescaleOp = nullptr; + CreateRescaleTosaOperator(outputElemenwiseBinaryName, + outputName, + combinedQScale, + 0, + outputs[0]->GetQuantizationOffset(), + false, + false, + true, + true, + &rescaleOp); + tensors.push_back(new TosaSerializationTensor(outputName, + GetTosaTensorShape(outputs[0]->GetShape()), + DType_INT8, + {})); + operators.push_back(rescaleOp); } return new TosaSerializationBasicBlock(blockName, // name