You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
279 lines
13 KiB
279 lines
13 KiB
/*
|
|
* Copyright (c) 2017-2023 Arm Limited.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
#include "arm_compute/core/Types.h"
|
|
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
|
|
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
|
|
#include "arm_compute/runtime/Tensor.h"
|
|
#include "arm_compute/runtime/TensorAllocator.h"
|
|
#include "src/core/helpers/MemoryHelpers.h"
|
|
#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
|
|
#include "tests/NEON/Accessor.h"
|
|
#include "tests/NEON/Helper.h"
|
|
#include "tests/PaddingCalculator.h"
|
|
#include "tests/datasets/GEMMLowpFusedOffsetOutputDataset.h"
|
|
#include "tests/datasets/LargeGEMMLowpDataset.h"
|
|
#include "tests/datasets/ShapeDatasets.h"
|
|
#include "tests/datasets/SmallGEMMLowpDataset.h"
|
|
#include "tests/framework/Asserts.h"
|
|
#include "tests/framework/Macros.h"
|
|
#include "tests/framework/datasets/Datasets.h"
|
|
#include "tests/validation/Validation.h"
|
|
#include "tests/validation/fixtures/GEMMLowpFixture.h"
|
|
|
|
namespace arm_compute
|
|
{
|
|
namespace test
|
|
{
|
|
namespace validation
|
|
{
|
|
TEST_SUITE(NEON)
|
|
TEST_SUITE(GEMMLowp)
|
|
TEST_SUITE(MatrixMultiplyCore)
|
|
using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
|
|
using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
|
|
|
|
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
|
|
shape_a, shape_b, shape_c, a_offset, b_offset)
|
|
{
|
|
// Create tensors
|
|
Tensor a = create_tensor<Tensor>(shape_a, DataType::QASYMM8);
|
|
Tensor b = create_tensor<Tensor>(shape_b, DataType::QASYMM8);
|
|
Tensor c = create_tensor<Tensor>(shape_c, DataType::S32);
|
|
|
|
a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
|
|
b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
|
|
|
|
ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
|
|
ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
|
|
ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
|
|
|
|
// Create and configure function
|
|
NEGEMMLowpMatrixMultiplyCore gemmlowp_mm;
|
|
gemmlowp_mm.configure(&a, &b, nullptr, &c);
|
|
|
|
// Validate padding is zero
|
|
validate(a.info()->padding(), PaddingSize());
|
|
validate(b.info()->padding(), PaddingSize());
|
|
validate(c.info()->padding(), PaddingSize());
|
|
}
|
|
|
|
// *INDENT-OFF*
|
|
// clang-format off
|
|
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
|
|
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
|
|
TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type
|
|
TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
|
|
TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
|
|
TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
|
|
}),
|
|
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
|
|
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
|
|
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
|
|
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
|
|
TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
|
|
})),
|
|
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
|
|
TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
|
|
TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
|
|
TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
|
|
TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
|
|
})),
|
|
framework::dataset::make("Expected", { true, false, false, false, true })),
|
|
a_info, b_info, output_info, expected)
|
|
{
|
|
// Lock tensors
|
|
Status status = NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false),
|
|
&b_info.clone()->set_is_resizable(false),
|
|
nullptr,
|
|
&output_info.clone()->set_is_resizable(false));
|
|
ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
|
|
}
|
|
// clang-format on
|
|
// *INDENT-ON*
|
|
|
|
/** Test case for memory injection in @ref cpu::CpuGemmLowpMatrixMultiplyCore.
|
|
*
|
|
* Configure the operator once and inject memory at run-time in multiple executions.
|
|
*
|
|
* Checks performed in order:
|
|
* - Both runs compute the same output
|
|
*/
|
|
TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
|
|
{
|
|
auto gemm = std::make_unique<cpu::CpuGemmLowpMatrixMultiplyCore>();
|
|
auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
|
|
auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
|
|
auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
|
|
a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
|
|
b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
|
|
const auto gemm_info = GEMMInfo{};
|
|
gemm->configure(&a_info, &b_info, nullptr, &dst_info, gemm_info);
|
|
|
|
// telhs are newly created every call of this lambda function
|
|
auto a = create_tensor<Tensor>(a_info);
|
|
auto b = create_tensor<Tensor>(b_info);
|
|
auto dst = create_tensor<Tensor>(dst_info);
|
|
a.allocator()->allocate();
|
|
b.allocator()->allocate();
|
|
dst.allocator()->allocate();
|
|
|
|
ITensorPack run_pack =
|
|
{
|
|
{ TensorType::ACL_SRC_0, &a },
|
|
{ TensorType::ACL_SRC_1, &b },
|
|
{ TensorType::ACL_DST, &dst }
|
|
};
|
|
ITensorPack prep_pack =
|
|
{
|
|
{ TensorType::ACL_SRC_1, &b },
|
|
};
|
|
|
|
auto mg = MemoryGroup{};
|
|
auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
|
|
|
|
auto run_conv = [&]() -> Tensor
|
|
{
|
|
auto dst = create_tensor<Tensor>(dst_info);
|
|
dst.allocator()->allocate();
|
|
run_pack.add_tensor(TensorType::ACL_DST, &dst);
|
|
|
|
library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
|
|
library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
|
|
// This operator is configured once and captured by this lambda.
|
|
gemm->prepare(prep_pack);
|
|
gemm->run(run_pack);
|
|
return dst;
|
|
};
|
|
auto result_0 = run_conv();
|
|
auto result_1 = run_conv();
|
|
for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
|
|
{
|
|
ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
|
|
}
|
|
}
|
|
|
|
/** Test case for memory injection in @ref NEGEMMLowpMatrixMultiplyCore.
|
|
*
|
|
* Make sure @ref NEGEMMLowpMatrixMultiplyCore still works through injecting the memory at configure time using the old API.
|
|
*
|
|
* Checks performed in order:
|
|
* - Both runs compute the same output
|
|
*/
|
|
TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
|
|
{
|
|
auto gemm = std::make_unique<NEGEMMLowpMatrixMultiplyCore>();
|
|
auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
|
|
auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
|
|
auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
|
|
a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
|
|
b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
|
|
const auto gemm_info = GEMMInfo{};
|
|
auto run_conv = [&]()
|
|
{
|
|
auto a = create_tensor<Tensor>(a_info);
|
|
auto b = create_tensor<Tensor>(b_info);
|
|
auto dst = create_tensor<Tensor>(dst_info);
|
|
gemm->configure(&a, &b, nullptr, &dst, gemm_info);
|
|
a.allocator()->allocate();
|
|
b.allocator()->allocate();
|
|
dst.allocator()->allocate();
|
|
library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
|
|
library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
|
|
gemm->run();
|
|
return dst;
|
|
};
|
|
auto result_0 = run_conv();
|
|
auto result_1 = run_conv();
|
|
for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
|
|
{
|
|
ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
|
|
}
|
|
}
|
|
|
|
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
|
|
{
|
|
// Validate output
|
|
validate(Accessor(_target), _reference);
|
|
}
|
|
|
|
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
|
|
{
|
|
// Validate output
|
|
validate(Accessor(_target), _reference);
|
|
}
|
|
|
|
constexpr AbsoluteTolerance<float> tolerance_batched(1);
|
|
|
|
using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
|
|
GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
|
|
|
|
TEST_SUITE(BatchedMatMul)
|
|
TEST_SUITE(QASYMM8)
|
|
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
|
|
combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
|
|
framework::dataset::make("DataType", { DataType::QASYMM8 })),
|
|
framework::dataset::make("bool", { false })))
|
|
{
|
|
validate(Accessor(_target), _reference, tolerance_batched);
|
|
}
|
|
TEST_SUITE_END() // QASYMM8
|
|
|
|
using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
|
|
GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
|
|
TEST_SUITE(QASYMM8_SIGNED)
|
|
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
|
|
combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
|
|
framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
|
|
framework::dataset::make("bool", { false })))
|
|
{
|
|
validate(Accessor(_target), _reference, tolerance_batched);
|
|
}
|
|
TEST_SUITE_END() // QASYMM8_SIGNED
|
|
TEST_SUITE_END() // BatchedMatMul
|
|
|
|
using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
|
|
constexpr AbsoluteTolerance<float> tolerance_quant(1);
|
|
|
|
TEST_SUITE(FusedOffsetOutput)
|
|
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
|
|
framework::dataset::make("DataType", { DataType::QASYMM8 })))
|
|
{
|
|
// Validate output
|
|
validate(Accessor(_target), _reference, tolerance_quant);
|
|
}
|
|
|
|
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
|
|
framework::dataset::make("DataType", { DataType::QASYMM8 })))
|
|
{
|
|
// Validate output
|
|
validate(Accessor(_target), _reference, tolerance_quant);
|
|
}
|
|
TEST_SUITE_END() // FusedOffsetOutput
|
|
TEST_SUITE_END() // MatrixMultiplyCore
|
|
TEST_SUITE_END() // GEMMLowp
|
|
TEST_SUITE_END() // NEON
|
|
} // namespace validation
|
|
} // namespace test
|
|
} // namespace arm_compute
|