From 6e974d7f6ebe78e9f51313d9ffbbbf9083ad0e78 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Wed, 17 Jun 2026 17:33:48 +0200 Subject: [PATCH] feat: optimized QASYMM8_SIGNED->F32 direct convolution path Signed-off-by: Aleksandr Voron --- .../kernels/arm_gemm/gemm_interleaved.hpp | 70 ++- .../NEON/kernels/arm_gemm/quantized-fp16.cpp | 3 +- src/core/NEON/kernels/arm_gemm/quantized.cpp | 56 ++- .../arm_common/internal/quantized.hpp | 12 +- .../kernels/assembly/arm_gemm/arm_gemm.hpp | 14 +- src/cpu/operators/CpuConv2d.cpp | 24 +- src/cpu/operators/CpuGemmDirectConv2d.cpp | 37 +- src/cpu/operators/CpuGemmDirectConv2d.h | 3 +- .../NEON/functions/NEConvolutionLayer.cpp | 4 +- tests/validation/NEON/ConvolutionLayer.cpp | 439 ++++++++++++++++++ .../fixtures/ConvolutionLayerFixture.h | 138 ++++++ 11 files changed, 773 insertions(+), 27 deletions(-) diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index 5a2b83f527a..2a8779f933e 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -281,6 +281,14 @@ void kernel_and_merge::run( offset_bias = bias + n_0; } + // When b_offset != 0, row sums of A are packed at the end of the A panel + // (appended by the quantized PrepareA transform with multiplier=1). Read them + // to pass to dequantize_block_32 for per-row offset correction. + const int32_t *row_sum = nullptr; + if (dq.b_offset != 0) { + row_sum = reinterpret_cast(a_ptr + strategy::out_height() * kern_k); + } + strat.kernel(// A and B pointers are just the packed panels. a_ptr, b_panel, // Provide relevant part of output array and row stride. @@ -288,7 +296,7 @@ void kernel_and_merge::run( // M, N, K sizes m_max-m_0, n_max - n_0, kern_k, // Bias, activation, accumulation. Need to offset the bias as needed. - offset_col_bias, dq, offset_bias, act, accumulate, acc_buff); + offset_col_bias, dq, offset_bias, act, accumulate, acc_buff, row_sum, kern_k); } template<> @@ -300,7 +308,7 @@ void kernel_and_merge::run( strategy &strat, const Tlo *a_ptr, const Tro *b_panel, size_t, Tri *c_panel, Tr *c_ptr, int ldc, int kern_k, unsigned int m_0, unsigned int m_max, unsigned int n_0, unsigned int n_max, const Tr *bias, - const Activation &act, bool not_first_pass, const DequantizeFloat &qp, const int32_t *, + const Activation &act, bool not_first_pass, const DequantizeFloat &qp, const int32_t *col_bias, Tab *) { const int bblocks = iceildiv(n_max - n_0, strategy::out_width()); @@ -317,6 +325,11 @@ void kernel_and_merge::run( #ifdef CYCLE_PROFILING auto p=prof.ScopedProfiler(PROFILE_QUANTIZE, ((m_max-m_0) * bblocks * strategy::out_width() * sizeof(Tr))); #endif + // When b_offset != 0, row sums are packed after the A panel data + const int32_t *row_sum = (qp.b_offset != 0) + ? reinterpret_cast(a_ptr + strategy::out_height() * kern_k) + : nullptr; + for (int i=0; i::run( dequantize_block_32(qp, (n_end - n_start), (m_max - m_0), c_panel + (i * strategy::out_width() * strategy::out_height()), strategy::out_width(), c_ptr + m_0 * ldc + n_start, ldc, - bias != nullptr ? bias + n_start : nullptr, not_first_pass, act); + bias != nullptr ? bias + n_start : nullptr, not_first_pass, act, + col_bias != nullptr ? col_bias + n_start : nullptr, row_sum, kern_k); } } @@ -475,6 +489,13 @@ class GemmInterleaved : public GemmCommon { return _Nsize * _nmulti * sizeof(int32_t); } + if (std::is_same::value) { + const DequantizeFloat *dq = reinterpret_cast(&_os); + if (dq->a_offset != 0) { + return _Nsize * _nmulti * sizeof(int32_t); + } + } + return 0; } @@ -557,6 +578,12 @@ class GemmInterleaved : public GemmCommon { k_depth += sizeof(int32_t) / sizeof(Tloi); } + if (std::is_same::value && MergeStep) { + // transforms_quantized always packs row sum slots (zeros when multiplier=0, actual + // sums when b_offset != 0). Reserve space unconditionally when MergeStep is enabled. + k_depth += sizeof(int32_t) / sizeof(Tloi); + } + return k_depth; } @@ -647,6 +674,13 @@ class GemmInterleaved : public GemmCommon { return -qp->b_offset; } + if (std::is_same::value) { + const DequantizeFloat *dq = reinterpret_cast(&_os); + // Pack row sums into the A panel when b_offset is non-zero so that the + // merge step can apply the b_offset correction per output position. + return (dq->b_offset != 0) ? 1 : 0; + } + return 0; } @@ -693,6 +727,14 @@ class GemmInterleaved : public GemmCommon { return get_ktotal(args); } + // K blocking is not supported for DequantizeFloat with MergeStep when b_offset != 0, + // because row sums of A must cover the full K depth. We cannot check b_offset here + // (static function), so we conservatively disable K-blocking for all DequantizeFloat + // MergeStep cases. The working-memory cost is minimal and correctness is guaranteed. + if (std::is_same::value && MergeStep) { + return get_ktotal(args); + } + // We can't K block non-fast FP16 cases without an accumulation buffer. #if defined(__aarch64__) && (defined(FP16_KERNELS) || defined(ARM_COMPUTE_ENABLE_FP16)) if (std::is_same::value && std::is_same::value && !args._fast_mode && MergeStep) { @@ -937,7 +979,7 @@ class GemmInterleaved : public GemmCommon { #endif // See comment above on transform_type<> class: this extracts either 'transforms' or // 'transforms_quantized' as appropriate. - typename transform_type::value>::type transforms; + typename transform_type::value || std::is_same::value)>::type transforms; if (_indirect_buf != nullptr) { transforms.PrepareA_indirect(a_panel, @@ -1027,7 +1069,7 @@ class GemmInterleaved : public GemmCommon { #endif // See comment above on transform_type<> class: this extracts either 'transforms' or // 'transforms_quantized' as appropriate. - typename transform_type::value>::type transforms; + typename transform_type::value || std::is_same::value)>::type transforms; for (unsigned int batch = batch_0; batch <= batch_end; batch++) { unsigned int first_m = (batch == batch_0) ? m_0 : 0; @@ -1060,6 +1102,10 @@ class GemmInterleaved : public GemmCommon { if(std::is_same::value) { a_panel_stride = kern_k + (sizeof(int32_t) / sizeof(Tloi)); + } else if (std::is_same::value) { + // transforms_quantized always packs row-sum slots (zeros when b_offset=0, + // actual sums when b_offset != 0), so the stride must include the slot. + a_panel_stride = kern_k + (sizeof(int32_t) / sizeof(Tloi)); } else { a_panel_stride = kern_k; } @@ -1212,6 +1258,20 @@ class GemmInterleaved : public GemmCommon { compute_col_sums(*qp_ptr, _Nsize, _Ksize * _Ksections, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize * _Ksections, i, 0); } } + + if (std::is_same::value) { + const DequantizeFloat *dq = reinterpret_cast(&_os); + if (dq->a_offset != 0) { + // Compute raw column sums of B (weight matrix) for use in a_offset correction. + // dequantize_block_32 applies: -a_offset * col_sums[n] * scale per output channel. + col_bias = reinterpret_cast(in_buffer); + for (unsigned int i = 0; i < _nmulti; ++i) { + compute_raw_col_sums(_Nsize, _Ksize * _Ksections, + B + (i * B_multi_stride), ldb, + col_bias + (i * _Nsize)); + } + } + } } // Support for transposed B is a property of the strategy::transpose type diff --git a/src/core/NEON/kernels/arm_gemm/quantized-fp16.cpp b/src/core/NEON/kernels/arm_gemm/quantized-fp16.cpp index 3647fbc8a12..a66bc3b3957 100644 --- a/src/core/NEON/kernels/arm_gemm/quantized-fp16.cpp +++ b/src/core/NEON/kernels/arm_gemm/quantized-fp16.cpp @@ -35,7 +35,8 @@ namespace arm_gemm { template<> void dequantize_block_32<__fp16>(const DequantizeFloat &qp, unsigned int width, unsigned int height, const int32_t * in_ptr, unsigned int in_stride, __fp16 *out_ptr, unsigned int out_stride, - const __fp16 * bias_ptr, bool not_first_pass, const Activation &act) + const __fp16 * bias_ptr, bool not_first_pass, const Activation &act, + const int32_t * /*col_bias*/, const int32_t * /*row_sum*/, int32_t /*k_total*/) { const float32x4_t vscale = vdupq_n_f32(qp.scale); float maxval = std::numeric_limits::infinity(); diff --git a/src/core/NEON/kernels/arm_gemm/quantized.cpp b/src/core/NEON/kernels/arm_gemm/quantized.cpp index 0243ee14a3b..ae47c157621 100644 --- a/src/core/NEON/kernels/arm_gemm/quantized.cpp +++ b/src/core/NEON/kernels/arm_gemm/quantized.cpp @@ -973,10 +973,28 @@ void compute_col_sums(const Requantize32 &qp, unsigned int width, unsigned int h template void compute_col_sums(const Requantize32 &qp, unsigned int width, unsigned int height, const int8_t *input, unsigned int in_stride, int32_t *col_bias, unsigned int depth, unsigned int multi, unsigned int first_col); template void compute_col_sums(const Requantize32 &qp, unsigned int width, unsigned int height, const uint8_t *input, unsigned int in_stride, int32_t *col_bias, unsigned int depth, unsigned int multi, unsigned int first_col); +template +void compute_raw_col_sums(unsigned int width, unsigned int height, + const T *input, unsigned int in_stride, int32_t *col_sums) +{ + memset(reinterpret_cast(col_sums), 0, width * sizeof(int32_t)); + for (unsigned int row = 0; row < height; ++row) + { + for (unsigned int col = 0; col < width; ++col) + { + col_sums[col] += static_cast(input[row * in_stride + col]); + } + } +} + +template void compute_raw_col_sums(unsigned int width, unsigned int height, const int8_t *input, unsigned int in_stride, int32_t *col_sums); +template void compute_raw_col_sums(unsigned int width, unsigned int height, const uint8_t *input, unsigned int in_stride, int32_t *col_sums); + template<> void dequantize_block_32(const DequantizeFloat &qp, unsigned int width, unsigned int height, const int32_t* in_ptr, unsigned int in_stride, float *out_ptr, unsigned int out_stride, - const float* bias_ptr, bool accumulate, const Activation &act) + const float* bias_ptr, bool accumulate, const Activation &act, + const int32_t *col_bias, const int32_t *row_sum, int32_t k_total) { const float32x4_t vscale = vdupq_n_f32(qp.scale); float maxval = std::numeric_limits::infinity(); @@ -1000,14 +1018,38 @@ void dequantize_block_32(const DequantizeFloat &qp, unsigned int width, u for(unsigned int row=0; row(-qp.b_offset * row_sum[row]) * qp.scale; + } + if (col_bias != nullptr && row_sum != nullptr && k_total != 0) { + // Cross-term: +a_offset * b_offset * K * scale + row_offset += static_cast(qp.a_offset) * static_cast(qp.b_offset) + * static_cast(k_total) * qp.scale; + } + const float32x4_t vrow_offset = vdupq_n_f32(row_offset); + unsigned int col=0; if (width >= 4) { for(; col <= (width - 4); col+= 4) { const int32x4_t vin = vld1q_s32(row_in_ptr + col); float32x4_t vdeq = vmulq_f32(vcvtq_f32_s32(vin), vscale); if(bias_ptr) { - const float32x4_t bin = vld1q_f32(bias_ptr + col); - vdeq = vaddq_f32(vdeq, bin); + vdeq = vaddq_f32(vdeq, vld1q_f32(bias_ptr + col)); + } + if(col_bias) { + // a_offset correction: -a_offset * sum_b_col[n] * scale + const float32x4_t vcol_corr = vmulq_f32( + vcvtq_f32_s32(vld1q_s32(col_bias + col)), + vdupq_n_f32(static_cast(-qp.a_offset) * qp.scale)); + vdeq = vaddq_f32(vdeq, vcol_corr); + } + if(row_sum) { + vdeq = vaddq_f32(vdeq, vrow_offset); } if(accumulate) { vdeq = vaddq_f32(vdeq, vld1q_f32(row_out_ptr + col)); @@ -1019,10 +1061,16 @@ void dequantize_block_32(const DequantizeFloat &qp, unsigned int width, u // left-over elements for(; col < width; ++col) { const int32_t val = *(row_in_ptr + col); - float res = static_cast(val * qp.scale); + float res = static_cast(val) * qp.scale; if(bias_ptr) { res += static_cast(*(bias_ptr + col)); } + if(col_bias) { + res += static_cast(-qp.a_offset * col_bias[col]) * qp.scale; + } + if(row_sum) { + res += row_offset; + } if(accumulate) { res += *(row_out_ptr + col); } diff --git a/src/cpu/kernels/assembly/arm_common/internal/quantized.hpp b/src/cpu/kernels/assembly/arm_common/internal/quantized.hpp index 796da4e2e19..64985e7b9ba 100644 --- a/src/cpu/kernels/assembly/arm_common/internal/quantized.hpp +++ b/src/cpu/kernels/assembly/arm_common/internal/quantized.hpp @@ -41,6 +41,14 @@ void compute_col_sums(const Requantize32 &qp, unsigned int width, unsigned int h const T *input, unsigned int in_stride, int32_t *col_bias, unsigned int depth, unsigned int multi, unsigned int first_col); +/** Compute raw column sums of a matrix: col_sums[n] = sum_{k} input[k * in_stride + n]. + * Unlike compute_col_sums(), this does not apply any quantization offsets or scaling — + * it stores the plain integer sums for use as weight column reductions in the + * DequantizeFloat a_offset correction path. */ +template +void compute_raw_col_sums(unsigned int width, unsigned int height, + const T *input, unsigned int in_stride, int32_t *col_sums); + template void row_sums_indirect(size_t num_strings, const unsigned int *string_lengths, IndirectInputArg A_arg, size_t M, int32_t *output_ptr, const Requantize32 *qp); @@ -48,6 +56,8 @@ void row_sums_indirect(size_t num_strings, const unsigned int *string_lengths, I template void dequantize_block_32(const DequantizeFloat &qp, unsigned int width, unsigned int height, const int32_t* input, unsigned int in_stride, T *output, unsigned int out_stride, - const T *row_bias, bool not_first_pass, const Activation &act); + const T *row_bias, bool not_first_pass, const Activation &act, + const int32_t *col_bias = nullptr, const int32_t *row_sum = nullptr, + int32_t k_total = 0); } // namespace arm_gemm diff --git a/src/cpu/kernels/assembly/arm_gemm/arm_gemm.hpp b/src/cpu/kernels/assembly/arm_gemm/arm_gemm.hpp index 8bae014a757..cdc2a42682d 100644 --- a/src/cpu/kernels/assembly/arm_gemm/arm_gemm.hpp +++ b/src/cpu/kernels/assembly/arm_gemm/arm_gemm.hpp @@ -240,12 +240,20 @@ struct Requantize32 struct DequantizeFloat { public: - float scale = 0; + float scale = 0; + int32_t a_offset = 0; // input quantization zero-point (subtract from each input value) + int32_t b_offset = 0; // weight quantization zero-point (subtract from each weight value) DequantizeFloat() = default; - // Constructor - DequantizeFloat(const float scale) : scale(scale) + // Constructor without offset (symmetric quantization) + DequantizeFloat(const float scale) : scale(scale), a_offset(0), b_offset(0) + { + } + + // Constructor with asymmetric quantization offsets + DequantizeFloat(const float scale, int32_t a_offset, int32_t b_offset) + : scale(scale), a_offset(a_offset), b_offset(b_offset) { } }; diff --git a/src/cpu/operators/CpuConv2d.cpp b/src/cpu/operators/CpuConv2d.cpp index adc4486889c..a2c950792e2 100644 --- a/src/cpu/operators/CpuConv2d.cpp +++ b/src/cpu/operators/CpuConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023-2025 Arm Limited. + * Copyright (c) 2017-2021, 2023-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,13 +59,14 @@ void CpuConv2d::configure(ITensorInfo *input, // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_UNUSED(num_groups); - ARM_COMPUTE_ERROR_THROW_ON(CpuConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation, - act_info, enable_fast_math, num_groups)); + ARM_COMPUTE_ERROR_THROW_ON( + CpuConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, + enable_fast_math, num_groups)); ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups); - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); + const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups, weights_info); switch (CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math)) { @@ -119,7 +120,8 @@ Status CpuConv2d::validate(const ITensorInfo *input, ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConv2d::validate"); ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon"); - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); + const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups, weights_info); + switch (CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math)) { @@ -155,7 +157,17 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i bool enable_fast_math) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights); - ARM_COMPUTE_UNUSED(weights_info); + + // For QASYMM8_SIGNED→F32 with NHWC and no dilation, automatically select the single-kernel + // CpuGemmDirectConv2d path when it validates successfully. + if (input->data_type() == DataType::QASYMM8_SIGNED && output->data_type() == DataType::F32) + { + const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, 1, weights_info); + if (bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info))) + { + return ConvolutionMethod::GEMM_CONV2D; + } + } const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); diff --git a/src/cpu/operators/CpuGemmDirectConv2d.cpp b/src/cpu/operators/CpuGemmDirectConv2d.cpp index caac7761e27..4024207add5 100644 --- a/src/cpu/operators/CpuGemmDirectConv2d.cpp +++ b/src/cpu/operators/CpuGemmDirectConv2d.cpp @@ -44,6 +44,11 @@ using namespace arm_compute::utils::cast; namespace { +inline bool is_direct_i8_s8_f32_path(const ITensorInfo *src, const ITensorInfo *dst) +{ + return src->data_type() == DataType::QASYMM8_SIGNED && dst->data_type() == DataType::F32; +} + GEMMLowpOutputStageInfo calculate_output_stage_metadata(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, @@ -130,7 +135,15 @@ void CpuGemmDirectConv2d::configure(const ITensorInfo *src, // Configure assembly dispatch cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false); - if (is_data_type_quantized(src->data_type())) + if (is_direct_i8_s8_f32_path(src, dst)) + { + // Provide the quantization zero-points via AsmGemmInfo so that create_arm_gemm_dequant + // can bake them into the DequantizeFloat output stage. The assembly kernel then handles + // all offset corrections (col sums, row sums, cross-term) natively. + asm_info.dequant_a_offset = src->quantization_info().uniform().offset; + asm_info.dequant_b_offset = weights->quantization_info().uniform().offset; + } + else if (is_data_type_quantized(src->data_type())) { asm_info.output_stage = calculate_output_stage_metadata(src, weights, dst, info.act_info); } @@ -189,6 +202,17 @@ Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, ARM_COMPUTE_RETURN_ERROR_ON(info.dilation != Size2D(1U, 1U)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); + const bool direct_i8_f32 = is_direct_i8_s8_f32_path(src, dst); + + if (direct_i8_f32) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F32); + // Offset correction (a_offset, b_offset) is handled inside the assembly kernel via the + // extended DequantizeFloat output stage — no separate operator-level validation needed. + } + // Validate Permute TensorInfo perm_weights; ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &perm_weights, PermutationVector{3, 0, 1, 2})); @@ -204,7 +228,11 @@ Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, // Validate biases if (biases != nullptr) { - if (is_data_type_quantized_asymmetric(data_type)) + if (direct_i8_f32) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); + } + else if (is_data_type_quantized_asymmetric(data_type)) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); } @@ -216,7 +244,10 @@ Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + if (!direct_i8_f32) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + } cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false); ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuGemmAssemblyDispatch::validate(src, weights, biases, dst, asm_info)); diff --git a/src/cpu/operators/CpuGemmDirectConv2d.h b/src/cpu/operators/CpuGemmDirectConv2d.h index a7365615b94..2ab9ef531a6 100644 --- a/src/cpu/operators/CpuGemmDirectConv2d.h +++ b/src/cpu/operators/CpuGemmDirectConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,7 @@ class CpuGemmDirectConv2d : public ICpuOperator * |:--------------|:--------------|:--------------|:--------------| * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 | * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index c4f28079852..d9061b4c355 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023-2025 Arm Limited. + * Copyright (c) 2017-2021, 2023-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,6 @@ void NEConvolutionLayer::configure(ITensor *input, ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups); - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); switch (cpu::CpuConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math)) { @@ -134,7 +133,6 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, unsigned int num_groups) { ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEConvolutionLayer::validate"); - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!weights->are_values_constant(), "Dynamic weights are not supported"); ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, weights, biases, output); diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 16eff388d6c..8371e0c5981 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -311,6 +311,445 @@ TEST_SUITE_END() // QASYMM8_SIGNED // clang-format on // *INDENT-ON* + +#ifdef __aarch64__ +// *INDENT-OFF* +// clang-format off +TEST_SUITE(DirectI8S8F32) + +/** Validate accepts QASYMM8_SIGNED→F32 with both zero and non-zero offsets. + * Shapes are in NHWC format: [C, W, H] for input/output, [Cin, Kw, Kh, Cout] for weights. */ +DATA_TEST_CASE( + Validate, + framework::DatasetMode::ALL, + zip(make("SrcInfo", { TensorInfo(TensorShape(16U, 8U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(32U, 4U, 4U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 8U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 8U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC) }), + make("WgtInfo", { TensorInfo(TensorShape(16U, 3U, 3U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(32U, 1U, 1U, 16U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC) }), + make("BiasInfo", { TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(8U), 1, DataType::F32) }), + make("DstInfo", { TensorInfo(TensorShape(8U, 6U, 6U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 4U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 6U, 6U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 6U, 6U), 1, DataType::F32, DataLayout::NHWC) }), + make("SrcOffset", { 0, 0, 5, -10 }), + make("WgtOffset", { 0, 0, 0, 3 }), + make("Expected", { true, true, true, true })), + src_info_const, wgt_info_const, bias_info_const, dst_info_const, + src_offset, wgt_offset, expected) +{ + TensorInfo src_info = src_info_const; + TensorInfo wgt_info = wgt_info_const; + TensorInfo bias_info = bias_info_const; + TensorInfo dst_info = dst_info_const; + src_info.set_quantization_info(QuantizationInfo(0.25f, src_offset)); + wgt_info.set_quantization_info(QuantizationInfo(0.125f, wgt_offset)); + + const Status s = NEConvolutionLayer::validate(&src_info, &wgt_info, &bias_info, &dst_info, + PadStrideInfo(1, 1, 0, 0), + WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), + false /*fast_math*/, 1 /*num_groups*/); + ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); +} + +/** Verify GEMM_CONV2D is returned automatically for NHWC QASYMM8_SIGNED→F32. */ +TEST_CASE(GetConvMethodDirectI8F32, framework::DatasetMode::ALL) +{ + const QuantizationInfo qi(0.25f, 0); + TensorInfo src_info(TensorShape(16U, 8U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC); + TensorInfo wgt_info(TensorShape(16U, 3U, 3U, 8U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC); + TensorInfo dst_info(TensorShape(8U, 6U, 6U), 1, DataType::F32, DataLayout::NHWC); + src_info.set_quantization_info(qi); + wgt_info.set_quantization_info(qi); + + const ConvolutionMethod m = NEConvolutionLayer::get_convolution_method( + &src_info.clone()->set_is_resizable(true), + &wgt_info.clone()->set_is_resizable(true), + &dst_info.clone()->set_is_resizable(true), + PadStrideInfo(1, 1, 0, 0), + WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), + false /*fast_math*/); + ARM_COMPUTE_EXPECT(m == ConvolutionMethod::GEMM_CONV2D, framework::LogLevel::ERRORS); +} + +using NEDirectI8S8F32ConvolutionFixture = DirectI8S8F32ConvolutionFixture; + +/** Small dataset: verify direct i8->f32 path produces correct output vs CPU reference. + * Shapes are in NCHW format [W, H, C] for input/output, [Kw, Kh, Cin, Cout] for weights. + * InputShape/WeightsShape/BiasShape/OutputShape are zipped — each row is one consistent config. */ +FIXTURE_DATA_TEST_CASE( + RunSmall, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(16U, 16U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(14U, 14U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsDataset, + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Stride-2 3x3 convolution. */ +FIXTURE_DATA_TEST_CASE( + RunStride2, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + make("InputShape", { TensorShape(14U, 14U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 16U) }), + make("ConvInfo", { PadStrideInfo(2, 2, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** 1x1 convolution. */ +FIXTURE_DATA_TEST_CASE( + Run1x1, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + make("InputShape", { TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(1U, 1U, 32U, 16U) }), + make("BiasShape", { TensorShape(16U) }), + make("OutputShape", { TensorShape(8U, 8U, 16U) }), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Padded convolution. */ +FIXTURE_DATA_TEST_CASE( + RunPadded, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + make("InputShape", { TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(16U) }), + make("OutputShape", { TensorShape(8U, 8U, 16U) }), + make("ConvInfo", { PadStrideInfo(1, 1, 1, 1) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f) }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Non-zero offsets: input offset only. + * Uses the same fixture with QuantizationInfo passed explicitly. + * Shapes in NCHW: [W,H,C] / [Kw,Kh,Cin,Cout]. */ +FIXTURE_DATA_TEST_CASE( + RunWithInputOffset, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 16U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(1U, 1U, 16U, 8U) }), + make("BiasShape", { TensorShape(8U), TensorShape(8U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(8U, 8U, 8U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("InputQI", { QuantizationInfo(0.25f, -10) }), // non-zero input offset + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) // zero weight offset + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Non-zero offsets: weight offset only. */ +FIXTURE_DATA_TEST_CASE( + RunWithWeightOffset, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 16U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(1U, 1U, 16U, 8U) }), + make("BiasShape", { TensorShape(8U), TensorShape(8U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(8U, 8U, 8U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), // zero input offset + make("WeightsQI", { QuantizationInfo(0.125f, 5) }) // non-zero weight offset + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Non-zero offsets on both tensors, with padding and activation. */ +FIXTURE_DATA_TEST_CASE( + RunWithBothOffsets, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(8U, 8U, 8U), TensorShape(8U, 8U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 1, 1) }), // same-size output via padding + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, -20) }), // non-zero input offset + make("WeightsQI", { QuantizationInfo(0.125f, 10) }) // non-zero weight offset + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** BOUNDED_RELU activation with non-zero input offset. + * Exercises that the activation clips the post-correction float value correctly. */ +FIXTURE_DATA_TEST_CASE( + RunWithInputOffsetAndBoundedRelu, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 16U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(1U, 1U, 16U, 8U) }), + make("BiasShape", { TensorShape(8U), TensorShape(8U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(8U, 8U, 8U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, -6.0f) }), + make("InputQI", { QuantizationInfo(0.25f, -10) }), + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** BOUNDED_RELU activation with non-zero weight offset. */ +FIXTURE_DATA_TEST_CASE( + RunWithWeightOffsetAndActivation, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(6U, 6U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 4.0f) }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, 5) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Stride-2 convolution with non-zero offsets. + * Stride changes which input patches contribute to each output position, exercising the + * per-output row-sum computation path with stride != 1. */ +FIXTURE_DATA_TEST_CASE( + RunStride2WithOffsets, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(14U, 14U, 16U), TensorShape(14U, 14U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(6U, 6U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(2, 2, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, -15) }), + make("WeightsQI", { QuantizationInfo(0.125f, 8) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** 1×1 kernel with both non-zero offsets. + * K = Cin only (no spatial footprint), ensuring the K-total used in the cross-term is correct. */ +FIXTURE_DATA_TEST_CASE( + Run1x1WithBothOffsets, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(1U, 1U, 16U, 8U), TensorShape(1U, 1U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(8U, 8U, 8U), TensorShape(8U, 8U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, -20) }), + make("WeightsQI", { QuantizationInfo(0.125f, 10) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Positive input offset + stride-2 + padding: exercises the row-sum computation + * for a strided conv where each output position sees a distinct input patch. + * Also covers a positive (not negative) input offset sign. */ +FIXTURE_DATA_TEST_CASE( + RunStride2PaddedWithInputOffset, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(14U, 14U, 16U), TensorShape(14U, 14U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(7U, 7U, 8U), TensorShape(7U, 7U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(2, 2, 1, 1) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, 12) }), // positive input offset + make("WeightsQI", { QuantizationInfo(0.125f, 0) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Negative weight offset — exercises the sign of the b_offset correction term. */ +FIXTURE_DATA_TEST_CASE( + RunWithNegativeWeightOffset, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(6U, 6U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("InputQI", { QuantizationInfo(0.25f, 0) }), + make("WeightsQI", { QuantizationInfo(0.125f, -8) }) + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +/** Large offsets (both near ±127) — stress-tests the cross-term magnitude. + * Large a_offset * b_offset * K can be a large fraction of the raw accumulator; + * any sign or magnitude error is highly visible. */ +FIXTURE_DATA_TEST_CASE( + RunWithLargeOffsets, + NEDirectI8S8F32ConvolutionFixture, + framework::DatasetMode::ALL, + combine( + zip( + make("InputShape", { TensorShape(8U, 8U, 16U), TensorShape(8U, 8U, 32U) }), + make("WeightsShape", { TensorShape(3U, 3U, 16U, 8U), TensorShape(3U, 3U, 32U, 16U) }), + make("BiasShape", { TensorShape(8U), TensorShape(16U) }), + make("OutputShape", { TensorShape(6U, 6U, 8U), TensorShape(6U, 6U, 16U) }) + ), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0) }), + make("Dilation", { Size2D(1U, 1U) }), + make("ReshapeWeights", { true }), + make("DataLayout", { DataLayout::NHWC }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("InputQI", { QuantizationInfo(0.01f, 100) }), // large positive input offset + make("WeightsQI", { QuantizationInfo(0.01f, -100) }) // large negative weight offset + ) +) +{ + validate(Accessor(_target), _reference, rel_tolerance_f32, tolerance_num_dequantize_f32, float(abs_tolerance_f32)); +} + +TEST_SUITE_END() // DirectI8S8F32 +// clang-format on +// *INDENT-ON* +#endif // #ifdef __aarch64__ + TEST_SUITE_END() // ConvolutionLayer /* diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 2ccefa54575..1f3c7e6c319 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -1320,6 +1320,144 @@ class HasOptImplFixture : public framework::Fixture }; #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS +/** Fixture for testing the single-kernel i8->fp32 direct convolution path. + * + * Supports NHWC, no dilation, arbitrary quantization offsets. + * Compares target (direct single-kernel) against CPU float reference. + */ +template +class DirectI8S8F32ConvolutionFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape_nchw, + TensorShape weights_shape_nchw, + TensorShape bias_shape, + TensorShape output_shape_nchw, + PadStrideInfo conv_info, + Size2D dilation, + bool reshape_weights, + DataLayout data_layout, + ActivationLayerInfo act_info, + QuantizationInfo input_qi = QuantizationInfo(0.25f, 0), + QuantizationInfo weights_qi = QuantizationInfo(0.125f, 0)) + { + ARM_COMPUTE_EXPECT_EQUAL(data_layout, DataLayout::NHWC, framework::LogLevel::ERRORS); + _data_layout = data_layout; + _input_qi = input_qi; + _weights_qi = weights_qi; + + _hash = static_cast(input_shape_nchw[0] + input_shape_nchw[1] + input_shape_nchw[2] + + weights_shape_nchw[0] + weights_shape_nchw[1] + + static_cast(input_qi.uniform().offset) + + static_cast(weights_qi.uniform().offset)); + + _target = compute_target(input_shape_nchw, weights_shape_nchw, bias_shape, output_shape_nchw, conv_info, + reshape_weights, dilation, act_info); + _reference = compute_reference(input_shape_nchw, weights_shape_nchw, bias_shape, output_shape_nchw, conv_info, + dilation, act_info); + } + +protected: + template + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::QASYMM8_SIGNED: + { + std::uniform_int_distribution dist(-127, 127); + library->fill(tensor, dist, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution dist(-1.0f, 1.0f); + library->fill(tensor, dist, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const PadStrideInfo &conv_info, + bool reshape_weights, + const Size2D &dilation, + const ActivationLayerInfo act_info) + { + // Convert NCHW→NHWC + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + const int idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + + WeightsInfo weights_info(!reshape_weights, weights_shape[idx_w], weights_shape[idx_h], weights_shape[3]); + + TensorType src = create_tensor(input_shape, DataType::QASYMM8_SIGNED, 1, _input_qi, _data_layout); + TensorType wgt = create_tensor(weights_shape, DataType::QASYMM8_SIGNED, 1, _weights_qi, + _data_layout); + TensorType bias = create_tensor(bias_shape, DataType::F32, 1, QuantizationInfo(), _data_layout); + TensorType dst = create_tensor(output_shape, DataType::F32, 1, QuantizationInfo(), _data_layout); + + FunctionType conv; + conv.configure(&src, &wgt, &bias, &dst, conv_info, weights_info, dilation, act_info, + false /*enable_fast_math*/, 1 /*num_groups*/); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + src.allocator()->allocate(); + wgt.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(wgt), 1 + _hash); + fill(AccessorType(bias), 2 + _hash); + + conv.run(); + return dst; + } + + SimpleTensor compute_reference(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const PadStrideInfo &conv_info, + const Size2D &dilation, + const ActivationLayerInfo act_info) + { + // Reference runs in NCHW with float input (dequantized) to match what the kernel should produce. + SimpleTensor src_q{input_shape, DataType::QASYMM8_SIGNED, 1, _input_qi}; + SimpleTensor wgt_q{weights_shape, DataType::QASYMM8_SIGNED, 1, _weights_qi}; + SimpleTensor bias_f{bias_shape, DataType::F32}; + + fill(src_q, 0 + _hash); + fill(wgt_q, 1 + _hash); + fill(bias_f, 2 + _hash); + + auto conv = reference::convolution_layer(src_q, wgt_q, bias_f, output_shape, + conv_info, dilation, 1 /*num_groups*/, + QuantizationInfo()); + if (act_info.enabled()) + { + return reference::activation_layer(conv, act_info); + } + return conv; + } + + TensorType _target{}; + SimpleTensor _reference{}; + DataLayout _data_layout{DataLayout::NHWC}; + QuantizationInfo _input_qi{}; + QuantizationInfo _weights_qi{}; + int32_t _hash{0}; +}; + } // namespace validation } // namespace test } // namespace arm_compute