| From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001 |
| From: Randy MacLeod <Randy.MacLeod@windriver.com> |
| Date: Wed, 26 Apr 2017 15:00:32 -0400 |
| Subject: [PATCH 2/2] Revert "check FP16 build condition correctly" |
| |
| This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13. |
| |
| Fix up revert conflicts to take previous behaviour. |
| --- |
| cmake/OpenCVCompilerOptions.cmake | 45 +++++++++-------------- |
| modules/core/include/opencv2/core/cvdef.h | 2 +- |
| modules/core/src/convert.cpp | 11 +++--- |
| modules/core/test/test_intrin.cpp | 60 ++++++++++++++----------------- |
| 4 files changed, 48 insertions(+), 70 deletions(-) |
| |
| diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake |
| index 5bb0479..4b19fdb 100644 |
| --- a/cmake/OpenCVCompilerOptions.cmake |
| +++ b/cmake/OpenCVCompilerOptions.cmake |
| @@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) |
| add_extra_compiler_option("-mfp16-format=ieee") |
| endif(ARM) |
| if(ENABLE_NEON) |
| - add_extra_compiler_option("-mfpu=neon") |
| + add_extra_compiler_option("-mfpu=neon-fp16") |
| endif() |
| if(ENABLE_VFPV3 AND NOT ENABLE_NEON) |
| add_extra_compiler_option("-mfpu=vfpv3") |
| @@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) |
| add_extra_compiler_option(-fvisibility-inlines-hidden) |
| endif() |
| |
| -if(NOT OPENCV_FP16_DISABLE AND NOT IOS) |
| - if(ARM AND ENABLE_NEON) |
| - set(FP16_OPTION "-mfpu=neon-fp16") |
| - elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) |
| - set(FP16_OPTION "-mf16c") |
| - endif() |
| - try_compile(__VALID_FP16 |
| - "${OpenCV_BINARY_DIR}" |
| - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" |
| - COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" |
| - OUTPUT_VARIABLE TRY_OUT |
| - ) |
| - if(NOT __VALID_FP16) |
| - if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) |
| - # GCC enables AVX when mf16c is passed |
| - message(STATUS "FP16: Feature disabled") |
| - else() |
| - message(STATUS "FP16: Compiler support is not available") |
| - endif() |
| - else() |
| - message(STATUS "FP16: Compiler support is available") |
| - set(HAVE_FP16 1) |
| - if(NOT ${FP16_OPTION} STREQUAL "") |
| - add_extra_compiler_option(${FP16_OPTION}) |
| - endif() |
| - endif() |
| -endif() |
| - |
| #combine all "extra" options |
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") |
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") |
| @@ -450,6 +422,21 @@ if(MSVC) |
| endif() |
| endif() |
| |
| +if(NOT OPENCV_FP16_DISABLE) |
| + try_compile(__VALID_FP16 |
| + "${OpenCV_BINARY_DIR}" |
| + "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" |
| + COMPILE_DEFINITIONS "-DCHECK_FP16" |
| + OUTPUT_VARIABLE TRY_OUT |
| + ) |
| + if(NOT __VALID_FP16) |
| + message(STATUS "FP16: Compiler support is not available") |
| + else() |
| + message(STATUS "FP16: Compiler support is available") |
| + set(HAVE_FP16 1) |
| + endif() |
| +endif() |
| + |
| if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") |
| link_directories("/usr/local/lib") |
| endif() |
| diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h |
| index efc24ca..a10936b 100644 |
| --- a/modules/core/include/opencv2/core/cvdef.h |
| +++ b/modules/core/include/opencv2/core/cvdef.h |
| @@ -312,7 +312,7 @@ enum CpuFeatures { |
| typedef union Cv16suf |
| { |
| short i; |
| -#if CV_FP16_TYPE |
| +#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) |
| __fp16 h; |
| #endif |
| struct _fp16Format |
| diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp |
| index e04d89e..46db26f 100644 |
| --- a/modules/core/src/convert.cpp |
| +++ b/modules/core/src/convert.cpp |
| @@ -44,7 +44,6 @@ |
| #include "precomp.hpp" |
| |
| #include "opencl_kernels_core.hpp" |
| -#include "opencv2/core/hal/intrin.hpp" |
| |
| #include "opencv2/core/openvx/ovx_defs.hpp" |
| |
| @@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int> |
| |
| #endif |
| |
| -#if !CV_FP16_TYPE |
| +#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) |
| // const numbers for floating points format |
| const unsigned int kShiftSignificand = 13; |
| const unsigned int kMaskFp16Significand = 0x3ff; |
| @@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent = 15; |
| const unsigned int kBiasFp32Exponent = 127; |
| #endif |
| |
| -#if CV_FP16_TYPE |
| +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) |
| static float convertFp16SW(short fp16) |
| { |
| // Fp16 -> Fp32 |
| @@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16) |
| } |
| #endif |
| |
| -#if CV_FP16_TYPE |
| +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) |
| static short convertFp16SW(float fp32) |
| { |
| // Fp32 -> Fp16 |
| @@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t |
| if ( ( (intptr_t)dst & 0xf ) == 0 ) |
| #endif |
| { |
| -#if CV_FP16 && CV_SIMD128 |
| +#if CV_FP16 |
| for ( ; x <= size.width - 4; x += 4) |
| { |
| v_float32x4 v_src = v_load(src + x); |
| @@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t |
| if ( ( (intptr_t)src & 0xf ) == 0 ) |
| #endif |
| { |
| -#if CV_FP16 && CV_SIMD128 |
| +#if CV_FP16 |
| for ( ; x <= size.width - 4; x += 4) |
| { |
| v_float16x4 v_src = v_load_f16(src + x); |
| diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp |
| index 66b2083..7349d48 100644 |
| --- a/modules/core/test/test_intrin.cpp |
| +++ b/modules/core/test/test_intrin.cpp |
| @@ -729,56 +729,48 @@ template<typename R> struct TheTest |
| return *this; |
| } |
| |
| +#if CV_FP16 |
| TheTest & test_loadstore_fp16() |
| { |
| -#if CV_FP16 |
| AlignedData<R> data; |
| AlignedData<R> out; |
| |
| - if(checkHardwareSupport(CV_CPU_FP16)) |
| - { |
| - // check if addresses are aligned and unaligned respectively |
| - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); |
| - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); |
| - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); |
| - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); |
| - |
| - // check some initialization methods |
| - R r1 = data.u; |
| - R r2 = v_load_f16(data.a.d); |
| - R r3(r2); |
| - EXPECT_EQ(data.u[0], r1.get0()); |
| - EXPECT_EQ(data.a[0], r2.get0()); |
| - EXPECT_EQ(data.a[0], r3.get0()); |
| - |
| - // check some store methods |
| - out.a.clear(); |
| - v_store_f16(out.a.d, r1); |
| - EXPECT_EQ(data.a, out.a); |
| - } |
| + // check if addresses are aligned and unaligned respectively |
| + EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); |
| + EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); |
| + EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); |
| + EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); |
| + |
| + // check some initialization methods |
| + R r1 = data.u; |
| + R r2 = v_load_f16(data.a.d); |
| + R r3(r2); |
| + EXPECT_EQ(data.u[0], r1.get0()); |
| + EXPECT_EQ(data.a[0], r2.get0()); |
| + EXPECT_EQ(data.a[0], r3.get0()); |
| + |
| + // check some store methods |
| + out.a.clear(); |
| + v_store_f16(out.a.d, r1); |
| + EXPECT_EQ(data.a, out.a); |
| |
| return *this; |
| -#endif |
| } |
| |
| TheTest & test_float_cvt_fp16() |
| { |
| -#if CV_FP16 |
| AlignedData<v_float32x4> data; |
| |
| - if(checkHardwareSupport(CV_CPU_FP16)) |
| - { |
| - // check conversion |
| - v_float32x4 r1 = v_load(data.a.d); |
| - v_float16x4 r2 = v_cvt_f16(r1); |
| - v_float32x4 r3 = v_cvt_f32(r2); |
| - EXPECT_EQ(0x3c00, r2.get0()); |
| - EXPECT_EQ(r3.get0(), r1.get0()); |
| - } |
| + // check conversion |
| + v_float32x4 r1 = v_load(data.a.d); |
| + v_float16x4 r2 = v_cvt_f16(r1); |
| + v_float32x4 r3 = v_cvt_f32(r2); |
| + EXPECT_EQ(0x3c00, r2.get0()); |
| + EXPECT_EQ(r3.get0(), r1.get0()); |
| |
| return *this; |
| -#endif |
| } |
| +#endif |
| |
| }; |
| |
| -- |
| 2.9.3 |
| |