Andrew Geissler | c723b72 | 2021-01-08 16:14:09 -0600 | [diff] [blame] | 1 | From 05d290165a3b61da09b715e6c8e62cebebab57cc Mon Sep 17 00:00:00 2001 |
| 2 | From: Erik Flodin <erik@ejohansson.se> |
| 3 | Date: Mon, 7 Dec 2020 19:20:31 +0100 |
| 4 | Subject: [PATCH 1/2] Improve SIMD detection (#735) |
| 5 | |
| 6 | * Try to compile code to detect SSE/AVX support. Just checking if the compiler |
| 7 | supports the flag isn't enough as e.g. Clang on Apple's new ARM silicon seems |
| 8 | to accept the flag but then fails when building. |
| 9 | * Try to detect and enable BLAKE3's Neon support. |
| 10 | * Improve detection of AVX2 target attribute support and remove the explicit |
| 11 | compiler version check that hopefully shouldn't be needed. |
| 12 | |
| 13 | Fixes #734. |
| 14 | Upstream-Status: Backport [https://github.com/ccache/ccache/commit/b438f50388dd00285083260f60450e6237b7d58f] |
| 15 | Signed-off-by: Khem Raj <raj.khem@gmail.com> |
| 16 | --- |
| 17 | cmake/GenerateConfigurationFile.cmake | 25 +++++++++--------- |
| 18 | src/third_party/blake3/CMakeLists.txt | 38 ++++++++++++++++++++------- |
| 19 | 2 files changed, 42 insertions(+), 21 deletions(-) |
| 20 | |
| 21 | diff --git a/cmake/GenerateConfigurationFile.cmake b/cmake/GenerateConfigurationFile.cmake |
| 22 | index a21861f4..836ff9bb 100644 |
| 23 | --- a/cmake/GenerateConfigurationFile.cmake |
| 24 | +++ b/cmake/GenerateConfigurationFile.cmake |
| 25 | @@ -67,18 +67,19 @@ check_struct_has_member("struct stat" st_mtim sys/stat.h |
| 26 | check_struct_has_member("struct statfs" f_fstypename sys/mount.h |
| 27 | HAVE_STRUCT_STATFS_F_FSTYPENAME) |
| 28 | |
| 29 | -include(CheckCXXCompilerFlag) |
| 30 | - |
| 31 | -# Old GCC versions don't have the required header support. |
| 32 | -# Old Apple Clang versions seem to support -mavx2 but not the target |
| 33 | -# attribute that's used to enable AVX2 for a certain function. |
| 34 | -if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) |
| 35 | - OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0)) |
| 36 | - message(STATUS "Detected unsupported compiler for HAVE_AVX2 - disabled") |
| 37 | - set(HAVE_AVX2 FALSE) |
| 38 | -else() |
| 39 | - check_cxx_compiler_flag(-mavx2 HAVE_AVX2) |
| 40 | -endif() |
| 41 | +include(CheckCXXSourceCompiles) |
| 42 | +check_cxx_source_compiles( |
| 43 | + [=[ |
| 44 | + #include <immintrin.h> |
| 45 | + void func() __attribute__((target("avx2"))); |
| 46 | + void func() { _mm256_abs_epi8(_mm256_set1_epi32(42)); } |
| 47 | + int main() |
| 48 | + { |
| 49 | + func(); |
| 50 | + return 0; |
| 51 | + } |
| 52 | + ]=] |
| 53 | + HAVE_AVX2) |
| 54 | |
| 55 | list(APPEND CMAKE_REQUIRED_LIBRARIES ws2_32) |
| 56 | list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ws2_32) |
| 57 | diff --git a/src/third_party/blake3/CMakeLists.txt b/src/third_party/blake3/CMakeLists.txt |
| 58 | index a75e5611..cc24253c 100644 |
| 59 | --- a/src/third_party/blake3/CMakeLists.txt |
| 60 | +++ b/src/third_party/blake3/CMakeLists.txt |
| 61 | @@ -13,9 +13,9 @@ else() |
| 62 | endif() |
| 63 | |
| 64 | include(CheckAsmCompilerFlag) |
| 65 | -include(CheckCCompilerFlag) |
| 66 | +include(CheckCSourceCompiles) |
| 67 | |
| 68 | -function(add_source_if_enabled feature compile_flags) |
| 69 | +function(add_source_if_enabled feature compile_flags intrinsic) |
| 70 | string(TOUPPER "have_${blake_source_type}_${feature}" have_feature) |
| 71 | |
| 72 | # AVX512 support fails to compile with old Apple Clang versions even though |
| 73 | @@ -28,7 +28,14 @@ function(add_source_if_enabled feature compile_flags) |
| 74 | elseif(${blake_source_type} STREQUAL "asm") |
| 75 | check_asm_compiler_flag(${compile_flags} ${have_feature}) |
| 76 | else() |
| 77 | - check_c_compiler_flag(${compile_flags} ${have_feature}) |
| 78 | + set(CMAKE_REQUIRED_FLAGS ${compile_flags}) |
| 79 | + check_c_source_compiles( |
| 80 | + [=[ |
| 81 | + #include <immintrin.h> |
| 82 | + int main() { ${intrinsic}; return 0; } |
| 83 | + ]=] |
| 84 | + ${have_feature}) |
| 85 | + unset(CMAKE_REQUIRED_FLAGS) |
| 86 | endif() |
| 87 | |
| 88 | if(${have_feature}) |
| 89 | @@ -42,10 +49,23 @@ function(add_source_if_enabled feature compile_flags) |
| 90 | endif() |
| 91 | endfunction() |
| 92 | |
| 93 | -add_source_if_enabled(sse2 "-msse2") |
| 94 | -add_source_if_enabled(sse41 "-msse4.1") |
| 95 | -add_source_if_enabled(avx2 "-mavx2") |
| 96 | -add_source_if_enabled(avx512 "-mavx512f -mavx512vl") |
| 97 | +# https://software.intel.com/sites/landingpage/IntrinsicsGuide/ |
| 98 | +add_source_if_enabled(sse2 "-msse2" "_mm_set1_epi32(42)") |
| 99 | +add_source_if_enabled(sse41 "-msse4.1" "_mm_test_all_ones(_mm_set1_epi32(42))") |
| 100 | +add_source_if_enabled(avx2 "-mavx2" "_mm256_abs_epi8(_mm256_set1_epi32(42))") |
| 101 | +add_source_if_enabled(avx512 "-mavx512f -mavx512vl" "_mm256_abs_epi64(_mm256_set1_epi32(42))") |
| 102 | |
| 103 | -# TODO: how to detect ARM NEON support? |
| 104 | -# If NEON, define BLAKE3_USE_NEON and build blake3_neon.c |
| 105 | +# Neon is always available on AArch64 |
| 106 | +if(CMAKE_SIZEOF_VOID_P EQUAL 8) |
| 107 | + # https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics |
| 108 | + check_c_source_compiles( |
| 109 | + [=[ |
| 110 | + #include <arm_neon.h> |
| 111 | + int main() { vdupq_n_s32(42); return 0; } |
| 112 | + ]=] |
| 113 | + HAVE_NEON) |
| 114 | + if(HAVE_NEON) |
| 115 | + target_sources(blake3 PRIVATE blake3_neon.c) |
| 116 | + target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON) |
| 117 | + endif() |
| 118 | +endif() |
| 119 | -- |
| 120 | 2.30.0 |
| 121 | |