blob: 12d4ebc4bc568830e0ce4017122e2c34c35ee60d [file] [log] [blame]
From 05d290165a3b61da09b715e6c8e62cebebab57cc Mon Sep 17 00:00:00 2001
From: Erik Flodin <erik@ejohansson.se>
Date: Mon, 7 Dec 2020 19:20:31 +0100
Subject: [PATCH 1/2] Improve SIMD detection (#735)
* Try to compile code to detect SSE/AVX support. Just checking if the compiler
supports the flag isn't enough as e.g. Clang on Apple's new ARM silicon seems
to accept the flag but then fails when building.
* Try to detect and enable BLAKE3's Neon support.
* Improve detection of AVX2 target attribute support and remove the explicit
compiler version check that hopefully shouldn't be needed.
Fixes #734.
Upstream-Status: Backport [https://github.com/ccache/ccache/commit/b438f50388dd00285083260f60450e6237b7d58f]
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
cmake/GenerateConfigurationFile.cmake | 25 +++++++++---------
src/third_party/blake3/CMakeLists.txt | 38 ++++++++++++++++++++-------
2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/cmake/GenerateConfigurationFile.cmake b/cmake/GenerateConfigurationFile.cmake
index a21861f4..836ff9bb 100644
--- a/cmake/GenerateConfigurationFile.cmake
+++ b/cmake/GenerateConfigurationFile.cmake
@@ -67,18 +67,19 @@ check_struct_has_member("struct stat" st_mtim sys/stat.h
check_struct_has_member("struct statfs" f_fstypename sys/mount.h
HAVE_STRUCT_STATFS_F_FSTYPENAME)
-include(CheckCXXCompilerFlag)
-
-# Old GCC versions don't have the required header support.
-# Old Apple Clang versions seem to support -mavx2 but not the target
-# attribute that's used to enable AVX2 for a certain function.
-if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
- OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0))
- message(STATUS "Detected unsupported compiler for HAVE_AVX2 - disabled")
- set(HAVE_AVX2 FALSE)
-else()
- check_cxx_compiler_flag(-mavx2 HAVE_AVX2)
-endif()
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+ [=[
+ #include <immintrin.h>
+ void func() __attribute__((target("avx2")));
+ void func() { _mm256_abs_epi8(_mm256_set1_epi32(42)); }
+ int main()
+ {
+ func();
+ return 0;
+ }
+ ]=]
+ HAVE_AVX2)
list(APPEND CMAKE_REQUIRED_LIBRARIES ws2_32)
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ws2_32)
diff --git a/src/third_party/blake3/CMakeLists.txt b/src/third_party/blake3/CMakeLists.txt
index a75e5611..cc24253c 100644
--- a/src/third_party/blake3/CMakeLists.txt
+++ b/src/third_party/blake3/CMakeLists.txt
@@ -13,9 +13,9 @@ else()
endif()
include(CheckAsmCompilerFlag)
-include(CheckCCompilerFlag)
+include(CheckCSourceCompiles)
-function(add_source_if_enabled feature compile_flags)
+function(add_source_if_enabled feature compile_flags intrinsic)
string(TOUPPER "have_${blake_source_type}_${feature}" have_feature)
# AVX512 support fails to compile with old Apple Clang versions even though
@@ -28,7 +28,14 @@ function(add_source_if_enabled feature compile_flags)
elseif(${blake_source_type} STREQUAL "asm")
check_asm_compiler_flag(${compile_flags} ${have_feature})
else()
- check_c_compiler_flag(${compile_flags} ${have_feature})
+ set(CMAKE_REQUIRED_FLAGS ${compile_flags})
+ check_c_source_compiles(
+ [=[
+ #include <immintrin.h>
+ int main() { ${intrinsic}; return 0; }
+ ]=]
+ ${have_feature})
+ unset(CMAKE_REQUIRED_FLAGS)
endif()
if(${have_feature})
@@ -42,10 +49,23 @@ function(add_source_if_enabled feature compile_flags)
endif()
endfunction()
-add_source_if_enabled(sse2 "-msse2")
-add_source_if_enabled(sse41 "-msse4.1")
-add_source_if_enabled(avx2 "-mavx2")
-add_source_if_enabled(avx512 "-mavx512f -mavx512vl")
+# https://software.intel.com/sites/landingpage/IntrinsicsGuide/
+add_source_if_enabled(sse2 "-msse2" "_mm_set1_epi32(42)")
+add_source_if_enabled(sse41 "-msse4.1" "_mm_test_all_ones(_mm_set1_epi32(42))")
+add_source_if_enabled(avx2 "-mavx2" "_mm256_abs_epi8(_mm256_set1_epi32(42))")
+add_source_if_enabled(avx512 "-mavx512f -mavx512vl" "_mm256_abs_epi64(_mm256_set1_epi32(42))")
-# TODO: how to detect ARM NEON support?
-# If NEON, define BLAKE3_USE_NEON and build blake3_neon.c
+# Neon is always available on AArch64
+if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ # https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
+ check_c_source_compiles(
+ [=[
+ #include <arm_neon.h>
+ int main() { vdupq_n_s32(42); return 0; }
+ ]=]
+ HAVE_NEON)
+ if(HAVE_NEON)
+ target_sources(blake3 PRIVATE blake3_neon.c)
+ target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON)
+ endif()
+endif()
--
2.30.0