blob: 16506566c2553db4f9d2990ec14f454465e09758 [file] [log] [blame]
From 40ba7a05de6a08cfd382b116f76dbeaa7237df45 Mon Sep 17 00:00:00 2001
From: Sungup Moon <sungup.moon@samsung.com>
Date: Mon, 8 Aug 2022 17:21:46 +0900
Subject: [PATCH] lib/rand: Enhance __fill_random_buf using the multi random
seed
The __fill_random_buf fills a buffer using the random 8byte integer to
write. But, this mechanism is depend on the CPU performance and could
not reach the max performance on the PCIe Gen5 devices. I have tested
128KB single worker sequential write on PCIe Gen5 NVMe, but it cannot
reach write throughput 6.0GB/s.
So, I have reviewed the __fill_random_buf and focused the multiplier
dependency to generate the random number. So, I have changed
__fill_random_buf using the multiple-random-seed to reduce the
dependencies in the small data filling loop.
I'll attach detail analysis result in the PR of this branch.
Signed-off-by: Sungup Moon <sungup.moon@samsung.com>
---
configure | 17 +++++++++++++++++
lib/rand.c | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index 36450df8..a2b9bd4c 100755
--- a/configure
+++ b/configure
@@ -116,6 +116,10 @@ has() {
type "$1" >/dev/null 2>&1
}
+num() {
+ echo "$1" | grep -P -q "^[0-9]+$"
+}
+
check_define() {
cat > $TMPC <<EOF
#if !defined($1)
@@ -174,6 +178,7 @@ libnfs=""
xnvme=""
libzbc=""
dfs=""
+seed_buckets=""
dynamic_engines="no"
prefix=/usr/local
@@ -255,6 +260,8 @@ for opt do
;;
--enable-asan) asan="yes"
;;
+ --seed-buckets=*) seed_buckets="$optarg"
+ ;;
--help)
show_help="yes"
;;
@@ -302,6 +309,7 @@ if test "$show_help" = "yes" ; then
echo "--dynamic-libengines Lib-based ioengines as dynamic libraries"
echo "--disable-dfs Disable DAOS File System support even if found"
echo "--enable-asan Enable address sanitizer"
+ echo "--seed-buckets= Number of seed buckets for the refill-buffer"
exit $exit_val
fi
@@ -3273,6 +3281,15 @@ if test "$disable_tcmalloc" != "yes"; then
fi
fi
print_config "TCMalloc support" "$tcmalloc"
+if ! num "$seed_buckets"; then
+ seed_buckets=4
+elif test "$seed_buckets" -lt 2; then
+ seed_buckets=2
+elif test "$seed_buckets" -gt 16; then
+ seed_buckets=16
+fi
+echo "#define CONFIG_SEED_BUCKETS $seed_buckets" >> $config_host_h
+print_config "seed_buckets" "$seed_buckets"
echo "LIBS+=$LIBS" >> $config_host_mak
echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
diff --git a/lib/rand.c b/lib/rand.c
index 1e669116..1ce4a849 100644
--- a/lib/rand.c
+++ b/lib/rand.c
@@ -95,7 +95,7 @@ void init_rand_seed(struct frand_state *state, uint64_t seed, bool use64)
__init_rand64(&state->state64, seed);
}
-void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
+void __fill_random_buf_small(void *buf, unsigned int len, uint64_t seed)
{
uint64_t *b = buf;
uint64_t *e = b + len / sizeof(*b);
@@ -110,6 +110,41 @@ void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
__builtin_memcpy(e, &seed, rest);
}
+void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
+{
+#define MAX_SEED_BUCKETS 16
+ static uint64_t prime[MAX_SEED_BUCKETS] = {1, 2, 3, 5,
+ 7, 11, 13, 17,
+ 19, 23, 29, 31,
+ 37, 41, 43, 47};
+
+ uint64_t *b, *e, s[CONFIG_SEED_BUCKETS];
+ unsigned int rest;
+ int p;
+
+ /*
+ * Calculate the max index which is multiples of the seed buckets.
+ */
+ rest = (len / sizeof(*b) / CONFIG_SEED_BUCKETS) * CONFIG_SEED_BUCKETS;
+
+ b = buf;
+ e = b + rest;
+
+ rest = len - (rest * sizeof(*b));
+
+ for (p = 0; p < CONFIG_SEED_BUCKETS; p++)
+ s[p] = seed * prime[p];
+
+ for (; b != e; b += CONFIG_SEED_BUCKETS) {
+ for (p = 0; p < CONFIG_SEED_BUCKETS; ++p) {
+ b[p] = s[p];
+ s[p] = __hash_u64(s[p]);
+ }
+ }
+
+ __fill_random_buf_small(b, rest, s[0]);
+}
+
uint64_t fill_random_buf(struct frand_state *fs, void *buf,
unsigned int len)
{