Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 1 | From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001 |
| 2 | From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com> |
| 3 | Date: Fri, 24 Nov 2017 00:05:35 +0100 |
| 4 | Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups |
| 5 | MIME-Version: 1.0 |
| 6 | Content-Type: text/plain; charset=UTF-8 |
| 7 | Content-Transfer-Encoding: 8bit |
| 8 | |
| 9 | Upstream-Status: Pending |
| 10 | |
| 11 | Signed-off-by: Andreas MΓΌller <schnitzeltony@gmail.com> |
| 12 | --- |
| 13 | src/rvoice/fluid_rvoice_mixer.c | 26 ++++++++++++++++++++++++++ |
| 14 | 1 file changed, 26 insertions(+) |
| 15 | |
| 16 | diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c |
| 17 | index 9616518..dbf8057 100644 |
| 18 | --- a/src/rvoice/fluid_rvoice_mixer.c |
| 19 | +++ b/src/rvoice/fluid_rvoice_mixer.c |
| 20 | @@ -27,6 +27,10 @@ |
| 21 | #include "fluid_ladspa.h" |
| 22 | #include "fluid_synth.h" |
| 23 | |
| 24 | +#if defined(__ARM_NEON__) |
| 25 | +#include "arm_neon.h" |
| 26 | +#endif |
| 27 | + |
| 28 | |
| 29 | #define ENABLE_MIXER_THREADS 1 |
| 30 | |
| 31 | @@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src) |
| 32 | if (minbuf > src->buf_count) |
| 33 | minbuf = src->buf_count; |
| 34 | for (i=0; i < minbuf; i++) { |
| 35 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) |
| 36 | + for (j=0; j < scount; j+=4) { |
| 37 | + float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]); |
| 38 | + float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]); |
| 39 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j])); |
| 40 | + vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j])); |
| 41 | + vst1q_f32(&dest->left_buf[i][j], vleft); |
| 42 | + vst1q_f32(&dest->right_buf[i][j], vright); |
| 43 | + } |
| 44 | +#else |
| 45 | for (j=0; j < scount; j++) { |
| 46 | dest->left_buf[i][j] += src->left_buf[i][j]; |
| 47 | dest->right_buf[i][j] += src->right_buf[i][j]; |
| 48 | } |
| 49 | +#endif |
| 50 | } |
| 51 | |
| 52 | minbuf = dest->fx_buf_count; |
| 53 | if (minbuf > src->fx_buf_count) |
| 54 | minbuf = src->fx_buf_count; |
| 55 | for (i=0; i < minbuf; i++) { |
| 56 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) |
| 57 | + for (j=0; j < scount; j+=4) { |
| 58 | + float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]); |
| 59 | + float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]); |
| 60 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j])); |
| 61 | + vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j])); |
| 62 | + vst1q_f32(&dest->fx_left_buf[i][j], vleft); |
| 63 | + vst1q_f32(&dest->fx_right_buf[i][j], vright); |
| 64 | + } |
| 65 | +#else |
| 66 | for (j=0; j < scount; j++) { |
| 67 | dest->fx_left_buf[i][j] += src->fx_left_buf[i][j]; |
| 68 | dest->fx_right_buf[i][j] += src->fx_right_buf[i][j]; |
| 69 | } |
| 70 | +#endif |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | -- |
| 75 | 2.9.5 |
| 76 | |