blob: 5e3b3e2d7be98cb21b80875ac3c0345bdd85cf1e [file] [log] [blame]
From 59b0a78ae706a540dbd8905bc97c875220d6aeb2 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Wed, 18 Mar 2015 00:01:50 +0000
Subject: [PATCH 08/29] fsl e500/e5500/e6500/603e fsqrt implementation
Upstream-Status: Pending
Signed-off-by: Edmar Wienskoski <edmar@freescale.com>
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | 134 ++++++++++++++++++
sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | 101 +++++++++++++
sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | 134 ++++++++++++++++++
.../powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | 101 +++++++++++++
sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++
sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++
sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++
sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++
sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
.../linux/powerpc/powerpc32/603e/fpu/Implies | 1 +
.../powerpc/powerpc32/e300c3/fpu/Implies | 2 +
.../powerpc/powerpc32/e500mc/fpu/Implies | 1 +
.../linux/powerpc/powerpc32/e5500/fpu/Implies | 1 +
.../linux/powerpc/powerpc32/e6500/fpu/Implies | 1 +
.../linux/powerpc/powerpc64/e5500/fpu/Implies | 1 +
.../linux/powerpc/powerpc64/e6500/fpu/Implies | 1 +
19 files changed, 1418 insertions(+)
create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
new file mode 100644
index 0000000000..71e516d1c8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
@@ -0,0 +1,134 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+#ifdef __STDC__
+double
+__ieee754_sqrt (double b)
+#else
+double
+__ieee754_sqrt (b)
+ double b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..26fa067abf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
@@ -0,0 +1,101 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+#ifdef __STDC__
+float
+__ieee754_sqrtf (float b)
+#else
+float
+__ieee754_sqrtf (b)
+ float b;
+#endif
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
new file mode 100644
index 0000000000..b103b4dea5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/603e/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
new file mode 100644
index 0000000000..64db17fada
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
@@ -0,0 +1,2 @@
+# e300c3 is a variant of 603e so use the same optimizations for sqrt
+powerpc/powerpc32/603e/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
new file mode 100644
index 0000000000..7eac5fcf02
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/e500mc/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
new file mode 100644
index 0000000000..264b2a7700
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/e5500/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
new file mode 100644
index 0000000000..a25934467b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/e6500/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
new file mode 100644
index 0000000000..a7bc854be8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/e5500/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
new file mode 100644
index 0000000000..04ff8cc181
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/e6500/fpu
--
2.27.0