poky/meta/recipes-core/glibc/glibc/0006-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch - openbmc/openbmc - Gitiles

 From 7cccf5cec09f2a42cc3fe5d82ce21b7309330b33 Mon Sep 17 00:00:00 2001
 From: Khem Raj <raj.khem@gmail.com>
 Date: Wed, 18 Mar 2015 00:01:50 +0000
 Subject: [PATCH 06/28] fsl e500/e5500/e6500/603e fsqrt implementation

 Upstream-Status: Pending
 Signed-off-by: Edmar Wienskoski <edmar@freescale.com>
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 ---
  sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c   | 134 ++++++++++++++++++
  sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c  | 101 +++++++++++++
  sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | 134 ++++++++++++++++++
  .../powerpc/powerpc32/e500mc/fpu/e_sqrtf.c    | 101 +++++++++++++
  sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c  | 134 ++++++++++++++++++
  sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
  sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c  | 134 ++++++++++++++++++
  sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
  sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c  | 134 ++++++++++++++++++
  sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
  sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c  | 134 ++++++++++++++++++
  sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
  .../linux/powerpc/powerpc32/603e/fpu/Implies  |   1 +
  .../powerpc/powerpc32/e300c3/fpu/Implies      |   2 +
  .../powerpc/powerpc32/e500mc/fpu/Implies      |   1 +
  .../linux/powerpc/powerpc32/e5500/fpu/Implies |   1 +
  .../linux/powerpc/powerpc32/e6500/fpu/Implies |   1 +
  .../linux/powerpc/powerpc64/e5500/fpu/Implies |   1 +
  .../linux/powerpc/powerpc64/e6500/fpu/Implies |   1 +
  19 files changed, 1418 insertions(+)
  create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
  create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
  create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
  create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
  create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
  create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
  create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
  create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies

 diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
 new file mode 100644
 index 0000000000..71e516d1c8
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
 @@ -0,0 +1,134 @@
 +/* Double-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float two108 = 3.245185536584267269e+32;
 +static const float twom54 = 5.551115123125782702e-17;
 +static const float half = 0.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the actual square root and half of its reciprocal
 +   simultaneously.  */
 +
 +#ifdef __STDC__
 +double
 +__ieee754_sqrt (double b)
 +#else
 +double
 +__ieee754_sqrt (b)
 +     double b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +      double y, g, h, d, r;
 +      ieee_double_shape_type u;
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          u.value = b;
 +
 +          relax_fenv_state ();
 +
 +          __asm__ ("frsqrte %[estimate], %[x]\n"
 +                   : [estimate] "=f" (y) : [x] "f" (b));
 +
 +          /* Following Muller et al, page 168, equation 5.20.
 +
 +             h goes to 1/(2*sqrt(b))
 +             g goes to sqrt(b).
 +
 +             We need three iterations to get within 1ulp.  */
 +
 +          /* Indicate that these can be performed prior to the branch.  GCC
 +             insists on sinking them below the branch, however; it seems like
 +             they'd be better before the branch so that we can cover any latency
 +             from storing the argument and loading its high word.  Oh well.  */
 +
 +          g = b * y;
 +          h = 0.5 * y;
 +
 +          /* Handle small numbers by scaling.  */
 +          if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
 +            return __ieee754_sqrt (b * two108) * twom54;
 +
 +#define FMADD(a_, c_, b_)                                               \
 +          ({ double __r;                                                \
 +          __asm__ ("fmadd %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +#define FNMSUB(a_, c_, b_)                                          \
 +          ({ double __r;                                                \
 +          __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                     \
 +                   : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +          __r;})
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          r = FNMSUB (g, h, half);
 +          g = FMADD (g, r, g);
 +          h = FMADD (h, r, h);
 +
 +          /* g is now +/- 1ulp, or exactly equal to, the square root of b.  */
 +
 +          /* Final refinement.  */
 +          d = FNMSUB (g, g, b);
 +
 +          fesetenv_register (fe);
 +          return FMADD (d, h, g);
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_wash (b);
 +}
 diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
 new file mode 100644
 index 0000000000..26fa067abf
 --- /dev/null
 +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
 @@ -0,0 +1,101 @@
 +/* Single-precision floating point square root.
 +   Copyright (C) 2010 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, write to the Free
 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 +   02111-1307 USA.  */
 +
 +#include <math.h>
 +#include <math_private.h>
 +#include <fenv_libc.h>
 +#include <inttypes.h>
 +
 +#include <sysdep.h>
 +#include <ldsodefs.h>
 +
 +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
 +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
 +static const float threehalf = 1.5;
 +
 +/* The method is based on the descriptions in:
 +
 +   _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
 +   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
 +
 +   We find the reciprocal square root and use that to compute the actual
 +   square root.  */
 +
 +#ifdef __STDC__
 +float
 +__ieee754_sqrtf (float b)
 +#else
 +float
 +__ieee754_sqrtf (b)
 +     float b;
 +#endif
 +{
 +  if (__builtin_expect (b > 0, 1))
 +    {
 +#define FMSUB(a_, c_, b_)                                               \
 +      ({ double __r;                                                    \
 +        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +#define FNMSUB(a_, c_, b_)                                              \
 +      ({ double __r;                                                    \
 +        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
 +                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
 +        __r;})
 +
 +      if (__builtin_expect (b != a_inf.value, 1))
 +        {
 +          double y, x;
 +          fenv_t fe;
 +
 +          fe = fegetenv_register ();
 +
 +          relax_fenv_state ();
 +
 +          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
 +          y = FMSUB (threehalf, b, b);
 +
 +          /* Initial estimate.  */
 +          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
 +
 +          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +          x = x * FNMSUB (y, x * x, threehalf);
 +
 +          /* All done.  */
 +          fesetenv_register (fe);
 +          return x * b;
 +        }
 +    }
 +  else if (b < 0)
 +    {
 +      /* For some reason, some PowerPC32 processors don't implement
 +         FE_INVALID_SQRT.  */
 +#ifdef FE_INVALID_SQRT
 +      feraiseexcept (FE_INVALID_SQRT);
 +
 +      fenv_union_t u = { .fenv = fegetenv_register () };
 +      if ((u.l & FE_INVALID) == 0)
 +#endif
 +	feraiseexcept (FE_INVALID);
 +      b = a_nan.value;
 +    }
 +  return f_washf (b);
 +}
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
 new file mode 100644
 index 0000000000..b103b4dea5
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc32/603e/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
 new file mode 100644
 index 0000000000..64db17fada
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
 @@ -0,0 +1,2 @@
 +# e300c3 is a variant of 603e so use the same optimizations for sqrt
 +powerpc/powerpc32/603e/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
 new file mode 100644
 index 0000000000..7eac5fcf02
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc32/e500mc/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
 new file mode 100644
 index 0000000000..264b2a7700
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc32/e5500/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
 new file mode 100644
 index 0000000000..a25934467b
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc32/e6500/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
 new file mode 100644
 index 0000000000..a7bc854be8
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc64/e5500/fpu
 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
 new file mode 100644
 index 0000000000..04ff8cc181
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
 @@ -0,0 +1 @@
 +powerpc/powerpc64/e6500/fpu
 --
 2.22.0