blob: 5e3b3e2d7be98cb21b80875ac3c0345bdd85cf1e [file] [log] [blame]
Andrew Geissler635e0e42020-08-21 15:58:33 -05001From 59b0a78ae706a540dbd8905bc97c875220d6aeb2 Mon Sep 17 00:00:00 2001
Andrew Geissler82c905d2020-04-13 13:39:40 -05002From: Khem Raj <raj.khem@gmail.com>
3Date: Wed, 18 Mar 2015 00:01:50 +0000
Andrew Geissler635e0e42020-08-21 15:58:33 -05004Subject: [PATCH 08/29] fsl e500/e5500/e6500/603e fsqrt implementation
Andrew Geissler82c905d2020-04-13 13:39:40 -05005
6Upstream-Status: Pending
7Signed-off-by: Edmar Wienskoski <edmar@freescale.com>
8Signed-off-by: Khem Raj <raj.khem@gmail.com>
9---
10 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | 134 ++++++++++++++++++
11 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | 101 +++++++++++++
12 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | 134 ++++++++++++++++++
13 .../powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | 101 +++++++++++++
14 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++
15 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
16 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++
17 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
18 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++
19 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | 101 +++++++++++++
20 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++
21 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | 101 +++++++++++++
22 .../linux/powerpc/powerpc32/603e/fpu/Implies | 1 +
23 .../powerpc/powerpc32/e300c3/fpu/Implies | 2 +
24 .../powerpc/powerpc32/e500mc/fpu/Implies | 1 +
25 .../linux/powerpc/powerpc32/e5500/fpu/Implies | 1 +
26 .../linux/powerpc/powerpc32/e6500/fpu/Implies | 1 +
27 .../linux/powerpc/powerpc64/e5500/fpu/Implies | 1 +
28 .../linux/powerpc/powerpc64/e6500/fpu/Implies | 1 +
29 19 files changed, 1418 insertions(+)
30 create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
31 create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
32 create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
33 create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
34 create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
35 create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
36 create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
37 create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
38 create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
39 create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
40 create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
41 create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
42 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
43 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
44 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
45 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
46 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
47 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
48 create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
49
50diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
51new file mode 100644
52index 0000000000..71e516d1c8
53--- /dev/null
54+++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
55@@ -0,0 +1,134 @@
56+/* Double-precision floating point square root.
57+ Copyright (C) 2010 Free Software Foundation, Inc.
58+ This file is part of the GNU C Library.
59+
60+ The GNU C Library is free software; you can redistribute it and/or
61+ modify it under the terms of the GNU Lesser General Public
62+ License as published by the Free Software Foundation; either
63+ version 2.1 of the License, or (at your option) any later version.
64+
65+ The GNU C Library is distributed in the hope that it will be useful,
66+ but WITHOUT ANY WARRANTY; without even the implied warranty of
67+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
68+ Lesser General Public License for more details.
69+
70+ You should have received a copy of the GNU Lesser General Public
71+ License along with the GNU C Library; if not, write to the Free
72+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
73+ 02111-1307 USA. */
74+
75+#include <math.h>
76+#include <math_private.h>
77+#include <fenv_libc.h>
78+#include <inttypes.h>
79+
80+#include <sysdep.h>
81+#include <ldsodefs.h>
82+
83+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
84+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
85+static const float two108 = 3.245185536584267269e+32;
86+static const float twom54 = 5.551115123125782702e-17;
87+static const float half = 0.5;
88+
89+/* The method is based on the descriptions in:
90+
91+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
92+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
93+
94+ We find the actual square root and half of its reciprocal
95+ simultaneously. */
96+
97+#ifdef __STDC__
98+double
99+__ieee754_sqrt (double b)
100+#else
101+double
102+__ieee754_sqrt (b)
103+ double b;
104+#endif
105+{
106+ if (__builtin_expect (b > 0, 1))
107+ {
108+ double y, g, h, d, r;
109+ ieee_double_shape_type u;
110+
111+ if (__builtin_expect (b != a_inf.value, 1))
112+ {
113+ fenv_t fe;
114+
115+ fe = fegetenv_register ();
116+
117+ u.value = b;
118+
119+ relax_fenv_state ();
120+
121+ __asm__ ("frsqrte %[estimate], %[x]\n"
122+ : [estimate] "=f" (y) : [x] "f" (b));
123+
124+ /* Following Muller et al, page 168, equation 5.20.
125+
126+ h goes to 1/(2*sqrt(b))
127+ g goes to sqrt(b).
128+
129+ We need three iterations to get within 1ulp. */
130+
131+ /* Indicate that these can be performed prior to the branch. GCC
132+ insists on sinking them below the branch, however; it seems like
133+ they'd be better before the branch so that we can cover any latency
134+ from storing the argument and loading its high word. Oh well. */
135+
136+ g = b * y;
137+ h = 0.5 * y;
138+
139+ /* Handle small numbers by scaling. */
140+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
141+ return __ieee754_sqrt (b * two108) * twom54;
142+
143+#define FMADD(a_, c_, b_) \
144+ ({ double __r; \
145+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
146+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
147+ __r;})
148+#define FNMSUB(a_, c_, b_) \
149+ ({ double __r; \
150+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
151+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
152+ __r;})
153+
154+ r = FNMSUB (g, h, half);
155+ g = FMADD (g, r, g);
156+ h = FMADD (h, r, h);
157+
158+ r = FNMSUB (g, h, half);
159+ g = FMADD (g, r, g);
160+ h = FMADD (h, r, h);
161+
162+ r = FNMSUB (g, h, half);
163+ g = FMADD (g, r, g);
164+ h = FMADD (h, r, h);
165+
166+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
167+
168+ /* Final refinement. */
169+ d = FNMSUB (g, g, b);
170+
171+ fesetenv_register (fe);
172+ return FMADD (d, h, g);
173+ }
174+ }
175+ else if (b < 0)
176+ {
177+ /* For some reason, some PowerPC32 processors don't implement
178+ FE_INVALID_SQRT. */
179+#ifdef FE_INVALID_SQRT
180+ feraiseexcept (FE_INVALID_SQRT);
181+
182+ fenv_union_t u = { .fenv = fegetenv_register () };
183+ if ((u.l & FE_INVALID) == 0)
184+#endif
185+ feraiseexcept (FE_INVALID);
186+ b = a_nan.value;
187+ }
188+ return f_wash (b);
189+}
190diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
191new file mode 100644
192index 0000000000..26fa067abf
193--- /dev/null
194+++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
195@@ -0,0 +1,101 @@
196+/* Single-precision floating point square root.
197+ Copyright (C) 2010 Free Software Foundation, Inc.
198+ This file is part of the GNU C Library.
199+
200+ The GNU C Library is free software; you can redistribute it and/or
201+ modify it under the terms of the GNU Lesser General Public
202+ License as published by the Free Software Foundation; either
203+ version 2.1 of the License, or (at your option) any later version.
204+
205+ The GNU C Library is distributed in the hope that it will be useful,
206+ but WITHOUT ANY WARRANTY; without even the implied warranty of
207+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
208+ Lesser General Public License for more details.
209+
210+ You should have received a copy of the GNU Lesser General Public
211+ License along with the GNU C Library; if not, write to the Free
212+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
213+ 02111-1307 USA. */
214+
215+#include <math.h>
216+#include <math_private.h>
217+#include <fenv_libc.h>
218+#include <inttypes.h>
219+
220+#include <sysdep.h>
221+#include <ldsodefs.h>
222+
223+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
224+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
225+static const float threehalf = 1.5;
226+
227+/* The method is based on the descriptions in:
228+
229+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
230+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
231+
232+ We find the reciprocal square root and use that to compute the actual
233+ square root. */
234+
235+#ifdef __STDC__
236+float
237+__ieee754_sqrtf (float b)
238+#else
239+float
240+__ieee754_sqrtf (b)
241+ float b;
242+#endif
243+{
244+ if (__builtin_expect (b > 0, 1))
245+ {
246+#define FMSUB(a_, c_, b_) \
247+ ({ double __r; \
248+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
249+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
250+ __r;})
251+#define FNMSUB(a_, c_, b_) \
252+ ({ double __r; \
253+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
254+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
255+ __r;})
256+
257+ if (__builtin_expect (b != a_inf.value, 1))
258+ {
259+ double y, x;
260+ fenv_t fe;
261+
262+ fe = fegetenv_register ();
263+
264+ relax_fenv_state ();
265+
266+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
267+ y = FMSUB (threehalf, b, b);
268+
269+ /* Initial estimate. */
270+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
271+
272+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
273+ x = x * FNMSUB (y, x * x, threehalf);
274+ x = x * FNMSUB (y, x * x, threehalf);
275+ x = x * FNMSUB (y, x * x, threehalf);
276+
277+ /* All done. */
278+ fesetenv_register (fe);
279+ return x * b;
280+ }
281+ }
282+ else if (b < 0)
283+ {
284+ /* For some reason, some PowerPC32 processors don't implement
285+ FE_INVALID_SQRT. */
286+#ifdef FE_INVALID_SQRT
287+ feraiseexcept (FE_INVALID_SQRT);
288+
289+ fenv_union_t u = { .fenv = fegetenv_register () };
290+ if ((u.l & FE_INVALID) == 0)
291+#endif
292+ feraiseexcept (FE_INVALID);
293+ b = a_nan.value;
294+ }
295+ return f_washf (b);
296+}
297diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
298new file mode 100644
299index 0000000000..71e516d1c8
300--- /dev/null
301+++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
302@@ -0,0 +1,134 @@
303+/* Double-precision floating point square root.
304+ Copyright (C) 2010 Free Software Foundation, Inc.
305+ This file is part of the GNU C Library.
306+
307+ The GNU C Library is free software; you can redistribute it and/or
308+ modify it under the terms of the GNU Lesser General Public
309+ License as published by the Free Software Foundation; either
310+ version 2.1 of the License, or (at your option) any later version.
311+
312+ The GNU C Library is distributed in the hope that it will be useful,
313+ but WITHOUT ANY WARRANTY; without even the implied warranty of
314+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
315+ Lesser General Public License for more details.
316+
317+ You should have received a copy of the GNU Lesser General Public
318+ License along with the GNU C Library; if not, write to the Free
319+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
320+ 02111-1307 USA. */
321+
322+#include <math.h>
323+#include <math_private.h>
324+#include <fenv_libc.h>
325+#include <inttypes.h>
326+
327+#include <sysdep.h>
328+#include <ldsodefs.h>
329+
330+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
331+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
332+static const float two108 = 3.245185536584267269e+32;
333+static const float twom54 = 5.551115123125782702e-17;
334+static const float half = 0.5;
335+
336+/* The method is based on the descriptions in:
337+
338+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
339+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
340+
341+ We find the actual square root and half of its reciprocal
342+ simultaneously. */
343+
344+#ifdef __STDC__
345+double
346+__ieee754_sqrt (double b)
347+#else
348+double
349+__ieee754_sqrt (b)
350+ double b;
351+#endif
352+{
353+ if (__builtin_expect (b > 0, 1))
354+ {
355+ double y, g, h, d, r;
356+ ieee_double_shape_type u;
357+
358+ if (__builtin_expect (b != a_inf.value, 1))
359+ {
360+ fenv_t fe;
361+
362+ fe = fegetenv_register ();
363+
364+ u.value = b;
365+
366+ relax_fenv_state ();
367+
368+ __asm__ ("frsqrte %[estimate], %[x]\n"
369+ : [estimate] "=f" (y) : [x] "f" (b));
370+
371+ /* Following Muller et al, page 168, equation 5.20.
372+
373+ h goes to 1/(2*sqrt(b))
374+ g goes to sqrt(b).
375+
376+ We need three iterations to get within 1ulp. */
377+
378+ /* Indicate that these can be performed prior to the branch. GCC
379+ insists on sinking them below the branch, however; it seems like
380+ they'd be better before the branch so that we can cover any latency
381+ from storing the argument and loading its high word. Oh well. */
382+
383+ g = b * y;
384+ h = 0.5 * y;
385+
386+ /* Handle small numbers by scaling. */
387+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
388+ return __ieee754_sqrt (b * two108) * twom54;
389+
390+#define FMADD(a_, c_, b_) \
391+ ({ double __r; \
392+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
393+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
394+ __r;})
395+#define FNMSUB(a_, c_, b_) \
396+ ({ double __r; \
397+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
398+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
399+ __r;})
400+
401+ r = FNMSUB (g, h, half);
402+ g = FMADD (g, r, g);
403+ h = FMADD (h, r, h);
404+
405+ r = FNMSUB (g, h, half);
406+ g = FMADD (g, r, g);
407+ h = FMADD (h, r, h);
408+
409+ r = FNMSUB (g, h, half);
410+ g = FMADD (g, r, g);
411+ h = FMADD (h, r, h);
412+
413+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
414+
415+ /* Final refinement. */
416+ d = FNMSUB (g, g, b);
417+
418+ fesetenv_register (fe);
419+ return FMADD (d, h, g);
420+ }
421+ }
422+ else if (b < 0)
423+ {
424+ /* For some reason, some PowerPC32 processors don't implement
425+ FE_INVALID_SQRT. */
426+#ifdef FE_INVALID_SQRT
427+ feraiseexcept (FE_INVALID_SQRT);
428+
429+ fenv_union_t u = { .fenv = fegetenv_register () };
430+ if ((u.l & FE_INVALID) == 0)
431+#endif
432+ feraiseexcept (FE_INVALID);
433+ b = a_nan.value;
434+ }
435+ return f_wash (b);
436+}
437diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
438new file mode 100644
439index 0000000000..26fa067abf
440--- /dev/null
441+++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
442@@ -0,0 +1,101 @@
443+/* Single-precision floating point square root.
444+ Copyright (C) 2010 Free Software Foundation, Inc.
445+ This file is part of the GNU C Library.
446+
447+ The GNU C Library is free software; you can redistribute it and/or
448+ modify it under the terms of the GNU Lesser General Public
449+ License as published by the Free Software Foundation; either
450+ version 2.1 of the License, or (at your option) any later version.
451+
452+ The GNU C Library is distributed in the hope that it will be useful,
453+ but WITHOUT ANY WARRANTY; without even the implied warranty of
454+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
455+ Lesser General Public License for more details.
456+
457+ You should have received a copy of the GNU Lesser General Public
458+ License along with the GNU C Library; if not, write to the Free
459+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
460+ 02111-1307 USA. */
461+
462+#include <math.h>
463+#include <math_private.h>
464+#include <fenv_libc.h>
465+#include <inttypes.h>
466+
467+#include <sysdep.h>
468+#include <ldsodefs.h>
469+
470+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
471+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
472+static const float threehalf = 1.5;
473+
474+/* The method is based on the descriptions in:
475+
476+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
477+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
478+
479+ We find the reciprocal square root and use that to compute the actual
480+ square root. */
481+
482+#ifdef __STDC__
483+float
484+__ieee754_sqrtf (float b)
485+#else
486+float
487+__ieee754_sqrtf (b)
488+ float b;
489+#endif
490+{
491+ if (__builtin_expect (b > 0, 1))
492+ {
493+#define FMSUB(a_, c_, b_) \
494+ ({ double __r; \
495+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
496+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
497+ __r;})
498+#define FNMSUB(a_, c_, b_) \
499+ ({ double __r; \
500+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
501+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
502+ __r;})
503+
504+ if (__builtin_expect (b != a_inf.value, 1))
505+ {
506+ double y, x;
507+ fenv_t fe;
508+
509+ fe = fegetenv_register ();
510+
511+ relax_fenv_state ();
512+
513+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
514+ y = FMSUB (threehalf, b, b);
515+
516+ /* Initial estimate. */
517+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
518+
519+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
520+ x = x * FNMSUB (y, x * x, threehalf);
521+ x = x * FNMSUB (y, x * x, threehalf);
522+ x = x * FNMSUB (y, x * x, threehalf);
523+
524+ /* All done. */
525+ fesetenv_register (fe);
526+ return x * b;
527+ }
528+ }
529+ else if (b < 0)
530+ {
531+ /* For some reason, some PowerPC32 processors don't implement
532+ FE_INVALID_SQRT. */
533+#ifdef FE_INVALID_SQRT
534+ feraiseexcept (FE_INVALID_SQRT);
535+
536+ fenv_union_t u = { .fenv = fegetenv_register () };
537+ if ((u.l & FE_INVALID) == 0)
538+#endif
539+ feraiseexcept (FE_INVALID);
540+ b = a_nan.value;
541+ }
542+ return f_washf (b);
543+}
544diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
545new file mode 100644
546index 0000000000..71e516d1c8
547--- /dev/null
548+++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
549@@ -0,0 +1,134 @@
550+/* Double-precision floating point square root.
551+ Copyright (C) 2010 Free Software Foundation, Inc.
552+ This file is part of the GNU C Library.
553+
554+ The GNU C Library is free software; you can redistribute it and/or
555+ modify it under the terms of the GNU Lesser General Public
556+ License as published by the Free Software Foundation; either
557+ version 2.1 of the License, or (at your option) any later version.
558+
559+ The GNU C Library is distributed in the hope that it will be useful,
560+ but WITHOUT ANY WARRANTY; without even the implied warranty of
561+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
562+ Lesser General Public License for more details.
563+
564+ You should have received a copy of the GNU Lesser General Public
565+ License along with the GNU C Library; if not, write to the Free
566+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
567+ 02111-1307 USA. */
568+
569+#include <math.h>
570+#include <math_private.h>
571+#include <fenv_libc.h>
572+#include <inttypes.h>
573+
574+#include <sysdep.h>
575+#include <ldsodefs.h>
576+
577+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
578+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
579+static const float two108 = 3.245185536584267269e+32;
580+static const float twom54 = 5.551115123125782702e-17;
581+static const float half = 0.5;
582+
583+/* The method is based on the descriptions in:
584+
585+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
586+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
587+
588+ We find the actual square root and half of its reciprocal
589+ simultaneously. */
590+
591+#ifdef __STDC__
592+double
593+__ieee754_sqrt (double b)
594+#else
595+double
596+__ieee754_sqrt (b)
597+ double b;
598+#endif
599+{
600+ if (__builtin_expect (b > 0, 1))
601+ {
602+ double y, g, h, d, r;
603+ ieee_double_shape_type u;
604+
605+ if (__builtin_expect (b != a_inf.value, 1))
606+ {
607+ fenv_t fe;
608+
609+ fe = fegetenv_register ();
610+
611+ u.value = b;
612+
613+ relax_fenv_state ();
614+
615+ __asm__ ("frsqrte %[estimate], %[x]\n"
616+ : [estimate] "=f" (y) : [x] "f" (b));
617+
618+ /* Following Muller et al, page 168, equation 5.20.
619+
620+ h goes to 1/(2*sqrt(b))
621+ g goes to sqrt(b).
622+
623+ We need three iterations to get within 1ulp. */
624+
625+ /* Indicate that these can be performed prior to the branch. GCC
626+ insists on sinking them below the branch, however; it seems like
627+ they'd be better before the branch so that we can cover any latency
628+ from storing the argument and loading its high word. Oh well. */
629+
630+ g = b * y;
631+ h = 0.5 * y;
632+
633+ /* Handle small numbers by scaling. */
634+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
635+ return __ieee754_sqrt (b * two108) * twom54;
636+
637+#define FMADD(a_, c_, b_) \
638+ ({ double __r; \
639+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
640+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
641+ __r;})
642+#define FNMSUB(a_, c_, b_) \
643+ ({ double __r; \
644+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
645+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
646+ __r;})
647+
648+ r = FNMSUB (g, h, half);
649+ g = FMADD (g, r, g);
650+ h = FMADD (h, r, h);
651+
652+ r = FNMSUB (g, h, half);
653+ g = FMADD (g, r, g);
654+ h = FMADD (h, r, h);
655+
656+ r = FNMSUB (g, h, half);
657+ g = FMADD (g, r, g);
658+ h = FMADD (h, r, h);
659+
660+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
661+
662+ /* Final refinement. */
663+ d = FNMSUB (g, g, b);
664+
665+ fesetenv_register (fe);
666+ return FMADD (d, h, g);
667+ }
668+ }
669+ else if (b < 0)
670+ {
671+ /* For some reason, some PowerPC32 processors don't implement
672+ FE_INVALID_SQRT. */
673+#ifdef FE_INVALID_SQRT
674+ feraiseexcept (FE_INVALID_SQRT);
675+
676+ fenv_union_t u = { .fenv = fegetenv_register () };
677+ if ((u.l & FE_INVALID) == 0)
678+#endif
679+ feraiseexcept (FE_INVALID);
680+ b = a_nan.value;
681+ }
682+ return f_wash (b);
683+}
684diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
685new file mode 100644
686index 0000000000..26fa067abf
687--- /dev/null
688+++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
689@@ -0,0 +1,101 @@
690+/* Single-precision floating point square root.
691+ Copyright (C) 2010 Free Software Foundation, Inc.
692+ This file is part of the GNU C Library.
693+
694+ The GNU C Library is free software; you can redistribute it and/or
695+ modify it under the terms of the GNU Lesser General Public
696+ License as published by the Free Software Foundation; either
697+ version 2.1 of the License, or (at your option) any later version.
698+
699+ The GNU C Library is distributed in the hope that it will be useful,
700+ but WITHOUT ANY WARRANTY; without even the implied warranty of
701+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
702+ Lesser General Public License for more details.
703+
704+ You should have received a copy of the GNU Lesser General Public
705+ License along with the GNU C Library; if not, write to the Free
706+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
707+ 02111-1307 USA. */
708+
709+#include <math.h>
710+#include <math_private.h>
711+#include <fenv_libc.h>
712+#include <inttypes.h>
713+
714+#include <sysdep.h>
715+#include <ldsodefs.h>
716+
717+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
718+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
719+static const float threehalf = 1.5;
720+
721+/* The method is based on the descriptions in:
722+
723+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
724+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
725+
726+ We find the reciprocal square root and use that to compute the actual
727+ square root. */
728+
729+#ifdef __STDC__
730+float
731+__ieee754_sqrtf (float b)
732+#else
733+float
734+__ieee754_sqrtf (b)
735+ float b;
736+#endif
737+{
738+ if (__builtin_expect (b > 0, 1))
739+ {
740+#define FMSUB(a_, c_, b_) \
741+ ({ double __r; \
742+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
743+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
744+ __r;})
745+#define FNMSUB(a_, c_, b_) \
746+ ({ double __r; \
747+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
748+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
749+ __r;})
750+
751+ if (__builtin_expect (b != a_inf.value, 1))
752+ {
753+ double y, x;
754+ fenv_t fe;
755+
756+ fe = fegetenv_register ();
757+
758+ relax_fenv_state ();
759+
760+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
761+ y = FMSUB (threehalf, b, b);
762+
763+ /* Initial estimate. */
764+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
765+
766+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
767+ x = x * FNMSUB (y, x * x, threehalf);
768+ x = x * FNMSUB (y, x * x, threehalf);
769+ x = x * FNMSUB (y, x * x, threehalf);
770+
771+ /* All done. */
772+ fesetenv_register (fe);
773+ return x * b;
774+ }
775+ }
776+ else if (b < 0)
777+ {
778+ /* For some reason, some PowerPC32 processors don't implement
779+ FE_INVALID_SQRT. */
780+#ifdef FE_INVALID_SQRT
781+ feraiseexcept (FE_INVALID_SQRT);
782+
783+ fenv_union_t u = { .fenv = fegetenv_register () };
784+ if ((u.l & FE_INVALID) == 0)
785+#endif
786+ feraiseexcept (FE_INVALID);
787+ b = a_nan.value;
788+ }
789+ return f_washf (b);
790+}
791diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
792new file mode 100644
793index 0000000000..71e516d1c8
794--- /dev/null
795+++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
796@@ -0,0 +1,134 @@
797+/* Double-precision floating point square root.
798+ Copyright (C) 2010 Free Software Foundation, Inc.
799+ This file is part of the GNU C Library.
800+
801+ The GNU C Library is free software; you can redistribute it and/or
802+ modify it under the terms of the GNU Lesser General Public
803+ License as published by the Free Software Foundation; either
804+ version 2.1 of the License, or (at your option) any later version.
805+
806+ The GNU C Library is distributed in the hope that it will be useful,
807+ but WITHOUT ANY WARRANTY; without even the implied warranty of
808+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
809+ Lesser General Public License for more details.
810+
811+ You should have received a copy of the GNU Lesser General Public
812+ License along with the GNU C Library; if not, write to the Free
813+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
814+ 02111-1307 USA. */
815+
816+#include <math.h>
817+#include <math_private.h>
818+#include <fenv_libc.h>
819+#include <inttypes.h>
820+
821+#include <sysdep.h>
822+#include <ldsodefs.h>
823+
824+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
825+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
826+static const float two108 = 3.245185536584267269e+32;
827+static const float twom54 = 5.551115123125782702e-17;
828+static const float half = 0.5;
829+
830+/* The method is based on the descriptions in:
831+
832+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
833+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
834+
835+ We find the actual square root and half of its reciprocal
836+ simultaneously. */
837+
838+#ifdef __STDC__
839+double
840+__ieee754_sqrt (double b)
841+#else
842+double
843+__ieee754_sqrt (b)
844+ double b;
845+#endif
846+{
847+ if (__builtin_expect (b > 0, 1))
848+ {
849+ double y, g, h, d, r;
850+ ieee_double_shape_type u;
851+
852+ if (__builtin_expect (b != a_inf.value, 1))
853+ {
854+ fenv_t fe;
855+
856+ fe = fegetenv_register ();
857+
858+ u.value = b;
859+
860+ relax_fenv_state ();
861+
862+ __asm__ ("frsqrte %[estimate], %[x]\n"
863+ : [estimate] "=f" (y) : [x] "f" (b));
864+
865+ /* Following Muller et al, page 168, equation 5.20.
866+
867+ h goes to 1/(2*sqrt(b))
868+ g goes to sqrt(b).
869+
870+ We need three iterations to get within 1ulp. */
871+
872+ /* Indicate that these can be performed prior to the branch. GCC
873+ insists on sinking them below the branch, however; it seems like
874+ they'd be better before the branch so that we can cover any latency
875+ from storing the argument and loading its high word. Oh well. */
876+
877+ g = b * y;
878+ h = 0.5 * y;
879+
880+ /* Handle small numbers by scaling. */
881+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
882+ return __ieee754_sqrt (b * two108) * twom54;
883+
884+#define FMADD(a_, c_, b_) \
885+ ({ double __r; \
886+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
887+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
888+ __r;})
889+#define FNMSUB(a_, c_, b_) \
890+ ({ double __r; \
891+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
892+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
893+ __r;})
894+
895+ r = FNMSUB (g, h, half);
896+ g = FMADD (g, r, g);
897+ h = FMADD (h, r, h);
898+
899+ r = FNMSUB (g, h, half);
900+ g = FMADD (g, r, g);
901+ h = FMADD (h, r, h);
902+
903+ r = FNMSUB (g, h, half);
904+ g = FMADD (g, r, g);
905+ h = FMADD (h, r, h);
906+
907+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
908+
909+ /* Final refinement. */
910+ d = FNMSUB (g, g, b);
911+
912+ fesetenv_register (fe);
913+ return FMADD (d, h, g);
914+ }
915+ }
916+ else if (b < 0)
917+ {
918+ /* For some reason, some PowerPC32 processors don't implement
919+ FE_INVALID_SQRT. */
920+#ifdef FE_INVALID_SQRT
921+ feraiseexcept (FE_INVALID_SQRT);
922+
923+ fenv_union_t u = { .fenv = fegetenv_register () };
924+ if ((u.l & FE_INVALID) == 0)
925+#endif
926+ feraiseexcept (FE_INVALID);
927+ b = a_nan.value;
928+ }
929+ return f_wash (b);
930+}
931diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
932new file mode 100644
933index 0000000000..26fa067abf
934--- /dev/null
935+++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
936@@ -0,0 +1,101 @@
937+/* Single-precision floating point square root.
938+ Copyright (C) 2010 Free Software Foundation, Inc.
939+ This file is part of the GNU C Library.
940+
941+ The GNU C Library is free software; you can redistribute it and/or
942+ modify it under the terms of the GNU Lesser General Public
943+ License as published by the Free Software Foundation; either
944+ version 2.1 of the License, or (at your option) any later version.
945+
946+ The GNU C Library is distributed in the hope that it will be useful,
947+ but WITHOUT ANY WARRANTY; without even the implied warranty of
948+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
949+ Lesser General Public License for more details.
950+
951+ You should have received a copy of the GNU Lesser General Public
952+ License along with the GNU C Library; if not, write to the Free
953+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
954+ 02111-1307 USA. */
955+
956+#include <math.h>
957+#include <math_private.h>
958+#include <fenv_libc.h>
959+#include <inttypes.h>
960+
961+#include <sysdep.h>
962+#include <ldsodefs.h>
963+
964+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
965+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
966+static const float threehalf = 1.5;
967+
968+/* The method is based on the descriptions in:
969+
970+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
971+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
972+
973+ We find the reciprocal square root and use that to compute the actual
974+ square root. */
975+
976+#ifdef __STDC__
977+float
978+__ieee754_sqrtf (float b)
979+#else
980+float
981+__ieee754_sqrtf (b)
982+ float b;
983+#endif
984+{
985+ if (__builtin_expect (b > 0, 1))
986+ {
987+#define FMSUB(a_, c_, b_) \
988+ ({ double __r; \
989+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
990+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
991+ __r;})
992+#define FNMSUB(a_, c_, b_) \
993+ ({ double __r; \
994+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
995+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
996+ __r;})
997+
998+ if (__builtin_expect (b != a_inf.value, 1))
999+ {
1000+ double y, x;
1001+ fenv_t fe;
1002+
1003+ fe = fegetenv_register ();
1004+
1005+ relax_fenv_state ();
1006+
1007+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1008+ y = FMSUB (threehalf, b, b);
1009+
1010+ /* Initial estimate. */
1011+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1012+
1013+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1014+ x = x * FNMSUB (y, x * x, threehalf);
1015+ x = x * FNMSUB (y, x * x, threehalf);
1016+ x = x * FNMSUB (y, x * x, threehalf);
1017+
1018+ /* All done. */
1019+ fesetenv_register (fe);
1020+ return x * b;
1021+ }
1022+ }
1023+ else if (b < 0)
1024+ {
1025+ /* For some reason, some PowerPC32 processors don't implement
1026+ FE_INVALID_SQRT. */
1027+#ifdef FE_INVALID_SQRT
1028+ feraiseexcept (FE_INVALID_SQRT);
1029+
1030+ fenv_union_t u = { .fenv = fegetenv_register () };
1031+ if ((u.l & FE_INVALID) == 0)
1032+#endif
1033+ feraiseexcept (FE_INVALID);
1034+ b = a_nan.value;
1035+ }
1036+ return f_washf (b);
1037+}
1038diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
1039new file mode 100644
1040index 0000000000..71e516d1c8
1041--- /dev/null
1042+++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
1043@@ -0,0 +1,134 @@
1044+/* Double-precision floating point square root.
1045+ Copyright (C) 2010 Free Software Foundation, Inc.
1046+ This file is part of the GNU C Library.
1047+
1048+ The GNU C Library is free software; you can redistribute it and/or
1049+ modify it under the terms of the GNU Lesser General Public
1050+ License as published by the Free Software Foundation; either
1051+ version 2.1 of the License, or (at your option) any later version.
1052+
1053+ The GNU C Library is distributed in the hope that it will be useful,
1054+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1055+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1056+ Lesser General Public License for more details.
1057+
1058+ You should have received a copy of the GNU Lesser General Public
1059+ License along with the GNU C Library; if not, write to the Free
1060+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1061+ 02111-1307 USA. */
1062+
1063+#include <math.h>
1064+#include <math_private.h>
1065+#include <fenv_libc.h>
1066+#include <inttypes.h>
1067+
1068+#include <sysdep.h>
1069+#include <ldsodefs.h>
1070+
1071+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1072+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1073+static const float two108 = 3.245185536584267269e+32;
1074+static const float twom54 = 5.551115123125782702e-17;
1075+static const float half = 0.5;
1076+
1077+/* The method is based on the descriptions in:
1078+
1079+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1080+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1081+
1082+ We find the actual square root and half of its reciprocal
1083+ simultaneously. */
1084+
1085+#ifdef __STDC__
1086+double
1087+__ieee754_sqrt (double b)
1088+#else
1089+double
1090+__ieee754_sqrt (b)
1091+ double b;
1092+#endif
1093+{
1094+ if (__builtin_expect (b > 0, 1))
1095+ {
1096+ double y, g, h, d, r;
1097+ ieee_double_shape_type u;
1098+
1099+ if (__builtin_expect (b != a_inf.value, 1))
1100+ {
1101+ fenv_t fe;
1102+
1103+ fe = fegetenv_register ();
1104+
1105+ u.value = b;
1106+
1107+ relax_fenv_state ();
1108+
1109+ __asm__ ("frsqrte %[estimate], %[x]\n"
1110+ : [estimate] "=f" (y) : [x] "f" (b));
1111+
1112+ /* Following Muller et al, page 168, equation 5.20.
1113+
1114+ h goes to 1/(2*sqrt(b))
1115+ g goes to sqrt(b).
1116+
1117+ We need three iterations to get within 1ulp. */
1118+
1119+ /* Indicate that these can be performed prior to the branch. GCC
1120+ insists on sinking them below the branch, however; it seems like
1121+ they'd be better before the branch so that we can cover any latency
1122+ from storing the argument and loading its high word. Oh well. */
1123+
1124+ g = b * y;
1125+ h = 0.5 * y;
1126+
1127+ /* Handle small numbers by scaling. */
1128+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1129+ return __ieee754_sqrt (b * two108) * twom54;
1130+
1131+#define FMADD(a_, c_, b_) \
1132+ ({ double __r; \
1133+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1134+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1135+ __r;})
1136+#define FNMSUB(a_, c_, b_) \
1137+ ({ double __r; \
1138+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1139+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1140+ __r;})
1141+
1142+ r = FNMSUB (g, h, half);
1143+ g = FMADD (g, r, g);
1144+ h = FMADD (h, r, h);
1145+
1146+ r = FNMSUB (g, h, half);
1147+ g = FMADD (g, r, g);
1148+ h = FMADD (h, r, h);
1149+
1150+ r = FNMSUB (g, h, half);
1151+ g = FMADD (g, r, g);
1152+ h = FMADD (h, r, h);
1153+
1154+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1155+
1156+ /* Final refinement. */
1157+ d = FNMSUB (g, g, b);
1158+
1159+ fesetenv_register (fe);
1160+ return FMADD (d, h, g);
1161+ }
1162+ }
1163+ else if (b < 0)
1164+ {
1165+ /* For some reason, some PowerPC32 processors don't implement
1166+ FE_INVALID_SQRT. */
1167+#ifdef FE_INVALID_SQRT
1168+ feraiseexcept (FE_INVALID_SQRT);
1169+
1170+ fenv_union_t u = { .fenv = fegetenv_register () };
1171+ if ((u.l & FE_INVALID) == 0)
1172+#endif
1173+ feraiseexcept (FE_INVALID);
1174+ b = a_nan.value;
1175+ }
1176+ return f_wash (b);
1177+}
1178diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
1179new file mode 100644
1180index 0000000000..26fa067abf
1181--- /dev/null
1182+++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
1183@@ -0,0 +1,101 @@
1184+/* Single-precision floating point square root.
1185+ Copyright (C) 2010 Free Software Foundation, Inc.
1186+ This file is part of the GNU C Library.
1187+
1188+ The GNU C Library is free software; you can redistribute it and/or
1189+ modify it under the terms of the GNU Lesser General Public
1190+ License as published by the Free Software Foundation; either
1191+ version 2.1 of the License, or (at your option) any later version.
1192+
1193+ The GNU C Library is distributed in the hope that it will be useful,
1194+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1195+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1196+ Lesser General Public License for more details.
1197+
1198+ You should have received a copy of the GNU Lesser General Public
1199+ License along with the GNU C Library; if not, write to the Free
1200+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1201+ 02111-1307 USA. */
1202+
1203+#include <math.h>
1204+#include <math_private.h>
1205+#include <fenv_libc.h>
1206+#include <inttypes.h>
1207+
1208+#include <sysdep.h>
1209+#include <ldsodefs.h>
1210+
1211+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1212+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1213+static const float threehalf = 1.5;
1214+
1215+/* The method is based on the descriptions in:
1216+
1217+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1218+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1219+
1220+ We find the reciprocal square root and use that to compute the actual
1221+ square root. */
1222+
1223+#ifdef __STDC__
1224+float
1225+__ieee754_sqrtf (float b)
1226+#else
1227+float
1228+__ieee754_sqrtf (b)
1229+ float b;
1230+#endif
1231+{
1232+ if (__builtin_expect (b > 0, 1))
1233+ {
1234+#define FMSUB(a_, c_, b_) \
1235+ ({ double __r; \
1236+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1237+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1238+ __r;})
1239+#define FNMSUB(a_, c_, b_) \
1240+ ({ double __r; \
1241+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1242+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1243+ __r;})
1244+
1245+ if (__builtin_expect (b != a_inf.value, 1))
1246+ {
1247+ double y, x;
1248+ fenv_t fe;
1249+
1250+ fe = fegetenv_register ();
1251+
1252+ relax_fenv_state ();
1253+
1254+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1255+ y = FMSUB (threehalf, b, b);
1256+
1257+ /* Initial estimate. */
1258+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1259+
1260+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1261+ x = x * FNMSUB (y, x * x, threehalf);
1262+ x = x * FNMSUB (y, x * x, threehalf);
1263+ x = x * FNMSUB (y, x * x, threehalf);
1264+
1265+ /* All done. */
1266+ fesetenv_register (fe);
1267+ return x * b;
1268+ }
1269+ }
1270+ else if (b < 0)
1271+ {
1272+ /* For some reason, some PowerPC32 processors don't implement
1273+ FE_INVALID_SQRT. */
1274+#ifdef FE_INVALID_SQRT
1275+ feraiseexcept (FE_INVALID_SQRT);
1276+
1277+ fenv_union_t u = { .fenv = fegetenv_register () };
1278+ if ((u.l & FE_INVALID) == 0)
1279+#endif
1280+ feraiseexcept (FE_INVALID);
1281+ b = a_nan.value;
1282+ }
1283+ return f_washf (b);
1284+}
1285diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
1286new file mode 100644
1287index 0000000000..71e516d1c8
1288--- /dev/null
1289+++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
1290@@ -0,0 +1,134 @@
1291+/* Double-precision floating point square root.
1292+ Copyright (C) 2010 Free Software Foundation, Inc.
1293+ This file is part of the GNU C Library.
1294+
1295+ The GNU C Library is free software; you can redistribute it and/or
1296+ modify it under the terms of the GNU Lesser General Public
1297+ License as published by the Free Software Foundation; either
1298+ version 2.1 of the License, or (at your option) any later version.
1299+
1300+ The GNU C Library is distributed in the hope that it will be useful,
1301+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1302+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1303+ Lesser General Public License for more details.
1304+
1305+ You should have received a copy of the GNU Lesser General Public
1306+ License along with the GNU C Library; if not, write to the Free
1307+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1308+ 02111-1307 USA. */
1309+
1310+#include <math.h>
1311+#include <math_private.h>
1312+#include <fenv_libc.h>
1313+#include <inttypes.h>
1314+
1315+#include <sysdep.h>
1316+#include <ldsodefs.h>
1317+
1318+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1319+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1320+static const float two108 = 3.245185536584267269e+32;
1321+static const float twom54 = 5.551115123125782702e-17;
1322+static const float half = 0.5;
1323+
1324+/* The method is based on the descriptions in:
1325+
1326+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1327+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1328+
1329+ We find the actual square root and half of its reciprocal
1330+ simultaneously. */
1331+
1332+#ifdef __STDC__
1333+double
1334+__ieee754_sqrt (double b)
1335+#else
1336+double
1337+__ieee754_sqrt (b)
1338+ double b;
1339+#endif
1340+{
1341+ if (__builtin_expect (b > 0, 1))
1342+ {
1343+ double y, g, h, d, r;
1344+ ieee_double_shape_type u;
1345+
1346+ if (__builtin_expect (b != a_inf.value, 1))
1347+ {
1348+ fenv_t fe;
1349+
1350+ fe = fegetenv_register ();
1351+
1352+ u.value = b;
1353+
1354+ relax_fenv_state ();
1355+
1356+ __asm__ ("frsqrte %[estimate], %[x]\n"
1357+ : [estimate] "=f" (y) : [x] "f" (b));
1358+
1359+ /* Following Muller et al, page 168, equation 5.20.
1360+
1361+ h goes to 1/(2*sqrt(b))
1362+ g goes to sqrt(b).
1363+
1364+ We need three iterations to get within 1ulp. */
1365+
1366+ /* Indicate that these can be performed prior to the branch. GCC
1367+ insists on sinking them below the branch, however; it seems like
1368+ they'd be better before the branch so that we can cover any latency
1369+ from storing the argument and loading its high word. Oh well. */
1370+
1371+ g = b * y;
1372+ h = 0.5 * y;
1373+
1374+ /* Handle small numbers by scaling. */
1375+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1376+ return __ieee754_sqrt (b * two108) * twom54;
1377+
1378+#define FMADD(a_, c_, b_) \
1379+ ({ double __r; \
1380+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1381+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1382+ __r;})
1383+#define FNMSUB(a_, c_, b_) \
1384+ ({ double __r; \
1385+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1386+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1387+ __r;})
1388+
1389+ r = FNMSUB (g, h, half);
1390+ g = FMADD (g, r, g);
1391+ h = FMADD (h, r, h);
1392+
1393+ r = FNMSUB (g, h, half);
1394+ g = FMADD (g, r, g);
1395+ h = FMADD (h, r, h);
1396+
1397+ r = FNMSUB (g, h, half);
1398+ g = FMADD (g, r, g);
1399+ h = FMADD (h, r, h);
1400+
1401+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1402+
1403+ /* Final refinement. */
1404+ d = FNMSUB (g, g, b);
1405+
1406+ fesetenv_register (fe);
1407+ return FMADD (d, h, g);
1408+ }
1409+ }
1410+ else if (b < 0)
1411+ {
1412+ /* For some reason, some PowerPC32 processors don't implement
1413+ FE_INVALID_SQRT. */
1414+#ifdef FE_INVALID_SQRT
1415+ feraiseexcept (FE_INVALID_SQRT);
1416+
1417+ fenv_union_t u = { .fenv = fegetenv_register () };
1418+ if ((u.l & FE_INVALID) == 0)
1419+#endif
1420+ feraiseexcept (FE_INVALID);
1421+ b = a_nan.value;
1422+ }
1423+ return f_wash (b);
1424+}
1425diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
1426new file mode 100644
1427index 0000000000..26fa067abf
1428--- /dev/null
1429+++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
1430@@ -0,0 +1,101 @@
1431+/* Single-precision floating point square root.
1432+ Copyright (C) 2010 Free Software Foundation, Inc.
1433+ This file is part of the GNU C Library.
1434+
1435+ The GNU C Library is free software; you can redistribute it and/or
1436+ modify it under the terms of the GNU Lesser General Public
1437+ License as published by the Free Software Foundation; either
1438+ version 2.1 of the License, or (at your option) any later version.
1439+
1440+ The GNU C Library is distributed in the hope that it will be useful,
1441+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1442+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1443+ Lesser General Public License for more details.
1444+
1445+ You should have received a copy of the GNU Lesser General Public
1446+ License along with the GNU C Library; if not, write to the Free
1447+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1448+ 02111-1307 USA. */
1449+
1450+#include <math.h>
1451+#include <math_private.h>
1452+#include <fenv_libc.h>
1453+#include <inttypes.h>
1454+
1455+#include <sysdep.h>
1456+#include <ldsodefs.h>
1457+
1458+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1459+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1460+static const float threehalf = 1.5;
1461+
1462+/* The method is based on the descriptions in:
1463+
1464+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1465+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1466+
1467+ We find the reciprocal square root and use that to compute the actual
1468+ square root. */
1469+
1470+#ifdef __STDC__
1471+float
1472+__ieee754_sqrtf (float b)
1473+#else
1474+float
1475+__ieee754_sqrtf (b)
1476+ float b;
1477+#endif
1478+{
1479+ if (__builtin_expect (b > 0, 1))
1480+ {
1481+#define FMSUB(a_, c_, b_) \
1482+ ({ double __r; \
1483+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1484+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1485+ __r;})
1486+#define FNMSUB(a_, c_, b_) \
1487+ ({ double __r; \
1488+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1489+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1490+ __r;})
1491+
1492+ if (__builtin_expect (b != a_inf.value, 1))
1493+ {
1494+ double y, x;
1495+ fenv_t fe;
1496+
1497+ fe = fegetenv_register ();
1498+
1499+ relax_fenv_state ();
1500+
1501+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1502+ y = FMSUB (threehalf, b, b);
1503+
1504+ /* Initial estimate. */
1505+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1506+
1507+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1508+ x = x * FNMSUB (y, x * x, threehalf);
1509+ x = x * FNMSUB (y, x * x, threehalf);
1510+ x = x * FNMSUB (y, x * x, threehalf);
1511+
1512+ /* All done. */
1513+ fesetenv_register (fe);
1514+ return x * b;
1515+ }
1516+ }
1517+ else if (b < 0)
1518+ {
1519+ /* For some reason, some PowerPC32 processors don't implement
1520+ FE_INVALID_SQRT. */
1521+#ifdef FE_INVALID_SQRT
1522+ feraiseexcept (FE_INVALID_SQRT);
1523+
1524+ fenv_union_t u = { .fenv = fegetenv_register () };
1525+ if ((u.l & FE_INVALID) == 0)
1526+#endif
1527+ feraiseexcept (FE_INVALID);
1528+ b = a_nan.value;
1529+ }
1530+ return f_washf (b);
1531+}
1532diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
1533new file mode 100644
1534index 0000000000..b103b4dea5
1535--- /dev/null
1536+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
1537@@ -0,0 +1 @@
1538+powerpc/powerpc32/603e/fpu
1539diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
1540new file mode 100644
1541index 0000000000..64db17fada
1542--- /dev/null
1543+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
1544@@ -0,0 +1,2 @@
1545+# e300c3 is a variant of 603e so use the same optimizations for sqrt
1546+powerpc/powerpc32/603e/fpu
1547diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
1548new file mode 100644
1549index 0000000000..7eac5fcf02
1550--- /dev/null
1551+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
1552@@ -0,0 +1 @@
1553+powerpc/powerpc32/e500mc/fpu
1554diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
1555new file mode 100644
1556index 0000000000..264b2a7700
1557--- /dev/null
1558+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
1559@@ -0,0 +1 @@
1560+powerpc/powerpc32/e5500/fpu
1561diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
1562new file mode 100644
1563index 0000000000..a25934467b
1564--- /dev/null
1565+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
1566@@ -0,0 +1 @@
1567+powerpc/powerpc32/e6500/fpu
1568diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
1569new file mode 100644
1570index 0000000000..a7bc854be8
1571--- /dev/null
1572+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
1573@@ -0,0 +1 @@
1574+powerpc/powerpc64/e5500/fpu
1575diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
1576new file mode 100644
1577index 0000000000..04ff8cc181
1578--- /dev/null
1579+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
1580@@ -0,0 +1 @@
1581+powerpc/powerpc64/e6500/fpu
Andrew Geissler635e0e42020-08-21 15:58:33 -05001582--
15832.27.0
1584