blob: 05903807e0f3b93b31251296fb913e4819d90f88 [file] [log] [blame]
Brad Bishop19323692019-04-05 15:28:33 -04001From 02a138f0b247fb08b799f32c49b35912b2921321 Mon Sep 17 00:00:00 2001
2From: Khem Raj <raj.khem@gmail.com>
3Date: Tue, 12 Feb 2019 11:38:46 -0800
4Subject: [PATCH] math_vfp_asm.S: Convert fldmia/fstmia instructions to UAL
5 syntax for clang
6
7This is flagged with clang internal assembler, since it does not allow
8non UAL syntax
9
10Upstream-Status: Pending
11
12Signed-off-by: Khem Raj <raj.khem@gmail.com>
13---
14 liboil/arm/math_vfp_asm.S | 94 +++++++++++++++++++--------------------
15 1 file changed, 47 insertions(+), 47 deletions(-)
16
17diff --git a/liboil/arm/math_vfp_asm.S b/liboil/arm/math_vfp_asm.S
18index ae5c803..3dd14d9 100644
19--- a/liboil/arm/math_vfp_asm.S
20+++ b/liboil/arm/math_vfp_asm.S
21@@ -25,7 +25,7 @@
22 */
23
24 #if defined(__VFP_FP__) && !defined(__SOFTFP__)
25-/*
26+/*
27 ** compile with -mcpu=arm1136j-s -mfpu=vfp -mfloat-abi=softfp
28 **
29 ** void vfp_add_f32 (float *d, const float *s1, const float *s2, int n);
30@@ -48,10 +48,10 @@
31 ands ip, r3, #7; /* ip = n % 8 */ \
32 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
33 vfp_ ## fname ## _loop1: \
34- fldmias r1!, {s0}; \
35- fldmias r2!, {s1}; \
36+ vldmia.f32 r1!, {s0}; \
37+ vldmia.f32 r2!, {s1}; \
38 ## finst ##s s2, s0, s1; \
39- fstmias r0!, {s2}; \
40+ vstmia.f32 r0!, {s2}; \
41 subs ip, ip, #1; \
42 bne vfp_ ## fname ## _loop1; \
43 vfp_ ## fname ## _unroll: /* unroll by 8 */ \
44@@ -62,15 +62,15 @@
45 orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */ \
46 fmxr fpscr, fp; \
47 vfp_ ## fname ## _loop2: \
48- fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
49- fldmias r2!, {s16, s17, s18, s19, s20, s21, s22, s23}; \
50+ vldmia.f32 r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
51+ vldmia.f32 r2!, {s16, s17, s18, s19, s20, s21, s22, s23}; \
52 ## finst ##s s24, s8, s16; \
53- fstmias r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
54+ vstmia.f32 r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
55 subs ip, ip, #1; \
56 bne vfp_ ## fname ## _loop2; \
57 fmxr fpscr, lr; /* restore original fpscr */ \
58 vfp_ ## fname ## _end: \
59- ldmia sp!, {fp, pc}; /* recovering from stack and return */
60+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
61
62 #define UNROLL_F64_TEMPLATE(fname,finst) \
63 .global vfp_ ## fname ## ; \
64@@ -79,10 +79,10 @@
65 ands ip, r3, #3; /* ip = n % 3 */ \
66 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
67 vfp_ ## fname ## _loop1: \
68- fldmiad r1!, {d0}; \
69- fldmiad r2!, {d1}; \
70+ vldmia.f64 r1!, {d0}; \
71+ vldmia.f64 r2!, {d1}; \
72 ## finst ##d d2, d0, d1; \
73- fstmiad r0!, {d2}; \
74+ vstmia.f64 r0!, {d2}; \
75 subs ip, ip, #1; \
76 bne vfp_ ## fname ## _loop1; \
77 vfp_ ## fname ## _unroll: /* unroll by 4 */ \
78@@ -93,15 +93,15 @@
79 orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */ \
80 fmxr fpscr, fp; \
81 vfp_ ## fname ## _loop2: \
82- fldmiad r1!, {d4, d5, d6, d7}; \
83- fldmiad r2!, {d8, d9, d10, d11}; \
84+ vldmia.f64 r1!, {d4, d5, d6, d7}; \
85+ vldmia.f64 r2!, {d8, d9, d10, d11}; \
86 ## finst ##d d12, d4, d8; \
87- fstmiad r0!, {d12, d13, d14, d15}; \
88+ vstmia.f64 r0!, {d12, d13, d14, d15}; \
89 subs ip, ip, #1; \
90 bne vfp_ ## fname ## _loop2; \
91 fmxr fpscr, lr; /* restore original fpscr */ \
92 vfp_ ## fname ## _end: \
93- ldmia sp!, {fp, pc}; /* recovering from stack and return */
94+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
95
96 .align 2
97 UNROLL_F32_TEMPLATE(add_f32,fadd);
98@@ -119,7 +119,7 @@ UNROLL_F64_TEMPLATE(subtract_f64,fsub);
99 #undef UNROLL_F32_TEMPLATE
100 #undef UNROLL_F64_TEMPLATE
101
102-/*
103+/*
104 **
105 ** void vfp_scalaradd_f32_ns (float *d, const float *s1, const float *s2_1, int n);
106 ** void vfp_scalaradd_f64_ns (double *d, const double *s1, const double *s2_1, int n);
107@@ -133,13 +133,13 @@ UNROLL_F64_TEMPLATE(subtract_f64,fsub);
108 .global vfp_ ## fname ## ; \
109 vfp_ ## fname ## : \
110 stmdb sp!, {fp, lr}; /* save registers to stack */ \
111- fldmias r2, {s1}; /* load scalar value */ \
112+ vldmia.f32 r2, {s1}; /* load scalar value */ \
113 ands ip, r3, #7; /* ip = n % 8 */ \
114 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
115 vfp_ ## fname ## _loop1: \
116- fldmias r1!, {s0}; \
117+ vldmia.f32 r1!, {s0}; \
118 ## finst ##s s2, s0, s1; \
119- fstmias r0!, {s2}; \
120+ vstmia.f32 r0!, {s2}; \
121 subs ip, ip, #1; \
122 bne vfp_ ## fname ## _loop1; \
123 vfp_ ## fname ## _unroll: /* unroll by 8 */ \
124@@ -150,26 +150,26 @@ UNROLL_F64_TEMPLATE(subtract_f64,fsub);
125 orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */ \
126 fmxr fpscr, fp; \
127 vfp_ ## fname ## _loop2: \
128- fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
129+ vldmia.f32 r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
130 ## finst ##s s24, s8, s1; \
131- fstmias r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
132+ vstmia.f32 r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
133 subs ip, ip, #1; \
134 bne vfp_ ## fname ## _loop2; \
135 fmxr fpscr, lr; /* restore original fpscr */ \
136 vfp_ ## fname ## _end: \
137- ldmia sp!, {fp, pc}; /* recovering from stack and return */
138+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
139
140 #define UNROLL_F64_TEMPLATE(fname,finst) \
141 .global vfp_ ## fname ## ; \
142 vfp_ ## fname ## : \
143 stmdb sp!, {fp, lr}; /* save registers to stack */ \
144- fldmiad r2, {d1}; /* load scalar value */ \
145+ vldmia.f64 r2, {d1}; /* load scalar value */ \
146 ands ip, r3, #3; /* ip = n % 3 */ \
147 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
148 vfp_ ## fname ## _loop1: \
149- fldmiad r1!, {d0}; \
150+ vldmia.f64 r1!, {d0}; \
151 ## finst ##d d2, d0, d1; \
152- fstmiad r0!, {d2}; \
153+ vstmia.f64 r0!, {d2}; \
154 subs ip, ip, #1; \
155 bne vfp_ ## fname ## _loop1; \
156 vfp_ ## fname ## _unroll: /* unroll by 4 */ \
157@@ -180,14 +180,14 @@ UNROLL_F64_TEMPLATE(subtract_f64,fsub);
158 orr fp, lr, fp, lsl #16; /* set vector lenght to 4 */ \
159 fmxr fpscr, fp; \
160 vfp_ ## fname ## _loop2: \
161- fldmiad r1!, {d4, d5, d6, d7}; \
162+ vldmia.f64 r1!, {d4, d5, d6, d7}; \
163 ## finst ##d d12, d4, d1; \
164- fstmiad r0!, {d12, d13, d14, d15}; \
165+ vstmia.f64 r0!, {d12, d13, d14, d15}; \
166 subs ip, ip, #1; \
167 bne vfp_ ## fname ## _loop2; \
168 fmxr fpscr, lr; /* restore original fpscr */ \
169 vfp_ ## fname ## _end: \
170- ldmia sp!, {fp, pc}; /* recovering from stack and return */
171+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
172
173 UNROLL_F32_TEMPLATE(scalaradd_f32_ns,fadd);
174 UNROLL_F64_TEMPLATE(scalaradd_f64_ns,fadd);
175@@ -198,7 +198,7 @@ UNROLL_F64_TEMPLATE(scalarmultiply_f64_ns,fmul);
176 #undef UNROLL_F32_TEMPLATE
177 #undef UNROLL_F64_TEMPLATE
178
179-/*
180+/*
181 **
182 ** void vfp_abs_f32_f32_ns(float *d, const float *s, int n);
183 ** void vfp_abs_f64_f64_ns(double *d, const double *s, int n);
184@@ -215,9 +215,9 @@ UNROLL_F64_TEMPLATE(scalarmultiply_f64_ns,fmul);
185 ands ip, r2, #7; /* ip = n % 8 */ \
186 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
187 vfp_ ## fname ## _loop1: \
188- fldmias r1!, {s0}; \
189- ## finst ##s s2, s0; \
190- fstmias r0!, {s2}; \
191+ vldmia.f32 r1!, {s0}; \
192+ ## finst ##.f32 s2, s0; \
193+ vstmia.f32 r0!, {s2}; \
194 subs ip, ip, #1; \
195 bne vfp_ ## fname ## _loop1; \
196 vfp_ ## fname ## _unroll: /* unroll by 8 */ \
197@@ -228,14 +228,14 @@ UNROLL_F64_TEMPLATE(scalarmultiply_f64_ns,fmul);
198 orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */ \
199 fmxr fpscr, fp; \
200 vfp_ ## fname ## _loop2: \
201- fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
202- ## finst ##s s24, s8; \
203- fstmias r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
204+ vldmia.f32 r1!, {s8, s9, s10, s11, s12, s13, s14, s15}; \
205+ ## finst ##.f32 s24, s8; \
206+ vstmia.f32 r0!, {s24, s25, s26, s27, s28, s29, s30, s31}; \
207 subs ip, ip, #1; \
208 bne vfp_ ## fname ## _loop2; \
209 fmxr fpscr, lr; /* restore original fpscr */ \
210 vfp_ ## fname ## _end: \
211- ldmia sp!, {fp, pc}; /* recovering from stack and return */
212+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
213
214 #define UNROLL_F64_TEMPLATE(fname,finst) \
215 .global vfp_ ## fname ## ; \
216@@ -244,9 +244,9 @@ UNROLL_F64_TEMPLATE(scalarmultiply_f64_ns,fmul);
217 ands ip, r2, #3; /* ip = n % 3 */ \
218 beq vfp_ ## fname ## _unroll; /* if ip == 0 goto prep_loop2 */ \
219 vfp_ ## fname ## _loop1: \
220- fldmiad r1!, {d0}; \
221- ## finst ##d d2, d0; \
222- fstmiad r0!, {d2}; \
223+ vldmia.f64 r1!, {d0}; \
224+ ## finst ##.f64 d2, d0; \
225+ vstmia.f64 r0!, {d2}; \
226 subs ip, ip, #1; \
227 bne vfp_ ## fname ## _loop1; \
228 vfp_ ## fname ## _unroll: /* unroll by 4 */ \
229@@ -257,20 +257,20 @@ UNROLL_F64_TEMPLATE(scalarmultiply_f64_ns,fmul);
230 orr fp, lr, fp, lsl #16; /* set vector lenght to 4 */ \
231 fmxr fpscr, fp; \
232 vfp_ ## fname ## _loop2: \
233- fldmiad r1!, {d4, d5, d6, d7}; \
234- ## finst ##d d12, d4; \
235- fstmiad r0!, {d12, d13, d14, d15}; \
236+ vldmia.f64 r1!, {d4, d5, d6, d7}; \
237+ ## finst ##.f64 d12, d4; \
238+ vstmia.f64 r0!, {d12, d13, d14, d15}; \
239 subs ip, ip, #1; \
240 bne vfp_ ## fname ## _loop2; \
241 fmxr fpscr, lr; /* restore original fpscr */ \
242 vfp_ ## fname ## _end: \
243- ldmia sp!, {fp, pc}; /* recovering from stack and return */
244+ ldmia sp!, {fp, pc}; /* recovering from stack and return */
245
246-UNROLL_F32_TEMPLATE(abs_f32_f32_ns,fabs);
247-UNROLL_F64_TEMPLATE(abs_f64_f64_ns,fabs);
248+UNROLL_F32_TEMPLATE(abs_f32_f32_ns,vabs);
249+UNROLL_F64_TEMPLATE(abs_f64_f64_ns,vabs);
250
251-UNROLL_F32_TEMPLATE(negative_f32,fneg);
252-UNROLL_F64_TEMPLATE(negative_f64,fneg);
253+UNROLL_F32_TEMPLATE(negative_f32,vneg);
254+UNROLL_F64_TEMPLATE(negative_f64,vneg);
255
256 #undef UNROLL_F32_TEMPLATE
257 #undef UNROLL_F64_TEMPLATE