blob: c8d13af0084290b369385281c971fd0ddfb3e66d [file] [log] [blame]
Andrew Geissler84ad7c52020-06-27 00:00:16 -05001From b35b582ef3f6575447097585174302fde1761078 Mon Sep 17 00:00:00 2001
2From: Nagaraju <nmekala@xilinx.com>
3Date: Wed, 24 Apr 2019 23:29:21 +0530
4Subject: [PATCH 11/11] Removing the Assembly implementation of 64bit string
5 function. Revisit in next release and fix it
6
7---
8 newlib/libc/machine/microblaze/mb_endian.h | 4 ++
9 newlib/libc/machine/microblaze/strcmp.c | 93 ++++++++++--------------------
10 newlib/libc/machine/microblaze/strcpy.c | 82 ++++++++------------------
11 newlib/libc/machine/microblaze/strlen.c | 59 +++++++------------
12 4 files changed, 81 insertions(+), 157 deletions(-)
13
14diff --git a/newlib/libc/machine/microblaze/mb_endian.h b/newlib/libc/machine/microblaze/mb_endian.h
15index fb217ec..17772c8 100644
16--- a/newlib/libc/machine/microblaze/mb_endian.h
17+++ b/newlib/libc/machine/microblaze/mb_endian.h
18@@ -8,8 +8,12 @@
19 #ifdef __LITTLE_ENDIAN__
20 #define LOAD4BYTES(rD,rA,rB) "\tlwr\t" rD ", " rA ", " rB "\n"
21 #define STORE4BYTES(rD,rA,rB) "\tswr\t" rD ", " rA ", " rB "\n"
22+#define LOAD8BYTES(rD,rA,rB) "\tllr\t" rD ", " rA ", " rB "\n"
23+#define STORE8BYTES(rD,rA,rB) "\tslr\t" rD ", " rA ", " rB "\n"
24 #else
25 #define LOAD4BYTES(rD,rA,rB) "\tlw\t" rD ", " rA ", " rB "\n"
26 #define STORE4BYTES(rD,rA,rB) "\tsw\t" rD ", " rA ", " rB "\n"
27+#define LOAD8BYTES(rD,rA,rB) "\tll\t" rD ", " rA ", " rB "\n"
28+#define STORE8BYTES(rD,rA,rB) "\tsl\t" rD ", " rA ", " rB "\n"
29 #endif
30 #endif
31diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c
32index acfe4cd..e34c64a 100644
33--- a/newlib/libc/machine/microblaze/strcmp.c
34+++ b/newlib/libc/machine/microblaze/strcmp.c
35@@ -129,70 +129,42 @@ strcmp (const char *s1,
36 return (*(unsigned char *) s1) - (*(unsigned char *) s2);
37 #endif /* not PREFER_SIZE_OVER_SPEED */
38
39+#elif __arch64__
40+ unsigned int *a1;
41+ unsigned int *a2;
42+
43+ /* If s1 or s2 are unaligned, then compare bytes. */
44+ if (!UNALIGNED (s1, s2))
45+ {
46+ /* If s1 and s2 are word-aligned, compare them a word at a time. */
47+ a1 = (unsigned int*)s1;
48+ a2 = (unsigned int*)s2;
49+ while (*a1 == *a2)
50+ {
51+ /* To get here, *a1 == *a2, thus if we find a null in *a1,
52+ then the strings must be equal, so return zero. */
53+ if (DETECTNULL (*a1))
54+ return 0;
55+
56+ a1++;
57+ a2++;
58+ }
59+
60+ /* A difference was detected in last few bytes of s1, so search bytewise */
61+ s1 = (char*)a1;
62+ s2 = (char*)a2;
63+ }
64+
65+ while (*s1 != '\0' && *s1 == *s2)
66+ {
67+ s1++;
68+ s2++;
69+ }
70+ return (*(unsigned char *) s1) - (*(unsigned char *) s2);
71 #else
72
73 #include "mb_endian.h"
74
75-#ifdef __arch64__
76- asm volatile (" \n\
77- orl r9, r0, r0 /* Index register */ \n\
78-check_alignment: \n\
79- andli r3, r5, 3 \n\
80- andli r4, r6, 3 \n\
81- beanei r3, try_align_args \n\
82- beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\
83-cmp_loop: \n"
84- LOAD4BYTES("r3", "r5", "r9")
85- LOAD4BYTES("r4", "r6", "r9")
86-" \n\
87- pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\
88- beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\
89- cmplu r7, r4, r3 /* ELSE compare whole word */ \n\
90- beanei r7, end_cmp \n\
91- addlik r9, r9, 4 /* delay slot */ \n\
92- breaid cmp_loop \n\
93- nop /* delay slot */ \n\
94-end_cmp_loop: \n\
95- lbu r3, r5, r9 /* byte compare loop */ \n\
96- lbu r4, r6, r9 \n\
97- cmplu r7, r4, r3 /* Compare bytes */ \n\
98- beanei r7, end_cmp_early \n\
99- addlik r9, r9, 1 /* delay slot */ \n\
100- beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\
101- nop \n\
102-end_cmp_early: \n\
103- orl r3, r0, r7 /* delay slot */ \n\
104- rtsd r15, 8 \n\
105- nop \n\
106-try_align_args: \n\
107- xorl r7, r4, r3 \n\
108- beanei r7, regular_strcmp /* cannot align args */ \n\
109- rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\
110-align_loop: \n\
111- lbu r3, r5, r9 \n\
112- lbu r4, r6, r9 \n\
113- cmplu r7, r4, r3 \n\
114- beanei r7, end_cmp \n\
115- beaeqi r3, end_cmp \n\
116- addlik r10, r10, -1 \n\
117- addlik r9, r9, 1 \n\
118- beaeqid r10, cmp_loop \n\
119- nop \n\
120- breai align_loop \n\
121-regular_strcmp: \n\
122- lbu r3, r5, r9 \n\
123- lbu r4, r6, r9 \n\
124- cmplu r7, r4, r3 \n\
125- beanei r7, end_cmp \n\
126- beaeqi r3, end_cmp \n\
127- addlik r9, r9, 1 \n\
128- breaid regular_strcmp \n\
129- nop \n\
130-end_cmp: \n\
131- orl r3, r0, r7 \n\
132- rtsd r15, 8 \n\
133- nop /* Return strcmp result */");
134-#else
135 asm volatile (" \n\
136 or r9, r0, r0 /* Index register */\n\
137 check_alignment: \n\
138@@ -246,7 +218,6 @@ end_cmp:
139 rtsd r15, 8 \n\
140 or r3, r0, r7 /* Return strcmp result */");
141
142-#endif
143 #endif /* ! HAVE_HW_PCMP */
144 }
145
146diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c
147index 6dbc60d..ddb6922 100644
148--- a/newlib/libc/machine/microblaze/strcpy.c
149+++ b/newlib/libc/machine/microblaze/strcpy.c
150@@ -121,67 +121,36 @@ strcpy (char *__restrict dst0,
151 ;
152 return dst0;
153 #endif /* not PREFER_SIZE_OVER_SPEED */
154+#elif __arch64__
155+ char *dst = dst0;
156+ const char *src = src0;
157+ long *aligned_dst;
158+ const long *aligned_src;
159
160-#else
161+ /* If SRC or DEST is unaligned, then copy bytes. */
162+ if (!UNALIGNED (src, dst))
163+ {
164+ aligned_dst = (long*)dst;
165+ aligned_src = (long*)src;
166
167-#include "mb_endian.h"
168-#ifdef __arch64__
169+ /* SRC and DEST are both "long int" aligned, try to do "long int"
170+ sized copies. */
171+ while (!DETECTNULL(*aligned_src))
172+ {
173+ *aligned_dst++ = *aligned_src++;
174+ }
175
176- asm volatile (" \n\
177- orl r9, r0, r0 /* Index register */ \n\
178-check_alignment: \n\
179- andli r3, r5, 3 \n\
180- andli r4, r6, 3 \n\
181- beanei r3, try_align_args \n\
182- beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\
183-cpy_loop: \n"
184- LOAD4BYTES("r3", "r6", "r9")
185-" \n\
186- pcmplbf r4, r0, r3 \n\
187- beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n"
188- STORE4BYTES("r3", "r5", "r9")
189-" \n\
190- addlik r9, r9, 4 \n\
191- breaid cpy_loop \n\
192- nop \n\
193-cpy_bytes: \n\
194- lbu r3, r6, r9 \n\
195- sb r3, r5, r9 \n\
196- addlik r4, r4, -1 \n\
197- addlik r9, r9, 1 /* delay slot */\n\
198- beaneid r4, cpy_bytes \n\
199- nop \n\
200-cpy_null: \n\
201- orl r3, r0, r5 /* Return strcpy result */\n\
202- rtsd r15, 8 \n\
203- nop \n\
204-try_align_args: \n\
205- xorl r7, r4, r3 \n\
206- beanei r7, regular_strcpy /* cannot align args */\n\
207- rsublik r10, r3, 4 /* Number of initial bytes to align */\n\
208-align_loop: \n\
209- lbu r3, r6, r9 \n\
210- sb r3, r5, r9 \n\
211- addlik r10, r10, -1 \n\
212- beaeqid r3, end_cpy /* Break if we have seen null character */\n\
213- nop \n\
214- addlik r9, r9, 1 \n\
215- beaneid r10, align_loop \n\
216- nop \n\
217- breai cpy_loop \n\
218-regular_strcpy: \n\
219- lbu r3, r6, r9 \n\
220- sb r3, r5, r9 \n\
221- addlik r9, r9, 1 \n\
222- beaneid r3, regular_strcpy \n\
223- nop \n\
224-end_cpy: \n\
225- orl r3, r0, r5 \n\
226- rtsd r15, 8 \n\
227- nop /* Return strcpy result */");
228+ dst = (char*)aligned_dst;
229+ src = (char*)aligned_src;
230+ }
231
232-#else
233+ while (*dst++ = *src++)
234+ ;
235+ return dst0;
236+
237+#else
238
239+#include "mb_endian.h"
240 asm volatile (" \n\
241 or r9, r0, r0 /* Index register */ \n\
242 check_alignment: \n\
243@@ -227,7 +196,6 @@ regular_strcpy: \n\
244 end_cpy: \n\
245 rtsd r15, 8 \n\
246 or r3, r0, r5 /* Return strcpy result */");
247-#endif
248 #endif /* ! HAVE_HW_PCMP */
249 }
250
251diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c
252index b6f2d3c..9407539 100644
253--- a/newlib/libc/machine/microblaze/strlen.c
254+++ b/newlib/libc/machine/microblaze/strlen.c
255@@ -112,47 +112,29 @@ strlen (const char *str)
256 return str - start;
257 #endif /* not PREFER_SIZE_OVER_SPEED */
258
259-#else
260-
261-#include "mb_endian.h"
262+#elif __arch64__
263+ const char *start = str;
264+ unsigned long *aligned_addr;
265
266-#ifdef __arch64__
267- asm volatile (" \n\
268- orl r9, r0, r0 /* Index register */ \n\
269-check_alignment: \n\
270- andli r3, r5, 3 \n\
271- beanei r3, align_arg \n\
272-len_loop: \n"
273- LOAD4BYTES("r3", "r5", "r9")
274-" \n\
275- pcmplbf r4, r3, r0 \n\
276- beanei r4, end_len \n\
277- addlik r9, r9, 4 \n\
278- breaid len_loop \n\
279- nop \n\
280-end_len: \n\
281- lbu r3, r5, r9 \n\
282- beaeqi r3, done_len \n\
283- addlik r9, r9, 1 \n\
284- breaid end_len \n\
285- nop \n\
286-done_len: \n\
287- orl r3, r0, r9 /* Return len */ \n\
288- rtsd r15, 8 \n\
289- nop \n\
290-align_arg: \n\
291- rsublik r10, r3, 4 \n\
292-align_loop: \n\
293- lbu r3, r5, r9 \n\
294- addlik r10, r10, -1 \n\
295- beaeqid r3, done_len \n\
296- nop \n\
297- addlik r9, r9, 1 \n\
298- beaneid r10, align_loop \n\
299- nop \n\
300- breai len_loop");
301+ if (!UNALIGNED (str))
302+ {
303+ /* If the string is word-aligned, we can check for the presence of
304+ a null in each word-sized block. */
305+ aligned_addr = (unsigned long*)str;
306+ while (!DETECTNULL (*aligned_addr))
307+ aligned_addr++;
308
309+ /* Once a null is detected, we check each byte in that block for a
310+ precise position of the null. */
311+ str = (char*)aligned_addr;
312+ }
313+
314+ while (*str)
315+ str++;
316+ return str - start;
317 #else
318+
319+#include "mb_endian.h"
320 asm volatile (" \n\
321 or r9, r0, r0 /* Index register */ \n\
322 check_alignment: \n\
323@@ -183,6 +165,5 @@ align_loop: \n\
324 addik r9, r9, 1 \n\
325 bri len_loop");
326
327-#endif
328 #endif /* ! HAVE_HW_PCMP */
329 }
330--
3312.7.4
332