blob: 38508b550e18b93556217717bacd0acfec2c917e [file] [log] [blame]
Andrew Geissler84ad7c52020-06-27 00:00:16 -05001From 19d7b2a34f3c69d62f570ac9d0f6bc3cd584b496 Mon Sep 17 00:00:00 2001
2From: Nagaraju <nmekala@xilinx.com>
3Date: Thu, 14 Mar 2019 18:16:32 +0530
4Subject: [PATCH 09/11] Added MB-64 support to strcmp/strcpy/strlen files
5
6---
7 newlib/libc/machine/microblaze/strcmp.c | 61 ++++++++++++++++++++++++++++++++-
8 newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++++++++++
9 newlib/libc/machine/microblaze/strlen.c | 38 ++++++++++++++++++++
10 3 files changed, 155 insertions(+), 1 deletion(-)
11
12diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c
13index 3119d82..dac64da 100644
14--- a/newlib/libc/machine/microblaze/strcmp.c
15+++ b/newlib/libc/machine/microblaze/strcmp.c
16@@ -133,6 +133,65 @@ strcmp (const char *s1,
17
18 #include "mb_endian.h"
19
20+#ifdef __arch64__
21+ asm volatile (" \n\
22+ orl r9, r0, r0 /* Index register */\n\
23+check_alignment: \n\
24+ andli r3, r5, 3 \n\
25+ andli r4, r6, 3 \n\
26+ beanei r3, try_align_args \n\
27+ beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\
28+cmp_loop: \n"
29+ LOAD4BYTES("r3", "r5", "r9")
30+ LOAD4BYTES("r4", "r6", "r9")
31+" \n\
32+ pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\
33+ beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\
34+ cmplu r7, r4, r3 /* ELSE compare whole word */ \n\
35+ beanei r7, end_cmp \n\
36+ addlik r9, r9, 4 /* delay slot */ \n\
37+ breaid cmp_loop \n\
38+ nop /* delay slot */ \n\
39+end_cmp_loop: \n\
40+ lbu r3, r5, r9 /* byte compare loop */ \n\
41+ lbu r4, r6, r9 \n\
42+ cmplu r7, r4, r3 /* Compare bytes */ \n\
43+ beanei r7, end_cmp_early \n\
44+ addlik r9, r9, 1 /* delay slot */ \n\
45+ beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\
46+ nop \n\
47+end_cmp_early: \n\
48+ or r3, r0, r7 /* delay slot */ \n\
49+ rtsd r15, 8 \n\
50+ nop \n\
51+try_align_args: \n\
52+ xorl r7, r4, r3 \n\
53+ beanei r7, regular_strcmp /* cannot align args */ \n\
54+ rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\
55+align_loop: \n\
56+ lbu r3, r5, r9 \n\
57+ lbu r4, r6, r9 \n\
58+ cmpu r7, r4, r3 \n\
59+ beanei r7, end_cmp \n\
60+ beaeqi r3, end_cmp \n\
61+ addlik r10, r10, -1 \n\
62+ addlik r9, r9, 1 \n\
63+ beaeqid r10, cmp_loop \n\
64+ nop \n\
65+ breai align_loop \n\
66+regular_strcmp: \n\
67+ lbu r3, r5, r9 \n\
68+ lbu r4, r6, r9 \n\
69+ cmplu r7, r4, r3 \n\
70+ beanei r7, end_cmp \n\
71+ beaeqi r3, end_cmp \n\
72+ breaid regular_strcmp \n\
73+ addlik r9, r9, 1 \n\
74+end_cmp: \n\
75+ or r3, r0, r7 \n\
76+ rtsd r15, 8 \n\
77+ nop /* Return strcmp result */");
78+#else
79 asm volatile (" \n\
80 or r9, r0, r0 /* Index register */\n\
81 check_alignment: \n\
82@@ -181,11 +240,11 @@ regular_strcmp:
83 bnei r7, end_cmp \n\
84 beqi r3, end_cmp \n\
85 brid regular_strcmp \n\
86- addik r9, r9, 1 \n\
87 end_cmp: \n\
88 rtsd r15, 8 \n\
89 or r3, r0, r7 /* Return strcmp result */");
90
91+#endif
92 #endif /* ! HAVE_HW_PCMP */
93 }
94
95diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c
96index 62072fa..6dbc60d 100644
97--- a/newlib/libc/machine/microblaze/strcpy.c
98+++ b/newlib/libc/machine/microblaze/strcpy.c
99@@ -125,6 +125,62 @@ strcpy (char *__restrict dst0,
100 #else
101
102 #include "mb_endian.h"
103+#ifdef __arch64__
104+
105+ asm volatile (" \n\
106+ orl r9, r0, r0 /* Index register */ \n\
107+check_alignment: \n\
108+ andli r3, r5, 3 \n\
109+ andli r4, r6, 3 \n\
110+ beanei r3, try_align_args \n\
111+ beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\
112+cpy_loop: \n"
113+ LOAD4BYTES("r3", "r6", "r9")
114+" \n\
115+ pcmplbf r4, r0, r3 \n\
116+ beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n"
117+ STORE4BYTES("r3", "r5", "r9")
118+" \n\
119+ addlik r9, r9, 4 \n\
120+ breaid cpy_loop \n\
121+ nop \n\
122+cpy_bytes: \n\
123+ lbu r3, r6, r9 \n\
124+ sb r3, r5, r9 \n\
125+ addlik r4, r4, -1 \n\
126+ addlik r9, r9, 1 /* delay slot */\n\
127+ beaneid r4, cpy_bytes \n\
128+ nop \n\
129+cpy_null: \n\
130+ orl r3, r0, r5 /* Return strcpy result */\n\
131+ rtsd r15, 8 \n\
132+ nop \n\
133+try_align_args: \n\
134+ xorl r7, r4, r3 \n\
135+ beanei r7, regular_strcpy /* cannot align args */\n\
136+ rsublik r10, r3, 4 /* Number of initial bytes to align */\n\
137+align_loop: \n\
138+ lbu r3, r6, r9 \n\
139+ sb r3, r5, r9 \n\
140+ addlik r10, r10, -1 \n\
141+ beaeqid r3, end_cpy /* Break if we have seen null character */\n\
142+ nop \n\
143+ addlik r9, r9, 1 \n\
144+ beaneid r10, align_loop \n\
145+ nop \n\
146+ breai cpy_loop \n\
147+regular_strcpy: \n\
148+ lbu r3, r6, r9 \n\
149+ sb r3, r5, r9 \n\
150+ addlik r9, r9, 1 \n\
151+ beaneid r3, regular_strcpy \n\
152+ nop \n\
153+end_cpy: \n\
154+ orl r3, r0, r5 \n\
155+ rtsd r15, 8 \n\
156+ nop /* Return strcpy result */");
157+
158+#else
159
160 asm volatile (" \n\
161 or r9, r0, r0 /* Index register */ \n\
162@@ -171,6 +227,7 @@ regular_strcpy: \n\
163 end_cpy: \n\
164 rtsd r15, 8 \n\
165 or r3, r0, r5 /* Return strcpy result */");
166+#endif
167 #endif /* ! HAVE_HW_PCMP */
168 }
169
170diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c
171index acb4464..c04fa4f 100644
172--- a/newlib/libc/machine/microblaze/strlen.c
173+++ b/newlib/libc/machine/microblaze/strlen.c
174@@ -116,6 +116,43 @@ strlen (const char *str)
175
176 #include "mb_endian.h"
177
178+#ifdef __arch64__
179+ asm volatile (" \n\
180+ orl r9, r0, r0 /* Index register */ \n\
181+check_alignment: \n\
182+ andli r3, r5, 3 \n\
183+ beanei r3, align_arg \n\
184+len_loop: \n"
185+ LOAD4BYTES("r3", "r5", "r9")
186+" \n\
187+ pcmplbf r4, r3, r0 \n\
188+ beanei r4, end_len \n\
189+ addik r9, r9, 4 \n\
190+ breaid len_loop \n\
191+ nop \n\
192+end_len: \n\
193+ lbu r3, r5, r9 \n\
194+ beaeqi r3, done_len \n\
195+ addik r9, r9, 1 \n\
196+ breaid end_len \n\
197+ nop \n\
198+done_len: \n\
199+ orl r3, r0, r9 /* Return len */ \n\
200+ rtsd r15, 8 \n\
201+ nop \n\
202+align_arg: \n\
203+ rsublik r10, r3, 4 \n\
204+align_loop: \n\
205+ lbu r3, r5, r9 \n\
206+ addlik r10, r10, -1 \n\
207+ beaeqid r3, done_len \n\
208+ nop \n\
209+ addlik r9, r9, 1 \n\
210+ beaneid r10, align_loop \n\
211+ nop \n\
212+ breai len_loop");
213+
214+#else
215 asm volatile (" \n\
216 or r9, r0, r0 /* Index register */ \n\
217 check_alignment: \n\
218@@ -146,5 +183,6 @@ align_loop: \n\
219 addik r9, r9, 1 \n\
220 bri len_loop");
221
222+#endif
223 #endif /* ! HAVE_HW_PCMP */
224 }
225--
2262.7.4
227