Andrew Geissler | 84ad7c5 | 2020-06-27 00:00:16 -0500 | [diff] [blame] | 1 | From 19d7b2a34f3c69d62f570ac9d0f6bc3cd584b496 Mon Sep 17 00:00:00 2001 |
| 2 | From: Nagaraju <nmekala@xilinx.com> |
| 3 | Date: Thu, 14 Mar 2019 18:16:32 +0530 |
| 4 | Subject: [PATCH 09/11] Added MB-64 support to strcmp/strcpy/strlen files |
| 5 | |
| 6 | --- |
| 7 | newlib/libc/machine/microblaze/strcmp.c | 61 ++++++++++++++++++++++++++++++++- |
| 8 | newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++++++++++ |
| 9 | newlib/libc/machine/microblaze/strlen.c | 38 ++++++++++++++++++++ |
| 10 | 3 files changed, 155 insertions(+), 1 deletion(-) |
| 11 | |
| 12 | diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c |
| 13 | index 3119d82..dac64da 100644 |
| 14 | --- a/newlib/libc/machine/microblaze/strcmp.c |
| 15 | +++ b/newlib/libc/machine/microblaze/strcmp.c |
| 16 | @@ -133,6 +133,65 @@ strcmp (const char *s1, |
| 17 | |
| 18 | #include "mb_endian.h" |
| 19 | |
| 20 | +#ifdef __arch64__ |
| 21 | + asm volatile (" \n\ |
| 22 | + orl r9, r0, r0 /* Index register */\n\ |
| 23 | +check_alignment: \n\ |
| 24 | + andli r3, r5, 3 \n\ |
| 25 | + andli r4, r6, 3 \n\ |
| 26 | + beanei r3, try_align_args \n\ |
| 27 | + beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\ |
| 28 | +cmp_loop: \n" |
| 29 | + LOAD4BYTES("r3", "r5", "r9") |
| 30 | + LOAD4BYTES("r4", "r6", "r9") |
| 31 | +" \n\ |
| 32 | + pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\ |
| 33 | + beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\ |
| 34 | + cmplu r7, r4, r3 /* ELSE compare whole word */ \n\ |
| 35 | + beanei r7, end_cmp \n\ |
| 36 | + addlik r9, r9, 4 /* delay slot */ \n\ |
| 37 | + breaid cmp_loop \n\ |
| 38 | + nop /* delay slot */ \n\ |
| 39 | +end_cmp_loop: \n\ |
| 40 | + lbu r3, r5, r9 /* byte compare loop */ \n\ |
| 41 | + lbu r4, r6, r9 \n\ |
| 42 | + cmplu r7, r4, r3 /* Compare bytes */ \n\ |
| 43 | + beanei r7, end_cmp_early \n\ |
| 44 | + addlik r9, r9, 1 /* delay slot */ \n\ |
| 45 | + beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\ |
| 46 | + nop \n\ |
| 47 | +end_cmp_early: \n\ |
| 48 | + or r3, r0, r7 /* delay slot */ \n\ |
| 49 | + rtsd r15, 8 \n\ |
| 50 | + nop \n\ |
| 51 | +try_align_args: \n\ |
| 52 | + xorl r7, r4, r3 \n\ |
| 53 | + beanei r7, regular_strcmp /* cannot align args */ \n\ |
| 54 | + rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\ |
| 55 | +align_loop: \n\ |
| 56 | + lbu r3, r5, r9 \n\ |
| 57 | + lbu r4, r6, r9 \n\ |
| 58 | + cmpu r7, r4, r3 \n\ |
| 59 | + beanei r7, end_cmp \n\ |
| 60 | + beaeqi r3, end_cmp \n\ |
| 61 | + addlik r10, r10, -1 \n\ |
| 62 | + addlik r9, r9, 1 \n\ |
| 63 | + beaeqid r10, cmp_loop \n\ |
| 64 | + nop \n\ |
| 65 | + breai align_loop \n\ |
| 66 | +regular_strcmp: \n\ |
| 67 | + lbu r3, r5, r9 \n\ |
| 68 | + lbu r4, r6, r9 \n\ |
| 69 | + cmplu r7, r4, r3 \n\ |
| 70 | + beanei r7, end_cmp \n\ |
| 71 | + beaeqi r3, end_cmp \n\ |
| 72 | + breaid regular_strcmp \n\ |
| 73 | + addlik r9, r9, 1 \n\ |
| 74 | +end_cmp: \n\ |
| 75 | + or r3, r0, r7 \n\ |
| 76 | + rtsd r15, 8 \n\ |
| 77 | + nop /* Return strcmp result */"); |
| 78 | +#else |
| 79 | asm volatile (" \n\ |
| 80 | or r9, r0, r0 /* Index register */\n\ |
| 81 | check_alignment: \n\ |
| 82 | @@ -181,11 +240,11 @@ regular_strcmp: |
| 83 | bnei r7, end_cmp \n\ |
| 84 | beqi r3, end_cmp \n\ |
| 85 | brid regular_strcmp \n\ |
| 86 | - addik r9, r9, 1 \n\ |
| 87 | end_cmp: \n\ |
| 88 | rtsd r15, 8 \n\ |
| 89 | or r3, r0, r7 /* Return strcmp result */"); |
| 90 | |
| 91 | +#endif |
| 92 | #endif /* ! HAVE_HW_PCMP */ |
| 93 | } |
| 94 | |
| 95 | diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c |
| 96 | index 62072fa..6dbc60d 100644 |
| 97 | --- a/newlib/libc/machine/microblaze/strcpy.c |
| 98 | +++ b/newlib/libc/machine/microblaze/strcpy.c |
| 99 | @@ -125,6 +125,62 @@ strcpy (char *__restrict dst0, |
| 100 | #else |
| 101 | |
| 102 | #include "mb_endian.h" |
| 103 | +#ifdef __arch64__ |
| 104 | + |
| 105 | + asm volatile (" \n\ |
| 106 | + orl r9, r0, r0 /* Index register */ \n\ |
| 107 | +check_alignment: \n\ |
| 108 | + andli r3, r5, 3 \n\ |
| 109 | + andli r4, r6, 3 \n\ |
| 110 | + beanei r3, try_align_args \n\ |
| 111 | + beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\ |
| 112 | +cpy_loop: \n" |
| 113 | + LOAD4BYTES("r3", "r6", "r9") |
| 114 | +" \n\ |
| 115 | + pcmplbf r4, r0, r3 \n\ |
| 116 | + beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n" |
| 117 | + STORE4BYTES("r3", "r5", "r9") |
| 118 | +" \n\ |
| 119 | + addlik r9, r9, 4 \n\ |
| 120 | + breaid cpy_loop \n\ |
| 121 | + nop \n\ |
| 122 | +cpy_bytes: \n\ |
| 123 | + lbu r3, r6, r9 \n\ |
| 124 | + sb r3, r5, r9 \n\ |
| 125 | + addlik r4, r4, -1 \n\ |
| 126 | + addlik r9, r9, 1 /* delay slot */\n\ |
| 127 | + beaneid r4, cpy_bytes \n\ |
| 128 | + nop \n\ |
| 129 | +cpy_null: \n\ |
| 130 | + orl r3, r0, r5 /* Return strcpy result */\n\ |
| 131 | + rtsd r15, 8 \n\ |
| 132 | + nop \n\ |
| 133 | +try_align_args: \n\ |
| 134 | + xorl r7, r4, r3 \n\ |
| 135 | + beanei r7, regular_strcpy /* cannot align args */\n\ |
| 136 | + rsublik r10, r3, 4 /* Number of initial bytes to align */\n\ |
| 137 | +align_loop: \n\ |
| 138 | + lbu r3, r6, r9 \n\ |
| 139 | + sb r3, r5, r9 \n\ |
| 140 | + addlik r10, r10, -1 \n\ |
| 141 | + beaeqid r3, end_cpy /* Break if we have seen null character */\n\ |
| 142 | + nop \n\ |
| 143 | + addlik r9, r9, 1 \n\ |
| 144 | + beaneid r10, align_loop \n\ |
| 145 | + nop \n\ |
| 146 | + breai cpy_loop \n\ |
| 147 | +regular_strcpy: \n\ |
| 148 | + lbu r3, r6, r9 \n\ |
| 149 | + sb r3, r5, r9 \n\ |
| 150 | + addlik r9, r9, 1 \n\ |
| 151 | + beaneid r3, regular_strcpy \n\ |
| 152 | + nop \n\ |
| 153 | +end_cpy: \n\ |
| 154 | + orl r3, r0, r5 \n\ |
| 155 | + rtsd r15, 8 \n\ |
| 156 | + nop /* Return strcpy result */"); |
| 157 | + |
| 158 | +#else |
| 159 | |
| 160 | asm volatile (" \n\ |
| 161 | or r9, r0, r0 /* Index register */ \n\ |
| 162 | @@ -171,6 +227,7 @@ regular_strcpy: \n\ |
| 163 | end_cpy: \n\ |
| 164 | rtsd r15, 8 \n\ |
| 165 | or r3, r0, r5 /* Return strcpy result */"); |
| 166 | +#endif |
| 167 | #endif /* ! HAVE_HW_PCMP */ |
| 168 | } |
| 169 | |
| 170 | diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c |
| 171 | index acb4464..c04fa4f 100644 |
| 172 | --- a/newlib/libc/machine/microblaze/strlen.c |
| 173 | +++ b/newlib/libc/machine/microblaze/strlen.c |
| 174 | @@ -116,6 +116,43 @@ strlen (const char *str) |
| 175 | |
| 176 | #include "mb_endian.h" |
| 177 | |
| 178 | +#ifdef __arch64__ |
| 179 | + asm volatile (" \n\ |
| 180 | + orl r9, r0, r0 /* Index register */ \n\ |
| 181 | +check_alignment: \n\ |
| 182 | + andli r3, r5, 3 \n\ |
| 183 | + beanei r3, align_arg \n\ |
| 184 | +len_loop: \n" |
| 185 | + LOAD4BYTES("r3", "r5", "r9") |
| 186 | +" \n\ |
| 187 | + pcmplbf r4, r3, r0 \n\ |
| 188 | + beanei r4, end_len \n\ |
| 189 | + addik r9, r9, 4 \n\ |
| 190 | + breaid len_loop \n\ |
| 191 | + nop \n\ |
| 192 | +end_len: \n\ |
| 193 | + lbu r3, r5, r9 \n\ |
| 194 | + beaeqi r3, done_len \n\ |
| 195 | + addik r9, r9, 1 \n\ |
| 196 | + breaid end_len \n\ |
| 197 | + nop \n\ |
| 198 | +done_len: \n\ |
| 199 | + orl r3, r0, r9 /* Return len */ \n\ |
| 200 | + rtsd r15, 8 \n\ |
| 201 | + nop \n\ |
| 202 | +align_arg: \n\ |
| 203 | + rsublik r10, r3, 4 \n\ |
| 204 | +align_loop: \n\ |
| 205 | + lbu r3, r5, r9 \n\ |
| 206 | + addlik r10, r10, -1 \n\ |
| 207 | + beaeqid r3, done_len \n\ |
| 208 | + nop \n\ |
| 209 | + addlik r9, r9, 1 \n\ |
| 210 | + beaneid r10, align_loop \n\ |
| 211 | + nop \n\ |
| 212 | + breai len_loop"); |
| 213 | + |
| 214 | +#else |
| 215 | asm volatile (" \n\ |
| 216 | or r9, r0, r0 /* Index register */ \n\ |
| 217 | check_alignment: \n\ |
| 218 | @@ -146,5 +183,6 @@ align_loop: \n\ |
| 219 | addik r9, r9, 1 \n\ |
| 220 | bri len_loop"); |
| 221 | |
| 222 | +#endif |
| 223 | #endif /* ! HAVE_HW_PCMP */ |
| 224 | } |
| 225 | -- |
| 226 | 2.7.4 |
| 227 | |