| From 19d7b2a34f3c69d62f570ac9d0f6bc3cd584b496 Mon Sep 17 00:00:00 2001 |
| From: Nagaraju <nmekala@xilinx.com> |
| Date: Thu, 14 Mar 2019 18:16:32 +0530 |
| Subject: [PATCH 09/11] Added MB-64 support to strcmp/strcpy/strlen files |
| |
| --- |
| newlib/libc/machine/microblaze/strcmp.c | 61 ++++++++++++++++++++++++++++++++- |
| newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++++++++++ |
| newlib/libc/machine/microblaze/strlen.c | 38 ++++++++++++++++++++ |
| 3 files changed, 155 insertions(+), 1 deletion(-) |
| |
| diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c |
| index 3119d82..dac64da 100644 |
| --- a/newlib/libc/machine/microblaze/strcmp.c |
| +++ b/newlib/libc/machine/microblaze/strcmp.c |
| @@ -133,6 +133,65 @@ strcmp (const char *s1, |
| |
| #include "mb_endian.h" |
| |
| +#ifdef __arch64__ |
| + asm volatile (" \n\ |
| + orl r9, r0, r0 /* Index register */\n\ |
| +check_alignment: \n\ |
| + andli r3, r5, 3 \n\ |
| + andli r4, r6, 3 \n\ |
| + beanei r3, try_align_args \n\ |
| + beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\ |
| +cmp_loop: \n" |
| + LOAD4BYTES("r3", "r5", "r9") |
| + LOAD4BYTES("r4", "r6", "r9") |
| +" \n\ |
| + pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\ |
| + beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\ |
| + cmplu r7, r4, r3 /* ELSE compare whole word */ \n\ |
| + beanei r7, end_cmp \n\ |
| + addlik r9, r9, 4 /* delay slot */ \n\ |
| + breaid cmp_loop \n\ |
| + nop /* delay slot */ \n\ |
| +end_cmp_loop: \n\ |
| + lbu r3, r5, r9 /* byte compare loop */ \n\ |
| + lbu r4, r6, r9 \n\ |
| + cmplu r7, r4, r3 /* Compare bytes */ \n\ |
| + beanei r7, end_cmp_early \n\ |
| + addlik r9, r9, 1 /* delay slot */ \n\ |
| + beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\ |
| + nop \n\ |
| +end_cmp_early: \n\ |
| + or r3, r0, r7 /* delay slot */ \n\ |
| + rtsd r15, 8 \n\ |
| + nop \n\ |
| +try_align_args: \n\ |
| + xorl r7, r4, r3 \n\ |
| + beanei r7, regular_strcmp /* cannot align args */ \n\ |
| + rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\ |
| +align_loop: \n\ |
| + lbu r3, r5, r9 \n\ |
| + lbu r4, r6, r9 \n\ |
| + cmpu r7, r4, r3 \n\ |
| + beanei r7, end_cmp \n\ |
| + beaeqi r3, end_cmp \n\ |
| + addlik r10, r10, -1 \n\ |
| + addlik r9, r9, 1 \n\ |
| + beaeqid r10, cmp_loop \n\ |
| + nop \n\ |
| + breai align_loop \n\ |
| +regular_strcmp: \n\ |
| + lbu r3, r5, r9 \n\ |
| + lbu r4, r6, r9 \n\ |
| + cmplu r7, r4, r3 \n\ |
| + beanei r7, end_cmp \n\ |
| + beaeqi r3, end_cmp \n\ |
| + breaid regular_strcmp \n\ |
| + addlik r9, r9, 1 \n\ |
| +end_cmp: \n\ |
| + or r3, r0, r7 \n\ |
| + rtsd r15, 8 \n\ |
| + nop /* Return strcmp result */"); |
| +#else |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */\n\ |
| check_alignment: \n\ |
| @@ -181,11 +240,11 @@ regular_strcmp: |
| bnei r7, end_cmp \n\ |
| beqi r3, end_cmp \n\ |
| brid regular_strcmp \n\ |
| - addik r9, r9, 1 \n\ |
| end_cmp: \n\ |
| rtsd r15, 8 \n\ |
| or r3, r0, r7 /* Return strcmp result */"); |
| |
| +#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| |
| diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c |
| index 62072fa..6dbc60d 100644 |
| --- a/newlib/libc/machine/microblaze/strcpy.c |
| +++ b/newlib/libc/machine/microblaze/strcpy.c |
| @@ -125,6 +125,62 @@ strcpy (char *__restrict dst0, |
| #else |
| |
| #include "mb_endian.h" |
| +#ifdef __arch64__ |
| + |
| + asm volatile (" \n\ |
| + orl r9, r0, r0 /* Index register */ \n\ |
| +check_alignment: \n\ |
| + andli r3, r5, 3 \n\ |
| + andli r4, r6, 3 \n\ |
| + beanei r3, try_align_args \n\ |
| + beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\ |
| +cpy_loop: \n" |
| + LOAD4BYTES("r3", "r6", "r9") |
| +" \n\ |
| + pcmplbf r4, r0, r3 \n\ |
| + beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n" |
| + STORE4BYTES("r3", "r5", "r9") |
| +" \n\ |
| + addlik r9, r9, 4 \n\ |
| + breaid cpy_loop \n\ |
| + nop \n\ |
| +cpy_bytes: \n\ |
| + lbu r3, r6, r9 \n\ |
| + sb r3, r5, r9 \n\ |
| + addlik r4, r4, -1 \n\ |
| + addlik r9, r9, 1 /* delay slot */\n\ |
| + beaneid r4, cpy_bytes \n\ |
| + nop \n\ |
| +cpy_null: \n\ |
| + orl r3, r0, r5 /* Return strcpy result */\n\ |
| + rtsd r15, 8 \n\ |
| + nop \n\ |
| +try_align_args: \n\ |
| + xorl r7, r4, r3 \n\ |
| + beanei r7, regular_strcpy /* cannot align args */\n\ |
| + rsublik r10, r3, 4 /* Number of initial bytes to align */\n\ |
| +align_loop: \n\ |
| + lbu r3, r6, r9 \n\ |
| + sb r3, r5, r9 \n\ |
| + addlik r10, r10, -1 \n\ |
| + beaeqid r3, end_cpy /* Break if we have seen null character */\n\ |
| + nop \n\ |
| + addlik r9, r9, 1 \n\ |
| + beaneid r10, align_loop \n\ |
| + nop \n\ |
| + breai cpy_loop \n\ |
| +regular_strcpy: \n\ |
| + lbu r3, r6, r9 \n\ |
| + sb r3, r5, r9 \n\ |
| + addlik r9, r9, 1 \n\ |
| + beaneid r3, regular_strcpy \n\ |
| + nop \n\ |
| +end_cpy: \n\ |
| + orl r3, r0, r5 \n\ |
| + rtsd r15, 8 \n\ |
| + nop /* Return strcpy result */"); |
| + |
| +#else |
| |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */ \n\ |
| @@ -171,6 +227,7 @@ regular_strcpy: \n\ |
| end_cpy: \n\ |
| rtsd r15, 8 \n\ |
| or r3, r0, r5 /* Return strcpy result */"); |
| +#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| |
| diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c |
| index acb4464..c04fa4f 100644 |
| --- a/newlib/libc/machine/microblaze/strlen.c |
| +++ b/newlib/libc/machine/microblaze/strlen.c |
| @@ -116,6 +116,43 @@ strlen (const char *str) |
| |
| #include "mb_endian.h" |
| |
| +#ifdef __arch64__ |
| + asm volatile (" \n\ |
| + orl r9, r0, r0 /* Index register */ \n\ |
| +check_alignment: \n\ |
| + andli r3, r5, 3 \n\ |
| + beanei r3, align_arg \n\ |
| +len_loop: \n" |
| + LOAD4BYTES("r3", "r5", "r9") |
| +" \n\ |
| + pcmplbf r4, r3, r0 \n\ |
| + beanei r4, end_len \n\ |
| + addik r9, r9, 4 \n\ |
| + breaid len_loop \n\ |
| + nop \n\ |
| +end_len: \n\ |
| + lbu r3, r5, r9 \n\ |
| + beaeqi r3, done_len \n\ |
| + addik r9, r9, 1 \n\ |
| + breaid end_len \n\ |
| + nop \n\ |
| +done_len: \n\ |
| + orl r3, r0, r9 /* Return len */ \n\ |
| + rtsd r15, 8 \n\ |
| + nop \n\ |
| +align_arg: \n\ |
| + rsublik r10, r3, 4 \n\ |
| +align_loop: \n\ |
| + lbu r3, r5, r9 \n\ |
| + addlik r10, r10, -1 \n\ |
| + beaeqid r3, done_len \n\ |
| + nop \n\ |
| + addlik r9, r9, 1 \n\ |
| + beaneid r10, align_loop \n\ |
| + nop \n\ |
| + breai len_loop"); |
| + |
| +#else |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */ \n\ |
| check_alignment: \n\ |
| @@ -146,5 +183,6 @@ align_loop: \n\ |
| addik r9, r9, 1 \n\ |
| bri len_loop"); |
| |
| +#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| -- |
| 2.7.4 |
| |