| From b35b582ef3f6575447097585174302fde1761078 Mon Sep 17 00:00:00 2001 |
| From: Nagaraju <nmekala@xilinx.com> |
| Date: Wed, 24 Apr 2019 23:29:21 +0530 |
| Subject: [PATCH 11/11] Removing the Assembly implementation of 64bit string |
| function. Revisit in next release and fix it |
| |
| --- |
| newlib/libc/machine/microblaze/mb_endian.h | 4 ++ |
| newlib/libc/machine/microblaze/strcmp.c | 93 ++++++++++-------------------- |
| newlib/libc/machine/microblaze/strcpy.c | 82 ++++++++------------------ |
| newlib/libc/machine/microblaze/strlen.c | 59 +++++++------------ |
| 4 files changed, 81 insertions(+), 157 deletions(-) |
| |
| diff --git a/newlib/libc/machine/microblaze/mb_endian.h b/newlib/libc/machine/microblaze/mb_endian.h |
| index fb217ec..17772c8 100644 |
| --- a/newlib/libc/machine/microblaze/mb_endian.h |
| +++ b/newlib/libc/machine/microblaze/mb_endian.h |
| @@ -8,8 +8,12 @@ |
| #ifdef __LITTLE_ENDIAN__ |
| #define LOAD4BYTES(rD,rA,rB) "\tlwr\t" rD ", " rA ", " rB "\n" |
| #define STORE4BYTES(rD,rA,rB) "\tswr\t" rD ", " rA ", " rB "\n" |
| +#define LOAD8BYTES(rD,rA,rB) "\tllr\t" rD ", " rA ", " rB "\n" |
| +#define STORE8BYTES(rD,rA,rB) "\tslr\t" rD ", " rA ", " rB "\n" |
| #else |
| #define LOAD4BYTES(rD,rA,rB) "\tlw\t" rD ", " rA ", " rB "\n" |
| #define STORE4BYTES(rD,rA,rB) "\tsw\t" rD ", " rA ", " rB "\n" |
| +#define LOAD8BYTES(rD,rA,rB) "\tll\t" rD ", " rA ", " rB "\n" |
| +#define STORE8BYTES(rD,rA,rB) "\tsl\t" rD ", " rA ", " rB "\n" |
| #endif |
| #endif |
| diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c |
| index acfe4cd..e34c64a 100644 |
| --- a/newlib/libc/machine/microblaze/strcmp.c |
| +++ b/newlib/libc/machine/microblaze/strcmp.c |
| @@ -129,70 +129,42 @@ strcmp (const char *s1, |
| return (*(unsigned char *) s1) - (*(unsigned char *) s2); |
| #endif /* not PREFER_SIZE_OVER_SPEED */ |
| |
| +#elif __arch64__ |
| + unsigned int *a1; |
| + unsigned int *a2; |
| + |
| + /* If s1 or s2 are unaligned, then compare bytes. */ |
| + if (!UNALIGNED (s1, s2)) |
| + { |
| + /* If s1 and s2 are word-aligned, compare them a word at a time. */ |
| + a1 = (unsigned int*)s1; |
| + a2 = (unsigned int*)s2; |
| + while (*a1 == *a2) |
| + { |
| + /* To get here, *a1 == *a2, thus if we find a null in *a1, |
| + then the strings must be equal, so return zero. */ |
| + if (DETECTNULL (*a1)) |
| + return 0; |
| + |
| + a1++; |
| + a2++; |
| + } |
| + |
| + /* A difference was detected in last few bytes of s1, so search bytewise */ |
| + s1 = (char*)a1; |
| + s2 = (char*)a2; |
| + } |
| + |
| + while (*s1 != '\0' && *s1 == *s2) |
| + { |
| + s1++; |
| + s2++; |
| + } |
| + return (*(unsigned char *) s1) - (*(unsigned char *) s2); |
| #else |
| |
| #include "mb_endian.h" |
| |
| -#ifdef __arch64__ |
| - asm volatile (" \n\ |
| - orl r9, r0, r0 /* Index register */ \n\ |
| -check_alignment: \n\ |
| - andli r3, r5, 3 \n\ |
| - andli r4, r6, 3 \n\ |
| - beanei r3, try_align_args \n\ |
| - beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\ |
| -cmp_loop: \n" |
| - LOAD4BYTES("r3", "r5", "r9") |
| - LOAD4BYTES("r4", "r6", "r9") |
| -" \n\ |
| - pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\ |
| - beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\ |
| - cmplu r7, r4, r3 /* ELSE compare whole word */ \n\ |
| - beanei r7, end_cmp \n\ |
| - addlik r9, r9, 4 /* delay slot */ \n\ |
| - breaid cmp_loop \n\ |
| - nop /* delay slot */ \n\ |
| -end_cmp_loop: \n\ |
| - lbu r3, r5, r9 /* byte compare loop */ \n\ |
| - lbu r4, r6, r9 \n\ |
| - cmplu r7, r4, r3 /* Compare bytes */ \n\ |
| - beanei r7, end_cmp_early \n\ |
| - addlik r9, r9, 1 /* delay slot */ \n\ |
| - beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\ |
| - nop \n\ |
| -end_cmp_early: \n\ |
| - orl r3, r0, r7 /* delay slot */ \n\ |
| - rtsd r15, 8 \n\ |
| - nop \n\ |
| -try_align_args: \n\ |
| - xorl r7, r4, r3 \n\ |
| - beanei r7, regular_strcmp /* cannot align args */ \n\ |
| - rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\ |
| -align_loop: \n\ |
| - lbu r3, r5, r9 \n\ |
| - lbu r4, r6, r9 \n\ |
| - cmplu r7, r4, r3 \n\ |
| - beanei r7, end_cmp \n\ |
| - beaeqi r3, end_cmp \n\ |
| - addlik r10, r10, -1 \n\ |
| - addlik r9, r9, 1 \n\ |
| - beaeqid r10, cmp_loop \n\ |
| - nop \n\ |
| - breai align_loop \n\ |
| -regular_strcmp: \n\ |
| - lbu r3, r5, r9 \n\ |
| - lbu r4, r6, r9 \n\ |
| - cmplu r7, r4, r3 \n\ |
| - beanei r7, end_cmp \n\ |
| - beaeqi r3, end_cmp \n\ |
| - addlik r9, r9, 1 \n\ |
| - breaid regular_strcmp \n\ |
| - nop \n\ |
| -end_cmp: \n\ |
| - orl r3, r0, r7 \n\ |
| - rtsd r15, 8 \n\ |
| - nop /* Return strcmp result */"); |
| -#else |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */\n\ |
| check_alignment: \n\ |
| @@ -246,7 +218,6 @@ end_cmp: |
| rtsd r15, 8 \n\ |
| or r3, r0, r7 /* Return strcmp result */"); |
| |
| -#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| |
| diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c |
| index 6dbc60d..ddb6922 100644 |
| --- a/newlib/libc/machine/microblaze/strcpy.c |
| +++ b/newlib/libc/machine/microblaze/strcpy.c |
| @@ -121,67 +121,36 @@ strcpy (char *__restrict dst0, |
| ; |
| return dst0; |
| #endif /* not PREFER_SIZE_OVER_SPEED */ |
| +#elif __arch64__ |
| + char *dst = dst0; |
| + const char *src = src0; |
| + long *aligned_dst; |
| + const long *aligned_src; |
| |
| -#else |
| + /* If SRC or DEST is unaligned, then copy bytes. */ |
| + if (!UNALIGNED (src, dst)) |
| + { |
| + aligned_dst = (long*)dst; |
| + aligned_src = (long*)src; |
| |
| -#include "mb_endian.h" |
| -#ifdef __arch64__ |
| + /* SRC and DEST are both "long int" aligned, try to do "long int" |
| + sized copies. */ |
| + while (!DETECTNULL(*aligned_src)) |
| + { |
| + *aligned_dst++ = *aligned_src++; |
| + } |
| |
| - asm volatile (" \n\ |
| - orl r9, r0, r0 /* Index register */ \n\ |
| -check_alignment: \n\ |
| - andli r3, r5, 3 \n\ |
| - andli r4, r6, 3 \n\ |
| - beanei r3, try_align_args \n\ |
| - beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\ |
| -cpy_loop: \n" |
| - LOAD4BYTES("r3", "r6", "r9") |
| -" \n\ |
| - pcmplbf r4, r0, r3 \n\ |
| - beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n" |
| - STORE4BYTES("r3", "r5", "r9") |
| -" \n\ |
| - addlik r9, r9, 4 \n\ |
| - breaid cpy_loop \n\ |
| - nop \n\ |
| -cpy_bytes: \n\ |
| - lbu r3, r6, r9 \n\ |
| - sb r3, r5, r9 \n\ |
| - addlik r4, r4, -1 \n\ |
| - addlik r9, r9, 1 /* delay slot */\n\ |
| - beaneid r4, cpy_bytes \n\ |
| - nop \n\ |
| -cpy_null: \n\ |
| - orl r3, r0, r5 /* Return strcpy result */\n\ |
| - rtsd r15, 8 \n\ |
| - nop \n\ |
| -try_align_args: \n\ |
| - xorl r7, r4, r3 \n\ |
| - beanei r7, regular_strcpy /* cannot align args */\n\ |
| - rsublik r10, r3, 4 /* Number of initial bytes to align */\n\ |
| -align_loop: \n\ |
| - lbu r3, r6, r9 \n\ |
| - sb r3, r5, r9 \n\ |
| - addlik r10, r10, -1 \n\ |
| - beaeqid r3, end_cpy /* Break if we have seen null character */\n\ |
| - nop \n\ |
| - addlik r9, r9, 1 \n\ |
| - beaneid r10, align_loop \n\ |
| - nop \n\ |
| - breai cpy_loop \n\ |
| -regular_strcpy: \n\ |
| - lbu r3, r6, r9 \n\ |
| - sb r3, r5, r9 \n\ |
| - addlik r9, r9, 1 \n\ |
| - beaneid r3, regular_strcpy \n\ |
| - nop \n\ |
| -end_cpy: \n\ |
| - orl r3, r0, r5 \n\ |
| - rtsd r15, 8 \n\ |
| - nop /* Return strcpy result */"); |
| + dst = (char*)aligned_dst; |
| + src = (char*)aligned_src; |
| + } |
| |
| -#else |
| + while (*dst++ = *src++) |
| + ; |
| + return dst0; |
| + |
| +#else |
| |
| +#include "mb_endian.h" |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */ \n\ |
| check_alignment: \n\ |
| @@ -227,7 +196,6 @@ regular_strcpy: \n\ |
| end_cpy: \n\ |
| rtsd r15, 8 \n\ |
| or r3, r0, r5 /* Return strcpy result */"); |
| -#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| |
| diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c |
| index b6f2d3c..9407539 100644 |
| --- a/newlib/libc/machine/microblaze/strlen.c |
| +++ b/newlib/libc/machine/microblaze/strlen.c |
| @@ -112,47 +112,29 @@ strlen (const char *str) |
| return str - start; |
| #endif /* not PREFER_SIZE_OVER_SPEED */ |
| |
| -#else |
| - |
| -#include "mb_endian.h" |
| +#elif __arch64__ |
| + const char *start = str; |
| + unsigned long *aligned_addr; |
| |
| -#ifdef __arch64__ |
| - asm volatile (" \n\ |
| - orl r9, r0, r0 /* Index register */ \n\ |
| -check_alignment: \n\ |
| - andli r3, r5, 3 \n\ |
| - beanei r3, align_arg \n\ |
| -len_loop: \n" |
| - LOAD4BYTES("r3", "r5", "r9") |
| -" \n\ |
| - pcmplbf r4, r3, r0 \n\ |
| - beanei r4, end_len \n\ |
| - addlik r9, r9, 4 \n\ |
| - breaid len_loop \n\ |
| - nop \n\ |
| -end_len: \n\ |
| - lbu r3, r5, r9 \n\ |
| - beaeqi r3, done_len \n\ |
| - addlik r9, r9, 1 \n\ |
| - breaid end_len \n\ |
| - nop \n\ |
| -done_len: \n\ |
| - orl r3, r0, r9 /* Return len */ \n\ |
| - rtsd r15, 8 \n\ |
| - nop \n\ |
| -align_arg: \n\ |
| - rsublik r10, r3, 4 \n\ |
| -align_loop: \n\ |
| - lbu r3, r5, r9 \n\ |
| - addlik r10, r10, -1 \n\ |
| - beaeqid r3, done_len \n\ |
| - nop \n\ |
| - addlik r9, r9, 1 \n\ |
| - beaneid r10, align_loop \n\ |
| - nop \n\ |
| - breai len_loop"); |
| + if (!UNALIGNED (str)) |
| + { |
| + /* If the string is word-aligned, we can check for the presence of |
| + a null in each word-sized block. */ |
| + aligned_addr = (unsigned long*)str; |
| + while (!DETECTNULL (*aligned_addr)) |
| + aligned_addr++; |
| |
| + /* Once a null is detected, we check each byte in that block for a |
| + precise position of the null. */ |
| + str = (char*)aligned_addr; |
| + } |
| + |
| + while (*str) |
| + str++; |
| + return str - start; |
| #else |
| + |
| +#include "mb_endian.h" |
| asm volatile (" \n\ |
| or r9, r0, r0 /* Index register */ \n\ |
| check_alignment: \n\ |
| @@ -183,6 +165,5 @@ align_loop: \n\ |
| addik r9, r9, 1 \n\ |
| bri len_loop"); |
| |
| -#endif |
| #endif /* ! HAVE_HW_PCMP */ |
| } |
| -- |
| 2.7.4 |
| |