Brad Bishop | 0011132 | 2018-04-01 22:23:53 -0400 | [diff] [blame] | 1 | From d3cdd96a300f9003a1cc242541605169aacdc811 Mon Sep 17 00:00:00 2001 |
| 2 | From: willschm <willschm@138bc75d-0d04-0410-961f-82ee72b054a4> |
| 3 | Date: Mon, 25 Sep 2017 14:35:02 +0000 |
| 4 | Subject: [PATCH] [gcc] |
| 5 | |
| 6 | 2017-09-25 Will Schmidt <will_schmidt@vnet.ibm.com> |
| 7 | |
| 8 | * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling |
| 9 | for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). |
| 10 | (rs6000_builtin_valid_without_lhs): New helper function. |
| 11 | * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): |
| 12 | Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST. |
| 13 | |
| 14 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253152 138bc75d-0d04-0410-961f-82ee72b054a4 |
| 15 | |
| 16 | Fix internal compiler error for testcase gcc.dg/vmx/7d-02.c |
| 17 | |
| 18 | Upstream commit: d3cdd96a300f9003a1cc242541605169aacdc811 |
| 19 | |
| 20 | Backport by Kaushik Phatak <Kaushik.Phatak@kpit.com> |
| 21 | |
| 22 | This patch removes changes to Changelog from the original upstream patch. |
| 23 | This will help us avoid conflicts. |
| 24 | |
| 25 | Upstream-Status: Backport |
| 26 | |
| 27 | Signed-off-by: Mark Hatle <mark.hatle@windriver.com> |
| 28 | --- |
| 29 | gcc/ChangeLog | 8 +++++ |
| 30 | gcc/config/rs6000/rs6000-c.c | 72 ------------------------------------------- |
| 31 | gcc/config/rs6000/rs6000.c | 73 ++++++++++++++++++++++++++++++++++++++++++-- |
| 32 | 3 files changed, 78 insertions(+), 75 deletions(-) |
| 33 | |
| 34 | diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c |
| 35 | index a49db97..4a363a1 100644 |
| 36 | --- a/gcc/config/rs6000/rs6000-c.c |
| 37 | +++ b/gcc/config/rs6000/rs6000-c.c |
| 38 | @@ -6279,70 +6279,6 @@ altivec_resolve_overloaded_builtin (loca |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | - /* Similarly for stvx. */ |
| 43 | - if (fcode == ALTIVEC_BUILTIN_VEC_ST |
| 44 | - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) |
| 45 | - && nargs == 3) |
| 46 | - { |
| 47 | - tree arg0 = (*arglist)[0]; |
| 48 | - tree arg1 = (*arglist)[1]; |
| 49 | - tree arg2 = (*arglist)[2]; |
| 50 | - |
| 51 | - /* Construct the masked address. Let existing error handling take |
| 52 | - over if we don't have a constant offset. */ |
| 53 | - arg1 = fold (arg1); |
| 54 | - |
| 55 | - if (TREE_CODE (arg1) == INTEGER_CST) |
| 56 | - { |
| 57 | - if (!ptrofftype_p (TREE_TYPE (arg1))) |
| 58 | - arg1 = build1 (NOP_EXPR, sizetype, arg1); |
| 59 | - |
| 60 | - tree arg2_type = TREE_TYPE (arg2); |
| 61 | - if (TREE_CODE (arg2_type) == ARRAY_TYPE && c_dialect_cxx ()) |
| 62 | - { |
| 63 | - /* Force array-to-pointer decay for C++. */ |
| 64 | - arg2 = default_conversion (arg2); |
| 65 | - arg2_type = TREE_TYPE (arg2); |
| 66 | - } |
| 67 | - |
| 68 | - /* Find the built-in to make sure a compatible one exists; if not |
| 69 | - we fall back to default handling to get the error message. */ |
| 70 | - for (desc = altivec_overloaded_builtins; |
| 71 | - desc->code && desc->code != fcode; desc++) |
| 72 | - continue; |
| 73 | - |
| 74 | - for (; desc->code == fcode; desc++) |
| 75 | - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) |
| 76 | - && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2) |
| 77 | - && rs6000_builtin_type_compatible (TREE_TYPE (arg2), |
| 78 | - desc->op3)) |
| 79 | - { |
| 80 | - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, |
| 81 | - arg2, arg1); |
| 82 | - tree aligned |
| 83 | - = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, |
| 84 | - addr, build_int_cst (arg2_type, -16)); |
| 85 | - |
| 86 | - tree arg0_type = TREE_TYPE (arg0); |
| 87 | - if (TYPE_MODE (arg0_type) == V2DImode) |
| 88 | - /* Type-based aliasing analysis thinks vector long |
| 89 | - and vector long long are different and will put them |
| 90 | - in distinct alias classes. Force our address type |
| 91 | - to be a may-alias type to avoid this. */ |
| 92 | - arg0_type |
| 93 | - = build_pointer_type_for_mode (arg0_type, Pmode, |
| 94 | - true/*can_alias_all*/); |
| 95 | - else |
| 96 | - arg0_type = build_pointer_type (arg0_type); |
| 97 | - aligned = build1 (NOP_EXPR, arg0_type, aligned); |
| 98 | - tree stg = build_indirect_ref (loc, aligned, RO_NULL); |
| 99 | - tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg, |
| 100 | - convert (TREE_TYPE (stg), arg0)); |
| 101 | - return retval; |
| 102 | - } |
| 103 | - } |
| 104 | - } |
| 105 | - |
| 106 | for (n = 0; |
| 107 | !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; |
| 108 | fnargs = TREE_CHAIN (fnargs), n++) |
| 109 | diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c |
| 110 | index a49db97..4a363a1 100644 |
| 111 | --- a/gcc/config/rs6000/rs6000.c |
| 112 | +++ b/gcc/config/rs6000/rs6000.c |
| 113 | @@ -55,6 +55,7 @@ |
| 114 | #include "reload.h" |
| 115 | #include "sched-int.h" |
| 116 | #include "gimplify.h" |
| 117 | +#include "gimple-fold.h" |
| 118 | #include "gimple-iterator.h" |
| 119 | #include "gimple-ssa.h" |
| 120 | #include "gimple-walk.h" |
| 121 | @@ -17089,6 +17090,25 @@ rs6000_fold_builtin (tree fndecl, int n_ |
| 122 | #endif |
| 123 | } |
| 124 | |
| 125 | +/* Helper function to sort out which built-ins may be valid without having |
| 126 | + a LHS. */ |
| 127 | +bool |
| 128 | +rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code) |
| 129 | +{ |
| 130 | + switch (fn_code) |
| 131 | + { |
| 132 | + case ALTIVEC_BUILTIN_STVX_V16QI: |
| 133 | + case ALTIVEC_BUILTIN_STVX_V8HI: |
| 134 | + case ALTIVEC_BUILTIN_STVX_V4SI: |
| 135 | + case ALTIVEC_BUILTIN_STVX_V4SF: |
| 136 | + case ALTIVEC_BUILTIN_STVX_V2DI: |
| 137 | + case ALTIVEC_BUILTIN_STVX_V2DF: |
| 138 | + return true; |
| 139 | + default: |
| 140 | + return false; |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | /* Fold a machine-dependent built-in in GIMPLE. (For folding into |
| 145 | a constant, use rs6000_fold_builtin.) */ |
| 146 | |
| 147 | @@ -17102,6 +17122,10 @@ rs6000_gimple_fold_builtin (gimple_stmt_ |
| 148 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); |
| 149 | tree arg0, arg1, lhs; |
| 150 | |
| 151 | + /* Prevent gimple folding for code that does not have a LHS, unless it is |
| 152 | + allowed per the rs6000_builtin_valid_without_lhs helper function. */ |
| 153 | + if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code)) |
| 154 | + return false; |
| 155 | switch (fn_code) |
| 156 | { |
| 157 | /* Flavors of vec_add. We deliberately don't expand |
| 158 | @@ -17169,6 +17193,54 @@ rs6000_gimple_fold_builtin (gimple_stmt_ |
| 159 | gsi_replace (gsi, g, true); |
| 160 | return true; |
| 161 | } |
| 162 | + /* Vector stores. */ |
| 163 | + case ALTIVEC_BUILTIN_STVX_V16QI: |
| 164 | + case ALTIVEC_BUILTIN_STVX_V8HI: |
| 165 | + case ALTIVEC_BUILTIN_STVX_V4SI: |
| 166 | + case ALTIVEC_BUILTIN_STVX_V4SF: |
| 167 | + case ALTIVEC_BUILTIN_STVX_V2DI: |
| 168 | + case ALTIVEC_BUILTIN_STVX_V2DF: |
| 169 | + { |
| 170 | + /* Do not fold for -maltivec=be on LE targets. */ |
| 171 | + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) |
| 172 | + return false; |
| 173 | + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ |
| 174 | + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ |
| 175 | + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ |
| 176 | + location_t loc = gimple_location (stmt); |
| 177 | + tree arg0_type = TREE_TYPE (arg0); |
| 178 | + /* Use ptr_type_node (no TBAA) for the arg2_type. |
| 179 | + FIXME: (Richard) "A proper fix would be to transition this type as |
| 180 | + seen from the frontend to GIMPLE, for example in a similar way we |
| 181 | + do for MEM_REFs by piggy-backing that on an extra argument, a |
| 182 | + constant zero pointer of the alias pointer type to use (which would |
| 183 | + also serve as a type indicator of the store itself). I'd use a |
| 184 | + target specific internal function for this (not sure if we can have |
| 185 | + those target specific, but I guess if it's folded away then that's |
| 186 | + fine) and get away with the overload set." |
| 187 | + */ |
| 188 | + tree arg2_type = ptr_type_node; |
| 189 | + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create |
| 190 | + the tree using the value from arg0. The resulting type will match |
| 191 | + the type of arg2. */ |
| 192 | + gimple_seq stmts = NULL; |
| 193 | + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); |
| 194 | + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, |
| 195 | + arg2_type, arg2, temp_offset); |
| 196 | + /* Mask off any lower bits from the address. */ |
| 197 | + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, |
| 198 | + arg2_type, temp_addr, |
| 199 | + build_int_cst (arg2_type, -16)); |
| 200 | + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
| 201 | + /* The desired gimple result should be similar to: |
| 202 | + MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ |
| 203 | + gimple *g; |
| 204 | + g = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, |
| 205 | + build_int_cst (arg2_type, 0)), arg0); |
| 206 | + gimple_set_location (g, loc); |
| 207 | + gsi_replace (gsi, g, true); |
| 208 | + return true; |
| 209 | + } |
| 210 | |
| 211 | default: |
| 212 | break; |