Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 1 | From 536b8318974495cde2b42c3c2742748e2b271be0 Mon Sep 17 00:00:00 2001 |
| 2 | From: ktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4> |
| 3 | Date: Wed, 27 May 2015 13:25:01 +0000 |
| 4 | Subject: [PATCH] PR target/65358 Avoid clobbering partial argument during |
| 5 | sibcall |
| 6 | |
| 7 | PR target/65358 |
| 8 | * expr.c (memory_load_overlap): New function. |
| 9 | (emit_push_insn): When pushing partial args to the stack would |
| 10 | clobber the register part load the overlapping part into a pseudo |
| 11 | and put it into the hard reg after pushing. Change return type |
| 12 | to bool. Add bool argument. |
| 13 | * expr.h (emit_push_insn): Change return type to bool. |
| 14 | Add bool argument. |
| 15 | * calls.c (expand_call): Cancel sibcall optimization when encountering |
| 16 | partial argument on targets with ARGS_GROW_DOWNWARD and |
| 17 | !STACK_GROWS_DOWNWARD. |
| 18 | (emit_library_call_value_1): Update callsite of emit_push_insn. |
| 19 | (store_one_arg): Likewise. |
| 20 | |
| 21 | PR target/65358 |
| 22 | * gcc.dg/pr65358.c: New test. |
| 23 | |
| 24 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223753 138bc75d-0d04-0410-961f-82ee72b054a4 |
| 25 | |
| 26 | Upstream-Status: Backport from 6.0 |
| 27 | Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com> |
| 28 | --- |
| 29 | gcc/calls.c | 17 ++++++-- |
| 30 | gcc/expr.c | 90 +++++++++++++++++++++++++++++++++++++----- |
| 31 | gcc/expr.h | 4 +- |
| 32 | gcc/testsuite/gcc.dg/pr65358.c | 33 ++++++++++++++++ |
| 33 | 4 files changed, 129 insertions(+), 15 deletions(-) |
| 34 | create mode 100644 gcc/testsuite/gcc.dg/pr65358.c |
| 35 | |
| 36 | diff --git a/gcc/calls.c b/gcc/calls.c |
| 37 | index ee8ea5f..2334381 100644 |
| 38 | --- a/gcc/calls.c |
| 39 | +++ b/gcc/calls.c |
| 40 | @@ -3236,6 +3236,14 @@ expand_call (tree exp, rtx target, int ignore) |
| 41 | { |
| 42 | rtx_insn *before_arg = get_last_insn (); |
| 43 | |
| 44 | + /* On targets with weird calling conventions (e.g. PA) it's |
| 45 | + hard to ensure that all cases of argument overlap between |
| 46 | + stack and registers work. Play it safe and bail out. */ |
| 47 | +#if defined(ARGS_GROW_DOWNWARD) && !defined(STACK_GROWS_DOWNWARD) |
| 48 | + sibcall_failure = 1; |
| 49 | + break; |
| 50 | +#endif |
| 51 | + |
| 52 | if (store_one_arg (&args[i], argblock, flags, |
| 53 | adjusted_args_size.var != 0, |
| 54 | reg_parm_stack_space) |
| 55 | @@ -4279,7 +4287,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, |
| 56 | partial, reg, 0, argblock, |
| 57 | GEN_INT (argvec[argnum].locate.offset.constant), |
| 58 | reg_parm_stack_space, |
| 59 | - ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad)); |
| 60 | + ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad), false); |
| 61 | |
| 62 | /* Now mark the segment we just used. */ |
| 63 | if (ACCUMULATE_OUTGOING_ARGS) |
| 64 | @@ -4886,10 +4894,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, |
| 65 | |
| 66 | /* This isn't already where we want it on the stack, so put it there. |
| 67 | This can either be done with push or copy insns. */ |
| 68 | - emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX, |
| 69 | + if (!emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX, |
| 70 | parm_align, partial, reg, used - size, argblock, |
| 71 | ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space, |
| 72 | - ARGS_SIZE_RTX (arg->locate.alignment_pad)); |
| 73 | + ARGS_SIZE_RTX (arg->locate.alignment_pad), true)) |
| 74 | + sibcall_failure = 1; |
| 75 | |
| 76 | /* Unless this is a partially-in-register argument, the argument is now |
| 77 | in the stack. */ |
| 78 | @@ -5001,7 +5010,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, |
| 79 | emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx, |
| 80 | parm_align, partial, reg, excess, argblock, |
| 81 | ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space, |
| 82 | - ARGS_SIZE_RTX (arg->locate.alignment_pad)); |
| 83 | + ARGS_SIZE_RTX (arg->locate.alignment_pad), false); |
| 84 | |
| 85 | /* Unless this is a partially-in-register argument, the argument is now |
| 86 | in the stack. |
| 87 | diff --git a/gcc/expr.c b/gcc/expr.c |
| 88 | index 5c09550..24a6293 100644 |
| 89 | --- a/gcc/expr.c |
| 90 | +++ b/gcc/expr.c |
| 91 | @@ -4121,12 +4121,35 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type) |
| 92 | } |
| 93 | #endif |
| 94 | |
| 95 | +/* If reading SIZE bytes from X will end up reading from |
| 96 | + Y return the number of bytes that overlap. Return -1 |
| 97 | + if there is no overlap or -2 if we can't determine |
| 98 | + (for example when X and Y have different base registers). */ |
| 99 | + |
| 100 | +static int |
| 101 | +memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size) |
| 102 | +{ |
| 103 | + rtx tmp = plus_constant (Pmode, x, size); |
| 104 | + rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y); |
| 105 | + |
| 106 | + if (!CONST_INT_P (sub)) |
| 107 | + return -2; |
| 108 | + |
| 109 | + HOST_WIDE_INT val = INTVAL (sub); |
| 110 | + |
| 111 | + return IN_RANGE (val, 1, size) ? val : -1; |
| 112 | +} |
| 113 | + |
| 114 | /* Generate code to push X onto the stack, assuming it has mode MODE and |
| 115 | type TYPE. |
| 116 | MODE is redundant except when X is a CONST_INT (since they don't |
| 117 | carry mode info). |
| 118 | SIZE is an rtx for the size of data to be copied (in bytes), |
| 119 | needed only if X is BLKmode. |
| 120 | + Return true if successful. May return false if asked to push a |
| 121 | + partial argument during a sibcall optimization (as specified by |
| 122 | + SIBCALL_P) and the incoming and outgoing pointers cannot be shown |
| 123 | + to not overlap. |
| 124 | |
| 125 | ALIGN (in bits) is maximum alignment we can assume. |
| 126 | |
| 127 | @@ -4152,11 +4175,11 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type) |
| 128 | for arguments passed in registers. If nonzero, it will be the number |
| 129 | of bytes required. */ |
| 130 | |
| 131 | -void |
| 132 | +bool |
| 133 | emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 134 | unsigned int align, int partial, rtx reg, int extra, |
| 135 | rtx args_addr, rtx args_so_far, int reg_parm_stack_space, |
| 136 | - rtx alignment_pad) |
| 137 | + rtx alignment_pad, bool sibcall_p) |
| 138 | { |
| 139 | rtx xinner; |
| 140 | enum direction stack_direction |
| 141 | @@ -4179,6 +4202,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 142 | |
| 143 | xinner = x; |
| 144 | |
| 145 | + int nregs = partial / UNITS_PER_WORD; |
| 146 | + rtx *tmp_regs = NULL; |
| 147 | + int overlapping = 0; |
| 148 | + |
| 149 | if (mode == BLKmode |
| 150 | || (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode))) |
| 151 | { |
| 152 | @@ -4309,6 +4336,43 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 153 | PARM_BOUNDARY. Assume the caller isn't lying. */ |
| 154 | set_mem_align (target, align); |
| 155 | |
| 156 | + /* If part should go in registers and pushing to that part would |
| 157 | + overwrite some of the values that need to go into regs, load the |
| 158 | + overlapping values into temporary pseudos to be moved into the hard |
| 159 | + regs at the end after the stack pushing has completed. |
| 160 | + We cannot load them directly into the hard regs here because |
| 161 | + they can be clobbered by the block move expansions. |
| 162 | + See PR 65358. */ |
| 163 | + |
| 164 | + if (partial > 0 && reg != 0 && mode == BLKmode |
| 165 | + && GET_CODE (reg) != PARALLEL) |
| 166 | + { |
| 167 | + overlapping = memory_load_overlap (XEXP (x, 0), temp, partial); |
| 168 | + if (overlapping > 0) |
| 169 | + { |
| 170 | + gcc_assert (overlapping % UNITS_PER_WORD == 0); |
| 171 | + overlapping /= UNITS_PER_WORD; |
| 172 | + |
| 173 | + tmp_regs = XALLOCAVEC (rtx, overlapping); |
| 174 | + |
| 175 | + for (int i = 0; i < overlapping; i++) |
| 176 | + tmp_regs[i] = gen_reg_rtx (word_mode); |
| 177 | + |
| 178 | + for (int i = 0; i < overlapping; i++) |
| 179 | + emit_move_insn (tmp_regs[i], |
| 180 | + operand_subword_force (target, i, mode)); |
| 181 | + } |
| 182 | + else if (overlapping == -1) |
| 183 | + overlapping = 0; |
| 184 | + /* Could not determine whether there is overlap. |
| 185 | + Fail the sibcall. */ |
| 186 | + else |
| 187 | + { |
| 188 | + overlapping = 0; |
| 189 | + if (sibcall_p) |
| 190 | + return false; |
| 191 | + } |
| 192 | + } |
| 193 | emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM); |
| 194 | } |
| 195 | } |
| 196 | @@ -4363,12 +4427,13 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 197 | has a size a multiple of a word. */ |
| 198 | for (i = size - 1; i >= not_stack; i--) |
| 199 | if (i >= not_stack + offset) |
| 200 | - emit_push_insn (operand_subword_force (x, i, mode), |
| 201 | + if (!emit_push_insn (operand_subword_force (x, i, mode), |
| 202 | word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, |
| 203 | 0, args_addr, |
| 204 | GEN_INT (args_offset + ((i - not_stack + skip) |
| 205 | * UNITS_PER_WORD)), |
| 206 | - reg_parm_stack_space, alignment_pad); |
| 207 | + reg_parm_stack_space, alignment_pad, sibcall_p)) |
| 208 | + return false; |
| 209 | } |
| 210 | else |
| 211 | { |
| 212 | @@ -4411,9 +4476,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | - /* If part should go in registers, copy that part |
| 217 | - into the appropriate registers. Do this now, at the end, |
| 218 | - since mem-to-mem copies above may do function calls. */ |
| 219 | + /* Move the partial arguments into the registers and any overlapping |
| 220 | + values that we moved into the pseudos in tmp_regs. */ |
| 221 | if (partial > 0 && reg != 0) |
| 222 | { |
| 223 | /* Handle calls that pass values in multiple non-contiguous locations. |
| 224 | @@ -4421,9 +4485,15 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 225 | if (GET_CODE (reg) == PARALLEL) |
| 226 | emit_group_load (reg, x, type, -1); |
| 227 | else |
| 228 | - { |
| 229 | + { |
| 230 | gcc_assert (partial % UNITS_PER_WORD == 0); |
| 231 | - move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode); |
| 232 | + move_block_to_reg (REGNO (reg), x, nregs - overlapping, mode); |
| 233 | + |
| 234 | + for (int i = 0; i < overlapping; i++) |
| 235 | + emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg) |
| 236 | + + nregs - overlapping + i), |
| 237 | + tmp_regs[i]); |
| 238 | + |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | @@ -4432,6 +4502,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| 243 | |
| 244 | if (alignment_pad && args_addr == 0) |
| 245 | anti_adjust_stack (alignment_pad); |
| 246 | + |
| 247 | + return true; |
| 248 | } |
| 249 | |
| 250 | /* Return X if X can be used as a subtarget in a sequence of arithmetic |
| 251 | diff --git a/gcc/expr.h b/gcc/expr.h |
| 252 | index 867852e..5fcc13f 100644 |
| 253 | --- a/gcc/expr.h |
| 254 | +++ b/gcc/expr.h |
| 255 | @@ -218,8 +218,8 @@ extern rtx emit_move_resolve_push (machine_mode, rtx); |
| 256 | extern rtx push_block (rtx, int, int); |
| 257 | |
| 258 | /* Generate code to push something onto the stack, given its mode and type. */ |
| 259 | -extern void emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int, |
| 260 | - int, rtx, int, rtx, rtx, int, rtx); |
| 261 | +extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int, |
| 262 | + int, rtx, int, rtx, rtx, int, rtx, bool); |
| 263 | |
| 264 | /* Expand an assignment that stores the value of FROM into TO. */ |
| 265 | extern void expand_assignment (tree, tree, bool); |
| 266 | diff --git a/gcc/testsuite/gcc.dg/pr65358.c b/gcc/testsuite/gcc.dg/pr65358.c |
| 267 | new file mode 100644 |
| 268 | index 0000000..ba89fd4 |
| 269 | --- /dev/null |
| 270 | +++ b/gcc/testsuite/gcc.dg/pr65358.c |
| 271 | @@ -0,0 +1,33 @@ |
| 272 | +/* { dg-do run } */ |
| 273 | +/* { dg-options "-O2" } */ |
| 274 | + |
| 275 | +struct pack |
| 276 | +{ |
| 277 | + int fine; |
| 278 | + int victim; |
| 279 | + int killer; |
| 280 | +}; |
| 281 | + |
| 282 | +int __attribute__ ((__noinline__, __noclone__)) |
| 283 | +bar (int a, int b, struct pack p) |
| 284 | +{ |
| 285 | + if (a != 20 || b != 30) |
| 286 | + __builtin_abort (); |
| 287 | + if (p.fine != 40 || p.victim != 50 || p.killer != 60) |
| 288 | + __builtin_abort (); |
| 289 | + return 0; |
| 290 | +} |
| 291 | + |
| 292 | +int __attribute__ ((__noinline__, __noclone__)) |
| 293 | +foo (int arg1, int arg2, int arg3, struct pack p) |
| 294 | +{ |
| 295 | + return bar (arg2, arg3, p); |
| 296 | +} |
| 297 | + |
| 298 | +int main (void) |
| 299 | +{ |
| 300 | + struct pack p = { 40, 50, 60 }; |
| 301 | + |
| 302 | + (void) foo (10, 20, 30, p); |
| 303 | + return 0; |
| 304 | +} |
| 305 | -- |
| 306 | 2.7.0 |
| 307 | |