| From 536b8318974495cde2b42c3c2742748e2b271be0 Mon Sep 17 00:00:00 2001 |
| From: ktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4> |
| Date: Wed, 27 May 2015 13:25:01 +0000 |
| Subject: [PATCH] PR target/65358 Avoid clobbering partial argument during |
| sibcall |
| |
| PR target/65358 |
| * expr.c (memory_load_overlap): New function. |
| (emit_push_insn): When pushing partial args to the stack would |
| clobber the register part load the overlapping part into a pseudo |
| and put it into the hard reg after pushing. Change return type |
| to bool. Add bool argument. |
| * expr.h (emit_push_insn): Change return type to bool. |
| Add bool argument. |
| * calls.c (expand_call): Cancel sibcall optimization when encountering |
| partial argument on targets with ARGS_GROW_DOWNWARD and |
| !STACK_GROWS_DOWNWARD. |
| (emit_library_call_value_1): Update callsite of emit_push_insn. |
| (store_one_arg): Likewise. |
| |
| PR target/65358 |
| * gcc.dg/pr65358.c: New test. |
| |
| git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223753 138bc75d-0d04-0410-961f-82ee72b054a4 |
| |
| Upstream-Status: Backport from 6.0 |
| Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com> |
| --- |
| gcc/calls.c | 17 ++++++-- |
| gcc/expr.c | 90 +++++++++++++++++++++++++++++++++++++----- |
| gcc/expr.h | 4 +- |
| gcc/testsuite/gcc.dg/pr65358.c | 33 ++++++++++++++++ |
| 4 files changed, 129 insertions(+), 15 deletions(-) |
| create mode 100644 gcc/testsuite/gcc.dg/pr65358.c |
| |
| diff --git a/gcc/calls.c b/gcc/calls.c |
| index ee8ea5f..2334381 100644 |
| --- a/gcc/calls.c |
| +++ b/gcc/calls.c |
| @@ -3236,6 +3236,14 @@ expand_call (tree exp, rtx target, int ignore) |
| { |
| rtx_insn *before_arg = get_last_insn (); |
| |
| + /* On targets with weird calling conventions (e.g. PA) it's |
| + hard to ensure that all cases of argument overlap between |
| + stack and registers work. Play it safe and bail out. */ |
| +#if defined(ARGS_GROW_DOWNWARD) && !defined(STACK_GROWS_DOWNWARD) |
| + sibcall_failure = 1; |
| + break; |
| +#endif |
| + |
| if (store_one_arg (&args[i], argblock, flags, |
| adjusted_args_size.var != 0, |
| reg_parm_stack_space) |
| @@ -4279,7 +4287,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, |
| partial, reg, 0, argblock, |
| GEN_INT (argvec[argnum].locate.offset.constant), |
| reg_parm_stack_space, |
| - ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad)); |
| + ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad), false); |
| |
| /* Now mark the segment we just used. */ |
| if (ACCUMULATE_OUTGOING_ARGS) |
| @@ -4886,10 +4894,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, |
| |
| /* This isn't already where we want it on the stack, so put it there. |
| This can either be done with push or copy insns. */ |
| - emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX, |
| + if (!emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX, |
| parm_align, partial, reg, used - size, argblock, |
| ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space, |
| - ARGS_SIZE_RTX (arg->locate.alignment_pad)); |
| + ARGS_SIZE_RTX (arg->locate.alignment_pad), true)) |
| + sibcall_failure = 1; |
| |
| /* Unless this is a partially-in-register argument, the argument is now |
| in the stack. */ |
| @@ -5001,7 +5010,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, |
| emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx, |
| parm_align, partial, reg, excess, argblock, |
| ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space, |
| - ARGS_SIZE_RTX (arg->locate.alignment_pad)); |
| + ARGS_SIZE_RTX (arg->locate.alignment_pad), false); |
| |
| /* Unless this is a partially-in-register argument, the argument is now |
| in the stack. |
| diff --git a/gcc/expr.c b/gcc/expr.c |
| index 5c09550..24a6293 100644 |
| --- a/gcc/expr.c |
| +++ b/gcc/expr.c |
| @@ -4121,12 +4121,35 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type) |
| } |
| #endif |
| |
| +/* If reading SIZE bytes from X will end up reading from |
| + Y return the number of bytes that overlap. Return -1 |
| + if there is no overlap or -2 if we can't determine |
| + (for example when X and Y have different base registers). */ |
| + |
| +static int |
| +memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size) |
| +{ |
| + rtx tmp = plus_constant (Pmode, x, size); |
| + rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y); |
| + |
| + if (!CONST_INT_P (sub)) |
| + return -2; |
| + |
| + HOST_WIDE_INT val = INTVAL (sub); |
| + |
| + return IN_RANGE (val, 1, size) ? val : -1; |
| +} |
| + |
| /* Generate code to push X onto the stack, assuming it has mode MODE and |
| type TYPE. |
| MODE is redundant except when X is a CONST_INT (since they don't |
| carry mode info). |
| SIZE is an rtx for the size of data to be copied (in bytes), |
| needed only if X is BLKmode. |
| + Return true if successful. May return false if asked to push a |
| + partial argument during a sibcall optimization (as specified by |
| + SIBCALL_P) and the incoming and outgoing pointers cannot be shown |
| + to not overlap. |
| |
| ALIGN (in bits) is maximum alignment we can assume. |
| |
| @@ -4152,11 +4175,11 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type) |
| for arguments passed in registers. If nonzero, it will be the number |
| of bytes required. */ |
| |
| -void |
| +bool |
| emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| unsigned int align, int partial, rtx reg, int extra, |
| rtx args_addr, rtx args_so_far, int reg_parm_stack_space, |
| - rtx alignment_pad) |
| + rtx alignment_pad, bool sibcall_p) |
| { |
| rtx xinner; |
| enum direction stack_direction |
| @@ -4179,6 +4202,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| |
| xinner = x; |
| |
| + int nregs = partial / UNITS_PER_WORD; |
| + rtx *tmp_regs = NULL; |
| + int overlapping = 0; |
| + |
| if (mode == BLKmode |
| || (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode))) |
| { |
| @@ -4309,6 +4336,43 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| PARM_BOUNDARY. Assume the caller isn't lying. */ |
| set_mem_align (target, align); |
| |
| + /* If part should go in registers and pushing to that part would |
| + overwrite some of the values that need to go into regs, load the |
| + overlapping values into temporary pseudos to be moved into the hard |
| + regs at the end after the stack pushing has completed. |
| + We cannot load them directly into the hard regs here because |
| + they can be clobbered by the block move expansions. |
| + See PR 65358. */ |
| + |
| + if (partial > 0 && reg != 0 && mode == BLKmode |
| + && GET_CODE (reg) != PARALLEL) |
| + { |
| + overlapping = memory_load_overlap (XEXP (x, 0), temp, partial); |
| + if (overlapping > 0) |
| + { |
| + gcc_assert (overlapping % UNITS_PER_WORD == 0); |
| + overlapping /= UNITS_PER_WORD; |
| + |
| + tmp_regs = XALLOCAVEC (rtx, overlapping); |
| + |
| + for (int i = 0; i < overlapping; i++) |
| + tmp_regs[i] = gen_reg_rtx (word_mode); |
| + |
| + for (int i = 0; i < overlapping; i++) |
| + emit_move_insn (tmp_regs[i], |
| + operand_subword_force (target, i, mode)); |
| + } |
| + else if (overlapping == -1) |
| + overlapping = 0; |
| + /* Could not determine whether there is overlap. |
| + Fail the sibcall. */ |
| + else |
| + { |
| + overlapping = 0; |
| + if (sibcall_p) |
| + return false; |
| + } |
| + } |
| emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM); |
| } |
| } |
| @@ -4363,12 +4427,13 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| has a size a multiple of a word. */ |
| for (i = size - 1; i >= not_stack; i--) |
| if (i >= not_stack + offset) |
| - emit_push_insn (operand_subword_force (x, i, mode), |
| + if (!emit_push_insn (operand_subword_force (x, i, mode), |
| word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, |
| 0, args_addr, |
| GEN_INT (args_offset + ((i - not_stack + skip) |
| * UNITS_PER_WORD)), |
| - reg_parm_stack_space, alignment_pad); |
| + reg_parm_stack_space, alignment_pad, sibcall_p)) |
| + return false; |
| } |
| else |
| { |
| @@ -4411,9 +4476,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| } |
| } |
| |
| - /* If part should go in registers, copy that part |
| - into the appropriate registers. Do this now, at the end, |
| - since mem-to-mem copies above may do function calls. */ |
| + /* Move the partial arguments into the registers and any overlapping |
| + values that we moved into the pseudos in tmp_regs. */ |
| if (partial > 0 && reg != 0) |
| { |
| /* Handle calls that pass values in multiple non-contiguous locations. |
| @@ -4421,9 +4485,15 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| if (GET_CODE (reg) == PARALLEL) |
| emit_group_load (reg, x, type, -1); |
| else |
| - { |
| + { |
| gcc_assert (partial % UNITS_PER_WORD == 0); |
| - move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode); |
| + move_block_to_reg (REGNO (reg), x, nregs - overlapping, mode); |
| + |
| + for (int i = 0; i < overlapping; i++) |
| + emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg) |
| + + nregs - overlapping + i), |
| + tmp_regs[i]); |
| + |
| } |
| } |
| |
| @@ -4432,6 +4502,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, |
| |
| if (alignment_pad && args_addr == 0) |
| anti_adjust_stack (alignment_pad); |
| + |
| + return true; |
| } |
| |
| /* Return X if X can be used as a subtarget in a sequence of arithmetic |
| diff --git a/gcc/expr.h b/gcc/expr.h |
| index 867852e..5fcc13f 100644 |
| --- a/gcc/expr.h |
| +++ b/gcc/expr.h |
| @@ -218,8 +218,8 @@ extern rtx emit_move_resolve_push (machine_mode, rtx); |
| extern rtx push_block (rtx, int, int); |
| |
| /* Generate code to push something onto the stack, given its mode and type. */ |
| -extern void emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int, |
| - int, rtx, int, rtx, rtx, int, rtx); |
| +extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int, |
| + int, rtx, int, rtx, rtx, int, rtx, bool); |
| |
| /* Expand an assignment that stores the value of FROM into TO. */ |
| extern void expand_assignment (tree, tree, bool); |
| diff --git a/gcc/testsuite/gcc.dg/pr65358.c b/gcc/testsuite/gcc.dg/pr65358.c |
| new file mode 100644 |
| index 0000000..ba89fd4 |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.dg/pr65358.c |
| @@ -0,0 +1,33 @@ |
| +/* { dg-do run } */ |
| +/* { dg-options "-O2" } */ |
| + |
| +struct pack |
| +{ |
| + int fine; |
| + int victim; |
| + int killer; |
| +}; |
| + |
| +int __attribute__ ((__noinline__, __noclone__)) |
| +bar (int a, int b, struct pack p) |
| +{ |
| + if (a != 20 || b != 30) |
| + __builtin_abort (); |
| + if (p.fine != 40 || p.victim != 50 || p.killer != 60) |
| + __builtin_abort (); |
| + return 0; |
| +} |
| + |
| +int __attribute__ ((__noinline__, __noclone__)) |
| +foo (int arg1, int arg2, int arg3, struct pack p) |
| +{ |
| + return bar (arg2, arg3, p); |
| +} |
| + |
| +int main (void) |
| +{ |
| + struct pack p = { 40, 50, 60 }; |
| + |
| + (void) foo (10, 20, 30, p); |
| + return 0; |
| +} |
| -- |
| 2.7.0 |
| |