blob: c18f40e7409cc0282fb0f857193a88faee85b4ad [file] [log] [blame]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001From 536b8318974495cde2b42c3c2742748e2b271be0 Mon Sep 17 00:00:00 2001
2From: ktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>
3Date: Wed, 27 May 2015 13:25:01 +0000
4Subject: [PATCH] PR target/65358 Avoid clobbering partial argument during
5 sibcall
6
7 PR target/65358
8 * expr.c (memory_load_overlap): New function.
9 (emit_push_insn): When pushing partial args to the stack would
10 clobber the register part load the overlapping part into a pseudo
11 and put it into the hard reg after pushing. Change return type
12 to bool. Add bool argument.
13 * expr.h (emit_push_insn): Change return type to bool.
14 Add bool argument.
15 * calls.c (expand_call): Cancel sibcall optimization when encountering
16 partial argument on targets with ARGS_GROW_DOWNWARD and
17 !STACK_GROWS_DOWNWARD.
18 (emit_library_call_value_1): Update callsite of emit_push_insn.
19 (store_one_arg): Likewise.
20
21 PR target/65358
22 * gcc.dg/pr65358.c: New test.
23
24git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223753 138bc75d-0d04-0410-961f-82ee72b054a4
25
26Upstream-Status: Backport from 6.0
27Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
28---
29 gcc/calls.c | 17 ++++++--
30 gcc/expr.c | 90 +++++++++++++++++++++++++++++++++++++-----
31 gcc/expr.h | 4 +-
32 gcc/testsuite/gcc.dg/pr65358.c | 33 ++++++++++++++++
33 4 files changed, 129 insertions(+), 15 deletions(-)
34 create mode 100644 gcc/testsuite/gcc.dg/pr65358.c
35
36diff --git a/gcc/calls.c b/gcc/calls.c
37index ee8ea5f..2334381 100644
38--- a/gcc/calls.c
39+++ b/gcc/calls.c
40@@ -3236,6 +3236,14 @@ expand_call (tree exp, rtx target, int ignore)
41 {
42 rtx_insn *before_arg = get_last_insn ();
43
44+ /* On targets with weird calling conventions (e.g. PA) it's
45+ hard to ensure that all cases of argument overlap between
46+ stack and registers work. Play it safe and bail out. */
47+#if defined(ARGS_GROW_DOWNWARD) && !defined(STACK_GROWS_DOWNWARD)
48+ sibcall_failure = 1;
49+ break;
50+#endif
51+
52 if (store_one_arg (&args[i], argblock, flags,
53 adjusted_args_size.var != 0,
54 reg_parm_stack_space)
55@@ -4279,7 +4287,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
56 partial, reg, 0, argblock,
57 GEN_INT (argvec[argnum].locate.offset.constant),
58 reg_parm_stack_space,
59- ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad));
60+ ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad), false);
61
62 /* Now mark the segment we just used. */
63 if (ACCUMULATE_OUTGOING_ARGS)
64@@ -4886,10 +4894,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
65
66 /* This isn't already where we want it on the stack, so put it there.
67 This can either be done with push or copy insns. */
68- emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
69+ if (!emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
70 parm_align, partial, reg, used - size, argblock,
71 ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
72- ARGS_SIZE_RTX (arg->locate.alignment_pad));
73+ ARGS_SIZE_RTX (arg->locate.alignment_pad), true))
74+ sibcall_failure = 1;
75
76 /* Unless this is a partially-in-register argument, the argument is now
77 in the stack. */
78@@ -5001,7 +5010,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
79 emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx,
80 parm_align, partial, reg, excess, argblock,
81 ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
82- ARGS_SIZE_RTX (arg->locate.alignment_pad));
83+ ARGS_SIZE_RTX (arg->locate.alignment_pad), false);
84
85 /* Unless this is a partially-in-register argument, the argument is now
86 in the stack.
87diff --git a/gcc/expr.c b/gcc/expr.c
88index 5c09550..24a6293 100644
89--- a/gcc/expr.c
90+++ b/gcc/expr.c
91@@ -4121,12 +4121,35 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
92 }
93 #endif
94
95+/* If reading SIZE bytes from X will end up reading from
96+ Y return the number of bytes that overlap. Return -1
97+ if there is no overlap or -2 if we can't determine
98+ (for example when X and Y have different base registers). */
99+
100+static int
101+memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size)
102+{
103+ rtx tmp = plus_constant (Pmode, x, size);
104+ rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y);
105+
106+ if (!CONST_INT_P (sub))
107+ return -2;
108+
109+ HOST_WIDE_INT val = INTVAL (sub);
110+
111+ return IN_RANGE (val, 1, size) ? val : -1;
112+}
113+
114 /* Generate code to push X onto the stack, assuming it has mode MODE and
115 type TYPE.
116 MODE is redundant except when X is a CONST_INT (since they don't
117 carry mode info).
118 SIZE is an rtx for the size of data to be copied (in bytes),
119 needed only if X is BLKmode.
120+ Return true if successful. May return false if asked to push a
121+ partial argument during a sibcall optimization (as specified by
122+ SIBCALL_P) and the incoming and outgoing pointers cannot be shown
123+ to not overlap.
124
125 ALIGN (in bits) is maximum alignment we can assume.
126
127@@ -4152,11 +4175,11 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
128 for arguments passed in registers. If nonzero, it will be the number
129 of bytes required. */
130
131-void
132+bool
133 emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
134 unsigned int align, int partial, rtx reg, int extra,
135 rtx args_addr, rtx args_so_far, int reg_parm_stack_space,
136- rtx alignment_pad)
137+ rtx alignment_pad, bool sibcall_p)
138 {
139 rtx xinner;
140 enum direction stack_direction
141@@ -4179,6 +4202,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
142
143 xinner = x;
144
145+ int nregs = partial / UNITS_PER_WORD;
146+ rtx *tmp_regs = NULL;
147+ int overlapping = 0;
148+
149 if (mode == BLKmode
150 || (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode)))
151 {
152@@ -4309,6 +4336,43 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
153 PARM_BOUNDARY. Assume the caller isn't lying. */
154 set_mem_align (target, align);
155
156+ /* If part should go in registers and pushing to that part would
157+ overwrite some of the values that need to go into regs, load the
158+ overlapping values into temporary pseudos to be moved into the hard
159+ regs at the end after the stack pushing has completed.
160+ We cannot load them directly into the hard regs here because
161+ they can be clobbered by the block move expansions.
162+ See PR 65358. */
163+
164+ if (partial > 0 && reg != 0 && mode == BLKmode
165+ && GET_CODE (reg) != PARALLEL)
166+ {
167+ overlapping = memory_load_overlap (XEXP (x, 0), temp, partial);
168+ if (overlapping > 0)
169+ {
170+ gcc_assert (overlapping % UNITS_PER_WORD == 0);
171+ overlapping /= UNITS_PER_WORD;
172+
173+ tmp_regs = XALLOCAVEC (rtx, overlapping);
174+
175+ for (int i = 0; i < overlapping; i++)
176+ tmp_regs[i] = gen_reg_rtx (word_mode);
177+
178+ for (int i = 0; i < overlapping; i++)
179+ emit_move_insn (tmp_regs[i],
180+ operand_subword_force (target, i, mode));
181+ }
182+ else if (overlapping == -1)
183+ overlapping = 0;
184+ /* Could not determine whether there is overlap.
185+ Fail the sibcall. */
186+ else
187+ {
188+ overlapping = 0;
189+ if (sibcall_p)
190+ return false;
191+ }
192+ }
193 emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM);
194 }
195 }
196@@ -4363,12 +4427,13 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
197 has a size a multiple of a word. */
198 for (i = size - 1; i >= not_stack; i--)
199 if (i >= not_stack + offset)
200- emit_push_insn (operand_subword_force (x, i, mode),
201+ if (!emit_push_insn (operand_subword_force (x, i, mode),
202 word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
203 0, args_addr,
204 GEN_INT (args_offset + ((i - not_stack + skip)
205 * UNITS_PER_WORD)),
206- reg_parm_stack_space, alignment_pad);
207+ reg_parm_stack_space, alignment_pad, sibcall_p))
208+ return false;
209 }
210 else
211 {
212@@ -4411,9 +4476,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
213 }
214 }
215
216- /* If part should go in registers, copy that part
217- into the appropriate registers. Do this now, at the end,
218- since mem-to-mem copies above may do function calls. */
219+ /* Move the partial arguments into the registers and any overlapping
220+ values that we moved into the pseudos in tmp_regs. */
221 if (partial > 0 && reg != 0)
222 {
223 /* Handle calls that pass values in multiple non-contiguous locations.
224@@ -4421,9 +4485,15 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
225 if (GET_CODE (reg) == PARALLEL)
226 emit_group_load (reg, x, type, -1);
227 else
228- {
229+ {
230 gcc_assert (partial % UNITS_PER_WORD == 0);
231- move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode);
232+ move_block_to_reg (REGNO (reg), x, nregs - overlapping, mode);
233+
234+ for (int i = 0; i < overlapping; i++)
235+ emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg)
236+ + nregs - overlapping + i),
237+ tmp_regs[i]);
238+
239 }
240 }
241
242@@ -4432,6 +4502,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
243
244 if (alignment_pad && args_addr == 0)
245 anti_adjust_stack (alignment_pad);
246+
247+ return true;
248 }
249
250 /* Return X if X can be used as a subtarget in a sequence of arithmetic
251diff --git a/gcc/expr.h b/gcc/expr.h
252index 867852e..5fcc13f 100644
253--- a/gcc/expr.h
254+++ b/gcc/expr.h
255@@ -218,8 +218,8 @@ extern rtx emit_move_resolve_push (machine_mode, rtx);
256 extern rtx push_block (rtx, int, int);
257
258 /* Generate code to push something onto the stack, given its mode and type. */
259-extern void emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
260- int, rtx, int, rtx, rtx, int, rtx);
261+extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
262+ int, rtx, int, rtx, rtx, int, rtx, bool);
263
264 /* Expand an assignment that stores the value of FROM into TO. */
265 extern void expand_assignment (tree, tree, bool);
266diff --git a/gcc/testsuite/gcc.dg/pr65358.c b/gcc/testsuite/gcc.dg/pr65358.c
267new file mode 100644
268index 0000000..ba89fd4
269--- /dev/null
270+++ b/gcc/testsuite/gcc.dg/pr65358.c
271@@ -0,0 +1,33 @@
272+/* { dg-do run } */
273+/* { dg-options "-O2" } */
274+
275+struct pack
276+{
277+ int fine;
278+ int victim;
279+ int killer;
280+};
281+
282+int __attribute__ ((__noinline__, __noclone__))
283+bar (int a, int b, struct pack p)
284+{
285+ if (a != 20 || b != 30)
286+ __builtin_abort ();
287+ if (p.fine != 40 || p.victim != 50 || p.killer != 60)
288+ __builtin_abort ();
289+ return 0;
290+}
291+
292+int __attribute__ ((__noinline__, __noclone__))
293+foo (int arg1, int arg2, int arg3, struct pack p)
294+{
295+ return bar (arg2, arg3, p);
296+}
297+
298+int main (void)
299+{
300+ struct pack p = { 40, 50, 60 };
301+
302+ (void) foo (10, 20, 30, p);
303+ return 0;
304+}
305--
3062.7.0
307