blob: f15207f581afa3b5935fe00a29f258cad58b6167 [file] [log] [blame]
Brad Bishop15ae2502019-06-18 21:44:24 -04001From f98495d90ba66f67fe922a4b9229ea787041c418 Mon Sep 17 00:00:00 2001
2From: thopre01 <thopre01@138bc75d-0d04-0410-961f-82ee72b054a4>
3Date: Thu, 22 Nov 2018 14:46:17 +0000
4Subject: [PATCH] PR85434: Prevent spilling of stack protector guard's address
5 on ARM
6
7In case of high register pressure in PIC mode, address of the stack
8protector's guard can be spilled on ARM targets as shown in PR85434,
9thus allowing an attacker to control what the canary would be compared
10against. ARM does lack stack_protect_set and stack_protect_test insn
11patterns, defining them does not help as the address is expanded
12regularly and the patterns only deal with the copy and test of the
13guard with the canary.
14
15This problem does not occur for x86 targets because the PIC access and
16the test can be done in the same instruction. Aarch64 is exempt too
17because PIC access insn pattern are mov of UNSPEC which prevents it from
18the second access in the epilogue being CSEd in cse_local pass with the
19first access in the prologue.
20
21The approach followed here is to create new "combined" set and test
22standard pattern names that take the unexpanded guard and do the set or
23test. This allows the target to use an opaque pattern (eg. using UNSPEC)
24to hide the individual instructions being generated to the compiler and
25split the pattern into generic load, compare and branch instruction
26after register allocator, therefore avoiding any spilling. This is here
27implemented for the ARM targets. For targets not implementing these new
28standard pattern names, the existing stack_protect_set and
29stack_protect_test pattern names are used.
30
31To be able to split PIC access after register allocation, the functions
32had to be augmented to force a new PIC register load and to control
33which register it loads into. This is because sharing the PIC register
34between prologue and epilogue could lead to spilling due to CSE again
35which an attacker could use to control what the canary gets compared
36against.
37
382018-11-22 Thomas Preud'homme <thomas.preudhomme@linaro.org>
39
40 gcc/
41 PR target/85434
42 * target-insns.def (stack_protect_combined_set): Define new standard
43 pattern name.
44 (stack_protect_combined_test): Likewise.
45 * cfgexpand.c (stack_protect_prologue): Try new
46 stack_protect_combined_set pattern first.
47 * function.c (stack_protect_epilogue): Try new
48 stack_protect_combined_test pattern first.
49 * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now
50 parameters to control which register to use as PIC register and force
51 reloading PIC register respectively. Insert in the stream of insns if
52 possible.
53 (legitimize_pic_address): Expose above new parameters in prototype and
54 adapt recursive calls accordingly. Use pic_reg if non null instead of
55 cached one.
56 (arm_load_pic_register): Add pic_reg parameter and use it if non null.
57 (arm_legitimize_address): Adapt to new legitimize_pic_address
58 prototype.
59 (thumb_legitimize_address): Likewise.
60 (arm_emit_call_insn): Adapt to require_pic_register prototype change.
61 (arm_expand_prologue): Adapt to arm_load_pic_register prototype change.
62 (thumb1_expand_prologue): Likewise.
63 * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype
64 change.
65 (arm_load_pic_register): Likewise.
66 * config/arm/predicated.md (guard_addr_operand): New predicate.
67 (guard_operand): New predicate.
68 * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address
69 prototype change.
70 (builtin_setjmp_receiver expander): Adapt to thumb1_expand_prologue
71 prototype change.
72 (stack_protect_combined_set): New expander..
73 (stack_protect_combined_set_insn): New insn_and_split pattern.
74 (stack_protect_set_insn): New insn pattern.
75 (stack_protect_combined_test): New expander.
76 (stack_protect_combined_test_insn): New insn_and_split pattern.
77 (arm_stack_protect_test_insn): New insn pattern.
78 * config/arm/thumb1.md (thumb1_stack_protect_test_insn): New insn pattern.
79 * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec.
80 (UNSPEC_SP_TEST): Likewise.
81 * doc/md.texi (stack_protect_combined_set): Document new standard
82 pattern name.
83 (stack_protect_set): Clarify that the operand for guard's address is
84 legal.
85 (stack_protect_combined_test): Document new standard pattern name.
86 (stack_protect_test): Clarify that the operand for guard's address is
87 legal.
88
89
90git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@266379 138bc75d-0d04-0410-961f-82ee72b054a4
91
92Upstream-Status: Backport
93CVE: CVE-2018-12886
94Signed-off-by: Zhixiong Chi <zhixiong.chi@windriver.com>
95---
96 gcc/ChangeLog | 49 ++++++
97 gcc/cfgexpand.c | 17 +++
98 gcc/config/arm/arm-protos.h | 4 +-
99 gcc/config/arm/arm.c | 87 ++++++++---
100 gcc/config/arm/arm.md | 163 +++++++++++++++++++-
101 gcc/config/arm/predicates.md | 17 +++
102 gcc/config/arm/thumb1.md | 13 ++
103 gcc/config/arm/unspecs.md | 3 +
104 gcc/doc/md.texi | 55 ++++++-
105 gcc/function.c | 32 +++-
106 gcc/target-insns.def | 2 +
107 11 files changed, 399 insertions(+), 43 deletions(-)
108 create mode 100644 gcc/testsuite/gcc.target/arm/pr85434.c
109
110diff --git a/gcc/ChangeLog b/gcc/ChangeLog
111index e2ebfd34214..fa41e7112e0 100644
112--- a/gcc/ChangeLog
113+++ b/gcc/ChangeLog
114@@ -1537,6 +1537,55 @@
115 * config/arm/neon.md (movv4hf, movv8hf): Refactored to..
116 (mov<mov>): ..this and enable unconditionally.
117
118+2018-11-22 Thomas Preud'homme <thomas.preudhomme@linaro.org>
119+
120+ * target-insns.def (stack_protect_combined_set): Define new standard
121+ pattern name.
122+ (stack_protect_combined_test): Likewise.
123+ * cfgexpand.c (stack_protect_prologue): Try new
124+ stack_protect_combined_set pattern first.
125+ * function.c (stack_protect_epilogue): Try new
126+ stack_protect_combined_test pattern first.
127+ * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now
128+ parameters to control which register to use as PIC register and force
129+ reloading PIC register respectively. Insert in the stream of insns if
130+ possible.
131+ (legitimize_pic_address): Expose above new parameters in prototype and
132+ adapt recursive calls accordingly. Use pic_reg if non null instead of
133+ cached one.
134+ (arm_load_pic_register): Add pic_reg parameter and use it if non null.
135+ (arm_legitimize_address): Adapt to new legitimize_pic_address
136+ prototype.
137+ (thumb_legitimize_address): Likewise.
138+ (arm_emit_call_insn): Adapt to require_pic_register prototype change.
139+ (arm_expand_prologue): Adapt to arm_load_pic_register prototype change.
140+ (thumb1_expand_prologue): Likewise.
141+ * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype
142+ change.
143+ (arm_load_pic_register): Likewise.
144+ * config/arm/predicated.md (guard_addr_operand): New predicate.
145+ (guard_operand): New predicate.
146+ * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address
147+ prototype change.
148+ (builtin_setjmp_receiver expander): Adapt to thumb1_expand_prologue
149+ prototype change.
150+ (stack_protect_combined_set): New expander..
151+ (stack_protect_combined_set_insn): New insn_and_split pattern.
152+ (stack_protect_set_insn): New insn pattern.
153+ (stack_protect_combined_test): New expander.
154+ (stack_protect_combined_test_insn): New insn_and_split pattern.
155+ (arm_stack_protect_test_insn): New insn pattern.
156+ * config/arm/thumb1.md (thumb1_stack_protect_test_insn): New insn pattern.
157+ * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec.
158+ (UNSPEC_SP_TEST): Likewise.
159+ * doc/md.texi (stack_protect_combined_set): Document new standard
160+ pattern name.
161+ (stack_protect_set): Clarify that the operand for guard's address is
162+ legal.
163+ (stack_protect_combined_test): Document new standard pattern name.
164+ (stack_protect_test): Clarify that the operand for guard's address is
165+ legal.
166+
167 2018-11-22 Uros Bizjak <ubizjak@gmail.com>
168
169 Backport from mainline
170diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
171index 8fa392fcd8a..21bdcdaeaa3 100644
172--- a/gcc/cfgexpand.c
173+++ b/gcc/cfgexpand.c
174@@ -6185,6 +6185,23 @@ stack_protect_prologue (void)
175 rtx x, y;
176
177 x = expand_normal (crtl->stack_protect_guard);
178+
179+ if (targetm.have_stack_protect_combined_set () && guard_decl)
180+ {
181+ gcc_assert (DECL_P (guard_decl));
182+ y = DECL_RTL (guard_decl);
183+
184+ /* Allow the target to compute address of Y and copy it to X without
185+ leaking Y into a register. This combined address + copy pattern
186+ allows the target to prevent spilling of any intermediate results by
187+ splitting it after register allocator. */
188+ if (rtx_insn *insn = targetm.gen_stack_protect_combined_set (x, y))
189+ {
190+ emit_insn (insn);
191+ return;
192+ }
193+ }
194+
195 if (guard_decl)
196 y = expand_normal (guard_decl);
197 else
198diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
199index 8d6d2395b84..00f5f16ed02 100644
200--- a/gcc/config/arm/arm-protos.h
201+++ b/gcc/config/arm/arm-protos.h
202@@ -28,7 +28,7 @@ extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
203 extern int use_return_insn (int, rtx);
204 extern bool use_simple_return_p (void);
205 extern enum reg_class arm_regno_class (int);
206-extern void arm_load_pic_register (unsigned long);
207+extern void arm_load_pic_register (unsigned long, rtx);
208 extern int arm_volatile_func (void);
209 extern void arm_expand_prologue (void);
210 extern void arm_expand_epilogue (bool);
211@@ -69,7 +69,7 @@ extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
212 extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
213 HOST_WIDE_INT, rtx, rtx, int);
214 extern int legitimate_pic_operand_p (rtx);
215-extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
216+extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool);
217 extern rtx legitimize_tls_address (rtx, rtx);
218 extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
219 extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
220diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
221index 8393f0b87f3..12417de5102 100644
222--- a/gcc/config/arm/arm.c
223+++ b/gcc/config/arm/arm.c
224@@ -7379,21 +7379,34 @@ legitimate_pic_operand_p (rtx x)
225 return 1;
226 }
227
228-/* Record that the current function needs a PIC register. Initialize
229- cfun->machine->pic_reg if we have not already done so. */
230+/* Record that the current function needs a PIC register. If PIC_REG is null,
231+ a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
232+ both case cfun->machine->pic_reg is initialized if we have not already done
233+ so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
234+ PIC register is reloaded in the current position of the instruction stream
235+ irregardless of whether it was loaded before. Otherwise, it is only loaded
236+ if not already done so (crtl->uses_pic_offset_table is null). Note that
237+ nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
238+ is only supported iff COMPUTE_NOW is false. */
239
240 static void
241-require_pic_register (void)
242+require_pic_register (rtx pic_reg, bool compute_now)
243 {
244+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
245+
246 /* A lot of the logic here is made obscure by the fact that this
247 routine gets called as part of the rtx cost estimation process.
248 We don't want those calls to affect any assumptions about the real
249 function; and further, we can't call entry_of_function() until we
250 start the real expansion process. */
251- if (!crtl->uses_pic_offset_table)
252+ if (!crtl->uses_pic_offset_table || compute_now)
253 {
254- gcc_assert (can_create_pseudo_p ());
255+ gcc_assert (can_create_pseudo_p ()
256+ || (pic_reg != NULL_RTX
257+ && REG_P (pic_reg)
258+ && GET_MODE (pic_reg) == Pmode));
259 if (arm_pic_register != INVALID_REGNUM
260+ && !compute_now
261 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
262 {
263 if (!cfun->machine->pic_reg)
264@@ -7409,8 +7422,10 @@ require_pic_register (void)
265 {
266 rtx_insn *seq, *insn;
267
268+ if (pic_reg == NULL_RTX)
269+ pic_reg = gen_reg_rtx (Pmode);
270 if (!cfun->machine->pic_reg)
271- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
272+ cfun->machine->pic_reg = pic_reg;
273
274 /* Play games to avoid marking the function as needing pic
275 if we are being called as part of the cost-estimation
276@@ -7421,11 +7436,12 @@ require_pic_register (void)
277 start_sequence ();
278
279 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
280- && arm_pic_register > LAST_LO_REGNUM)
281+ && arm_pic_register > LAST_LO_REGNUM
282+ && !compute_now)
283 emit_move_insn (cfun->machine->pic_reg,
284 gen_rtx_REG (Pmode, arm_pic_register));
285 else
286- arm_load_pic_register (0UL);
287+ arm_load_pic_register (0UL, pic_reg);
288
289 seq = get_insns ();
290 end_sequence ();
291@@ -7438,16 +7454,33 @@ require_pic_register (void)
292 we can't yet emit instructions directly in the final
293 insn stream. Queue the insns on the entry edge, they will
294 be committed after everything else is expanded. */
295- insert_insn_on_edge (seq,
296- single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
297+ if (currently_expanding_to_rtl)
298+ insert_insn_on_edge (seq,
299+ single_succ_edge
300+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
301+ else
302+ emit_insn (seq);
303 }
304 }
305 }
306 }
307
308+/* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
309+ created to hold the result of the load. If not NULL, PIC_REG indicates
310+ which register to use as PIC register, otherwise it is decided by register
311+ allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
312+ location in the instruction stream, irregardless of whether it was loaded
313+ previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
314+ true and null PIC_REG is only supported iff COMPUTE_NOW is false.
315+
316+ Returns the register REG into which the PIC load is performed. */
317+
318 rtx
319-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
320+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
321+ bool compute_now)
322 {
323+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
324+
325 if (GET_CODE (orig) == SYMBOL_REF
326 || GET_CODE (orig) == LABEL_REF)
327 {
328@@ -7480,9 +7513,12 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
329 rtx mem;
330
331 /* If this function doesn't have a pic register, create one now. */
332- require_pic_register ();
333+ require_pic_register (pic_reg, compute_now);
334+
335+ if (pic_reg == NULL_RTX)
336+ pic_reg = cfun->machine->pic_reg;
337
338- pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
339+ pat = gen_calculate_pic_address (reg, pic_reg, orig);
340
341 /* Make the MEM as close to a constant as possible. */
342 mem = SET_SRC (pat);
343@@ -7531,9 +7567,11 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
344
345 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
346
347- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
348+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
349+ pic_reg, compute_now);
350 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
351- base == reg ? 0 : reg);
352+ base == reg ? 0 : reg, pic_reg,
353+ compute_now);
354
355 if (CONST_INT_P (offset))
356 {
357@@ -7633,16 +7671,17 @@ static GTY(()) int pic_labelno;
358 low register. */
359
360 void
361-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
362+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
363 {
364- rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
365+ rtx l1, labelno, pic_tmp, pic_rtx;
366
367 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
368 return;
369
370 gcc_assert (flag_pic);
371
372- pic_reg = cfun->machine->pic_reg;
373+ if (pic_reg == NULL_RTX)
374+ pic_reg = cfun->machine->pic_reg;
375 if (TARGET_VXWORKS_RTP)
376 {
377 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
378@@ -8718,7 +8757,8 @@ arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
379 {
380 /* We need to find and carefully transform any SYMBOL and LABEL
381 references; so go back to the original address expression. */
382- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
383+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
384+ false /*compute_now*/);
385
386 if (new_x != orig_x)
387 x = new_x;
388@@ -8786,7 +8826,8 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
389 {
390 /* We need to find and carefully transform any SYMBOL and LABEL
391 references; so go back to the original address expression. */
392- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
393+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
394+ false /*compute_now*/);
395
396 if (new_x != orig_x)
397 x = new_x;
398@@ -18074,7 +18115,7 @@ arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
399 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
400 : !SYMBOL_REF_LOCAL_P (addr)))
401 {
402- require_pic_register ();
403+ require_pic_register (NULL_RTX, false /*compute_now*/);
404 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
405 }
406
407@@ -22006,7 +22047,7 @@ arm_expand_prologue (void)
408 mask &= THUMB2_WORK_REGS;
409 if (!IS_NESTED (func_type))
410 mask |= (1 << IP_REGNUM);
411- arm_load_pic_register (mask);
412+ arm_load_pic_register (mask, NULL_RTX);
413 }
414
415 /* If we are profiling, make sure no instructions are scheduled before
416@@ -25237,7 +25278,7 @@ thumb1_expand_prologue (void)
417 /* Load the pic register before setting the frame pointer,
418 so we can use r7 as a temporary work register. */
419 if (flag_pic && arm_pic_register != INVALID_REGNUM)
420- arm_load_pic_register (live_regs_mask);
421+ arm_load_pic_register (live_regs_mask, NULL_RTX);
422
423 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
424 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
425diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
426index c8dc9474b1b..f6196e93168 100644
427--- a/gcc/config/arm/arm.md
428+++ b/gcc/config/arm/arm.md
429@@ -6021,7 +6021,8 @@
430 operands[1] = legitimize_pic_address (operands[1], SImode,
431 (!can_create_pseudo_p ()
432 ? operands[0]
433- : 0));
434+ : NULL_RTX), NULL_RTX,
435+ false /*compute_now*/);
436 }
437 "
438 )
439@@ -6309,7 +6310,7 @@
440 /* r3 is clobbered by set/longjmp, so we can use it as a scratch
441 register. */
442 if (arm_pic_register != INVALID_REGNUM)
443- arm_load_pic_register (1UL << 3);
444+ arm_load_pic_register (1UL << 3, NULL_RTX);
445 DONE;
446 }")
447
448@@ -8634,6 +8635,164 @@
449 (set_attr "conds" "clob")]
450 )
451
452+;; Named patterns for stack smashing protection.
453+(define_expand "stack_protect_combined_set"
454+ [(parallel
455+ [(set (match_operand:SI 0 "memory_operand" "")
456+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
457+ UNSPEC_SP_SET))
458+ (clobber (match_scratch:SI 2 ""))
459+ (clobber (match_scratch:SI 3 ""))])]
460+ ""
461+ ""
462+)
463+
464+;; Use a separate insn from the above expand to be able to have the mem outside
465+;; the operand #1 when register allocation comes. This is needed to avoid LRA
466+;; try to reload the guard since we need to control how PIC access is done in
467+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
468+;; legitimize_pic_address ()).
469+(define_insn_and_split "*stack_protect_combined_set_insn"
470+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
471+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
472+ UNSPEC_SP_SET))
473+ (clobber (match_scratch:SI 2 "=&l,&r"))
474+ (clobber (match_scratch:SI 3 "=&l,&r"))]
475+ ""
476+ "#"
477+ "reload_completed"
478+ [(parallel [(set (match_dup 0) (unspec:SI [(mem:SI (match_dup 2))]
479+ UNSPEC_SP_SET))
480+ (clobber (match_dup 2))])]
481+ "
482+{
483+ if (flag_pic)
484+ {
485+ /* Forces recomputing of GOT base now. */
486+ legitimize_pic_address (operands[1], SImode, operands[2], operands[3],
487+ true /*compute_now*/);
488+ }
489+ else
490+ {
491+ if (address_operand (operands[1], SImode))
492+ operands[2] = operands[1];
493+ else
494+ {
495+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
496+ emit_move_insn (operands[2], mem);
497+ }
498+ }
499+}"
500+ [(set_attr "arch" "t1,32")]
501+)
502+
503+(define_insn "*stack_protect_set_insn"
504+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
505+ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))]
506+ UNSPEC_SP_SET))
507+ (clobber (match_dup 1))]
508+ ""
509+ "@
510+ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0
511+ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0"
512+ [(set_attr "length" "8,12")
513+ (set_attr "conds" "clob,nocond")
514+ (set_attr "type" "multiple")
515+ (set_attr "arch" "t1,32")]
516+)
517+
518+(define_expand "stack_protect_combined_test"
519+ [(parallel
520+ [(set (pc)
521+ (if_then_else
522+ (eq (match_operand:SI 0 "memory_operand" "")
523+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
524+ UNSPEC_SP_TEST))
525+ (label_ref (match_operand 2))
526+ (pc)))
527+ (clobber (match_scratch:SI 3 ""))
528+ (clobber (match_scratch:SI 4 ""))
529+ (clobber (reg:CC CC_REGNUM))])]
530+ ""
531+ ""
532+)
533+
534+;; Use a separate insn from the above expand to be able to have the mem outside
535+;; the operand #1 when register allocation comes. This is needed to avoid LRA
536+;; try to reload the guard since we need to control how PIC access is done in
537+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
538+;; legitimize_pic_address ()).
539+(define_insn_and_split "*stack_protect_combined_test_insn"
540+ [(set (pc)
541+ (if_then_else
542+ (eq (match_operand:SI 0 "memory_operand" "m,m")
543+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
544+ UNSPEC_SP_TEST))
545+ (label_ref (match_operand 2))
546+ (pc)))
547+ (clobber (match_scratch:SI 3 "=&l,&r"))
548+ (clobber (match_scratch:SI 4 "=&l,&r"))
549+ (clobber (reg:CC CC_REGNUM))]
550+ ""
551+ "#"
552+ "reload_completed"
553+ [(const_int 0)]
554+{
555+ rtx eq;
556+
557+ if (flag_pic)
558+ {
559+ /* Forces recomputing of GOT base now. */
560+ legitimize_pic_address (operands[1], SImode, operands[3], operands[4],
561+ true /*compute_now*/);
562+ }
563+ else
564+ {
565+ if (address_operand (operands[1], SImode))
566+ operands[3] = operands[1];
567+ else
568+ {
569+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
570+ emit_move_insn (operands[3], mem);
571+ }
572+ }
573+ if (TARGET_32BIT)
574+ {
575+ emit_insn (gen_arm_stack_protect_test_insn (operands[4], operands[0],
576+ operands[3]));
577+ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
578+ eq = gen_rtx_EQ (CC_Zmode, cc_reg, const0_rtx);
579+ emit_jump_insn (gen_arm_cond_branch (operands[2], eq, cc_reg));
580+ }
581+ else
582+ {
583+ emit_insn (gen_thumb1_stack_protect_test_insn (operands[4], operands[0],
584+ operands[3]));
585+ eq = gen_rtx_EQ (VOIDmode, operands[4], const0_rtx);
586+ emit_jump_insn (gen_cbranchsi4 (eq, operands[4], const0_rtx,
587+ operands[2]));
588+ }
589+ DONE;
590+}
591+ [(set_attr "arch" "t1,32")]
592+)
593+
594+(define_insn "arm_stack_protect_test_insn"
595+ [(set (reg:CC_Z CC_REGNUM)
596+ (compare:CC_Z (unspec:SI [(match_operand:SI 1 "memory_operand" "m,m")
597+ (mem:SI (match_operand:SI 2 "register_operand" "+l,r"))]
598+ UNSPEC_SP_TEST)
599+ (const_int 0)))
600+ (clobber (match_operand:SI 0 "register_operand" "=&l,&r"))
601+ (clobber (match_dup 2))]
602+ "TARGET_32BIT"
603+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
604+ [(set_attr "length" "8,12")
605+ (set_attr "conds" "set")
606+ (set_attr "type" "multiple")
607+ (set_attr "arch" "t,32")]
608+)
609+
610 (define_expand "casesi"
611 [(match_operand:SI 0 "s_register_operand" "") ; index to jump on
612 (match_operand:SI 1 "const_int_operand" "") ; lower bound
613diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
614index 7e198f9bce4..69718ee9c7a 100644
615--- a/gcc/config/arm/predicates.md
616+++ b/gcc/config/arm/predicates.md
617@@ -31,6 +31,23 @@
618 || REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
619 })
620
621+; Predicate for stack protector guard's address in
622+; stack_protect_combined_set_insn and stack_protect_combined_test_insn patterns
623+(define_predicate "guard_addr_operand"
624+ (match_test "true")
625+{
626+ return (CONSTANT_ADDRESS_P (op)
627+ || !targetm.cannot_force_const_mem (mode, op));
628+})
629+
630+; Predicate for stack protector guard in stack_protect_combined_set and
631+; stack_protect_combined_test patterns
632+(define_predicate "guard_operand"
633+ (match_code "mem")
634+{
635+ return guard_addr_operand (XEXP (op, 0), mode);
636+})
637+
638 (define_predicate "imm_for_neon_inv_logic_operand"
639 (match_code "const_vector")
640 {
641diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
642index 19dcdbcdd73..cd199c9c529 100644
643--- a/gcc/config/arm/thumb1.md
644+++ b/gcc/config/arm/thumb1.md
645@@ -1962,4 +1962,17 @@
646 }"
647 [(set_attr "type" "mov_reg")]
648 )
649+
650+(define_insn "thumb1_stack_protect_test_insn"
651+ [(set (match_operand:SI 0 "register_operand" "=&l")
652+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
653+ (mem:SI (match_operand:SI 2 "register_operand" "+l"))]
654+ UNSPEC_SP_TEST))
655+ (clobber (match_dup 2))]
656+ "TARGET_THUMB1"
657+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
658+ [(set_attr "length" "8")
659+ (set_attr "conds" "set")
660+ (set_attr "type" "multiple")]
661+)
662
663diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
664index 19416736ef9..8f9dbcb08dc 100644
665--- a/gcc/config/arm/unspecs.md
666+++ b/gcc/config/arm/unspecs.md
667@@ -86,6 +86,9 @@
668 UNSPEC_PROBE_STACK ; Probe stack memory reference
669 UNSPEC_NONSECURE_MEM ; Represent non-secure memory in ARMv8-M with
670 ; security extension
671+ UNSPEC_SP_SET ; Represent the setting of stack protector's canary
672+ UNSPEC_SP_TEST ; Represent the testing of stack protector's canary
673+ ; against the guard.
674 ])
675
676 (define_c_enum "unspec" [
677diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
678index 295fc1f1143..895309b2f3c 100644
679--- a/gcc/doc/md.texi
680+++ b/gcc/doc/md.texi
681@@ -7450,22 +7450,61 @@ builtins.
682 The get/set patterns have a single output/input operand respectively,
683 with @var{mode} intended to be @code{Pmode}.
684
685+@cindex @code{stack_protect_combined_set} instruction pattern
686+@item @samp{stack_protect_combined_set}
687+This pattern, if defined, moves a @code{ptr_mode} value from an address
688+whose declaration RTX is given in operand 1 to the memory in operand 0
689+without leaving the value in a register afterward. If several
690+instructions are needed by the target to perform the operation (eg. to
691+load the address from a GOT entry then load the @code{ptr_mode} value
692+and finally store it), it is the backend's responsibility to ensure no
693+intermediate result gets spilled. This is to avoid leaking the value
694+some place that an attacker might use to rewrite the stack guard slot
695+after having clobbered it.
696+
697+If this pattern is not defined, then the address declaration is
698+expanded first in the standard way and a @code{stack_protect_set}
699+pattern is then generated to move the value from that address to the
700+address in operand 0.
701+
702 @cindex @code{stack_protect_set} instruction pattern
703 @item @samp{stack_protect_set}
704-This pattern, if defined, moves a @code{ptr_mode} value from the memory
705-in operand 1 to the memory in operand 0 without leaving the value in
706-a register afterward. This is to avoid leaking the value some place
707-that an attacker might use to rewrite the stack guard slot after
708-having clobbered it.
709+This pattern, if defined, moves a @code{ptr_mode} value from the valid
710+memory location in operand 1 to the memory in operand 0 without leaving
711+the value in a register afterward. This is to avoid leaking the value
712+some place that an attacker might use to rewrite the stack guard slot
713+after having clobbered it.
714+
715+Note: on targets where the addressing modes do not allow to load
716+directly from stack guard address, the address is expanded in a standard
717+way first which could cause some spills.
718
719 If this pattern is not defined, then a plain move pattern is generated.
720
721+@cindex @code{stack_protect_combined_test} instruction pattern
722+@item @samp{stack_protect_combined_test}
723+This pattern, if defined, compares a @code{ptr_mode} value from an
724+address whose declaration RTX is given in operand 1 with the memory in
725+operand 0 without leaving the value in a register afterward and
726+branches to operand 2 if the values were equal. If several
727+instructions are needed by the target to perform the operation (eg. to
728+load the address from a GOT entry then load the @code{ptr_mode} value
729+and finally store it), it is the backend's responsibility to ensure no
730+intermediate result gets spilled. This is to avoid leaking the value
731+some place that an attacker might use to rewrite the stack guard slot
732+after having clobbered it.
733+
734+If this pattern is not defined, then the address declaration is
735+expanded first in the standard way and a @code{stack_protect_test}
736+pattern is then generated to compare the value from that address to the
737+value at the memory in operand 0.
738+
739 @cindex @code{stack_protect_test} instruction pattern
740 @item @samp{stack_protect_test}
741 This pattern, if defined, compares a @code{ptr_mode} value from the
742-memory in operand 1 with the memory in operand 0 without leaving the
743-value in a register afterward and branches to operand 2 if the values
744-were equal.
745+valid memory location in operand 1 with the memory in operand 0 without
746+leaving the value in a register afterward and branches to operand 2 if
747+the values were equal.
748
749 If this pattern is not defined, then a plain compare pattern and
750 conditional branch pattern is used.
751diff --git a/gcc/function.c b/gcc/function.c
752index 85a5d9f43f7..69523c1d723 100644
753--- a/gcc/function.c
754+++ b/gcc/function.c
755@@ -4937,18 +4937,34 @@ stack_protect_epilogue (void)
756 tree guard_decl = targetm.stack_protect_guard ();
757 rtx_code_label *label = gen_label_rtx ();
758 rtx x, y;
759- rtx_insn *seq;
760+ rtx_insn *seq = NULL;
761
762 x = expand_normal (crtl->stack_protect_guard);
763- if (guard_decl)
764- y = expand_normal (guard_decl);
765+
766+ if (targetm.have_stack_protect_combined_test () && guard_decl)
767+ {
768+ gcc_assert (DECL_P (guard_decl));
769+ y = DECL_RTL (guard_decl);
770+ /* Allow the target to compute address of Y and compare it with X without
771+ leaking Y into a register. This combined address + compare pattern
772+ allows the target to prevent spilling of any intermediate results by
773+ splitting it after register allocator. */
774+ seq = targetm.gen_stack_protect_combined_test (x, y, label);
775+ }
776 else
777- y = const0_rtx;
778+ {
779+ if (guard_decl)
780+ y = expand_normal (guard_decl);
781+ else
782+ y = const0_rtx;
783+
784+ /* Allow the target to compare Y with X without leaking either into
785+ a register. */
786+ if (targetm.have_stack_protect_test ())
787+ seq = targetm.gen_stack_protect_test (x, y, label);
788+ }
789
790- /* Allow the target to compare Y with X without leaking either into
791- a register. */
792- if (targetm.have_stack_protect_test ()
793- && ((seq = targetm.gen_stack_protect_test (x, y, label)) != NULL_RTX))
794+ if (seq)
795 emit_insn (seq);
796 else
797 emit_cmp_and_jump_insns (x, y, EQ, NULL_RTX, ptr_mode, 1, label);
798diff --git a/gcc/target-insns.def b/gcc/target-insns.def
799index 9a552c3d11c..d39889b3522 100644
800--- a/gcc/target-insns.def
801+++ b/gcc/target-insns.def
802@@ -96,7 +96,9 @@ DEF_TARGET_INSN (sibcall_value, (rtx x0, rtx x1, rtx opt2, rtx opt3,
803 DEF_TARGET_INSN (simple_return, (void))
804 DEF_TARGET_INSN (split_stack_prologue, (void))
805 DEF_TARGET_INSN (split_stack_space_check, (rtx x0, rtx x1))
806+DEF_TARGET_INSN (stack_protect_combined_set, (rtx x0, rtx x1))
807 DEF_TARGET_INSN (stack_protect_set, (rtx x0, rtx x1))
808+DEF_TARGET_INSN (stack_protect_combined_test, (rtx x0, rtx x1, rtx x2))
809 DEF_TARGET_INSN (stack_protect_test, (rtx x0, rtx x1, rtx x2))
810 DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
811 DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
812--
8132.21.0