Patrick Williams | 776d5d2 | 2021-03-11 14:29:47 -0600 | [diff] [blame^] | 1 | Upstream-Status: Backport [https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=97b668f9a8c6ec565c278a60e7d1492a6932e409] |
| 2 | Signed-off-by: Jon Mason <jon.mason@arm.com> |
| 3 | |
| 4 | From 97b668f9a8c6ec565c278a60e7d1492a6932e409 Mon Sep 17 00:00:00 2001 |
| 5 | From: Matthias Klose <doko@ubuntu.com> |
| 6 | Date: Tue, 6 Oct 2020 13:41:37 +0200 |
| 7 | Subject: [PATCH] Backport fix for PR/tree-optimization/97236 - fix bad use of |
| 8 | VMAT_CONTIGUOUS |
| 9 | |
| 10 | This avoids using VMAT_CONTIGUOUS with single-element interleaving |
| 11 | when using V1mode vectors. Instead keep VMAT_ELEMENTWISE but |
| 12 | continue to avoid load-lanes and gathers. |
| 13 | |
| 14 | 2020-10-01 Richard Biener <rguenther@suse.de> |
| 15 | |
| 16 | PR tree-optimization/97236 |
| 17 | * tree-vect-stmts.c (get_group_load_store_type): Keep |
| 18 | VMAT_ELEMENTWISE for single-element vectors. |
| 19 | |
| 20 | * gcc.dg/vect/pr97236.c: New testcase. |
| 21 | |
| 22 | (cherry picked from commit 1ab88985631dd2c5a5e3b5c0dce47cf8b6ed2f82) |
| 23 | --- |
| 24 | gcc/testsuite/gcc.dg/vect/pr97236.c | 43 +++++++++++++++++++++++++++++ |
| 25 | gcc/tree-vect-stmts.c | 20 ++++++-------- |
| 26 | 2 files changed, 52 insertions(+), 11 deletions(-) |
| 27 | create mode 100644 gcc/testsuite/gcc.dg/vect/pr97236.c |
| 28 | |
| 29 | diff --git a/gcc/testsuite/gcc.dg/vect/pr97236.c b/gcc/testsuite/gcc.dg/vect/pr97236.c |
| 30 | new file mode 100644 |
| 31 | index 000000000000..9d3dc20d953d |
| 32 | --- /dev/null |
| 33 | +++ b/gcc/testsuite/gcc.dg/vect/pr97236.c |
| 34 | @@ -0,0 +1,43 @@ |
| 35 | +typedef unsigned char __uint8_t; |
| 36 | +typedef __uint8_t uint8_t; |
| 37 | +typedef struct plane_t { |
| 38 | + uint8_t *p_pixels; |
| 39 | + int i_lines; |
| 40 | + int i_pitch; |
| 41 | +} plane_t; |
| 42 | + |
| 43 | +typedef struct { |
| 44 | + plane_t p[5]; |
| 45 | +} picture_t; |
| 46 | + |
| 47 | +#define N 4 |
| 48 | + |
| 49 | +void __attribute__((noipa)) |
| 50 | +picture_Clone(picture_t *picture, picture_t *res) |
| 51 | +{ |
| 52 | + for (int i = 0; i < N; i++) { |
| 53 | + res->p[i].p_pixels = picture->p[i].p_pixels; |
| 54 | + res->p[i].i_lines = picture->p[i].i_lines; |
| 55 | + res->p[i].i_pitch = picture->p[i].i_pitch; |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +int |
| 60 | +main() |
| 61 | +{ |
| 62 | + picture_t aaa, bbb; |
| 63 | + uint8_t pixels[10] = {1, 1, 1, 1, 1, 1, 1, 1}; |
| 64 | + |
| 65 | + for (unsigned i = 0; i < N; i++) |
| 66 | + aaa.p[i].p_pixels = pixels; |
| 67 | + |
| 68 | + picture_Clone (&aaa, &bbb); |
| 69 | + |
| 70 | + uint8_t c = 0; |
| 71 | + for (unsigned i = 0; i < N; i++) |
| 72 | + c += bbb.p[i].p_pixels[0]; |
| 73 | + |
| 74 | + if (c != N) |
| 75 | + __builtin_abort (); |
| 76 | + return 0; |
| 77 | +} |
| 78 | diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c |
| 79 | index 507f81b0a0e8..ffbba3441de2 100644 |
| 80 | --- a/gcc/tree-vect-stmts.c |
| 81 | +++ b/gcc/tree-vect-stmts.c |
| 82 | @@ -2355,25 +2355,23 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, |
| 83 | /* First cope with the degenerate case of a single-element |
| 84 | vector. */ |
| 85 | if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) |
| 86 | - *memory_access_type = VMAT_CONTIGUOUS; |
| 87 | + ; |
| 88 | |
| 89 | /* Otherwise try using LOAD/STORE_LANES. */ |
| 90 | - if (*memory_access_type == VMAT_ELEMENTWISE |
| 91 | - && (vls_type == VLS_LOAD |
| 92 | - ? vect_load_lanes_supported (vectype, group_size, masked_p) |
| 93 | - : vect_store_lanes_supported (vectype, group_size, |
| 94 | - masked_p))) |
| 95 | + else if (vls_type == VLS_LOAD |
| 96 | + ? vect_load_lanes_supported (vectype, group_size, masked_p) |
| 97 | + : vect_store_lanes_supported (vectype, group_size, |
| 98 | + masked_p)) |
| 99 | { |
| 100 | *memory_access_type = VMAT_LOAD_STORE_LANES; |
| 101 | overrun_p = would_overrun_p; |
| 102 | } |
| 103 | |
| 104 | /* If that fails, try using permuting loads. */ |
| 105 | - if (*memory_access_type == VMAT_ELEMENTWISE |
| 106 | - && (vls_type == VLS_LOAD |
| 107 | - ? vect_grouped_load_supported (vectype, single_element_p, |
| 108 | - group_size) |
| 109 | - : vect_grouped_store_supported (vectype, group_size))) |
| 110 | + else if (vls_type == VLS_LOAD |
| 111 | + ? vect_grouped_load_supported (vectype, single_element_p, |
| 112 | + group_size) |
| 113 | + : vect_grouped_store_supported (vectype, group_size)) |
| 114 | { |
| 115 | *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; |
| 116 | overrun_p = would_overrun_p; |
| 117 | -- |
| 118 | 2.20.1 |
| 119 | |