| From b46cdcdeb762c1f0eef68dc4a7d90f8176152e07 Mon Sep 17 00:00:00 2001 |
| From: Alistair Francis <alistair.francis@wdc.com> |
| Date: Wed, 1 May 2019 19:51:27 -0700 |
| Subject: [PATCH] Revert "target/arm: Use vector operations for saturation" |
| |
| This reverts commit 89e68b575e138d0af1435f11a8ffcd8779c237bd. |
| |
| This fixes QEMU aborts when running the qemuarm machine. |
| |
| Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
| Upstream-Status: Pending |
| --- |
| target/arm/helper.h | 33 ------- |
| target/arm/translate-a64.c | 36 ++++---- |
| target/arm/translate.c | 172 ++++++------------------------------- |
| target/arm/translate.h | 4 - |
| target/arm/vec_helper.c | 130 ---------------------------- |
| 5 files changed, 44 insertions(+), 331 deletions(-) |
| |
| diff --git a/target/arm/helper.h b/target/arm/helper.h |
| index 50cb036378..b2669f140f 100644 |
| --- a/target/arm/helper.h |
| +++ b/target/arm/helper.h |
| @@ -646,39 +646,6 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, |
| DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, |
| void, ptr, ptr, ptr, ptr, ptr, i32) |
| |
| -DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| -DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, |
| - void, ptr, ptr, ptr, ptr, i32) |
| - |
| DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, |
| void, ptr, ptr, ptr, ptr, i32) |
| DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, |
| diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c |
| index 9dcc5ff3a3..428211f92f 100644 |
| --- a/target/arm/translate-a64.c |
| +++ b/target/arm/translate-a64.c |
| @@ -11230,22 +11230,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) |
| } |
| |
| switch (opcode) { |
| - case 0x01: /* SQADD, UQADD */ |
| - tcg_gen_gvec_4(vec_full_reg_offset(s, rd), |
| - offsetof(CPUARMState, vfp.qc), |
| - vec_full_reg_offset(s, rn), |
| - vec_full_reg_offset(s, rm), |
| - is_q ? 16 : 8, vec_full_reg_size(s), |
| - (u ? uqadd_op : sqadd_op) + size); |
| - return; |
| - case 0x05: /* SQSUB, UQSUB */ |
| - tcg_gen_gvec_4(vec_full_reg_offset(s, rd), |
| - offsetof(CPUARMState, vfp.qc), |
| - vec_full_reg_offset(s, rn), |
| - vec_full_reg_offset(s, rm), |
| - is_q ? 16 : 8, vec_full_reg_size(s), |
| - (u ? uqsub_op : sqsub_op) + size); |
| - return; |
| case 0x0c: /* SMAX, UMAX */ |
| if (u) { |
| gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); |
| @@ -11341,6 +11325,16 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) |
| genfn = fns[size][u]; |
| break; |
| } |
| + case 0x1: /* SQADD, UQADD */ |
| + { |
| + static NeonGenTwoOpEnvFn * const fns[3][2] = { |
| + { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, |
| + { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, |
| + { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, |
| + }; |
| + genenvfn = fns[size][u]; |
| + break; |
| + } |
| case 0x2: /* SRHADD, URHADD */ |
| { |
| static NeonGenTwoOpFn * const fns[3][2] = { |
| @@ -11361,6 +11355,16 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) |
| genfn = fns[size][u]; |
| break; |
| } |
| + case 0x5: /* SQSUB, UQSUB */ |
| + { |
| + static NeonGenTwoOpEnvFn * const fns[3][2] = { |
| + { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, |
| + { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, |
| + { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, |
| + }; |
| + genenvfn = fns[size][u]; |
| + break; |
| + } |
| case 0x8: /* SSHL, USHL */ |
| { |
| static NeonGenTwoOpFn * const fns[3][2] = { |
| diff --git a/target/arm/translate.c b/target/arm/translate.c |
| index 10bc53f91c..cf675cef3f 100644 |
| --- a/target/arm/translate.c |
| +++ b/target/arm/translate.c |
| @@ -6242,142 +6242,6 @@ const GVecGen3 cmtst_op[4] = { |
| .vece = MO_64 }, |
| }; |
| |
| -static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, |
| - TCGv_vec a, TCGv_vec b) |
| -{ |
| - TCGv_vec x = tcg_temp_new_vec_matching(t); |
| - tcg_gen_add_vec(vece, x, a, b); |
| - tcg_gen_usadd_vec(vece, t, a, b); |
| - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); |
| - tcg_gen_or_vec(vece, sat, sat, x); |
| - tcg_temp_free_vec(x); |
| -} |
| - |
| -const GVecGen4 uqadd_op[4] = { |
| - { .fniv = gen_uqadd_vec, |
| - .fno = gen_helper_gvec_uqadd_b, |
| - .opc = INDEX_op_usadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_8 }, |
| - { .fniv = gen_uqadd_vec, |
| - .fno = gen_helper_gvec_uqadd_h, |
| - .opc = INDEX_op_usadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_16 }, |
| - { .fniv = gen_uqadd_vec, |
| - .fno = gen_helper_gvec_uqadd_s, |
| - .opc = INDEX_op_usadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_32 }, |
| - { .fniv = gen_uqadd_vec, |
| - .fno = gen_helper_gvec_uqadd_d, |
| - .opc = INDEX_op_usadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_64 }, |
| -}; |
| - |
| -static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, |
| - TCGv_vec a, TCGv_vec b) |
| -{ |
| - TCGv_vec x = tcg_temp_new_vec_matching(t); |
| - tcg_gen_add_vec(vece, x, a, b); |
| - tcg_gen_ssadd_vec(vece, t, a, b); |
| - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); |
| - tcg_gen_or_vec(vece, sat, sat, x); |
| - tcg_temp_free_vec(x); |
| -} |
| - |
| -const GVecGen4 sqadd_op[4] = { |
| - { .fniv = gen_sqadd_vec, |
| - .fno = gen_helper_gvec_sqadd_b, |
| - .opc = INDEX_op_ssadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_8 }, |
| - { .fniv = gen_sqadd_vec, |
| - .fno = gen_helper_gvec_sqadd_h, |
| - .opc = INDEX_op_ssadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_16 }, |
| - { .fniv = gen_sqadd_vec, |
| - .fno = gen_helper_gvec_sqadd_s, |
| - .opc = INDEX_op_ssadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_32 }, |
| - { .fniv = gen_sqadd_vec, |
| - .fno = gen_helper_gvec_sqadd_d, |
| - .opc = INDEX_op_ssadd_vec, |
| - .write_aofs = true, |
| - .vece = MO_64 }, |
| -}; |
| - |
| -static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, |
| - TCGv_vec a, TCGv_vec b) |
| -{ |
| - TCGv_vec x = tcg_temp_new_vec_matching(t); |
| - tcg_gen_sub_vec(vece, x, a, b); |
| - tcg_gen_ussub_vec(vece, t, a, b); |
| - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); |
| - tcg_gen_or_vec(vece, sat, sat, x); |
| - tcg_temp_free_vec(x); |
| -} |
| - |
| -const GVecGen4 uqsub_op[4] = { |
| - { .fniv = gen_uqsub_vec, |
| - .fno = gen_helper_gvec_uqsub_b, |
| - .opc = INDEX_op_ussub_vec, |
| - .write_aofs = true, |
| - .vece = MO_8 }, |
| - { .fniv = gen_uqsub_vec, |
| - .fno = gen_helper_gvec_uqsub_h, |
| - .opc = INDEX_op_ussub_vec, |
| - .write_aofs = true, |
| - .vece = MO_16 }, |
| - { .fniv = gen_uqsub_vec, |
| - .fno = gen_helper_gvec_uqsub_s, |
| - .opc = INDEX_op_ussub_vec, |
| - .write_aofs = true, |
| - .vece = MO_32 }, |
| - { .fniv = gen_uqsub_vec, |
| - .fno = gen_helper_gvec_uqsub_d, |
| - .opc = INDEX_op_ussub_vec, |
| - .write_aofs = true, |
| - .vece = MO_64 }, |
| -}; |
| - |
| -static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, |
| - TCGv_vec a, TCGv_vec b) |
| -{ |
| - TCGv_vec x = tcg_temp_new_vec_matching(t); |
| - tcg_gen_sub_vec(vece, x, a, b); |
| - tcg_gen_sssub_vec(vece, t, a, b); |
| - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); |
| - tcg_gen_or_vec(vece, sat, sat, x); |
| - tcg_temp_free_vec(x); |
| -} |
| - |
| -const GVecGen4 sqsub_op[4] = { |
| - { .fniv = gen_sqsub_vec, |
| - .fno = gen_helper_gvec_sqsub_b, |
| - .opc = INDEX_op_sssub_vec, |
| - .write_aofs = true, |
| - .vece = MO_8 }, |
| - { .fniv = gen_sqsub_vec, |
| - .fno = gen_helper_gvec_sqsub_h, |
| - .opc = INDEX_op_sssub_vec, |
| - .write_aofs = true, |
| - .vece = MO_16 }, |
| - { .fniv = gen_sqsub_vec, |
| - .fno = gen_helper_gvec_sqsub_s, |
| - .opc = INDEX_op_sssub_vec, |
| - .write_aofs = true, |
| - .vece = MO_32 }, |
| - { .fniv = gen_sqsub_vec, |
| - .fno = gen_helper_gvec_sqsub_d, |
| - .opc = INDEX_op_sssub_vec, |
| - .write_aofs = true, |
| - .vece = MO_64 }, |
| -}; |
| - |
| /* Translate a NEON data processing instruction. Return nonzero if the |
| instruction is invalid. |
| We process data in a mixture of 32-bit and 64-bit chunks. |
| @@ -6561,18 +6425,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) |
| } |
| return 0; |
| |
| - case NEON_3R_VQADD: |
| - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), |
| - rn_ofs, rm_ofs, vec_size, vec_size, |
| - (u ? uqadd_op : sqadd_op) + size); |
| - break; |
| - |
| - case NEON_3R_VQSUB: |
| - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), |
| - rn_ofs, rm_ofs, vec_size, vec_size, |
| - (u ? uqsub_op : sqsub_op) + size); |
| - break; |
| - |
| case NEON_3R_VMUL: /* VMUL */ |
| if (u) { |
| /* Polynomial case allows only P8 and is handled below. */ |
| @@ -6637,6 +6489,24 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) |
| neon_load_reg64(cpu_V0, rn + pass); |
| neon_load_reg64(cpu_V1, rm + pass); |
| switch (op) { |
| + case NEON_3R_VQADD: |
| + if (u) { |
| + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, |
| + cpu_V0, cpu_V1); |
| + } else { |
| + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, |
| + cpu_V0, cpu_V1); |
| + } |
| + break; |
| + case NEON_3R_VQSUB: |
| + if (u) { |
| + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, |
| + cpu_V0, cpu_V1); |
| + } else { |
| + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, |
| + cpu_V0, cpu_V1); |
| + } |
| + break; |
| case NEON_3R_VSHL: |
| if (u) { |
| gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); |
| @@ -6752,12 +6622,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) |
| case NEON_3R_VHADD: |
| GEN_NEON_INTEGER_OP(hadd); |
| break; |
| + case NEON_3R_VQADD: |
| + GEN_NEON_INTEGER_OP_ENV(qadd); |
| + break; |
| case NEON_3R_VRHADD: |
| GEN_NEON_INTEGER_OP(rhadd); |
| break; |
| case NEON_3R_VHSUB: |
| GEN_NEON_INTEGER_OP(hsub); |
| break; |
| + case NEON_3R_VQSUB: |
| + GEN_NEON_INTEGER_OP_ENV(qsub); |
| + break; |
| case NEON_3R_VSHL: |
| GEN_NEON_INTEGER_OP(shl); |
| break; |
| diff --git a/target/arm/translate.h b/target/arm/translate.h |
| index c2348def0d..07055c9449 100644 |
| --- a/target/arm/translate.h |
| +++ b/target/arm/translate.h |
| @@ -248,10 +248,6 @@ extern const GVecGen2i ssra_op[4]; |
| extern const GVecGen2i usra_op[4]; |
| extern const GVecGen2i sri_op[4]; |
| extern const GVecGen2i sli_op[4]; |
| -extern const GVecGen4 uqadd_op[4]; |
| -extern const GVecGen4 sqadd_op[4]; |
| -extern const GVecGen4 uqsub_op[4]; |
| -extern const GVecGen4 sqsub_op[4]; |
| void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); |
| |
| /* |
| diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c |
| index dedef62403..be3271659f 100644 |
| --- a/target/arm/vec_helper.c |
| +++ b/target/arm/vec_helper.c |
| @@ -769,136 +769,6 @@ DO_FMLA_IDX(gvec_fmla_idx_d, float64, ) |
| |
| #undef DO_FMLA_IDX |
| |
| -#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \ |
| -void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \ |
| -{ \ |
| - intptr_t i, oprsz = simd_oprsz(desc); \ |
| - TYPEN *d = vd, *n = vn; TYPEM *m = vm; \ |
| - bool q = false; \ |
| - for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \ |
| - WTYPE dd = (WTYPE)n[i] OP m[i]; \ |
| - if (dd < MIN) { \ |
| - dd = MIN; \ |
| - q = true; \ |
| - } else if (dd > MAX) { \ |
| - dd = MAX; \ |
| - q = true; \ |
| - } \ |
| - d[i] = dd; \ |
| - } \ |
| - if (q) { \ |
| - uint32_t *qc = vq; \ |
| - qc[0] = 1; \ |
| - } \ |
| - clear_tail(d, oprsz, simd_maxsz(desc)); \ |
| -} |
| - |
| -DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX) |
| -DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX) |
| -DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX) |
| - |
| -DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX) |
| -DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX) |
| -DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX) |
| - |
| -DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX) |
| -DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX) |
| -DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX) |
| - |
| -DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) |
| -DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) |
| -DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) |
| - |
| -#undef DO_SAT |
| - |
| -void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, |
| - void *vm, uint32_t desc) |
| -{ |
| - intptr_t i, oprsz = simd_oprsz(desc); |
| - uint64_t *d = vd, *n = vn, *m = vm; |
| - bool q = false; |
| - |
| - for (i = 0; i < oprsz / 8; i++) { |
| - uint64_t nn = n[i], mm = m[i], dd = nn + mm; |
| - if (dd < nn) { |
| - dd = UINT64_MAX; |
| - q = true; |
| - } |
| - d[i] = dd; |
| - } |
| - if (q) { |
| - uint32_t *qc = vq; |
| - qc[0] = 1; |
| - } |
| - clear_tail(d, oprsz, simd_maxsz(desc)); |
| -} |
| - |
| -void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn, |
| - void *vm, uint32_t desc) |
| -{ |
| - intptr_t i, oprsz = simd_oprsz(desc); |
| - uint64_t *d = vd, *n = vn, *m = vm; |
| - bool q = false; |
| - |
| - for (i = 0; i < oprsz / 8; i++) { |
| - uint64_t nn = n[i], mm = m[i], dd = nn - mm; |
| - if (nn < mm) { |
| - dd = 0; |
| - q = true; |
| - } |
| - d[i] = dd; |
| - } |
| - if (q) { |
| - uint32_t *qc = vq; |
| - qc[0] = 1; |
| - } |
| - clear_tail(d, oprsz, simd_maxsz(desc)); |
| -} |
| - |
| -void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn, |
| - void *vm, uint32_t desc) |
| -{ |
| - intptr_t i, oprsz = simd_oprsz(desc); |
| - int64_t *d = vd, *n = vn, *m = vm; |
| - bool q = false; |
| - |
| - for (i = 0; i < oprsz / 8; i++) { |
| - int64_t nn = n[i], mm = m[i], dd = nn + mm; |
| - if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) { |
| - dd = (nn >> 63) ^ ~INT64_MIN; |
| - q = true; |
| - } |
| - d[i] = dd; |
| - } |
| - if (q) { |
| - uint32_t *qc = vq; |
| - qc[0] = 1; |
| - } |
| - clear_tail(d, oprsz, simd_maxsz(desc)); |
| -} |
| - |
| -void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, |
| - void *vm, uint32_t desc) |
| -{ |
| - intptr_t i, oprsz = simd_oprsz(desc); |
| - int64_t *d = vd, *n = vn, *m = vm; |
| - bool q = false; |
| - |
| - for (i = 0; i < oprsz / 8; i++) { |
| - int64_t nn = n[i], mm = m[i], dd = nn - mm; |
| - if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) { |
| - dd = (nn >> 63) ^ ~INT64_MIN; |
| - q = true; |
| - } |
| - d[i] = dd; |
| - } |
| - if (q) { |
| - uint32_t *qc = vq; |
| - qc[0] = 1; |
| - } |
| - clear_tail(d, oprsz, simd_maxsz(desc)); |
| -} |
| - |
| /* |
| * Convert float16 to float32, raising no exceptions and |
| * preserving exceptional values, including SNaN. |
| -- |
| 2.21.0 |
| |