Commit edd5e900 by Jie Zhang Committed by Julian Brown

arm.c (arm_rtx_costs_1): Adjust cost for CONST_VECTOR.

    gcc/
    * config/arm/arm.c (arm_rtx_costs_1): Adjust cost for
    CONST_VECTOR.
    (arm_size_rtx_costs): Likewise.
    (neon_valid_immediate): Add a case for double 0.0.

    gcc/testsuite/
    * gcc.target/arm/neon-vdup-1.c: New test case.
    * gcc.target/arm/neon-vdup-2.c: New test case.
    * gcc.target/arm/neon-vdup-3.c: New test case.
    * gcc.target/arm/neon-vdup-4.c: New test case.
    * gcc.target/arm/neon-vdup-5.c: New test case.
    * gcc.target/arm/neon-vdup-6.c: New test case.
    * gcc.target/arm/neon-vdup-7.c: New test case.
    * gcc.target/arm/neon-vdup-8.c: New test case.
    * gcc.target/arm/neon-vdup-9.c: New test case.
    * gcc.target/arm/neon-vdup-10.c: New test case.
    * gcc.target/arm/neon-vdup-11.c: New test case.
    * gcc.target/arm/neon-vdup-12.c: New test case.
    * gcc.target/arm/neon-vdup-13.c: New test case.
    * gcc.target/arm/neon-vdup-14.c: New test case.
    * gcc.target/arm/neon-vdup-15.c: New test case.
    * gcc.target/arm/neon-vdup-16.c: New test case.
    * gcc.target/arm/neon-vdup-17.c: New test case.
    * gcc.target/arm/neon-vdup-18.c: New test case.
    * gcc.target/arm/neon-vdup-19.c: New test case.
    * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Make intrinsic
    arguments non-constant.


Co-Authored-By: Julian Brown <julian@codesourcery.com>

From-SVN: r189611
parent 392e87f6
2012-07-18 Jie Zhang <jzhang918@gmail.com>
Julian Brown <julian@codesourcery.com>
* config/arm/arm.c (arm_rtx_costs_1): Adjust cost for
CONST_VECTOR.
(arm_size_rtx_costs): Likewise.
(neon_valid_immediate): Add a case for double 0.0.
2012-07-18 Andrew Stubbs <ams@codesourcery.com>
Mark Shinwell <shinwell@codesourcery.com>
Julian Brown <julian@codesourcery.com>
gcc/
* config/arm/vfp.md (*arm_movsi_vfp, *thumb2_movsi_vfp)
(*movdi_vfp_cortexa8, *movsf_vfp, *thumb2_movsf_vfp)
(*movdf_vfp, *thumb2_movdf_vfp, *movsfcc_vfp)
......
......@@ -7614,6 +7614,17 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
}
return true;
case CONST_VECTOR:
if (TARGET_NEON
&& TARGET_HARD_FLOAT
&& outer == SET
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
&& neon_immediate_valid_for_move (x, mode, NULL, NULL))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (4);
return true;
default:
*total = COSTS_N_INSNS (4);
return false;
......@@ -7954,6 +7965,17 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*total = COSTS_N_INSNS (4);
return true;
case CONST_VECTOR:
if (TARGET_NEON
&& TARGET_HARD_FLOAT
&& outer_code == SET
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
&& neon_immediate_valid_for_move (x, mode, NULL, NULL))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (4);
return true;
case HIGH:
case LO_SUM:
/* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
......@@ -8774,11 +8796,14 @@ vfp3_const_double_rtx (rtx x)
vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
eeeeeeee ffffffff gggggggg hhhhhhhh
vmov f32 18 aBbbbbbc defgh000 00000000 00000000
vmov f32 19 00000000 00000000 00000000 00000000
For case 18, B = !b. Representable values are exactly those accepted by
vfp3_const_double_index, but are output as floating-point numbers rather
than indices.
For case 19, we will change it to vmov.i32 when assembling.
Variants 0-5 (inclusive) may also be used as immediates for the second
operand of VORR/VBIC instructions.
......@@ -8835,7 +8860,7 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
rtx el0 = CONST_VECTOR_ELT (op, 0);
REAL_VALUE_TYPE r0;
if (!vfp3_const_double_rtx (el0))
if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
return -1;
REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
......@@ -8857,7 +8882,10 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
if (elementwidth)
*elementwidth = 0;
return 18;
if (el0 == CONST0_RTX (GET_MODE (el0)))
return 19;
else
return 18;
}
/* Splat vector constant out into a byte vector. */
......
2012-07-18 Jie Zhang <jzhang918@gmail.com>
Julian Brown <julian@codesourcery.com>
* gcc.target/arm/neon-vdup-1.c: New test case.
* gcc.target/arm/neon-vdup-2.c: New test case.
* gcc.target/arm/neon-vdup-3.c: New test case.
* gcc.target/arm/neon-vdup-4.c: New test case.
* gcc.target/arm/neon-vdup-5.c: New test case.
* gcc.target/arm/neon-vdup-6.c: New test case.
* gcc.target/arm/neon-vdup-7.c: New test case.
* gcc.target/arm/neon-vdup-8.c: New test case.
* gcc.target/arm/neon-vdup-9.c: New test case.
* gcc.target/arm/neon-vdup-10.c: New test case.
* gcc.target/arm/neon-vdup-11.c: New test case.
* gcc.target/arm/neon-vdup-12.c: New test case.
* gcc.target/arm/neon-vdup-13.c: New test case.
* gcc.target/arm/neon-vdup-14.c: New test case.
* gcc.target/arm/neon-vdup-15.c: New test case.
* gcc.target/arm/neon-vdup-16.c: New test case.
* gcc.target/arm/neon-vdup-17.c: New test case.
* gcc.target/arm/neon-vdup-18.c: New test case.
* gcc.target/arm/neon-vdup-19.c: New test case.
* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Make intrinsic
arguments non-constant.
2012-07-18 Richard Guenther <rguenther@suse.de>
PR tree-optimization/53970
......
......@@ -4,10 +4,8 @@
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
float32x2_t f_sub_abs_to_vabd_32()
float32x2_t f_sub_abs_to_vabd_32(float32x2_t val1, float32x2_t val2)
{
float32x2_t val1 = vdup_n_f32 (10);
float32x2_t val2 = vdup_n_f32 (30);
float32x2_t sres = vsub_f32(val1, val2);
float32x2_t res = vabs_f32 (sres);
......@@ -16,10 +14,8 @@ float32x2_t f_sub_abs_to_vabd_32()
/* { dg-final { scan-assembler "vabd\.f32" } }*/
#include <arm_neon.h>
int8x8_t sub_abs_to_vabd_8()
int8x8_t sub_abs_to_vabd_8(int8x8_t val1, int8x8_t val2)
{
int8x8_t val1 = vdup_n_s8 (10);
int8x8_t val2 = vdup_n_s8 (30);
int8x8_t sres = vsub_s8(val1, val2);
int8x8_t res = vabs_s8 (sres);
......@@ -27,10 +23,8 @@ int8x8_t sub_abs_to_vabd_8()
}
/* { dg-final { scan-assembler "vabd\.s8" } }*/
int16x4_t sub_abs_to_vabd_16()
int16x4_t sub_abs_to_vabd_16(int16x4_t val1, int16x4_t val2)
{
int16x4_t val1 = vdup_n_s16 (10);
int16x4_t val2 = vdup_n_s16 (30);
int16x4_t sres = vsub_s16(val1, val2);
int16x4_t res = vabs_s16 (sres);
......@@ -38,10 +32,8 @@ int16x4_t sub_abs_to_vabd_16()
}
/* { dg-final { scan-assembler "vabd\.s16" } }*/
int32x2_t sub_abs_to_vabd_32()
int32x2_t sub_abs_to_vabd_32(int32x2_t val1, int32x2_t val2)
{
int32x2_t val1 = vdup_n_s32 (10);
int32x2_t val2 = vdup_n_s32 (30);
int32x2_t sres = vsub_s32(val1, val2);
int32x2_t res = vabs_s32 (sres);
......
/* Test the optimization of `vdupq_n_f32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
float32x4_t out_float32x4_t;
void test_vdupq_nf32 (void)
{
out_float32x4_t = vdupq_n_f32 (0.0);
}
/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[qQ\]\[0-9\]+, #0\.0\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x12000000);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #3992977407\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint16x8_t out_uint16x8_t;
void test_vdupq_nu16 (void)
{
out_uint16x8_t = vdupq_n_u16 (0x12);
}
/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint16x8_t out_uint16x8_t;
void test_vdupq_nu16 (void)
{
out_uint16x8_t = vdupq_n_u16 (0x1200);
}
/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #4608\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint16x8_t out_uint16x8_t;
void test_vdupq_nu16 (void)
{
out_uint16x8_t = vdupq_n_u16 (~0x12);
}
/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #65517\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint16x8_t out_uint16x8_t;
void test_vdupq_nu16 (void)
{
out_uint16x8_t = vdupq_n_u16 (~0x1200);
}
/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #60927\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u8' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint8x16_t out_uint8x16_t;
void test_vdupq_nu8 (void)
{
out_uint8x16_t = vdupq_n_u8 (0x12);
}
/* { dg-final { scan-assembler "vmov\.i8\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x12ff);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4863\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x12ffff);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #1245183\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x12ff);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294962432\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x12ffff);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4293722112\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_f32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
float32x4_t out_float32x4_t;
void test_vdupq_nf32 (void)
{
out_float32x4_t = vdupq_n_f32 (0.125);
}
/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[qQ\]\[0-9\]+, #1\.25e-1\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x12);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x1200);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4608\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x120000);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #1179648\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (0x12000000);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #301989888\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x12);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294967277\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x1200);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294962687\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint32x4_t out_uint32x4_t;
void test_vdupq_nu32 (void)
{
out_uint32x4_t = vdupq_n_u32 (~0x120000);
}
/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4293787647\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment