Commit 39252973 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64][1/2] Add fmul-by-power-of-2+fcvt optimisation

	* config/aarch64/aarch64.md
	(*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult): New pattern.
	* config/aarch64/aarch64-simd.md
	(*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult): Likewise.
	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle above patterns.
	(aarch64_fpconst_pow_of_2): New function.
	(aarch64_vec_fpconst_pow_of_2): Likewise.
	* config/aarch64/aarch64-protos.h (aarch64_fpconst_pow_of_2): Declare
	prototype.
	(aarch64_vec_fpconst_pow_of_2): Likewise.
	* config/aarch64/predicates.md (aarch64_fp_pow2): New predicate.
	(aarch64_fp_vec_pow2): Likewise.

	* gcc.target/aarch64/fmul_fcvt_1.c: New test.
	* gcc.target/aarch64/fmul_fcvt_2.c: Likewise.

From-SVN: r229085
parent 6c27ebdb
2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md
(*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult): New pattern.
* config/aarch64/aarch64-simd.md
(*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle above patterns.
(aarch64_fpconst_pow_of_2): New function.
(aarch64_vec_fpconst_pow_of_2): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_fpconst_pow_of_2): Declare
prototype.
(aarch64_vec_fpconst_pow_of_2): Likewise.
* config/aarch64/predicates.md (aarch64_fp_pow2): New predicate.
(aarch64_fp_vec_pow2): Likewise.
2015-10-20 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.h (HARD_REGNO_NREGS): Use CEIL macro.
......@@ -294,12 +294,14 @@ enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
enum reg_class aarch64_regno_regclass (unsigned);
int aarch64_asm_preferred_eh_data_format (int, int);
int aarch64_fpconst_pow_of_2 (rtx);
machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
machine_mode);
int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
int aarch64_hard_regno_nregs (unsigned, machine_mode);
int aarch64_simd_attr_length_move (rtx_insn *);
int aarch64_uxt_size (int, HOST_WIDE_INT);
int aarch64_vec_fpconst_pow_of_2 (rtx);
rtx aarch64_final_eh_return_addr (void);
rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
const char *aarch64_output_move_struct (rtx *operands);
......
......@@ -1654,6 +1654,26 @@
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
)
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(mult:VDQF
(match_operand:VDQF 1 "register_operand" "w")
(match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
UNSPEC_FRINTZ)))]
"TARGET_SIMD
&& IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
{
int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
char buf[64];
snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
output_asm_insn (buf, operands);
return "";
}
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
)
(define_expand "<optab><VDQF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
......
......@@ -6786,6 +6786,19 @@ cost_plus:
else
*cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
}
/* We can combine fmul by a power of 2 followed by a fcvt into a single
fixed-point fcvt. */
if (GET_CODE (x) == MULT
&& ((VECTOR_MODE_P (mode)
&& aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
|| aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
{
*cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
0, speed);
return true;
}
*cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
return true;
......@@ -13250,6 +13263,52 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
return default_unspec_may_trap_p (x, flags);
}
/* If X is a positive CONST_DOUBLE with a value that is a power of 2
return the log2 of that value. Otherwise return -1. */
int
aarch64_fpconst_pow_of_2 (rtx x)
{
const REAL_VALUE_TYPE *r;
if (!CONST_DOUBLE_P (x))
return -1;
r = CONST_DOUBLE_REAL_VALUE (x);
if (REAL_VALUE_NEGATIVE (*r)
|| REAL_VALUE_ISNAN (*r)
|| REAL_VALUE_ISINF (*r)
|| !real_isinteger (r, DFmode))
return -1;
return exact_log2 (real_to_integer (r));
}
/* If X is a vector of equal CONST_DOUBLE values and that value is
Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
int
aarch64_vec_fpconst_pow_of_2 (rtx x)
{
if (GET_CODE (x) != CONST_VECTOR)
return -1;
if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
return -1;
int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
if (firstval <= 0)
return -1;
for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
return -1;
return firstval;
}
/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
static tree
aarch64_promoted_type (const_tree t)
......
......@@ -4185,6 +4185,25 @@
[(set_attr "type" "f_cvtf2i")]
)
(define_insn "*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult"
[(set (match_operand:GPI 0 "register_operand" "=r")
(FIXUORS:GPI
(mult:GPF
(match_operand:GPF 1 "register_operand" "w")
(match_operand:GPF 2 "aarch64_fp_pow2" "F"))))]
"TARGET_FLOAT
&& IN_RANGE (aarch64_fpconst_pow_of_2 (operands[2]), 1,
GET_MODE_BITSIZE (<GPI:MODE>mode))"
{
int fbits = aarch64_fpconst_pow_of_2 (operands[2]);
char buf[64];
snprintf (buf, 64, "fcvtz<su>\\t%%<GPI:w>0, %%<GPF:s>1, #%d", fbits);
output_asm_insn (buf, operands);
return "";
}
[(set_attr "type" "f_cvtf2i")]
)
;; fma - no throw
(define_insn "fma<mode>4"
......
......@@ -87,6 +87,13 @@
(and (match_code "const_double")
(match_test "aarch64_float_const_zero_rtx_p (op)"))))
(define_predicate "aarch64_fp_pow2"
(and (match_code "const_double")
(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
(define_predicate "aarch64_fp_vec_pow2"
(match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
(define_predicate "aarch64_plus_immediate"
(and (match_code "const_int")
(ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
......
2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/fmul_fcvt_1.c: New test.
* gcc.target/aarch64/fmul_fcvt_2.c: Likewise.
2015-10-20 H.J. Lu <hongjiu.lu@intel.com>
PR target/66810
......
/* { dg-do run } */
/* { dg-options "-save-temps -O2 -fno-inline" } */
#define FUNC_DEFS(__a) \
int \
sffoo##__a (float x) \
{ \
return x * __a##.0f; \
} \
\
unsigned int \
usffoo##__a (float x) \
{ \
return x * __a##.0f; \
} \
\
long \
lsffoo##__a (float x) \
{ \
return x * __a##.0f; \
} \
\
unsigned long \
ulsffoo##__a (float x) \
{ \
return x * __a##.0f; \
}
#define FUNC_DEFD(__a) \
long \
dffoo##__a (double x) \
{ \
return x * __a##.0; \
} \
\
unsigned long \
udffoo##__a (double x) \
{ \
return x * __a##.0; \
} \
int \
sdffoo##__a (double x) \
{ \
return x * __a##.0; \
} \
\
unsigned int \
usdffoo##__a (double x) \
{ \
return x * __a##.0; \
}
FUNC_DEFS (4)
FUNC_DEFD (4)
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#2" 1 } } */
FUNC_DEFS (8)
FUNC_DEFD (8)
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#3" 1 } } */
FUNC_DEFS (16)
FUNC_DEFD (16)
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#4" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#4" 1 } } */
#define FUNC_TESTS(__a, __b) \
do \
{ \
if (sffoo##__a (__b) != (int)(__b * __a)) \
__builtin_abort (); \
if (usffoo##__a (__b) != (unsigned int)(__b * __a)) \
__builtin_abort (); \
if (lsffoo##__a (__b) != (long)(__b * __a)) \
__builtin_abort (); \
if (ulsffoo##__a (__b) != (unsigned long)(__b * __a)) \
__builtin_abort (); \
} while (0)
#define FUNC_TESTD(__a, __b) \
do \
{ \
if (dffoo##__a (__b) != (long)(__b * __a)) \
__builtin_abort (); \
if (udffoo##__a (__b) != (unsigned long)(__b * __a)) \
__builtin_abort (); \
if (sdffoo##__a (__b) != (int)(__b * __a)) \
__builtin_abort (); \
if (usdffoo##__a (__b) != (unsigned int)(__b * __a)) \
__builtin_abort (); \
} while (0)
int
main (void)
{
float i;
for (i = -0.001; i < 32.0; i += 1.0f)
{
FUNC_TESTS (4, i);
FUNC_TESTS (8, i);
FUNC_TESTS (16, i);
FUNC_TESTD (4, i);
FUNC_TESTD (8, i);
FUNC_TESTD (16, i);
}
return 0;
}
/* { dg-do run } */
/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline" } */
#define N 1024
#define FUNC_DEF(__a) \
void \
foo##__a (float *a, int *b) \
{ \
int i; \
for (i = 0; i < N; i++) \
b[i] = a[i] * __a##.0f; \
}
FUNC_DEF (4)
FUNC_DEF (8)
FUNC_DEF (16)
int ints[N];
float floats[N];
void
reset_ints (int *arr)
{
int i;
for (i = 0; i < N; i++)
arr[i] = 0;
}
void
check_result (int *is, int n)
{
int i;
for (i = 0; i < N; i++)
if (is[i] != i * n)
__builtin_abort ();
}
#define FUNC_CHECK(__a) \
do \
{ \
reset_ints (ints); \
foo##__a (floats, ints); \
check_result (ints, __a); \
} while (0)
int
main (void)
{
int i;
for (i = 0; i < N; i++)
floats[i] = (float) i;
FUNC_CHECK (4);
FUNC_CHECK (8);
FUNC_CHECK (16);
return 0;
}
/* { dg-final { scan-assembler-not "fmul\tv\[0-9\]*.*" } } */
/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#2" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#3" 1 } } */
/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#4" 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment