Commit 5105b576 by Richard Biener Committed by Richard Biener

re PR tree-optimization/92645 (Hand written vector code is 450 times slower when…

re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)

2019-12-03  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92645
	* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
	from or to a properly aligned register variable.

	* gcc.target/i386/pr92645-5.c: New testcase.

From-SVN: r278934
parent 8f316505
2019-12-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/92645
* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
from or to a properly aligned register variable.
2019-12-03 Matthias Klose <doko@ubuntu.com>
* Makefile.in (SOURCES): Add doc/lto-dump.1.
......@@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
src_align = get_pointer_alignment (src);
dest_align = get_pointer_alignment (dest);
if (dest_align < TYPE_ALIGN (desttype)
|| src_align < TYPE_ALIGN (srctype))
return false;
/* Choose between src and destination type for the access based
on alignment, whether the access constitutes a register access
and whether it may actually expose a declaration for SSA rewrite
or SRA decomposition. */
destvar = NULL_TREE;
srcvar = NULL_TREE;
if (TREE_CODE (dest) == ADDR_EXPR
&& var_decl_component_p (TREE_OPERAND (dest, 0))
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
&& dest_align >= TYPE_ALIGN (desttype)
&& (is_gimple_reg_type (desttype)
|| src_align >= TYPE_ALIGN (desttype)))
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
srcvar = NULL_TREE;
if (TREE_CODE (src) == ADDR_EXPR
&& var_decl_component_p (TREE_OPERAND (src, 0))
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
{
if (!destvar
|| src_align >= TYPE_ALIGN (desttype))
srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
src, off0);
else if (!STRICT_ALIGNMENT)
{
srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
src_align);
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
}
}
else if (TREE_CODE (src) == ADDR_EXPR
&& var_decl_component_p (TREE_OPERAND (src, 0))
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
&& src_align >= TYPE_ALIGN (srctype)
&& (is_gimple_reg_type (srctype)
|| dest_align >= TYPE_ALIGN (srctype)))
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
if (srcvar == NULL_TREE && destvar == NULL_TREE)
return false;
/* Now that we chose an access type express the other side in
terms of it if the target allows that with respect to alignment
constraints. */
if (srcvar == NULL_TREE)
{
if (src_align >= TYPE_ALIGN (desttype))
......
2019-12-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/92645
* gcc.target/i386/pr92645-5.c: New testcase.
2019-12-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/92751
* g++.dg/tree-ssa/pr92751.C: New testcase.
......
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
typedef long long v4di __attribute__((vector_size(32)));
struct Vec
{
unsigned int v[8];
};
v4di pun (struct Vec *s)
{
v4di tem;
__builtin_memcpy (&tem, s, 32);
return tem;
}
/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
and no memcpy call.
_3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
return _3; */
/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment