Commit 5c0d88e6 by Changpeng Fang Committed by Changpeng Fang

Auto-vectorizer generates 128-bit AVX insns by default for bdver1.

	* config/i386/i386.opt (mprefer-avx128): Redefine the flag as a Mask option.
	* config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_AVX128_OPTIMAL entry.
	(TARGET_AVX128_OPTIMAL): New definition.
	* config/i386/i386.c (initial_ix86_tune_features): Initialize
	X86_TUNE_AVX128_OPTIMAL entry.
	(ix86_option_override_internal): Enable the generation
	of the 128-bit instructions when TARGET_AVX128_OPTIMAL is set.
	(ix86_preferred_simd_mode): Use TARGET_PREFER_AVX128.
	(ix86_autovectorize_vector_sizes): Use TARGET_PREFER_AVX128.

From-SVN: r175661
parent 55d80bc4
2011-06-29 Changpeng Fang <changpeng.fang@amd.com>
* config/i386/i386.opt (mprefer-avx128): Redefine the flag as a Mask option.
* config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_AVX128_OPTIMAL entry.
(TARGET_AVX128_OPTIMAL): New definition.
* config/i386/i386.c (initial_ix86_tune_features): Initialize
X86_TUNE_AVX128_OPTIMAL entry.
(ix86_option_override_internal): Enable the generation
of the 128-bit instructions when TARGET_AVX128_OPTIMAL is set.
(ix86_preferred_simd_mode): Use TARGET_PREFER_AVX128.
(ix86_autovectorize_vector_sizes): Use TARGET_PREFER_AVX128.
2011-06-29 Eric Botcazou <ebotcazou@adacore.com> 2011-06-29 Eric Botcazou <ebotcazou@adacore.com>
PR tree-optimization/49539 PR tree-optimization/49539
......
...@@ -2089,7 +2089,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ...@@ -2089,7 +2089,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
at -O3. For the moment, the prefetching seems badly tuned for Intel at -O3. For the moment, the prefetching seems badly tuned for Intel
chips. */ chips. */
m_K6_GEODE | m_AMD_MULTIPLE m_K6_GEODE | m_AMD_MULTIPLE,
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
the auto-vectorizer. */
m_BDVER1
}; };
/* Feature tests against the various architecture variations. */ /* Feature tests against the various architecture variations. */
...@@ -2623,6 +2627,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, ...@@ -2623,6 +2627,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-mvzeroupper", MASK_VZEROUPPER }, { "-mvzeroupper", MASK_VZEROUPPER },
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
{ "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE}, { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
{ "-mprefer-avx128", MASK_PREFER_AVX128},
}; };
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
...@@ -3672,6 +3677,9 @@ ix86_option_override_internal (bool main_args_p) ...@@ -3672,6 +3677,9 @@ ix86_option_override_internal (bool main_args_p)
if ((x86_avx256_split_unaligned_store & ix86_tune_mask) if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
&& !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE)) && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
/* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
target_flags |= MASK_PREFER_AVX128;
} }
} }
else else
...@@ -34614,7 +34622,7 @@ ix86_preferred_simd_mode (enum machine_mode mode) ...@@ -34614,7 +34622,7 @@ ix86_preferred_simd_mode (enum machine_mode mode)
return V2DImode; return V2DImode;
case SFmode: case SFmode:
if (TARGET_AVX && !flag_prefer_avx128) if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode; return V8SFmode;
else else
return V4SFmode; return V4SFmode;
...@@ -34622,7 +34630,7 @@ ix86_preferred_simd_mode (enum machine_mode mode) ...@@ -34622,7 +34630,7 @@ ix86_preferred_simd_mode (enum machine_mode mode)
case DFmode: case DFmode:
if (!TARGET_VECTORIZE_DOUBLE) if (!TARGET_VECTORIZE_DOUBLE)
return word_mode; return word_mode;
else if (TARGET_AVX && !flag_prefer_avx128) else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode; return V4DFmode;
else if (TARGET_SSE2) else if (TARGET_SSE2)
return V2DFmode; return V2DFmode;
...@@ -34639,7 +34647,7 @@ ix86_preferred_simd_mode (enum machine_mode mode) ...@@ -34639,7 +34647,7 @@ ix86_preferred_simd_mode (enum machine_mode mode)
static unsigned int static unsigned int
ix86_autovectorize_vector_sizes (void) ix86_autovectorize_vector_sizes (void)
{ {
return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0; return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
} }
/* Initialize the GCC target structure. */ /* Initialize the GCC target structure. */
...@@ -312,6 +312,7 @@ enum ix86_tune_indices { ...@@ -312,6 +312,7 @@ enum ix86_tune_indices {
X86_TUNE_OPT_AGU, X86_TUNE_OPT_AGU,
X86_TUNE_VECTORIZE_DOUBLE, X86_TUNE_VECTORIZE_DOUBLE,
X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL,
X86_TUNE_AVX128_OPTIMAL,
X86_TUNE_LAST X86_TUNE_LAST
}; };
...@@ -410,7 +411,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ...@@ -410,7 +411,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE] ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE]
#define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \ #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL] ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
#define TARGET_AVX128_OPTIMAL \
ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
/* Feature tests against the various architecture variations. */ /* Feature tests against the various architecture variations. */
enum ix86_arch_indices { enum ix86_arch_indices {
X86_ARCH_CMOVE, /* || TARGET_SSE */ X86_ARCH_CMOVE, /* || TARGET_SSE */
......
...@@ -388,7 +388,7 @@ Do dispatch scheduling if processor is bdver1 and Haifa scheduling ...@@ -388,7 +388,7 @@ Do dispatch scheduling if processor is bdver1 and Haifa scheduling
is selected. is selected.
mprefer-avx128 mprefer-avx128
Target Report Var(flag_prefer_avx128) Init(0) Target Report Mask(PREFER_AVX128) SAVE
Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer. Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer.
;; ISA support ;; ISA support
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment