Commit 37891309 by Ramana Radhakrishnan Committed by Ramana Radhakrishnan

re PR rtl-optimization/49891 (ICE in redirect_jump_1)

2012-07-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>

        PR target/49891
        PR target/51980
        * gcc/testsuite/gcc.target/arm/neon/vtrnf32.c: Update.
        * gcc/testsuite/gcc.target/arm/neon/vtrns32.c: Update.
        * gcc/testsuite/gcc.target/arm/neon/vtrnu32.c: Update.
        * gcc/testsuite/gcc.target/arm/neon/vzipf32.c: Update.
        * gcc/testsuite/gcc.target/arm/neon/vzips32.c: Update.
        * gcc/testsuite/gcc.target/arm/neon/vzipu32.c: Update.


2012-07-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
	    Julian Brown  <julian@codesourcery.com>

        PR target/49891
        PR target/51980
        * config/arm/neon-gen.ml (return_by_ptr): Delete.
        (print_function): Handle empty strings.
        (return): Delete use of return_by_ptr.
        (mask_shape_for_shuffle): New function.
        (mask_elems): Likewise.
        (shuffle_fn): Likewise.
        (params): Simplify and remove use of return_by_ptr.
        (get_shuffle): New function.
        (print_variant): Update.
        * config/arm/neon.ml (rev_elems): New function.
        (permute_range): Likewise.
        (zip_range): Likewise.
        (uzip_range): Likewise.
        (trn_range): Likewise.
        (zip_elems): Likewise.
        (uzip_elems): Likewise.
        (trn_elems): Likewise.
        (features): New enumeration Use_shuffle. Delete ReturnPtr.
        (pf_su_8_16): New.
        (suf_32): New.
        (ops): Update entries for Vrev64, Vrev32, Vrev16, Vtr, Vzip, Vuzp.
        * config/arm/arm_neon.h: Regenerate.




Co-Authored-By: Julian Brown <julian@codesourcery.com>

From-SVN: r189294
parent 573234ac
2012-07-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Julian Brown <julian@codesourcery.com>
PR target/49891
PR target/51980
* config/arm/neon-gen.ml (return_by_ptr): Delete.
(print_function): Handle empty strings.
(return): Delete use of return_by_ptr.
(mask_shape_for_shuffle): New function.
(mask_elems): Likewise.
(shuffle_fn): Likewise.
(params): Simplify and remove use of return_by_ptr.
(get_shuffle): New function.
(print_variant): Update.
* config/arm/neon.ml (rev_elems): New function.
(permute_range): Likewise.
(zip_range): Likewise.
(uzip_range): Likewise.
(trn_range): Likewise.
(zip_elems): Likewise.
(uzip_elems): Likewise.
(trn_elems): Likewise.
(features): New enumeration Use_shuffle. Delete ReturnPtr.
(pf_su_8_16): New.
(suf_32): New.
(ops): Update entries for Vrev64, Vrev32, Vrev16, Vtr, Vzip, Vuzp.
* config/arm/arm_neon.h: Regenerate.
2012-07-05 Richard Guenther <rguenther@suse.de>
* tree-pretty-print.c (dump_generic_node): Properly test
......
......@@ -91,15 +91,14 @@ let print_function arity fnname body =
end;
open_braceblock ffmt;
let rec print_lines = function
[] -> ()
[] -> ()
| "" :: lines -> print_lines lines
| [line] -> Format.printf "%s" line
| line::lines -> Format.printf "%s@," line; print_lines lines in
| line::lines -> Format.printf "%s@," line ; print_lines lines in
print_lines body;
close_braceblock ffmt;
end_function ffmt
let return_by_ptr features = List.mem ReturnPtr features
let union_string num elts base =
let itype = inttype_for_array num elts in
let iname = string_of_inttype itype
......@@ -141,29 +140,76 @@ let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
(* Return a tuple of a list of declarations to go at the start of the function,
and a list of statements needed to return THING. *)
let return arity return_by_ptr thing =
let return arity thing =
match arity with
Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
| Arity4 (ret, _, _, _, _) ->
match ret with
T_arrayof (num, vec) ->
if return_by_ptr then
let sname = string_of_vectype ret in
[Printf.sprintf "%s __rv;" sname],
[thing ^ ";"; "return __rv;"]
else
begin match ret with
T_arrayof (num, vec) ->
let uname = union_string num vec "__rv" in
[uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
| T_void -> [], [thing ^ ";"]
| _ ->
[], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
| T_void ->
[], [thing ^ ";"]
| _ ->
[], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
end
let mask_shape_for_shuffle = function
All (num, reg) -> All (num, reg)
| Pair_result reg -> All (2, reg)
| _ -> failwith "mask_for_shuffle"
let mask_elems shuffle shape elttype part =
let elem_size = elt_width elttype in
let num_elems =
match regmap shape 0 with
Dreg -> 64 / elem_size
| Qreg -> 128 / elem_size
| _ -> failwith "mask_elems" in
shuffle elem_size num_elems part
(* Return a tuple of a list of declarations 0and a list of statements needed
to implement an intrinsic using __builtin_shuffle. SHUFFLE is a function
which returns a list of elements suitable for using as a mask. *)
let shuffle_fn shuffle shape arity elttype =
let mshape = mask_shape_for_shuffle shape in
let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in
let masktype_str = string_of_vectype masktype in
let shuffle_res = type_for_elt mshape elttype 0 in
let shuffle_res_str = string_of_vectype shuffle_res in
match arity with
Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
| Arity4 (ret, _, _, _, _) ->
begin match ret with
T_arrayof (num, vec) ->
let elems1 = mask_elems shuffle mshape elttype `lo
and elems2 = mask_elems shuffle mshape elttype `hi in
let mask1 = (String.concat ", " (List.map string_of_int elems1))
and mask2 = (String.concat ", " (List.map string_of_int elems2)) in
let shuf1 = Printf.sprintf
"__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
shuffle_res_str masktype_str mask1
and shuf2 = Printf.sprintf
"__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
shuffle_res_str masktype_str mask2 in
[Printf.sprintf "%s __rv;" (string_of_vectype ret);],
[shuf1; shuf2; "return __rv;"]
| _ ->
let elems = mask_elems shuffle mshape elttype `lo in
let mask = (String.concat ", " (List.map string_of_int elems)) in
let shuf = Printf.sprintf
"return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in
[""],
[shuf]
end
let rec element_type ctype =
match ctype with
T_arrayof (_, v) -> element_type v
| _ -> ctype
let params return_by_ptr ps =
let params ps =
let pdecls = ref [] in
let ptype t p =
match t with
......@@ -180,13 +226,7 @@ let params return_by_ptr ps =
| Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
| Arity4 (_, t1, t2, t3, t4) ->
[ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
match ps with
Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
| Arity4 (ret, _, _, _, _) ->
if return_by_ptr then
!pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist
else
!pdecls, plist
!pdecls, plist
let modify_params features plist =
let is_flipped =
......@@ -239,17 +279,27 @@ let rec mode_suffix elttype shape =
and srcmode = mode_of_elt src shape in
string_of_mode dstmode ^ string_of_mode srcmode
let get_shuffle features =
try
match List.find (function Use_shuffle _ -> true | _ -> false) features with
Use_shuffle fn -> Some fn
| _ -> None
with Not_found -> None
let print_variant opcode features shape name (ctype, asmtype, elttype) =
let bits = infoword_value elttype features in
let modesuf = mode_suffix elttype shape in
let return_by_ptr = return_by_ptr features in
let pdecls, paramlist = params return_by_ptr ctype in
let paramlist' = modify_params features paramlist in
let paramlist'' = extra_word shape features paramlist' bits in
let parstr = String.concat ", " paramlist'' in
let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
(builtin_name features name) modesuf parstr in
let rdecls, stmts = return ctype return_by_ptr builtin in
let pdecls, paramlist = params ctype in
let rdecls, stmts =
match get_shuffle features with
Some shuffle -> shuffle_fn shuffle shape ctype elttype
| None ->
let paramlist' = modify_params features paramlist in
let paramlist'' = extra_word shape features paramlist' bits in
let parstr = String.concat ", " paramlist'' in
let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
(builtin_name features name) modesuf parstr in
return ctype builtin in
let body = pdecls @ rdecls @ stmts
and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
print_function ctype fnname body
......
......@@ -201,6 +201,42 @@ type opcode =
(* Reinterpret casts. *)
| Vreinterp
let rev_elems revsize elsize nelts _ =
let mask = (revsize / elsize) - 1 in
let arr = Array.init nelts
(fun i -> i lxor mask) in
Array.to_list arr
let permute_range i stride nelts increment =
let rec build i = function
0 -> []
| nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in
build i nelts
(* Generate a list of integers suitable for vzip. *)
let zip_range i stride nelts = permute_range i stride nelts 1
(* Generate a list of integers suitable for vunzip. *)
let uzip_range i stride nelts = permute_range i stride nelts 4
(* Generate a list of integers suitable for trn. *)
let trn_range i stride nelts = permute_range i stride nelts 2
let zip_elems _ nelts part =
match part with
`lo -> zip_range 0 nelts (nelts / 2)
| `hi -> zip_range (nelts / 2) nelts (nelts / 2)
let uzip_elems _ nelts part =
match part with
`lo -> uzip_range 0 2 (nelts / 2)
| `hi -> uzip_range 1 2 (nelts / 2)
let trn_elems _ nelts part =
match part with
`lo -> trn_range 0 nelts (nelts / 2)
| `hi -> trn_range 1 nelts (nelts / 2)
(* Features used for documentation, to distinguish between some instruction
variants, and to signal special requirements (e.g. swapping arguments). *)
......@@ -214,7 +250,10 @@ type features =
| Flipped of string (* Builtin name to use with flipped arguments. *)
| InfoWord (* Pass an extra word for signage/rounding etc. (always passed
for All _, Long, Wide, Narrow shape_forms. *)
| ReturnPtr (* Pass explicit pointer to return value as first argument. *)
(* Implement builtin as shuffle. The parameter is a function which returns
masks suitable for __builtin_shuffle: arguments are (element size,
number of elements, high/low part selector). *)
| Use_shuffle of (int -> int -> [`lo|`hi] -> int list)
(* A specification as to the shape of instruction expected upon
disassembly, used if it differs from the shape used to build the
intrinsic prototype. Multiple entries in the constructor's argument
......@@ -706,8 +745,10 @@ let u_8_32 = [U8; U16; U32]
let su_8_32 = [S8; S16; S32; U8; U16; U32]
let su_8_64 = S64 :: U64 :: su_8_32
let su_16_64 = [S16; S32; S64; U16; U32; U64]
let pf_su_8_16 = [P8; P16; S8; S16; U8; U16]
let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
let suf_32 = [S32; U32; F32]
let ops =
[
......@@ -1317,12 +1358,18 @@ let ops =
pf_su_8_64;
(* Reverse elements. *)
Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1,
P8 :: P16 :: F32 :: su_8_32;
Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1,
P8 :: P16 :: F32 :: su_8_32;
Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1,
[P8; P16; S8; U8; S16; U16];
Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1,
[P8; P16; S8; U8; S16; U16];
Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1,
[P8; S8; U8];
Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1,
[P8; S8; U8];
(* Bit selection. *)
Vbsl,
......@@ -1336,25 +1383,19 @@ let ops =
Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
pf_su_8_64;
(* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
generating good code for intrinsics which return structure types --
builtins work well by themselves (and understand that the values being
stored on e.g. the stack also reside in registers, so can optimise the
stores away entirely if the results are used immediately), but
intrinsics are very much less efficient. Maybe something can be improved
re: inlining, or tweaking the ABI used for intrinsics (a special call
attribute?).
*)
Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16;
Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32;
Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
(* Zip elements. *)
Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16;
Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32;
Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
(* Unzip elements. *)
Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2,
pf_su_8_32;
Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2,
pf_su_8_32;
(* Element/structure loads. VLD1 variants. *)
Vldx 1,
......
2012-07-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/49891
PR target/51980
* gcc/testsuite/gcc.target/arm/neon/vtrnf32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vtrns32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vtrnu32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzipf32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzips32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzipu32.c: Update.
2012-07-05 Mikael Morin <mikael@gcc.gnu.org>
PR fortran/53732
......
......@@ -17,5 +17,5 @@ void test_vtrnf32 (void)
out_float32x2x2_t = vtrn_f32 (arg0_float32x2_t, arg1_float32x2_t);
}
/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -17,5 +17,5 @@ void test_vtrns32 (void)
out_int32x2x2_t = vtrn_s32 (arg0_int32x2_t, arg1_int32x2_t);
}
/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -17,5 +17,5 @@ void test_vtrnu32 (void)
out_uint32x2x2_t = vtrn_u32 (arg0_uint32x2_t, arg1_uint32x2_t);
}
/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -17,5 +17,5 @@ void test_vzipf32 (void)
out_float32x2x2_t = vzip_f32 (arg0_float32x2_t, arg1_float32x2_t);
}
/* { dg-final { scan-assembler "vzip\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -17,5 +17,5 @@ void test_vzips32 (void)
out_int32x2x2_t = vzip_s32 (arg0_int32x2_t, arg1_int32x2_t);
}
/* { dg-final { scan-assembler "vzip\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -17,5 +17,5 @@ void test_vzipu32 (void)
out_uint32x2x2_t = vzip_u32 (arg0_uint32x2_t, arg1_uint32x2_t);
}
/* { dg-final { scan-assembler "vzip\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment