[1/7,v6] ifn/optabs: Support vector load/store with length

Message ID b0be1c78-293d-c015-fd23-02221146c1e2@linux.ibm.com
State Superseded
Headers show
Series
  • [1/7,v6] ifn/optabs: Support vector load/store with length
Related show

Commit Message

Jonathan Wakely via Gcc-patches June 29, 2020, 6:32 a.m.
Hi Richard S./Richi/Jim/Segher,

Thanks a lot for your comments to make this patch more solid.

Based on our discussion, for the vector load/store with length
optab, the length unit would be measured in lanes by default.
For the targets which support length measured in bytes like Power,
they should only define VnQI modes to wrap the other same size
vector modes.  If the length is larger than total lane/byte count
of the given mode, it's taken to load all lanes/bytes implicitly.
For the remaining lanes/bytes which isn't specified by length,
they would be taken as undefined value.  For length in bytes,
it's required that the byte count should be a multiple of the
element size (wrapped vector), otherwise it's undefined.

This patch has been updated as attached.

2/7 for rs6000 optab defintion has been updated to use V16QI.
5/7 for vectorizer change has been updated accordingly.

-----

v6: Updated optab descriptions.

v5:
  - Updated lenload/lenstore optab to len_load/len_store and the docs.
  - Rename expand_mask_{load,store}_optab_fn to expand_partial_{load,store}_optab_fn
  - Added/updated macros for expand_mask_{load,store}_optab_fn
    and expand_len_{load,store}_optab_fn

v4: Update len_load_direct/len_store_direct to align with direct optab.

v3: Get rid of length mode hook.

BR,
Kewen
-----
gcc/ChangeLog:

2020-MM-DD  Kewen Lin  <linkw@gcc.gnu.org>

	* doc/md.texi (len_load_@var{m}): Document.
	(len_store_@var{m}): Likewise.
	* internal-fn.c (len_load_direct): New macro.
	(len_store_direct): Likewise.
	(expand_len_load_optab_fn): Likewise.
	(expand_len_store_optab_fn): Likewise.
	(direct_len_load_optab_supported_p): Likewise.
	(direct_len_store_optab_supported_p): Likewise.
	(expand_mask_load_optab_fn): New macro.  Original renamed to ...
	(expand_partial_load_optab_fn): ... here.  Add handlings for
	len_load_optab.
	(expand_mask_store_optab_fn): New macro.  Original renamed to ...
	(expand_partial_store_optab_fn): ... here. Add handlings for
	len_store_optab.
	(internal_load_fn_p): Handle IFN_LEN_LOAD.
	(internal_store_fn_p): Handle IFN_LEN_STORE.
	(internal_fn_stored_value_index): Handle IFN_LEN_STORE.
	* internal-fn.def (LEN_LOAD): New internal function.
	(LEN_STORE): Likewise.
	* optabs.def (len_load_optab, len_store_optab): New optab.

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 2c67c818da5..690c384ff66 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5167,6 +5167,34 @@  mode @var{n}.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{len_load_@var{m}} instruction pattern
+@item @samp{len_load_@var{m}}
+Load the number of units specified by operand 2 from memory operand 1
+into register operand 0, setting the other bytes of operand 0 to
+undefined values.  Operands 0 and 1 have mode @var{m}.  Operand 2 has
+whichever integer mode the target prefers.  If operand 2 exceeds the
+maximum units of mode @var{m}, it will be set to the maximum units of
+mode @var{m}.  For targets which support length measured in bytes,
+they should only define VnQI mode to wrap the other vector modes with
+the same size.  Meanwhile, it's required that the byte count should
+be a multiple of the element size (wrapped vector).
+
+This pattern is not allowed to @code{FAIL}.
+
+@cindex @code{len_store_@var{m}} instruction pattern
+@item @samp{len_store_@var{m}}
+Store the number of units specified by operand 2 from nonmemory operand 1
+into memory operand 0, leaving the other bytes of operand 0 unchanged.
+Operands 0 and 1 have mode @var{m}.  Operand 2 has whichever integer
+mode the target prefers.  If operand 2 exceeds the maximum units of mode
+@var{m}, it will be set to the maximum units of mode @var{m}.  For
+targets which support length measured in bytes, they should only define
+VnQI mode to wrap the other vector modes with the same size.  Meanwhile,
+it's required that the byte count should be a multiple of the element
+size (wrapped vector).
+
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{vec_perm@var{m}} instruction pattern
 @item @samp{vec_perm@var{m}}
 Output a (variable) vector permutation.  Operand 0 is the destination
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 4f088de48d5..1e53ced60eb 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -104,10 +104,12 @@  init_internal_fns ()
 #define load_lanes_direct { -1, -1, false }
 #define mask_load_lanes_direct { -1, -1, false }
 #define gather_load_direct { 3, 1, false }
+#define len_load_direct { -1, -1, false }
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
+#define len_store_direct { 3, 3, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
 #define ternary_direct { 0, 0, true }
@@ -2478,10 +2480,10 @@  expand_call_mem_ref (tree type, gcall *stmt, int index)
   return fold_build2 (MEM_REF, type, addr, build_int_cst (alias_ptr_type, 0));
 }
 
-/* Expand MASK_LOAD{,_LANES} call STMT using optab OPTAB.  */
+/* Expand MASK_LOAD{,_LANES} or LEN_LOAD call STMT using optab OPTAB.  */
 
 static void
-expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
   class expand_operand ops[3];
   tree type, lhs, rhs, maskt;
@@ -2497,6 +2499,8 @@  expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
   if (optab == vec_mask_load_lanes_optab)
     icode = get_multi_vector_move (type, optab);
+  else if (optab == len_load_optab)
+    icode = direct_optab_handler (optab, TYPE_MODE (type));
   else
     icode = convert_optab_handler (optab, TYPE_MODE (type),
 				   TYPE_MODE (TREE_TYPE (maskt)));
@@ -2507,18 +2511,24 @@  expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
-  create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
+  if (optab == len_load_optab)
+    create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
+  else
+    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
   expand_insn (icode, 3, ops);
   if (!rtx_equal_p (target, ops[0].value))
     emit_move_insn (target, ops[0].value);
 }
 
+#define expand_mask_load_optab_fn expand_partial_load_optab_fn
 #define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
+#define expand_len_load_optab_fn expand_partial_load_optab_fn
 
-/* Expand MASK_STORE{,_LANES} call STMT using optab OPTAB.  */
+/* Expand MASK_STORE{,_LANES} or LEN_STORE call STMT using optab OPTAB.  */
 
 static void
-expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
   class expand_operand ops[3];
   tree type, lhs, rhs, maskt;
@@ -2532,6 +2542,8 @@  expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
   if (optab == vec_mask_store_lanes_optab)
     icode = get_multi_vector_move (type, optab);
+  else if (optab == len_store_optab)
+    icode = direct_optab_handler (optab, TYPE_MODE (type));
   else
     icode = convert_optab_handler (optab, TYPE_MODE (type),
 				   TYPE_MODE (TREE_TYPE (maskt)));
@@ -2542,11 +2554,17 @@  expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   reg = expand_normal (rhs);
   create_fixed_operand (&ops[0], mem);
   create_input_operand (&ops[1], reg, TYPE_MODE (type));
-  create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
+  if (optab == len_store_optab)
+    create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
+  else
+    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
   expand_insn (icode, 3, ops);
 }
 
+#define expand_mask_store_optab_fn expand_partial_store_optab_fn
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
+#define expand_len_store_optab_fn expand_partial_store_optab_fn
 
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
@@ -3128,10 +3146,12 @@  multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_gather_load_optab_supported_p convert_optab_supported_p
+#define direct_len_load_optab_supported_p direct_optab_supported_p
 #define direct_mask_store_optab_supported_p convert_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
+#define direct_len_store_optab_supported_p direct_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
 #define direct_fold_left_optab_supported_p direct_optab_supported_p
@@ -3498,6 +3518,7 @@  internal_load_fn_p (internal_fn fn)
     case IFN_MASK_LOAD_LANES:
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
+    case IFN_LEN_LOAD:
       return true;
 
     default:
@@ -3517,6 +3538,7 @@  internal_store_fn_p (internal_fn fn)
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_STORE:
       return true;
 
     default:
@@ -3577,6 +3599,7 @@  internal_fn_stored_value_index (internal_fn fn)
     case IFN_MASK_STORE:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_STORE:
       return 3;
 
     default:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..17dac128e83 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -49,11 +49,13 @@  along with GCC; see the file COPYING3.  If not see
    - load_lanes: currently just vec_load_lanes
    - mask_load_lanes: currently just vec_mask_load_lanes
    - gather_load: used for {mask_,}gather_load
+   - len_load: currently just len_load
 
    - mask_store: currently just maskstore
    - store_lanes: currently just vec_store_lanes
    - mask_store_lanes: currently just vec_mask_store_lanes
    - scatter_store: used for {mask_,}scatter_store
+   - len_store: currently just len_store
 
    - unary: a normal unary optab, such as vec_reverse_<mode>
    - binary: a normal binary optab, such as vec_interleave_lo_<mode>
@@ -127,6 +129,8 @@  DEF_INTERNAL_OPTAB_FN (GATHER_LOAD, ECF_PURE, gather_load, gather_load)
 DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
 		       mask_gather_load, gather_load)
 
+DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
+
 DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
 DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
 		       mask_scatter_store, scatter_store)
@@ -136,6 +140,8 @@  DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 0c64eb52a8d..78409aa1453 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -435,3 +435,5 @@  OPTAB_D (check_war_ptrs_optab, "check_war_ptrs$a")
 OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
 OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
 OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
+OPTAB_D (len_load_optab, "len_load_$a")
+OPTAB_D (len_store_optab, "len_store_$a")