[05/13] aarch64: Add costs for one element of a scatter store

Message ID mptzgyprivh.fsf@arm.com
State New
Headers show
Series
  • [01/13] aarch64: Add reduction costs to simd_vec_costs
Related show

Commit Message

Aldy Hernandez via Gcc-patches March 26, 2021, 4:15 p.m.
Currently each element in a gather load is costed as a scalar_load
and each element in a scatter store is costed as a scalar_store.
The load side seems to work pretty well in practice, since many
CPU-specific costs give loads quite a high cost relative to
arithmetic operations.  However, stores usually have a cost
of just 1, which means that scatters tend to appear too cheap.

This patch adds a separate cost for one element in a scatter store.

Like with the previous patches, this one only becomes active if
a CPU selects use_new_vector_costs.  It should therefore have
a very low impact on other CPUs.

gcc/
	* config/aarch64/aarch64-protos.h
	(sve_vec_cost::scatter_store_elt_cost): New member variable.
	* config/aarch64/aarch64.c (generic_sve_vector_cost): Update
	accordingly, taking the cost from the cost of a scalar_store.
	(a64fx_sve_vector_cost): Likewise.
	(aarch64_detect_vector_stmt_subtype): Detect scatter stores.
---
 gcc/config/aarch64/aarch64-protos.h |  9 +++++++--
 gcc/config/aarch64/aarch64.c        | 13 +++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

-- 
2.17.1

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index fabe3df7071..2ffa96ec24b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -256,12 +256,14 @@  struct sve_vec_cost : simd_vec_cost
 			  unsigned int clast_cost,
 			  unsigned int fadda_f16_cost,
 			  unsigned int fadda_f32_cost,
-			  unsigned int fadda_f64_cost)
+			  unsigned int fadda_f64_cost,
+			  unsigned int scatter_store_elt_cost)
     : simd_vec_cost (base),
       clast_cost (clast_cost),
       fadda_f16_cost (fadda_f16_cost),
       fadda_f32_cost (fadda_f32_cost),
-      fadda_f64_cost (fadda_f64_cost)
+      fadda_f64_cost (fadda_f64_cost),
+      scatter_store_elt_cost (scatter_store_elt_cost)
   {}
 
   /* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
@@ -274,6 +276,9 @@  struct sve_vec_cost : simd_vec_cost
   const int fadda_f16_cost;
   const int fadda_f32_cost;
   const int fadda_f64_cost;
+
+  /* The per-element cost of a scatter store.  */
+  const int scatter_store_elt_cost;
 };
 
 /* Cost for vector insn classes.  */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 20bb75bd56c..7f727413d01 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -638,7 +638,8 @@  static const sve_vec_cost generic_sve_vector_cost =
   2, /* clast_cost  */
   2, /* fadda_f16_cost  */
   2, /* fadda_f32_cost  */
-  2 /* fadda_f64_cost  */
+  2, /* fadda_f64_cost  */
+  1 /* scatter_store_elt_cost  */
 };
 
 /* Generic costs for vector insn classes.  */
@@ -705,7 +706,8 @@  static const sve_vec_cost a64fx_sve_vector_cost =
   13, /* clast_cost  */
   13, /* fadda_f16_cost  */
   13, /* fadda_f32_cost  */
-  13 /* fadda_f64_cost  */
+  13, /* fadda_f64_cost  */
+  1 /* scatter_store_elt_cost  */
 };
 
 static const struct cpu_vector_cost a64fx_vector_cost =
@@ -14279,6 +14281,13 @@  aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
       && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
     return simd_costs->store_elt_extra_cost;
 
+  /* Detect cases in which a scalar_store is really storing one element
+     in a scatter operation.  */
+  if (kind == scalar_store
+      && sve_costs
+      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+    return sve_costs->scatter_store_elt_cost;
+
   /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
   if (kind == vec_to_scalar
       && where == vect_body