[13/13] aarch64: Add costs for LD[34] and ST[34] postincrements

Message ID mptsg4hq45z.fsf@arm.com
State New
Headers show
Series
  • [01/13] aarch64: Add reduction costs to simd_vec_costs
Related show

Commit Message

Ian Lance Taylor via Gcc-patches March 26, 2021, 4:18 p.m.
Most postincrements are cheap on Neoverse V1, but it's
generally better to avoid them on LD[34] and ST[34] instructions.
This patch adds separate address costs fields for these cases.
Other CPUs continue to use the same costs for all postincrements.

gcc/
	* config/aarch64/aarch64-protos.h
	(cpu_addrcost_table::post_modify_ld3_st3): New member variable.
	(cpu_addrcost_table::post_modify_ld4_st4): Likewise.
	* config/aarch64/aarch64.c (generic_addrcost_table): Update
	accordingly, using the same costs as for post_modify.
	(exynosm1_addrcost_table, xgene1_addrcost_table): Likewise.
	(thunderx2t99_addrcost_table, thunderx3t110_addrcost_table):
	(tsv110_addrcost_table, qdf24xx_addrcost_table): Likewise.
	(a64fx_addrcost_table): Likewise.
	(neoversev1_addrcost_table): New.
	(neoversev1_tunings): Use neoversev1_addrcost_table.
	(aarch64_address_cost): Use the new post_modify costs for CImode
	and XImode.
---
 gcc/config/aarch64/aarch64-protos.h |  2 ++
 gcc/config/aarch64/aarch64.c        | 45 +++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

-- 
2.17.1

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ca1ed9e8758..d5d5417370e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,8 @@  struct cpu_addrcost_table
   const struct scale_addr_mode_cost addr_scale_costs;
   const int pre_modify;
   const int post_modify;
+  const int post_modify_ld3_st3;
+  const int post_modify_ld4_st4;
   const int register_offset;
   const int register_sextend;
   const int register_zextend;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6d961bea5dc..a573850b3fd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -364,6 +364,8 @@  static const struct cpu_addrcost_table generic_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   0, /* register_offset  */
   0, /* register_sextend  */
   0, /* register_zextend  */
@@ -380,6 +382,8 @@  static const struct cpu_addrcost_table exynosm1_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   1, /* register_offset  */
   1, /* register_sextend  */
   2, /* register_zextend  */
@@ -396,6 +400,8 @@  static const struct cpu_addrcost_table xgene1_addrcost_table =
     },
   1, /* pre_modify  */
   1, /* post_modify  */
+  1, /* post_modify_ld3_st3  */
+  1, /* post_modify_ld4_st4  */
   0, /* register_offset  */
   1, /* register_sextend  */
   1, /* register_zextend  */
@@ -412,6 +418,8 @@  static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   2, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
@@ -428,6 +436,8 @@  static const struct cpu_addrcost_table thunderx3t110_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   2, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
@@ -444,6 +454,8 @@  static const struct cpu_addrcost_table tsv110_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   0, /* register_offset  */
   1, /* register_sextend  */
   1, /* register_zextend  */
@@ -460,6 +472,8 @@  static const struct cpu_addrcost_table qdf24xx_addrcost_table =
     },
   1, /* pre_modify  */
   1, /* post_modify  */
+  1, /* post_modify_ld3_st3  */
+  1, /* post_modify_ld4_st4  */
   3, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
@@ -476,12 +490,32 @@  static const struct cpu_addrcost_table a64fx_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   2, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
   0, /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table neoversev1_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  3, /* post_modify_ld3_st3  */
+  3, /* post_modify_ld4_st4  */
+  0, /* register_offset  */
+  0, /* register_sextend  */
+  0, /* register_zextend  */
+  0 /* imm_offset  */
+};
+
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   1, /* GP2GP  */
@@ -1777,7 +1811,7 @@  static const struct cpu_vector_cost neoversev1_vector_cost =
 static const struct tune_params neoversev1_tunings =
 {
   &cortexa76_extra_costs,
-  &generic_addrcost_table,
+  &neoversev1_addrcost_table,
   &generic_regmove_cost,
   &neoversev1_vector_cost,
   &generic_branch_cost,
@@ -12077,7 +12111,14 @@  aarch64_address_cost (rtx x,
 	if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
 	  cost += addr_cost->pre_modify;
 	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-	  cost += addr_cost->post_modify;
+	  {
+	    if (mode == CImode)
+	      cost += addr_cost->post_modify_ld3_st3;
+	    else if (mode == XImode)
+	      cost += addr_cost->post_modify_ld4_st4;
+	    else
+	      cost += addr_cost->post_modify;
+	  }
 	else
 	  gcc_unreachable ();