[AArch64] Add support for fused compare and branch

Message ID VI1PR0801MB21278868DCF938CEA1A67E3383460@VI1PR0801MB2127.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [AArch64] Add support for fused compare and branch
Related show

Commit Message

Wilco Dijkstra Nov. 29, 2019, 3:48 p.m.
Hi,

Add support for fused compare with branch.  Rename the existing
AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH
to ALU_CBZ to make it clear what is being fused.

AArch64 bootstrap OK, OK to commit?

ChangeLog:

2019-11-29  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c
	(thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.
	(thunderx_tunings): Likewise.
	(tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.
	(thunderx2t99_tunings): Likewise.
	(aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.
	* config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.
--

Comments

Richard Sandiford Nov. 29, 2019, 4:46 p.m. | #1
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi,

>

> Add support for fused compare with branch.  Rename the existing

> AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH

> to ALU_CBZ to make it clear what is being fused.


This night have been easier to review as three patches:

(1) rename ALU_BRANCH to ALU_CBZ
(2) rename CMP_BRANCH to ALU_BRANCH
(3) add back CMP_BRANCH with more accurate semantics

But what uses CMP_BRANCH after the patch?  It looked like you renamed
all existing uses and didn't add any new ones.

> @@ -20363,7 +20363,14 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)

>          }

>      }

>

> +  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */

>    if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)

> +      && prev_set && curr_set && any_condjump_p (curr)

> +      && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr)))

                                        ^^^^^^^^
Looks like this should be prev_set.  But this condition will trigger
for any prev_insn that is only being kept around for its effect on the
flags, not just CMP/CMN/TST/BICS.  If it's only supposed to be those
four insns then I think we should test for them explicitly.

Thanks,
Richard

> +    return true;

> +

> +  /* Fuse flag-setting ALU instructions and conditional branch.  */

> +  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)

>        && any_condjump_p (curr))

>      {

>        unsigned int condreg1, condreg2;
Wilco Dijkstra Dec. 3, 2019, 3:11 p.m. | #2
Hi Richard,

> But what uses CMP_BRANCH after the patch?  It looked like you renamed

> all existing uses and didn't add any new ones.


My next patch will be adding uses of it now I've done some benchmarking
to decide when to turn it on.

>> +      && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr)))


> Looks like this should be prev_set.  But this condition will trigger

> for any prev_insn that is only being kept around for its effect on the

> flags, not just CMP/CMN/TST/BICS.  If it's only supposed to be those

> four insns then I think we should test for them explicitly.


Yes that should have been prev_set indeed. I've tightened up the
condition to check for integer comparisons (which matches CMP,
CMN, TST and BICS). Updated patch below.

Cheers,
Wilco


[PATCH v2][AArch64] Add support for fused compare and branch

Add support for fused compare with branch.  Rename the existing
AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH
to ALU_CBZ to make it clear what is being fused.

AArch64 bootstrap OK, OK to commit?

ChangeLog:

2019-12-03  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c
	(thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.
	(thunderx_tunings): Likewise.
	(tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.
	(thunderx2t99_tunings): Likewise.
	(aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.
	* config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.

--

diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index ce4bb92d5c9d1f187c026b1a714e485a2b9f1a74..051009b42b2db4e79a8b302fd3f1b65dedfdba8f 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
 AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
 AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
 AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
+AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
 
 #undef AARCH64_FUSION_PAIR
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d0cbe13273f78ebac6f3facc983ca0dec1a43966..942413e0a377603be98a8c547db262eb75baed48 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
   "8",	/* function_align.  */
   "8",	/* jump_align.  */
   "8",	/* loop_align.  */
@@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
   "8",	/* function_align.  */
   "8",	/* jump_align.  */
   "8",	/* loop_align.  */
@@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4,    /* memmov_cost  */
   4,    /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
-   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16", /* function_align.  */
   "4",  /* jump_align.  */
   "8",  /* loop_align.  */
@@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost.  */
   4, /* issue_rate.  */
-  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
-   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
   "16",	/* loop_align.  */
@@ -20372,7 +20372,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */
   if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
+      && prev_set && curr_set && any_condjump_p (curr)
+      && GET_CODE (SET_SRC (prev_set)) == COMPARE
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
+      && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
+    return true;
+
+  /* Fuse flag-setting ALU instructions and conditional branch.  */
+  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
       && any_condjump_p (curr))
     {
       unsigned int condreg1, condreg2;
@@ -20396,9 +20405,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	}
     }
 
+  /* Fuse ALU instructions and CBZ/CBNZ.  */
   if (prev_set
       && curr_set
-      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
       && any_condjump_p (curr))
     {
       /* We're trying to match:
Richard Sandiford Dec. 4, 2019, 10:27 a.m. | #3
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,

>

>> But what uses CMP_BRANCH after the patch?  It looked like you renamed

>> all existing uses and didn't add any new ones.

>

> My next patch will be adding uses of it now I've done some benchmarking

> to decide when to turn it on.

>

>>> +      && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr)))

>

>> Looks like this should be prev_set.  But this condition will trigger

>> for any prev_insn that is only being kept around for its effect on the

>> flags, not just CMP/CMN/TST/BICS.  If it's only supposed to be those

>> four insns then I think we should test for them explicitly.

>

> Yes that should have been prev_set indeed. I've tightened up the

> condition to check for integer comparisons (which matches CMP,

> CMN, TST and BICS). Updated patch below.

>

> Cheers,

> Wilco

>

>

> [PATCH v2][AArch64] Add support for fused compare and branch

>

> Add support for fused compare with branch.  Rename the existing

> AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH

> to ALU_CBZ to make it clear what is being fused.

>

> AArch64 bootstrap OK, OK to commit?

>

> ChangeLog:

>

> 2019-12-03  Wilco Dijkstra  <wdijkstr@arm.com>

>

> * config/aarch64/aarch64.c

> (thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.

> (thunderx_tunings): Likewise.

> (tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.

> (thunderx2t99_tunings): Likewise.

> (aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.

> * config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.


OK, thanks.

Richard

>

> --

>

> diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def

> index ce4bb92d5c9d1f187c026b1a714e485a2b9f1a74..051009b42b2db4e79a8b302fd3f1b65dedfdba8f 100644

> --- a/gcc/config/aarch64/aarch64-fusion-pairs.def

> +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def

> @@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)

>  AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)

>  AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)

>  AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)

> +AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)

>

>  #undef AARCH64_FUSION_PAIR

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

> index d0cbe13273f78ebac6f3facc983ca0dec1a43966..942413e0a377603be98a8c547db262eb75baed48 100644

> --- a/gcc/config/aarch64/aarch64.c

> +++ b/gcc/config/aarch64/aarch64.c

> @@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    6, /* memmov_cost  */

>    2, /* issue_rate  */

> -  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */

> +  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */

>    "8",/* function_align.  */

>    "8",/* jump_align.  */

>    "8",/* loop_align.  */

> @@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    6, /* memmov_cost  */

>    2, /* issue_rate  */

> -  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */

> +  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */

>    "8",/* function_align.  */

>    "8",/* jump_align.  */

>    "8",/* loop_align.  */

> @@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    4,    /* memmov_cost  */

>    4,    /* issue_rate  */

> -  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH

> -   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */

> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH

> +   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */

>    "16", /* function_align.  */

>    "4",  /* jump_align.  */

>    "8",  /* loop_align.  */

> @@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    4, /* memmov_cost.  */

>    4, /* issue_rate.  */

> -  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC

> -   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */

> +  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC

> +   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */

>    "16",/* function_align.  */

>    "8",/* jump_align.  */

>    "16",/* loop_align.  */

> @@ -20372,7 +20372,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)

>          }

>      }

>

> +  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */

>    if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)

> +      && prev_set && curr_set && any_condjump_p (curr)

> +      && GET_CODE (SET_SRC (prev_set)) == COMPARE

> +      && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))

> +      && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))

> +    return true;

> +

> +  /* Fuse flag-setting ALU instructions and conditional branch.  */

> +  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)

>        && any_condjump_p (curr))

>      {

>        unsigned int condreg1, condreg2;

> @@ -20396,9 +20405,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)

>  }

>      }

>

> +  /* Fuse ALU instructions and CBZ/CBNZ.  */

>    if (prev_set

>        && curr_set

> -      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)

> +      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)

>        && any_condjump_p (curr))

>      {

>        /* We're trying to match:

Patch

diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index ce4bb92d5c9d1f187c026b1a714e485a2b9f1a74..051009b42b2db4e79a8b302fd3f1b65dedfdba8f 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -35,5 +35,6 @@  AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
 AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
 AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
 AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
+AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
 
 #undef AARCH64_FUSION_PAIR
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 94e664af52fb0e3404e76d4d0b67a618023571ba..f74134ea2e080c361be0facc274575c09fbb7a82 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -915,7 +915,7 @@  static const struct tune_params thunderxt88_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
   "8",	/* function_align.  */
   "8",	/* jump_align.  */
   "8",	/* loop_align.  */
@@ -941,7 +941,7 @@  static const struct tune_params thunderx_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
   "8",	/* function_align.  */
   "8",	/* jump_align.  */
   "8",	/* loop_align.  */
@@ -968,8 +968,8 @@  static const struct tune_params tsv110_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4,    /* memmov_cost  */
   4,    /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
-   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16", /* function_align.  */
   "4",  /* jump_align.  */
   "8",  /* loop_align.  */
@@ -1103,8 +1103,8 @@  static const struct tune_params thunderx2t99_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost.  */
   4, /* issue_rate.  */
-  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
-   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
   "16",	/* loop_align.  */
@@ -20363,7 +20363,14 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */
   if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
+      && prev_set && curr_set && any_condjump_p (curr)
+      && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr)))
+    return true;
+
+  /* Fuse flag-setting ALU instructions and conditional branch.  */
+  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
       && any_condjump_p (curr))
     {
       unsigned int condreg1, condreg2;
@@ -20387,9 +20394,10 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	}
     }
 
+  /* Fuse ALU instructions and CBZ/CBNZ.  */
   if (prev_set
       && curr_set
-      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
       && any_condjump_p (curr))
     {
       /* We're trying to match: