x86: Don't issue vzeroupper if callee returns AVX register

Message ID 20210718164748.1007080-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86: Don't issue vzeroupper if callee returns AVX register
Related show

Commit Message

Michael Meissner via Gcc-patches July 18, 2021, 4:47 p.m.
Don't issue vzeroupper before function call if callee returns AVX
register since callee must be compiled with AVX.

gcc/

	PR target/101495
	* config/i386/i386.c (ix86_check_avx_upper_stores): Moved before
	ix86_avx_u128_mode_needed.
	(ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY if callee
	returns AVX register.

gcc/testsuite/

	PR target/101495
	* gcc.target/i386/avx-vzeroupper-28.c: New test.
---
 gcc/config/i386/i386.c                        | 32 ++++++++++++-------
 .../gcc.target/i386/avx-vzeroupper-28.c       | 17 ++++++++++
 2 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c

-- 
2.31.1

Comments

Michael Meissner via Gcc-patches July 18, 2021, 7:18 p.m. | #1
On Sun, Jul 18, 2021 at 6:47 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> Don't issue vzeroupper before function call if callee returns AVX

> register since callee must be compiled with AVX.

>

> gcc/

>

>         PR target/101495

>         * config/i386/i386.c (ix86_check_avx_upper_stores): Moved before

>         ix86_avx_u128_mode_needed.

>         (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY if callee

>         returns AVX register.

>

> gcc/testsuite/

>

>         PR target/101495

>         * gcc.target/i386/avx-vzeroupper-28.c: New test.


OK.

Thanks,
Uros.

> ---

>  gcc/config/i386/i386.c                        | 32 ++++++++++++-------

>  .../gcc.target/i386/avx-vzeroupper-28.c       | 17 ++++++++++

>  2 files changed, 37 insertions(+), 12 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index 9d74b7a191b..e6c82624272 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -14093,6 +14093,18 @@ ix86_check_avx_upper_register (const_rtx exp)

>           && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);

>  }

>

> +/* Check if a 256bit or 512bit AVX register is referenced in stores.   */

> +

> +static void

> +ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)

> + {

> +   if (ix86_check_avx_upper_register (dest))

> +    {

> +      bool *used = (bool *) data;

> +      *used = true;

> +    }

> + }

> +

>  /* Return needed mode for entity in optimize_mode_switching pass.  */

>

>  static int

> @@ -14117,6 +14129,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)

>             }

>         }

>

> +      /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit

> +         nor 512bit registers used in the function return register.  */

> +      bool avx_upper_reg_found = false;

> +      note_stores (insn, ix86_check_avx_upper_stores,

> +                  &avx_upper_reg_found);

> +      if (avx_upper_reg_found)

> +       return AVX_U128_DIRTY;

> +

>        /* If the function is known to preserve some SSE registers,

>          RA and previous passes can legitimately rely on that for

>          modes wider than 256 bits.  It's only safe to issue a

> @@ -14217,18 +14237,6 @@ ix86_mode_needed (int entity, rtx_insn *insn)

>    return 0;

>  }

>

> -/* Check if a 256bit or 512bit AVX register is referenced in stores.   */

> -

> -static void

> -ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)

> - {

> -   if (ix86_check_avx_upper_register (dest))

> -    {

> -      bool *used = (bool *) data;

> -      *used = true;

> -    }

> - }

> -

>  /* Calculate mode of upper 128bit AVX registers after the insn.  */

>

>  static int

> diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c

> new file mode 100644

> index 00000000000..381ee9a7f96

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c

> @@ -0,0 +1,17 @@

> +/* PR target/101495  */

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -mavx2 -mtune=generic -dp" } */

> +

> +#include <immintrin.h>

> +

> +extern __m256 x, y;

> +extern __m256 bar (void);

> +

> +__m256

> +foo ()

> +{

> +  x = y;

> +  return bar ();

> +}

> +

> +/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */

> --

> 2.31.1

>

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9d74b7a191b..e6c82624272 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14093,6 +14093,18 @@  ix86_check_avx_upper_register (const_rtx exp)
 	  && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
 }
 
+/* Check if a 256bit or 512bit AVX register is referenced in stores.   */
+
+static void
+ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
+ {
+   if (ix86_check_avx_upper_register (dest))
+    {
+      bool *used = (bool *) data;
+      *used = true;
+    }
+ }
+
 /* Return needed mode for entity in optimize_mode_switching pass.  */
 
 static int
@@ -14117,6 +14129,14 @@  ix86_avx_u128_mode_needed (rtx_insn *insn)
 	    }
 	}
 
+      /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
+         nor 512bit registers used in the function return register.  */
+      bool avx_upper_reg_found = false;
+      note_stores (insn, ix86_check_avx_upper_stores,
+		   &avx_upper_reg_found);
+      if (avx_upper_reg_found)
+	return AVX_U128_DIRTY;
+
       /* If the function is known to preserve some SSE registers,
 	 RA and previous passes can legitimately rely on that for
 	 modes wider than 256 bits.  It's only safe to issue a
@@ -14217,18 +14237,6 @@  ix86_mode_needed (int entity, rtx_insn *insn)
   return 0;
 }
 
-/* Check if a 256bit or 512bit AVX register is referenced in stores.   */
- 
-static void
-ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
- {
-   if (ix86_check_avx_upper_register (dest))
-    {
-      bool *used = (bool *) data;
-      *used = true;
-    }
- } 
-
 /* Calculate mode of upper 128bit AVX registers after the insn.  */
 
 static int
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c
new file mode 100644
index 00000000000..381ee9a7f96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c
@@ -0,0 +1,17 @@ 
+/* PR target/101495  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -mtune=generic -dp" } */
+
+#include <immintrin.h>
+
+extern __m256 x, y;
+extern __m256 bar (void);
+
+__m256
+foo ()
+{
+  x = y;
+  return bar ();
+}
+
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */