[06/11] x86: introduce .bfloat16 directive

Message ID 0edef595-cac9-7b59-0caa-6d90576bf4e7@suse.com
State New
Headers show
Series
  • gas: adjustments to floating point data directives handling
Related show

Commit Message

H.J. Lu via Binutils July 23, 2021, 6:55 a.m.
This is to be able to generate data acted upon by AVX512-BF16 and
AMX-BF16 insns. While not part of the IEEE standard, the format is
sufficiently standardized to warrant handling in config/atof-ieee.c.
Arm, where custom handling was implemented, may want to leverage this as
well. To be able to also use the hex forms supported for other floating
point formats, a small addition to the generic hex_float() is needed.

Extend existing x86 testcases.

Comments

H.J. Lu via Binutils July 23, 2021, 1:21 p.m. | #1
On Thu, Jul 22, 2021 at 11:55 PM Jan Beulich <jbeulich@suse.com> wrote:
>

> This is to be able to generate data acted upon by AVX512-BF16 and

> AMX-BF16 insns. While not part of the IEEE standard, the format is

> sufficiently standardized to warrant handling in config/atof-ieee.c.

> Arm, where custom handling was implemented, may want to leverage this as

> well. To be able to also use the hex forms supported for other floating

> point formats, a small addition to the generic hex_float() is needed.

>

> Extend existing x86 testcases.


The x86 parts are OK.  Thanks.

> --- a/gas/config/atof-ieee.c

> +++ b/gas/config/atof-ieee.c

> @@ -27,6 +27,7 @@ extern FLONUM_TYPE generic_floating_poin

>  /* Don't count the gap in the m68k extended precision format.  */

>  #define MAX_PRECISION  5

>  #define H_PRECISION    1

> +#define B_PRECISION    1 /* Not strictly IEEE, but handled here anyway.  */

>  #define F_PRECISION    2

>  #define D_PRECISION    4

>  #define X_PRECISION    5

> @@ -243,6 +244,12 @@ atof_ieee (char *str,                      /* Text to conve

>        exponent_bits = 5;

>        break;

>

> +    case 'b':

> +    case 'B':

> +      precision = B_PRECISION;

> +      exponent_bits = 8;

> +      break;

> +

>      case 'f':

>      case 'F':

>      case 's':

> @@ -368,9 +375,9 @@ gen_to_words (LITTLENUM_TYPE *words, int

>         as_warn (_("Infinities are not supported by this target"));

>

>        /* +INF:  Do the right thing.  */

> -      if (precision == H_PRECISION)

> +      if (precision == H_PRECISION /* also B_PRECISION */)

>         {

> -         words[0] = 0x7c00;

> +         words[0] = exponent_bits == 5 ? 0x7c00 : 0x7f80;

>         }

>        else if (precision == F_PRECISION)

>         {

> @@ -413,9 +420,9 @@ gen_to_words (LITTLENUM_TYPE *words, int

>         as_warn (_("Infinities are not supported by this target"));

>

>        /* Negative INF.  */

> -      if (precision == H_PRECISION)

> +      if (precision == H_PRECISION /* also B_PRECISION */)

>         {

> -         words[0] = 0xfc00;

> +         words[0] = exponent_bits == 5 ? 0xfc00 : 0xff80;

>         }

>        else if (precision == F_PRECISION)

>         {

> @@ -777,6 +784,11 @@ ieee_md_atof (int type,

>           prec = H_PRECISION;

>           break;

>

> +       case 'B':

> +       case 'b':

> +         prec = B_PRECISION;

> +         break;

> +

>         case 'f':

>         case 'F':

>         case 's':

> --- a/gas/config/tc-i386.c

> +++ b/gas/config/tc-i386.c

> @@ -511,7 +511,7 @@ const char EXP_CHARS[] = "eE";

>  /* Chars that mean this number is a floating point constant

>     As in 0f12.456

>     or    0d1.2345e12.  */

> -const char FLT_CHARS[] = "fFdDxXhH";

> +const char FLT_CHARS[] = "fFdDxXhHbB";

>

>  /* Tables for lexical analysis.  */

>  static char mnemonic_chars[256];

> @@ -1353,6 +1353,7 @@ const pseudo_typeS md_pseudo_table[] =

>    {"dfloat", float_cons, 'd'},

>    {"tfloat", float_cons, 'x'},

>    {"hfloat", float_cons, 'h'},

> +  {"bfloat16", float_cons, 'b'},

>    {"value", cons, 2},

>    {"slong", signed_cons, 4},

>    {"noopt", s_ignore, 0},

> --- a/gas/doc/c-i386.texi

> +++ b/gas/doc/c-i386.texi

> @@ -1312,20 +1312,22 @@ data type.  Constructors build these dat

>  @cindex @code{double} directive, i386

>  @cindex @code{tfloat} directive, i386

>  @cindex @code{hfloat} directive, i386

> +@cindex @code{bfloat16} directive, i386

>  @cindex @code{float} directive, x86-64

>  @cindex @code{single} directive, x86-64

>  @cindex @code{double} directive, x86-64

>  @cindex @code{tfloat} directive, x86-64

>  @cindex @code{hfloat} directive, x86-64

> +@cindex @code{bfloat16} directive, x86-64

>  @itemize @bullet

>  @item

>  Floating point constructors are @samp{.float} or @samp{.single},

> -@samp{.double}, @samp{.tfloat}, and @samp{.hfloat} for 32-, 64-, 80-, and

> -16-bit formats respectively. The former three correspond to instruction

> -mnemonic suffixes @samp{s}, @samp{l}, and @samp{t}. @samp{t} stands for

> -80-bit (ten byte) real.  The 80387 only supports this format via the

> -@samp{fldt} (load 80-bit real to stack top) and @samp{fstpt} (store 80-bit

> -real and pop stack) instructions.

> +@samp{.double}, @samp{.tfloat}, @samp{.hfloat}, and @samp{.bfloat16} for 32-,

> +64-, 80-, and 16-bit (two flavors) formats respectively.  The former three

> +correspond to instruction mnemonic suffixes @samp{s}, @samp{l}, and @samp{t}.

> +@samp{t} stands for 80-bit (ten byte) real.  The 80387 only supports this

> +format via the @samp{fldt} (load 80-bit real to stack top) and @samp{fstpt}

> +(store 80-bit real and pop stack) instructions.

>

>  @cindex @code{word} directive, i386

>  @cindex @code{long} directive, i386

> --- a/gas/read.c

> +++ b/gas/read.c

> @@ -4852,6 +4852,8 @@ hex_float (int float_type, char *bytes)

>

>    switch (float_type)

>      {

> +    case 'b':

> +    case 'B':

>      case 'h':

>      case 'H':

>        length = 2;

> --- a/gas/testsuite/gas/i386/fp-elf32.d

> +++ b/gas/testsuite/gas/i386/fp-elf32.d

> @@ -14,4 +14,4 @@ Contents of section .data:

>   0060 00000000 00000080 fe3f0000 00000000  .*

>   0070 00000080 fdbf0000 00000000 00000080  .*

>   0080 ff030000 aaaaaaaa aaaaaaaa aaaaaaaa  .*

> - 0090 003c00c0 003c5555 55555555 55555555  .*

> + 0090 003c00c0 003c803f 00c0803f 55555555  .*

> --- a/gas/testsuite/gas/i386/fp-elf64.d

> +++ b/gas/testsuite/gas/i386/fp-elf64.d

> @@ -14,4 +14,4 @@ Contents of section .data:

>   0060 00000000 00000080 fe3f0000 00000000  .*

>   0070 00000000 00000080 fdbf0000 00000000  .*

>   0080 00000000 00000080 ff030000 00000000  .*

> - 0090 003c00c0 003c5555 55555555 55555555  .*

> + 0090 003c00c0 003c803f 00c0803f 55555555  .*

> --- a/gas/testsuite/gas/i386/fp.d

> +++ b/gas/testsuite/gas/i386/fp.d

> @@ -12,4 +12,4 @@ Contents of section .data:

>   0050 ffffffff ffffffff ffffcccc cccccccc  .*

>   0060 00000000 00000080 fe3f0000 00000000  .*

>   0070 0080fdbf 00000000 00000080 ff03aaaa  .*

> - 0080 003c00c0 003c5555 55555555 55555555  .*

> + 0080 003c00c0 003c803f 00c0803f 55555555  .*

> --- a/gas/testsuite/gas/i386/fp.s

> +++ b/gas/testsuite/gas/i386/fp.s

> @@ -31,4 +31,5 @@

>         .p2align 4,0xaa

>

>         .hfloat 1, -2, 0x:3c00

> +       .bfloat16 1, -2, 0x:3f80

>         .p2align 4,0x55

>



-- 
H.J.

Patch

--- a/gas/config/atof-ieee.c
+++ b/gas/config/atof-ieee.c
@@ -27,6 +27,7 @@  extern FLONUM_TYPE generic_floating_poin
 /* Don't count the gap in the m68k extended precision format.  */
 #define MAX_PRECISION  5
 #define H_PRECISION    1
+#define B_PRECISION    1 /* Not strictly IEEE, but handled here anyway.  */
 #define F_PRECISION    2
 #define D_PRECISION    4
 #define X_PRECISION    5
@@ -243,6 +244,12 @@  atof_ieee (char *str,			/* Text to conve
       exponent_bits = 5;
       break;
 
+    case 'b':
+    case 'B':
+      precision = B_PRECISION;
+      exponent_bits = 8;
+      break;
+
     case 'f':
     case 'F':
     case 's':
@@ -368,9 +375,9 @@  gen_to_words (LITTLENUM_TYPE *words, int
 	as_warn (_("Infinities are not supported by this target"));
 
       /* +INF:  Do the right thing.  */
-      if (precision == H_PRECISION)
+      if (precision == H_PRECISION /* also B_PRECISION */)
 	{
-	  words[0] = 0x7c00;
+	  words[0] = exponent_bits == 5 ? 0x7c00 : 0x7f80;
 	}
       else if (precision == F_PRECISION)
 	{
@@ -413,9 +420,9 @@  gen_to_words (LITTLENUM_TYPE *words, int
 	as_warn (_("Infinities are not supported by this target"));
 
       /* Negative INF.  */
-      if (precision == H_PRECISION)
+      if (precision == H_PRECISION /* also B_PRECISION */)
 	{
-	  words[0] = 0xfc00;
+	  words[0] = exponent_bits == 5 ? 0xfc00 : 0xff80;
 	}
       else if (precision == F_PRECISION)
 	{
@@ -777,6 +784,11 @@  ieee_md_atof (int type,
 	  prec = H_PRECISION;
 	  break;
 
+	case 'B':
+	case 'b':
+	  prec = B_PRECISION;
+	  break;
+
 	case 'f':
 	case 'F':
 	case 's':
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -511,7 +511,7 @@  const char EXP_CHARS[] = "eE";
 /* Chars that mean this number is a floating point constant
    As in 0f12.456
    or    0d1.2345e12.  */
-const char FLT_CHARS[] = "fFdDxXhH";
+const char FLT_CHARS[] = "fFdDxXhHbB";
 
 /* Tables for lexical analysis.  */
 static char mnemonic_chars[256];
@@ -1353,6 +1353,7 @@  const pseudo_typeS md_pseudo_table[] =
   {"dfloat", float_cons, 'd'},
   {"tfloat", float_cons, 'x'},
   {"hfloat", float_cons, 'h'},
+  {"bfloat16", float_cons, 'b'},
   {"value", cons, 2},
   {"slong", signed_cons, 4},
   {"noopt", s_ignore, 0},
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -1312,20 +1312,22 @@  data type.  Constructors build these dat
 @cindex @code{double} directive, i386
 @cindex @code{tfloat} directive, i386
 @cindex @code{hfloat} directive, i386
+@cindex @code{bfloat16} directive, i386
 @cindex @code{float} directive, x86-64
 @cindex @code{single} directive, x86-64
 @cindex @code{double} directive, x86-64
 @cindex @code{tfloat} directive, x86-64
 @cindex @code{hfloat} directive, x86-64
+@cindex @code{bfloat16} directive, x86-64
 @itemize @bullet
 @item
 Floating point constructors are @samp{.float} or @samp{.single},
-@samp{.double}, @samp{.tfloat}, and @samp{.hfloat} for 32-, 64-, 80-, and
-16-bit formats respectively. The former three correspond to instruction
-mnemonic suffixes @samp{s}, @samp{l}, and @samp{t}. @samp{t} stands for
-80-bit (ten byte) real.  The 80387 only supports this format via the
-@samp{fldt} (load 80-bit real to stack top) and @samp{fstpt} (store 80-bit
-real and pop stack) instructions.
+@samp{.double}, @samp{.tfloat}, @samp{.hfloat}, and @samp{.bfloat16} for 32-,
+64-, 80-, and 16-bit (two flavors) formats respectively.  The former three
+correspond to instruction mnemonic suffixes @samp{s}, @samp{l}, and @samp{t}.
+@samp{t} stands for 80-bit (ten byte) real.  The 80387 only supports this
+format via the @samp{fldt} (load 80-bit real to stack top) and @samp{fstpt}
+(store 80-bit real and pop stack) instructions.
 
 @cindex @code{word} directive, i386
 @cindex @code{long} directive, i386
--- a/gas/read.c
+++ b/gas/read.c
@@ -4852,6 +4852,8 @@  hex_float (int float_type, char *bytes)
 
   switch (float_type)
     {
+    case 'b':
+    case 'B':
     case 'h':
     case 'H':
       length = 2;
--- a/gas/testsuite/gas/i386/fp-elf32.d
+++ b/gas/testsuite/gas/i386/fp-elf32.d
@@ -14,4 +14,4 @@  Contents of section .data:
  0060 00000000 00000080 fe3f0000 00000000  .*
  0070 00000080 fdbf0000 00000000 00000080  .*
  0080 ff030000 aaaaaaaa aaaaaaaa aaaaaaaa  .*
- 0090 003c00c0 003c5555 55555555 55555555  .*
+ 0090 003c00c0 003c803f 00c0803f 55555555  .*
--- a/gas/testsuite/gas/i386/fp-elf64.d
+++ b/gas/testsuite/gas/i386/fp-elf64.d
@@ -14,4 +14,4 @@  Contents of section .data:
  0060 00000000 00000080 fe3f0000 00000000  .*
  0070 00000000 00000080 fdbf0000 00000000  .*
  0080 00000000 00000080 ff030000 00000000  .*
- 0090 003c00c0 003c5555 55555555 55555555  .*
+ 0090 003c00c0 003c803f 00c0803f 55555555  .*
--- a/gas/testsuite/gas/i386/fp.d
+++ b/gas/testsuite/gas/i386/fp.d
@@ -12,4 +12,4 @@  Contents of section .data:
  0050 ffffffff ffffffff ffffcccc cccccccc  .*
  0060 00000000 00000080 fe3f0000 00000000  .*
  0070 0080fdbf 00000000 00000080 ff03aaaa  .*
- 0080 003c00c0 003c5555 55555555 55555555  .*
+ 0080 003c00c0 003c803f 00c0803f 55555555  .*
--- a/gas/testsuite/gas/i386/fp.s
+++ b/gas/testsuite/gas/i386/fp.s
@@ -31,4 +31,5 @@ 
 	.p2align 4,0xaa
 
 	.hfloat 1, -2, 0x:3c00
+	.bfloat16 1, -2, 0x:3f80
 	.p2align 4,0x55