[amdgcn] Add runtime ISA check for amdgcn offloading

Message ID a4bfe268-173f-ffb8-b486-344f75e96216@codesourcery.com
State New
Headers show
Series
  • [amdgcn] Add runtime ISA check for amdgcn offloading
Related show

Commit Message

Frederik Harwath Jan. 20, 2020, 6:57 a.m.
Hi,
this patch implements a runtime ISA check for amdgcn offloading.
The check verifies that the ISA of the GPU to which we try to offload matches
the ISA for which the code to be offloaded has been compiled. If it detects
a mismatch, it emits an error message which contains a hint at the correct compilation
parameters for the GPU. For instance:

  "libgomp: GCN fatal error: GCN code object ISA 'gfx906' does not match GPU ISA 'gfx900'.
   Try to recompile with '-foffload=-march=gfx900'."
or
  "libgomp: GCN fatal error: GCN code object ISA 'gfx900' does not match agent ISA 'gfx803'.
   Try to recompile with '-foffload=-march=fiji'."

(By the way, the names that we use for the ISAs are a bit inconsistent. Perhaps we should just
 use the gfx-names for all ISAs everywhere?.)

Without this patch, the user only gets an confusing error message from the HSA runtime which
fails to load the GCN object code.

I have checked that the code does not lead to any regressions when running
the test suite correctly, i.e. with the "-foffload=-march=..." option
given to the compiler matching the architecture of the GPU.
It seems difficult to implement an automated test that triggers an ISA mismatch.
I have tested manually (for different combinations of the compilation flags
and offloading GPU ISAs) that the runtime ISA check produces the expected error messages.

Is it ok to commit this patch to the master branch?

Frederik

Comments

Andrew Stubbs Jan. 20, 2020, 10 a.m. | #1
Hi Frederik,

On 20/01/2020 06:57, Harwath, Frederik wrote:
> Hi,

> this patch implements a runtime ISA check for amdgcn offloading.

> The check verifies that the ISA of the GPU to which we try to offload matches

> the ISA for which the code to be offloaded has been compiled. If it detects

> a mismatch, it emits an error message which contains a hint at the correct compilation

> parameters for the GPU. For instance:

> 

>    "libgomp: GCN fatal error: GCN code object ISA 'gfx906' does not match GPU ISA 'gfx900'.

>     Try to recompile with '-foffload=-march=gfx900'."

> or

>    "libgomp: GCN fatal error: GCN code object ISA 'gfx900' does not match agent ISA 'gfx803'.

>     Try to recompile with '-foffload=-march=fiji'."

> 

> (By the way, the names that we use for the ISAs are a bit inconsistent. Perhaps we should just

>   use the gfx-names for all ISAs everywhere?.)


The -march=?? names match those used by LLVM, since they are passed 
straight through to the assembler and linker we import from that 
project. There seems to have been some inconsistency there also, with 
older devices having names, and newer devices numbers. Unless we want 
specs files with a full set of mappings then it's best to stick with 
what we have, for now, I think.

> Without this patch, the user only gets an confusing error message from the HSA runtime which

> fails to load the GCN object code.


This has been annoying indeed. The new message will be very welcome. :-)

> I have checked that the code does not lead to any regressions when running

> the test suite correctly, i.e. with the "-foffload=-march=..." option

> given to the compiler matching the architecture of the GPU.

> It seems difficult to implement an automated test that triggers an ISA mismatch.

> I have tested manually (for different combinations of the compilation flags

> and offloading GPU ISAs) that the runtime ISA check produces the expected error messages.


I think you said that overriding the multilib options wasn't 
straight-forward, and I don't have any idea how to fix that. Nor can we 
know what devices the test environment does or does not have. I think 
we'll have to do without a negative test.

> Is it ok to commit this patch to the master branch?


I can't see anything significantly wrong with the code of the patch, 
however I have some minor issues I'd like fixed in the text.

> @@ -396,6 +396,88 @@ struct gcn_image_desc

>    struct global_var_info *global_variables;

>  };

>  

> +/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we

> +   support.

> +   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */

> +

> +typedef enum {

> +  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,

> +  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,

> +  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,

> +  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,

> +} EF_AMDGPU_MACH;

> +

> +const static int EF_AMDGPU_MACH_MASK = 0x000000ff;

> +typedef EF_AMDGPU_MACH gcn_isa;

> +

> +const static char* gcn_gfx801_s = "gfx801";

> +const static char* gcn_gfx803_s = "gfx803";

> +const static char* gcn_gfx900_s = "gfx900";

> +const static char* gcn_gfx906_s = "gfx906";

> +const static int gcn_isa_name_len = 6;

> +

> +static int

> +elf_gcn_isa_field (Elf64_Ehdr *image)

> +{

> +  return image->e_flags & EF_AMDGPU_MACH_MASK;

> +}


The whole file has been carefully grouped into categories, separated 
with vim fold markers {{{ }}} but your patch inserts functions amongst 
the types. Please move the functions down into the "Utility functions" 
group. The const static variables should probably go with them.

> @@ -2257,6 +2342,39 @@ find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent,

>    return res;

>  }

>  

> +/* Check that the GCN ISA of the given image matches the ISA of the agent. */

> +

> +static bool

> +isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)

> +{

> +      int isa_field = elf_gcn_isa_field (image);

> +      const char* isa_s = isa_hsa_name (isa_field);

> +      if (!isa_s)

> +


Please use gnu-style indentation.

> @@ -3294,7 +3415,11 @@ GOMP_OFFLOAD_init_device (int n)

>  					  &buf);

>    if (status != HSA_STATUS_SUCCESS)

>      return hsa_error ("Error querying the name of the agent", status);

> -  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);

> +  agent->gfx900_p = (strncmp (buf, gcn_gfx900_s, gcn_isa_name_len) == 0);

> +

> +  agent->device_isa = isa_code (buf);

> +  if (agent->device_isa < 0)

> +    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);


Can device_isa not just replace gfx900_p? I think it's only tested in 
one place, and that would be easily substituted.

Thanks

Andrew
Jakub Jelinek Jan. 20, 2020, 10:08 a.m. | #2
On Mon, Jan 20, 2020 at 10:00:09AM +0000, Andrew Stubbs wrote:
> > @@ -396,6 +396,88 @@ struct gcn_image_desc

> >    struct global_var_info *global_variables;

> >  };

> > +/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we

> > +   support.

> > +   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */

> > +

> > +typedef enum {

> > +  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,

> > +  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,

> > +  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,

> > +  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,

> > +} EF_AMDGPU_MACH;

> > +

> > +const static int EF_AMDGPU_MACH_MASK = 0x000000ff;

> > +typedef EF_AMDGPU_MACH gcn_isa;

> > +

> > +const static char* gcn_gfx801_s = "gfx801";

> > +const static char* gcn_gfx803_s = "gfx803";

> > +const static char* gcn_gfx900_s = "gfx900";

> > +const static char* gcn_gfx906_s = "gfx906";

> > +const static int gcn_isa_name_len = 6;


Does this mean that GCN has 4 incompatible ISA sets and one can't compile
for some ISA set that covers them all?  Or is that just in case one wants
specific optimizations for the hw he has and nothing else?

Also, GNU coding conventions say that the above should use "char *gcn"
rather than "char* gcn".

	Jakub
Andrew Stubbs Jan. 20, 2020, 10:36 a.m. | #3
On 20/01/2020 10:08, Jakub Jelinek wrote:
> On Mon, Jan 20, 2020 at 10:00:09AM +0000, Andrew Stubbs wrote:

>>> @@ -396,6 +396,88 @@ struct gcn_image_desc

>>>     struct global_var_info *global_variables;

>>>   };

>>> +/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we

>>> +   support.

>>> +   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */

>>> +

>>> +typedef enum {

>>> +  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,

>>> +  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,

>>> +  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,

>>> +  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,

>>> +} EF_AMDGPU_MACH;

>>> +

>>> +const static int EF_AMDGPU_MACH_MASK = 0x000000ff;

>>> +typedef EF_AMDGPU_MACH gcn_isa;

>>> +

>>> +const static char* gcn_gfx801_s = "gfx801";

>>> +const static char* gcn_gfx803_s = "gfx803";

>>> +const static char* gcn_gfx900_s = "gfx900";

>>> +const static char* gcn_gfx906_s = "gfx906";

>>> +const static int gcn_isa_name_len = 6;

> 

> Does this mean that GCN has 4 incompatible ISA sets and one can't compile

> for some ISA set that covers them all?  Or is that just in case one wants

> specific optimizations for the hw he has and nothing else?


The HSA/ROCm runtime rejects binaries not built for the exact device 
present.

In practice, binaries built by GCC for GCN3 "fiji" devices would 
probably run on any of the devices we currently support, if only the 
driver would load it. It would not be optimal, but AFAIK the subset of 
the ISA we actually use is compatible.

However, in theory, the meta-data isn't quite the same (for one thing, 
GCN3 allocates registers in increments of 8, where GCN5 uses 16) so some 
programs may misbehave. I have suggested to the ROCm folks that possibly 
this could be fixed in the drivers, but that's not been done yet. I 
suppose that running sub-optimal code on an accelerator device is not a 
priority.

It would be possible for us to a) ensure we always use fully portable 
instruction and meta-data encodings in the compiler, and b) patch the 
ELF flags in libgomp at load time, to achieve some degree of 
forward-portability. We have not attempted this to date. Sooner or later 
AMD would change something in such a way that it couldn't be worked around.

Andrew
Jakub Jelinek Jan. 20, 2020, 10:42 a.m. | #4
On Mon, Jan 20, 2020 at 10:36:31AM +0000, Andrew Stubbs wrote:
> The HSA/ROCm runtime rejects binaries not built for the exact device

> present.

> 

> In practice, binaries built by GCC for GCN3 "fiji" devices would probably

> run on any of the devices we currently support, if only the driver would

> load it. It would not be optimal, but AFAIK the subset of the ISA we

> actually use is compatible.

> 

> However, in theory, the meta-data isn't quite the same (for one thing, GCN3

> allocates registers in increments of 8, where GCN5 uses 16) so some programs

> may misbehave. I have suggested to the ROCm folks that possibly this could

> be fixed in the drivers, but that's not been done yet. I suppose that

> running sub-optimal code on an accelerator device is not a priority.

> 

> It would be possible for us to a) ensure we always use fully portable

> instruction and meta-data encodings in the compiler, and b) patch the ELF

> flags in libgomp at load time, to achieve some degree of

> forward-portability. We have not attempted this to date. Sooner or later AMD

> would change something in such a way that it couldn't be worked around.


:(
Another option would be to build offloading code by GCN multiple times, once
for each incompatible ISA the user is asking for, so that one can have then
binaries that will work on different hw.
Because e.g. with the distro vendor hat, it is hard to guess what device
will the users have if we were to enable gcn offloading and ship binaries
with the offloading support in the distro.  For PTX it is easy, we can just
emit sm_30 as the lowest ISA supported (the default anyway) and be done with it.

	Jakub
Andrew Stubbs Jan. 20, 2020, 11 a.m. | #5
On 20/01/2020 10:42, Jakub Jelinek wrote:
> :(

> Another option would be to build offloading code by GCN multiple times, once

> for each incompatible ISA the user is asking for, so that one can have then

> binaries that will work on different hw.

> Because e.g. with the distro vendor hat, it is hard to guess what device

> will the users have if we were to enable gcn offloading and ship binaries

> with the offloading support in the distro.  For PTX it is easy, we can just

> emit sm_30 as the lowest ISA supported (the default anyway) and be done with it.


Indeed, fat binaries would be a good solution.

Presumably it's possible, but I'm not sure how we'd go about getting the 
offload mechanism to launch the backend multiple times? Having got that 
far, the libgomp and mkoffload changes to select the right variant would 
probably be fairly straight-forward.

Andrew
Jakub Jelinek Jan. 20, 2020, 11:07 a.m. | #6
On Mon, Jan 20, 2020 at 11:00:58AM +0000, Andrew Stubbs wrote:
> Indeed, fat binaries would be a good solution.

> 

> Presumably it's possible, but I'm not sure how we'd go about getting the

> offload mechanism to launch the backend multiple times? Having got that far,

> the libgomp and mkoffload changes to select the right variant would probably

> be fairly straight-forward.


I'd say easiest would be to do that in the gcn specific mkoffload.
But there needs to be a way for the user to specify that he wants only a
particular variant and not all of them (perhaps look for -march= in the
offload options?)?

Or, for the 8 vs. 16 regs, have -march=generic or whatever that would try to
generate something that will work everywhere or on as many chips as
possible, e.g. by using mostly fiji, but try to use 16 adjacent regs instead
of 8?  I admit I don't know anything about the hw, just worried because if
we have already 4 variants now when the port is almost new, won't we have 30
later on, which could be prohibitive for the fat binaries?

	Jakub
Andrew Stubbs Jan. 20, 2020, 11:33 a.m. | #7
On 20/01/2020 11:07, Jakub Jelinek wrote:
> On Mon, Jan 20, 2020 at 11:00:58AM +0000, Andrew Stubbs wrote:

>> Indeed, fat binaries would be a good solution.

>>

>> Presumably it's possible, but I'm not sure how we'd go about getting the

>> offload mechanism to launch the backend multiple times? Having got that far,

>> the libgomp and mkoffload changes to select the right variant would probably

>> be fairly straight-forward.

> 

> I'd say easiest would be to do that in the gcn specific mkoffload.

> But there needs to be a way for the user to specify that he wants only a

> particular variant and not all of them (perhaps look for -march= in the

> offload options?)?


Yeah, maybe "-foffload=-march=gfx900+", or 
"-foffload=-march=fiji,gfx900,gfx906"?

> Or, for the 8 vs. 16 regs, have -march=generic or whatever that would try to

> generate something that will work everywhere or on as many chips as

> possible, e.g. by using mostly fiji, but try to use 16 adjacent regs instead

> of 8?  I admit I don't know anything about the hw, just worried because if

> we have already 4 variants now when the port is almost new, won't we have 30

> later on, which could be prohibitive for the fat binaries?


That might work. It'd be far from optimal, but hopefully still faster 
than CPU.

AMD don't have any real interest in maintaining compatibility though, so 
this may get increasingly difficult. For example, between Fiji and Vega 
(gfx8xx to gfx9xx), they removed the v_moverel instructions, and removed 
a number of bit-fields from the memory descriptors. As it happens, GCC 
does not (currently) use any of those features, so compatibility was 
unaffected.

For another example, AMD changed the name of the v_add instructions to 
v_add_co, and added a new set of instructions named v_add (that don't 
have carry-out). The machine encodings for the old instructions remain 
the same, so again, binary compatibility was not affected, but it serves 
to demonstrate that they don't expect software to be written for a 
generic device.

Also, APUs will probably never be binary compatible with DGPUs (not that 
libgomp supports APUs properly, at present, as we have none to test).

Andrew
Tobias Burnus Jan. 20, 2020, 12:49 p.m. | #8
On 1/20/20 12:07 PM, Jakub Jelinek wrote:
> I'd say easiest would be to do that in the gcn specific mkoffload. But 

> there needs to be a way for the user to specify that he wants only a 

> particular variant and not all of them (perhaps look for -march= in 

> the offload options?)?


I think that relates to the general issues with telling libgomp what 
offload-target has been used.

(Might be solved differently, but it is at least related. Thinking of 
complications would be: main program and a linked library uses different 
offload targets [e.g. nvptx vs. host or gcn vs. host+nvptx] or the 
library is not linked but dlopen'ed. — Likewise for the issue of this 
thread: library and host program might use different gfx… for the march.)

  * * *

Regarding the -foffload=<target(s)>: Assume a GCC installation which has 
plugins for a certain device. If one now uses a simple hello-world 
program with OpenMP target or OpenACC then:

With "-fopenacc -foffload=disable" – and a device available – it fails 
at run time with "libgomp: target function wasn't mapped". (Work around: 
set env var ACC_DEVICE_TYPE to "host".)

With "-fopenmp -foffload=disable" or "-fopenacc -foffload=disable" – and 
no device available, it fails with: libgomp: while loading 
libgomp-plugin-nvptx.so.1: libcuda.so.1: cannot open shared object file: 
No such file or directory

And – at least with HSA/GCN: If there is a permission issue for /dev/…, 
it will fail when the devices are enumerated (I mean: 
gomp_init_targets_once → *get_num_devices) – even if -foffload=disable 
is used.

See also: PR 81886 and, related, PR 67300.

Cheers,

Tobias
Frederik Harwath Jan. 20, 2020, 4:42 p.m. | #9
Hi Andrew,
Thanks for the review! I have attached a revised patch containing the changes that you suggested.

On 20.01.20 11:00, Andrew Stubbs wrote:

> On 20/01/2020 06:57, Harwath, Frederik wrote:

>> Is it ok to commit this patch to the master branch?

> 

> I can't see anything significantly wrong with the code of the patch, however I have some minor issues I'd like fixed in the text.

> 

> [...] Please move the functions down into the "Utility functions" group. The const static variables should probably go with them.


Done.

>> @@ -3294,7 +3415,11 @@ GOMP_OFFLOAD_init_device (int n)

>>                        &buf);

>>    if (status != HSA_STATUS_SUCCESS)

>>      return hsa_error ("Error querying the name of the agent", status);

>> -  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);

>> +  agent->gfx900_p = (strncmp (buf, gcn_gfx900_s, gcn_isa_name_len) == 0);

>> +

>> +  agent->device_isa = isa_code (buf);

>> +  if (agent->device_isa < 0)

>> +    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);

> 

> Can device_isa not just replace gfx900_p? I think it's only tested in one place, and that would be easily substituted.

> 


Yes, I have changed that one place to use agent->device_isa.

I would commit the patch then if nobody objects :-). The other approaches (fat binaries etc.) that have been discussed in
this thread seem to be long-term projects and until something like this gets implemented the early error checking
implemented by this patch seems to be better than nothing.

Frederik
From 470892454bf0d67ea71c2399f5819713592e46a0 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frederik@codesourcery.com>

Date: Mon, 20 Jan 2020 07:45:43 +0100
Subject: [PATCH] Add runtime ISA check for amdgcn offloading

When executing code that uses amdgcn GPU offloading, the ISA of the GPU must
match the ISA for which the code has been compiled.  So far, the libgomp amdgcn
plugin did not attempt to verify this.  In case of a mismatch, the user is
confronted with an unhelpful error message produced by the HSA runtime.

This commit implements a runtime ISA check. In the case of a ISA mismatch, the
execution is aborted with a clear error message and a hint at the correct
compilation parameters for the GPU on which the execution has been attempted.

libgomp/
	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): New enum.
	* (EF_AMDGPU_MACH_MASK): New constant.
	* (gcn_isa): New typedef.
	* (gcn_gfx801_s): New constant.
	* (gcn_gfx803_s): New constant.
	* (gcn_gfx900_s): New constant.
	* (gcn_gfx906_s): New constant.
	* (gcn_isa_name_len): New constant.
	* (elf_gcn_isa_field): New function.
	* (isa_hsa_name): New function.
	* (isa_gcc_name): New function.
	* (isa_code): New function.
	* (struct agent_info): Add field "device_isa" and remove field
	"gfx900_p".
	* (GOMP_OFFLOAD_init_device): Adapt agent init to "agent_info"
	field changes, fail if device has unknown ISA.
	* (parse_target_attributes): Replace "gfx900_p" by "device_isa".
	* (isa_matches_agent): New function ...
	* (create_and_finalize_hsa_program): ... used from here to check
	that the GPU ISA and the code-object ISA match.
---
 libgomp/plugin/plugin-gcn.c | 131 ++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 16ce251f3a5..de470a3dd33 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -396,6 +396,20 @@ struct gcn_image_desc
   struct global_var_info *global_variables;
 };
 
+/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we
+   support.
+   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */
+
+typedef enum {
+  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
+  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
+  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
+  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+} EF_AMDGPU_MACH;
+
+const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
+typedef EF_AMDGPU_MACH gcn_isa;
+
 /* Description of an HSA GPU agent (device) and the program associated with
    it.  */
 
@@ -408,8 +422,9 @@ struct agent_info
   /* Whether the agent has been initialized.  The fields below are usable only
      if it has been.  */
   bool initialized;
-  /* Precomputed check for problem architectures.  */
-  bool gfx900_p;
+
+  /* The instruction set architecture of the device. */
+  gcn_isa device_isa;
 
   /* Command queues of the agent.  */
   hsa_queue_t *sync_queue;
@@ -1232,7 +1247,8 @@ parse_target_attributes (void **input,
 
   if (gcn_dims_found)
     {
-      if (agent->gfx900_p && gcn_threads == 0 && override_z_dim == 0)
+      if (agent->device_isa == EF_AMDGPU_MACH_AMDGCN_GFX900
+	  && gcn_threads == 0 && override_z_dim == 0)
 	{
 	  gcn_threads = 4;
 	  GCN_WARNING ("VEGA BUG WORKAROUND: reducing default number of "
@@ -1578,6 +1594,74 @@ get_data_memory_region (hsa_region_t region, void *data)
 			    HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
 }
 
+static int
+elf_gcn_isa_field (Elf64_Ehdr *image)
+{
+  return image->e_flags & EF_AMDGPU_MACH_MASK;
+}
+
+const static char *gcn_gfx801_s = "gfx801";
+const static char *gcn_gfx803_s = "gfx803";
+const static char *gcn_gfx900_s = "gfx900";
+const static char *gcn_gfx906_s = "gfx906";
+const static int gcn_isa_name_len = 6;
+
+/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
+   support the ISA. */
+
+static const char*
+isa_hsa_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return gcn_gfx801_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return gcn_gfx803_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX900:
+      return gcn_gfx900_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX906:
+      return gcn_gfx906_s;
+    }
+  return NULL;
+}
+
+/* Returns the user-facing name that GCC uses to identify the architecture (e.g.
+   with -march) or NULL if we do not support the ISA.
+   Keep in sync with /gcc/config/gcn/gcn.{c,opt}.  */
+
+static const char*
+isa_gcc_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return "carrizo";
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return "fiji";
+    default:
+      return isa_hsa_name (isa);
+    }
+}
+
+/* Returns the code which is used in the GCN object code to identify the ISA with
+   the given name (as used by the HSA runtime).  */
+
+static gcn_isa
+isa_code(const char *isa) {
+  if (!strncmp (isa, gcn_gfx801_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX801;
+
+  if (!strncmp (isa, gcn_gfx803_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX803;
+
+  if (!strncmp (isa, gcn_gfx900_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX900;
+
+  if (!strncmp (isa, gcn_gfx906_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX906;
+
+  return -1;
+}
+
 /* }}}  */
 /* {{{ Run  */
 
@@ -2257,6 +2341,39 @@ find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent,
   return res;
 }
 
+/* Check that the GCN ISA of the given image matches the ISA of the agent. */
+
+static bool
+isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)
+{
+  int isa_field = elf_gcn_isa_field (image);
+  const char* isa_s = isa_hsa_name (isa_field);
+  if (!isa_s)
+    {
+      hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR);
+      return false;
+    }
+
+  if (isa_field != agent->device_isa)
+    {
+      char msg[120];
+      const char *agent_isa_s = isa_hsa_name (agent->device_isa);
+      const char *agent_isa_gcc_s = isa_gcc_name (agent->device_isa);
+      assert (agent_isa_s);
+      assert (agent_isa_gcc_s);
+
+      snprintf (msg, sizeof msg,
+		"GCN code object ISA '%s' does not match GPU ISA '%s'.\n"
+		"Try to recompile with '-foffload=-march=%s'.\n",
+		isa_s, agent_isa_s, agent_isa_gcc_s);
+
+      hsa_error (msg, HSA_STATUS_ERROR);
+      return false;
+    }
+
+  return true;
+}
+
 /* Create and finalize the program consisting of all loaded modules.  */
 
 static bool
@@ -2289,6 +2406,9 @@ create_and_finalize_hsa_program (struct agent_info *agent)
     {
       Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image;
 
+      if (!isa_matches_agent (agent, image))
+	goto fail;
+
       /* Hide relocations from the HSA runtime loader.
 	 Keep a copy of the unmodified section headers to use later.  */
       Elf64_Shdr *image_sections = (Elf64_Shdr *)((char *)image
@@ -3294,7 +3414,10 @@ GOMP_OFFLOAD_init_device (int n)
 					  &buf);
   if (status != HSA_STATUS_SUCCESS)
     return hsa_error ("Error querying the name of the agent", status);
-  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);
+
+  agent->device_isa = isa_code (buf);
+  if (agent->device_isa < 0)
+    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);
 
   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
 					HSA_QUEUE_TYPE_MULTI,
-- 
2.17.1
Andrew Stubbs Jan. 20, 2020, 5:10 p.m. | #10
On 20/01/2020 16:42, Harwath, Frederik wrote:
> Hi Andrew,

> Thanks for the review! I have attached a revised patch containing the changes that you suggested.

> 

> On 20.01.20 11:00, Andrew Stubbs wrote:

> 

>> On 20/01/2020 06:57, Harwath, Frederik wrote:

>>> Is it ok to commit this patch to the master branch?

>>

>> I can't see anything significantly wrong with the code of the patch, however I have some minor issues I'd like fixed in the text.

>>

>> [...] Please move the functions down into the "Utility functions" group. The const static variables should probably go with them.

> 

> Done.

> 

>>> @@ -3294,7 +3415,11 @@ GOMP_OFFLOAD_init_device (int n)

>>>                         &buf);

>>>     if (status != HSA_STATUS_SUCCESS)

>>>       return hsa_error ("Error querying the name of the agent", status);

>>> -  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);

>>> +  agent->gfx900_p = (strncmp (buf, gcn_gfx900_s, gcn_isa_name_len) == 0);

>>> +

>>> +  agent->device_isa = isa_code (buf);

>>> +  if (agent->device_isa < 0)

>>> +    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);

>>

>> Can device_isa not just replace gfx900_p? I think it's only tested in one place, and that would be easily substituted.

>>

> 

> Yes, I have changed that one place to use agent->device_isa.

> 

> I would commit the patch then if nobody objects :-). The other approaches (fat binaries etc.) that have been discussed in

> this thread seem to be long-term projects and until something like this gets implemented the early error checking

> implemented by this patch seems to be better than nothing.


OK.

Andrew

Patch

From 27981f9c93d1efed6d943dae4ea0c52147c02d5b Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frederik@codesourcery.com>
Date: Mon, 20 Jan 2020 07:45:43 +0100
Subject: [PATCH] Add runtime ISA check for amdgcn offloading

When executing code that uses amdgcn GPU offloading, the ISA of the GPU must
match the ISA for which the code has been compiled.  So far, the libgomp amdgcn
plugin did not attempt to verify this.  In case of a mismatch, the user is
confronted with an unhelpful error message produced by the HSA runtime.

This commit implements a runtime ISA check. In the case of a ISA mismatch, the
execution is aborted with a clear error message and a hint at the correct
compilation parameters for the GPU on which the execution has been attempted.

libgomp/
	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): New enum.
	(EF_AMDGPU_MACH_MASK): New constant.
	(gcn_isa): New typedef.
	(gcn_gfx801_s): New constant.
	(gcn_gfx803_s): New constant.
	(gcn_gfx900_s): New constant.
	(gcn_gfx906_s): New constant.
	(gcn_isa_name_len): New constant.
	(elf_gcn_isa_field): New function.
	(isa_hsa_name): New function.
	(isa_gcc_name): New function.
	(isa_code): New function.
	(struct agent_info): Add field "device_isa" ...
	(GOMP_OFFLOAD_init_device): ... and init from here,
	failing if device has unknown ISA; adapt init of "gfx900_p"
	to use new constants.
	(isa_matches_agent): New function ...
	(create_and_finalize_hsa_program): ... used from here to check
	that the GPU ISA and the code-object ISA match.
---
 libgomp/plugin/plugin-gcn.c | 127 +++++++++++++++++++++++++++++++++++-
 1 file changed, 126 insertions(+), 1 deletion(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 16ce251f3a5..14f4a707a7c 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -396,6 +396,88 @@  struct gcn_image_desc
   struct global_var_info *global_variables;
 };
 
+/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we
+   support.
+   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */
+
+typedef enum {
+  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
+  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
+  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
+  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+} EF_AMDGPU_MACH;
+
+const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
+typedef EF_AMDGPU_MACH gcn_isa;
+
+const static char* gcn_gfx801_s = "gfx801";
+const static char* gcn_gfx803_s = "gfx803";
+const static char* gcn_gfx900_s = "gfx900";
+const static char* gcn_gfx906_s = "gfx906";
+const static int gcn_isa_name_len = 6;
+
+static int
+elf_gcn_isa_field (Elf64_Ehdr *image)
+{
+  return image->e_flags & EF_AMDGPU_MACH_MASK;
+}
+
+/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
+   support the ISA. */
+
+static const char*
+isa_hsa_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return gcn_gfx801_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return gcn_gfx803_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX900:
+      return gcn_gfx900_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX906:
+      return gcn_gfx906_s;
+    }
+  return NULL;
+}
+
+/* Returns the user-facing name that GCC uses to identify the architecture (e.g.
+   with -march) or NULL if we do not support the ISA.
+   Keep in sync with /gcc/config/gcn/gcn.{c,opt}.  */
+
+static const char*
+isa_gcc_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return "carrizo";
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return "fiji";
+    default:
+      return isa_hsa_name (isa);
+    }
+}
+
+/* Returns the code which is used in the GCN object code to identify the ISA with
+   the given name (as used by the HSA runtime).  */
+
+static gcn_isa
+isa_code(const char *isa) {
+  if (!strncmp (isa, gcn_gfx801_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX801;
+
+  if (!strncmp (isa, gcn_gfx803_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX803;
+
+  if (!strncmp (isa, gcn_gfx900_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX900;
+
+  if (!strncmp (isa, gcn_gfx906_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX906;
+
+  return -1;
+}
+
 /* Description of an HSA GPU agent (device) and the program associated with
    it.  */
 
@@ -411,6 +493,9 @@  struct agent_info
   /* Precomputed check for problem architectures.  */
   bool gfx900_p;
 
+  /* The instruction set architecture of the device. */
+  gcn_isa device_isa;
+
   /* Command queues of the agent.  */
   hsa_queue_t *sync_queue;
   struct goacc_asyncqueue *async_queues, *omp_async_queue;
@@ -2257,6 +2342,39 @@  find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent,
   return res;
 }
 
+/* Check that the GCN ISA of the given image matches the ISA of the agent. */
+
+static bool
+isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)
+{
+      int isa_field = elf_gcn_isa_field (image);
+      const char* isa_s = isa_hsa_name (isa_field);
+      if (!isa_s)
+	{
+	  hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR);
+	  return false;
+	}
+
+      if (isa_field != agent->device_isa)
+	{
+	  char msg[120];
+	  const char *agent_isa_s = isa_hsa_name (agent->device_isa);
+	  const char *agent_isa_gcc_s = isa_gcc_name (agent->device_isa);
+	  assert (agent_isa_s);
+	  assert (agent_isa_gcc_s);
+
+	  snprintf (msg, sizeof msg,
+		    "GCN code object ISA '%s' does not match GPU ISA '%s'.\n"
+		    "Try to recompile with '-foffload=-march=%s'.\n",
+		    isa_s, agent_isa_s, agent_isa_gcc_s);
+
+	  hsa_error (msg, HSA_STATUS_ERROR);
+	  return false;
+	}
+
+      return true;
+}
+
 /* Create and finalize the program consisting of all loaded modules.  */
 
 static bool
@@ -2289,6 +2407,9 @@  create_and_finalize_hsa_program (struct agent_info *agent)
     {
       Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image;
 
+      if (!isa_matches_agent (agent, image))
+	goto fail;
+
       /* Hide relocations from the HSA runtime loader.
 	 Keep a copy of the unmodified section headers to use later.  */
       Elf64_Shdr *image_sections = (Elf64_Shdr *)((char *)image
@@ -3294,7 +3415,11 @@  GOMP_OFFLOAD_init_device (int n)
 					  &buf);
   if (status != HSA_STATUS_SUCCESS)
     return hsa_error ("Error querying the name of the agent", status);
-  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);
+  agent->gfx900_p = (strncmp (buf, gcn_gfx900_s, gcn_isa_name_len) == 0);
+
+  agent->device_isa = isa_code (buf);
+  if (agent->device_isa < 0)
+    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);
 
   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
 					HSA_QUEUE_TYPE_MULTI,
-- 
2.17.1