PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514

Message ID trinity-558ac6fa-645e-4b69-bd92-ed980a8db626-1623274785526@3c-app-gmx-bs42
State New
Headers show
Series
  • PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
Related show

Commit Message

Jonathan Wakely via Gcc-patches June 9, 2021, 9:39 p.m.
Dear Fortranners,

we should be able to simplify the length of a substring with known
constant bounds.  The attached patch adds this.

Regtested on x86_64-pc-linux-gnu.

OK for mainline?  Since this should be rather safe, to at least 11-branch?

Thanks,
Harald


Fortran - simplify length of substring with constant bounds

gcc/fortran/ChangeLog:

	PR fortran/100950
	* simplify.c (substring_has_constant_len): New.
	(gfc_simplify_len): Handle case of substrings with constant
	bounds.

gcc/testsuite/ChangeLog:

	PR fortran/100950
	* gfortran.dg/pr100950.f90: New test.

Comments

Jonathan Wakely via Gcc-patches June 10, 2021, 10:24 a.m. | #1
On Wed, 9 Jun 2021 23:39:45 +0200
Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c

> index c27b47aa98f..016ec259518 100644

> --- a/gcc/fortran/simplify.c

> +++ b/gcc/fortran/simplify.c

> @@ -4512,6 +4512,60 @@ gfc_simplify_leadz (gfc_expr *e)

>  }

> 

> 

> +/* Check for constant length of a substring.  */

> +

> +static bool

> +substring_has_constant_len (gfc_expr *e)

> +{

> +  ptrdiff_t istart, iend;

> +  size_t length;

> +  bool equal_length = false;

> +

> +  if (e->ts.type != BT_CHARACTER

> +      || !(e->ref && e->ref->type == REF_SUBSTRING)


iff we ever can get here with e->ref == NULL then the below will not
work too well. If so then maybe
  if (e->ts.type != BT_CHARACTER
      || ! e->ref
      || e->ref->type != REF_SUBSTRING

?

> +      || !e->ref->u.ss.start

> +      || e->ref->u.ss.start->expr_type != EXPR_CONSTANT

> +      || !e->ref->u.ss.end

> +      || e->ref->u.ss.end->expr_type != EXPR_CONSTANT

> +      || !e->ref->u.ss.length

> +      || !e->ref->u.ss.length->length

> +      || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT)

> +    return false;

> +

> +  /* Basic checks on substring starting and ending indices.  */

> +  if (!gfc_resolve_substring (e->ref, &equal_length))

> +    return false;

> +

> +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);

> +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);

> +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);

> +

> +  if (istart <= iend)

> +    {

> +      if (istart < 1)

> +	{

> +	  gfc_error ("Substring start index (%ld) at %L below 1",

> +		     (long) istart, &e->ref->u.ss.start->where);

> +	  return false;

> +	}

> +      if (iend > (ssize_t) length)

> +	{

> +	  gfc_error ("Substring end index (%ld) at %L exceeds string "

> +		     "length", (long) iend, &e->ref->u.ss.end->where);

> +	  return false;

> +	}

> +      length = iend - istart + 1;

> +    }

> +  else

> +    length = 0;

> +

> +  /* Fix substring length.  */

> +  e->value.character.length = length;

> +

> +  return true;

> +}

> +

> +

>  gfc_expr *

>  gfc_simplify_len (gfc_expr *e, gfc_expr *kind)

>  {

> @@ -4547,6 +4601,13 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind)

>         of the unlimited polymorphic entity.  To get the _len component the last

>         _data ref needs to be stripped and a ref to the _len component added.  */

>      return gfc_get_len_component (e->symtree->n.sym->assoc->target, k);

> +  else if (substring_has_constant_len (e))

> +    {

> +      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);

> +      mpz_set_si (result->value.integer,

> +		  e->value.character.length);


I think the mpz_set_si args above fit on one line.

btw.. there's a commentary typo in add_init_expr_to_sym():
s/skeep/skip/

thanks,

> +      return range_check (result, "LEN");

> +    }

>    else

>      return NULL;

>  }

> diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90

> new file mode 100644

> index 00000000000..f06db45b0b4

> --- /dev/null

> +++ b/gcc/testsuite/gfortran.dg/pr100950.f90

> @@ -0,0 +1,18 @@

> +! { dg-do run }

> +! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514

> +

> +program p

> +  character(8), parameter :: u = "123"

> +  character(8)            :: x = "", s

> +  character(2)            :: w(2) = [character(len(x(3:4))) :: 'a','b' ]

> +  character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ]

> +  character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ]

> +  if (len (y) /= 2) stop 1

> +  if (len (z) /= 2) stop 2

> +  if (any (w /= y)) stop 3

> +  if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2)  stop 4

> +  if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2)  stop 5

> +  if (any ([character(len(x(3:4))) :: 'a','b' ]  /= y)) stop 6

> +  write(s,*) [character(len(x(3:4))) :: 'a','b' ]

> +  if (s /= " a b    ") stop 7

> +end
Jonathan Wakely via Gcc-patches June 10, 2021, 12:47 p.m. | #2
On Thu, 10 Jun 2021 12:24:35 +0200
Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:

> On Wed, 9 Jun 2021 23:39:45 +0200

> Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:


> > +/* Check for constant length of a substring.  */

> > +

> > +static bool

> > +substring_has_constant_len (gfc_expr *e)

> > +{

> > +  ptrdiff_t istart, iend;

> > +  size_t length;

> > +  bool equal_length = false;

> > +

> > +  if (e->ts.type != BT_CHARACTER

> > +      || !(e->ref && e->ref->type == REF_SUBSTRING)  

> 

> iff we ever can get here with e->ref == NULL then the below will not

> work too well. If so then maybe

>   if (e->ts.type != BT_CHARACTER

>       || ! e->ref

>       || e->ref->type != REF_SUBSTRING

> 

> ?


Not sure what i was reading, maybe i read || instead of && in the
braced condition. Your initial version works equally well of course
although it's obviously harder to parse for at least some :)
thanks,
Jonathan Wakely via Gcc-patches June 10, 2021, 6:52 p.m. | #3
Hi Bernhard,

> > +static bool

> > +substring_has_constant_len (gfc_expr *e)

> > +{

> > +  ptrdiff_t istart, iend;

> > +  size_t length;

> > +  bool equal_length = false;

> > +

> > +  if (e->ts.type != BT_CHARACTER

> > +      || !(e->ref && e->ref->type == REF_SUBSTRING)

>

> iff we ever can get here with e->ref == NULL then the below will not

> work too well. If so then maybe

>   if (e->ts.type != BT_CHARACTER

>       || ! e->ref

>       || e->ref->type != REF_SUBSTRING

>

> ?


as you already realized, the logic was fine, but probably less
readable than your version.  I've changed the code accordingly.

> > +  else if (substring_has_constant_len (e))

> > +    {

> > +      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);

> > +      mpz_set_si (result->value.integer,

> > +		  e->value.character.length);

>

> I think the mpz_set_si args above fit on one line.


That's true.

Since this block is exactly the same as for constant strings,
which is handled in the first condition, I've thought some more
and am convinced now that these two can be fused.  Done now.

I've also added two cornercases to the testcase, and regtested again.

> btw.. there's a commentary typo in add_init_expr_to_sym():

> s/skeep/skip/


That is a completely unrelated issue in a different file, right?

Thanks for your constructive comments!

Harald
diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index c27b47aa98f..42ddc62f3c6 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -4512,6 +4512,61 @@ gfc_simplify_leadz (gfc_expr *e)
 }


+/* Check for constant length of a substring.  */
+
+static bool
+substring_has_constant_len (gfc_expr *e)
+{
+  ptrdiff_t istart, iend;
+  size_t length;
+  bool equal_length = false;
+
+  if (e->ts.type != BT_CHARACTER
+      || !e->ref
+      || e->ref->type != REF_SUBSTRING
+      || !e->ref->u.ss.start
+      || e->ref->u.ss.start->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.end
+      || e->ref->u.ss.end->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.length
+      || !e->ref->u.ss.length->length
+      || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT)
+    return false;
+
+  /* Basic checks on substring starting and ending indices.  */
+  if (!gfc_resolve_substring (e->ref, &equal_length))
+    return false;
+
+  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
+  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
+  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
+
+  if (istart <= iend)
+    {
+      if (istart < 1)
+	{
+	  gfc_error ("Substring start index (%ld) at %L below 1",
+		     (long) istart, &e->ref->u.ss.start->where);
+	  return false;
+	}
+      if (iend > (ssize_t) length)
+	{
+	  gfc_error ("Substring end index (%ld) at %L exceeds string "
+		     "length", (long) iend, &e->ref->u.ss.end->where);
+	  return false;
+	}
+      length = iend - istart + 1;
+    }
+  else
+    length = 0;
+
+  /* Fix substring length.  */
+  e->value.character.length = length;
+
+  return true;
+}
+
+
 gfc_expr *
 gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
 {
@@ -4521,7 +4576,8 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
   if (k == -1)
     return &gfc_bad_expr;

-  if (e->expr_type == EXPR_CONSTANT)
+  if (e->expr_type == EXPR_CONSTANT
+      || substring_has_constant_len (e))
     {
       result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);
       mpz_set_si (result->value.integer, e->value.character.length);
diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90
new file mode 100644
index 00000000000..54c459adf99
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr100950.f90
@@ -0,0 +1,22 @@
+! { dg-do run }
+! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
+
+program p
+  character(8), parameter :: u = "123"
+  character(8)            :: x = "", s
+  character(2)            :: w(2) = [character(len(x(3:4))) :: 'a','b' ]
+  character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ]
+  character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ]
+  character(*), parameter :: t(*) = [character(len(x( :2))) :: 'a','b' ]
+  character(*), parameter :: v(*) = [character(len(x(7: ))) :: 'a','b' ]
+  if (len (y) /= 2) stop 1
+  if (len (z) /= 2) stop 2
+  if (any (w /= y)) stop 3
+  if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2)  stop 4
+  if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2)  stop 5
+  if (any ([character(len(x(3:4))) :: 'a','b' ]  /= y)) stop 6
+  write(s,*) [character(len(x(3:4))) :: 'a','b' ]
+  if (s /= " a b    ") stop 7
+  if (len (t) /= 2) stop 8
+  if (len (v) /= 2) stop 9
+end
Jonathan Wakely via Gcc-patches June 11, 2021, 6:02 a.m. | #4
On 10 June 2021 20:52:12 CEST, Harald Anlauf <anlauf@gmx.de> wrote:

>> I think the mpz_set_si args above fit on one line.

>

>That's true.

>

>Since this block is exactly the same as for constant strings,

>which is handled in the first condition, I've thought some more

>and am convinced now that these two can be fused.  Done now.


Thanks.
Btw.. I know that we cast hwi to long int often and use %ld but think there is a HOST_WIDE_INT_PRINT_DEC format macro to print a hwi.

>I've also added two cornercases to the testcase, and regtested again.

>

>> btw.. there's a commentary typo in add_init_expr_to_sym():

>> s/skeep/skip/

>

>That is a completely unrelated issue in a different file, right?


Completely unrelated, yes.
>

>Thanks for your constructive comments!


thanks for the patch!
Jonathan Wakely via Gcc-patches June 18, 2021, 8:47 p.m. | #5
*PING*

> Gesendet: Mittwoch, 09. Juni 2021 um 23:39 Uhr

> Von: "Harald Anlauf" <anlauf@gmx.de>

> An: "fortran" <fortran@gcc.gnu.org>, "gcc-patches" <gcc-patches@gcc.gnu.org>

> Betreff: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514

>

> Dear Fortranners,

>

> we should be able to simplify the length of a substring with known

> constant bounds.  The attached patch adds this.

>

> Regtested on x86_64-pc-linux-gnu.

>

> OK for mainline?  Since this should be rather safe, to at least 11-branch?

>

> Thanks,

> Harald

>

>

> Fortran - simplify length of substring with constant bounds

>

> gcc/fortran/ChangeLog:

>

> 	PR fortran/100950

> 	* simplify.c (substring_has_constant_len): New.

> 	(gfc_simplify_len): Handle case of substrings with constant

> 	bounds.

>

> gcc/testsuite/ChangeLog:

>

> 	PR fortran/100950

> 	* gfortran.dg/pr100950.f90: New test.

>

>
Tobias Burnus June 21, 2021, 12:12 p.m. | #6
Hi Harald,

sorry for being way behind my review duties :-(

On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> +static bool

> +substring_has_constant_len (gfc_expr *e)

> +{

> +  ptrdiff_t istart, iend;

> +  size_t length;

> +  bool equal_length = false;

> +

> +  if (e->ts.type != BT_CHARACTER

> +      || !e->ref

> +      || e->ref->type != REF_SUBSTRING


Is there a reason why you do not handle:

type t
   character(len=5) :: str1
   character(len=:), allocatable :: str2
end type
type(t) :: x

allocate(x%str2, source="abd")
if (len (x%str)) /= 1) ...
if (len (x%str2(1:2) /= 2) ...
etc.

Namely: Search the last_ref = expr->ref->next->next ...?
and then check that lastref?

   * * *

Slightly unrelated: I think the following does not violate
F2018's R916 / C923 – but is rejected, namely:
   R916  type-param-inquiry  is  designator % type-param-name
the latter is 'len' or 'kind' for intrinsic types. And:
   R901  designator is ...
                    or substring
But

character(len=5) :: str
print *, str(1:3)%len
end

fails with

     2 | print *, str(1:3)%len
       |                  1
Error: Syntax error in PRINT statement at (1)


Assuming you don't want to handle it, can you open a new PR?
Thanks!

  * * *

That's in so far related to your patch as last_ref would
then be the last ref before ref->next == NULL or
before ref->next->type == REF_INQUIRY

> +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);

> +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);

> +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);

> +

> +  if (istart <= iend)

> +    {

> +      if (istart < 1)

> +     {

> +       gfc_error ("Substring start index (%ld) at %L below 1",

> +                  (long) istart, &e->ref->u.ss.start->where);


As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.

(It probably only matters on Windows which uses long == int = 32bit for
strings longer than INT_MAX.)

Thanks,

Tobias

-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf

Patch

diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index c27b47aa98f..016ec259518 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -4512,6 +4512,60 @@  gfc_simplify_leadz (gfc_expr *e)
 }


+/* Check for constant length of a substring.  */
+
+static bool
+substring_has_constant_len (gfc_expr *e)
+{
+  ptrdiff_t istart, iend;
+  size_t length;
+  bool equal_length = false;
+
+  if (e->ts.type != BT_CHARACTER
+      || !(e->ref && e->ref->type == REF_SUBSTRING)
+      || !e->ref->u.ss.start
+      || e->ref->u.ss.start->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.end
+      || e->ref->u.ss.end->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.length
+      || !e->ref->u.ss.length->length
+      || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT)
+    return false;
+
+  /* Basic checks on substring starting and ending indices.  */
+  if (!gfc_resolve_substring (e->ref, &equal_length))
+    return false;
+
+  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
+  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
+  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
+
+  if (istart <= iend)
+    {
+      if (istart < 1)
+	{
+	  gfc_error ("Substring start index (%ld) at %L below 1",
+		     (long) istart, &e->ref->u.ss.start->where);
+	  return false;
+	}
+      if (iend > (ssize_t) length)
+	{
+	  gfc_error ("Substring end index (%ld) at %L exceeds string "
+		     "length", (long) iend, &e->ref->u.ss.end->where);
+	  return false;
+	}
+      length = iend - istart + 1;
+    }
+  else
+    length = 0;
+
+  /* Fix substring length.  */
+  e->value.character.length = length;
+
+  return true;
+}
+
+
 gfc_expr *
 gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
 {
@@ -4547,6 +4601,13 @@  gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
        of the unlimited polymorphic entity.  To get the _len component the last
        _data ref needs to be stripped and a ref to the _len component added.  */
     return gfc_get_len_component (e->symtree->n.sym->assoc->target, k);
+  else if (substring_has_constant_len (e))
+    {
+      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);
+      mpz_set_si (result->value.integer,
+		  e->value.character.length);
+      return range_check (result, "LEN");
+    }
   else
     return NULL;
 }
diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90
new file mode 100644
index 00000000000..f06db45b0b4
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr100950.f90
@@ -0,0 +1,18 @@ 
+! { dg-do run }
+! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
+
+program p
+  character(8), parameter :: u = "123"
+  character(8)            :: x = "", s
+  character(2)            :: w(2) = [character(len(x(3:4))) :: 'a','b' ]
+  character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ]
+  character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ]
+  if (len (y) /= 2) stop 1
+  if (len (z) /= 2) stop 2
+  if (any (w /= y)) stop 3
+  if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2)  stop 4
+  if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2)  stop 5
+  if (any ([character(len(x(3:4))) :: 'a','b' ]  /= y)) stop 6
+  write(s,*) [character(len(x(3:4))) :: 'a','b' ]
+  if (s /= " a b    ") stop 7
+end