linker script parsing regression in 2.37

Message ID f7b1e5d3-4ffd-0e5d-2829-3fdaa7e6228e@suse.com
State New
Headers show
Series
  • linker script parsing regression in 2.37
Related show

Commit Message

Florian Weimer via Binutils July 22, 2021, 8:30 a.m.
Alan,

I suspect it is commit 40726f16a8d7 ("ld script expression parsing")
which broke the Xen hypervisor build. In the linker script we have

  .note.gnu.build-id : AT(ADDR(.note.gnu.build-id) - (<expression>)) {
       __note_gnu_build_id_start = .;
       *(.note.gnu.build-id)
       __note_gnu_build_id_end = .;
  } :note :text

and I expect it was this hunk


which broke accepting '-' in section names.

Since I didn't trust my oldish flex/bison, I simply took 2.36.1's
generated files, re-did the bfd_boolean -> bool conversion, and put
them in the source tree to replace the 2.37 versions. Things are
working fine again this way. (Luckily in this case neither of the
two changes of yours that I've undone this way came with any test
cases, so no testsuite failures resulted either.)

I have to admit that I don't see how we could work around this on
the Xen Project side, so I guess I will want to ask for a 2.37.1
release once this regression got fixed.

Jan

Comments

Florian Weimer via Binutils July 22, 2021, 8:56 a.m. | #1
On 22.07.2021 10:30, Jan Beulich via Binutils wrote:
> I have to admit that I don't see how we could work around this on

> the Xen Project side, so I guess I will want to ask for a 2.37.1

> release once this regression got fixed.


Actually, having looked further at what the lexer accepts, quoting
the section name helps. But of course the issue still means that a
fair range of released versions of Xen won't build without the
workaround backported there (assuming it gets accepted in the first
place).

Jan
Florian Weimer via Binutils July 22, 2021, 12:46 p.m. | #2
On Thu, Jul 22, 2021 at 10:30:16AM +0200, Jan Beulich wrote:
> Alan,

> 

> I suspect it is commit 40726f16a8d7 ("ld script expression parsing")

> which broke the Xen hypervisor build. In the linker script we have

> 

>   .note.gnu.build-id : AT(ADDR(.note.gnu.build-id) - (<expression>)) {

>        __note_gnu_build_id_start = .;

>        *(.note.gnu.build-id)

>        __note_gnu_build_id_end = .;

>   } :note :text


Yes, guilty as charged.  I haven't tested the following besides a
quick check that ADDR(.note.gnu.build-id) is now correctly parsed.
Please test it out.

I haven't changed ORIGIN() and LENGTH() which are used on memory
regions yet, and won't do so unless someone finds a real project that
breaks.  The change there would be more complicated since I'd need to
support :silly-region too as a phdr for an output section, and the
colon matches what is allowed by a wildcard name in script mode.

diff --git a/ld/ldgram.y b/ld/ldgram.y
index 31e0071c6fc..db74f8a70e4 100644
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -96,8 +96,7 @@ static int error_index;
 %type <wildcard_list> section_name_list
 %type <flag_info_list> sect_flag_list
 %type <flag_info> sect_flags
-%type <name> memspec_opt casesymlist
-%type <name> memspec_at_opt
+%type <name> memspec_opt memspec_at_opt paren_script_name casesymlist
 %type <cname> wildcard_name
 %type <wildcard> section_name_spec filename_spec wildcard_maybe_exclude
 %token <bigint> INT
@@ -906,6 +905,10 @@ nocrossref_list:
 		}
 	;
 
+paren_script_name:
+		{ ldlex_script (); } '(' NAME  { ldlex_popstate (); } ')'
+			{ $$ = $3; }
+
 mustbe_exp:		 { ldlex_expression (); }
 		exp
 			 { ldlex_popstate (); $$=$2;}
@@ -970,14 +973,14 @@ exp	:
 	|	SIZEOF_HEADERS
 			{ $$ = exp_nameop (SIZEOF_HEADERS,0); }
 
-	|	ALIGNOF '(' NAME ')'
-			{ $$ = exp_nameop (ALIGNOF,$3); }
-	|	SIZEOF '(' NAME ')'
-			{ $$ = exp_nameop (SIZEOF,$3); }
-	|	ADDR '(' NAME ')'
-			{ $$ = exp_nameop (ADDR,$3); }
-	|	LOADADDR '(' NAME ')'
-			{ $$ = exp_nameop (LOADADDR,$3); }
+	|	ALIGNOF paren_script_name
+			{ $$ = exp_nameop (ALIGNOF, $2); }
+	|	SIZEOF	paren_script_name
+			{ $$ = exp_nameop (SIZEOF, $2); }
+	|	ADDR	paren_script_name
+			{ $$ = exp_nameop (ADDR, $2); }
+	|	LOADADDR paren_script_name
+			{ $$ = exp_nameop (LOADADDR, $2); }
 	|	CONSTANT '(' NAME ')'
 			{ $$ = exp_nameop (CONSTANT,$3); }
 	|	ABSOLUTE '(' exp ')'
@@ -992,15 +995,16 @@ exp	:
 			{ $$ = exp_binop (DATA_SEGMENT_RELRO_END, $5, $3); }
 	|	DATA_SEGMENT_END '(' exp ')'
 			{ $$ = exp_unop (DATA_SEGMENT_END, $3); }
-	|	SEGMENT_START '(' NAME ',' exp ')'
+	|	SEGMENT_START { ldlex_script (); } '(' NAME
+			{ ldlex_popstate (); } ',' exp ')'
 			{ /* The operands to the expression node are
 			     placed in the opposite order from the way
 			     in which they appear in the script as
 			     that allows us to reuse more code in
 			     fold_binary.  */
 			  $$ = exp_binop (SEGMENT_START,
-					  $5,
-					  exp_nameop (NAME, $3)); }
+					  $7,
+					  exp_nameop (NAME, $4)); }
 	|	BLOCK '(' exp ')'
 			{ $$ = exp_unop (ALIGN_K,$3); }
 	|	NAME

-- 
Alan Modra
Australia Development Lab, IBM
Florian Weimer via Binutils July 24, 2021, 10:23 a.m. | #3
Commit 40726f16a8d7 broke references to sections within ADDR(), and
overlays with weird section names.

	* ldgram.y (paren_script_name): New rule.
	(exp): Use it for ALIGNOF, SIZEOF, ADDR, and LOADADDR.  Similarly
	ensure script mode parsing for section name in SEGMENT_START.
	(overlay_section): Delete unnecessary ldlex_script call.  Backup
	on a lookahead NAME parsed in expression mode.
	* testsuite/ld-elf/overlay.s: Add more sections.
	* testsuite/ld-elf/overlay.t: Test '-' in section names.

diff --git a/ld/ldgram.y b/ld/ldgram.y
index 31e0071c6fc..2f9de59321a 100644
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -96,8 +96,7 @@ static int error_index;
 %type <wildcard_list> section_name_list
 %type <flag_info_list> sect_flag_list
 %type <flag_info> sect_flags
-%type <name> memspec_opt casesymlist
-%type <name> memspec_at_opt
+%type <name> memspec_opt memspec_at_opt paren_script_name casesymlist
 %type <cname> wildcard_name
 %type <wildcard> section_name_spec filename_spec wildcard_maybe_exclude
 %token <bigint> INT
@@ -906,6 +905,10 @@ nocrossref_list:
 		}
 	;
 
+paren_script_name:
+		{ ldlex_script (); } '(' NAME  { ldlex_popstate (); } ')'
+			{ $$ = $3; }
+
 mustbe_exp:		 { ldlex_expression (); }
 		exp
 			 { ldlex_popstate (); $$=$2;}
@@ -970,14 +973,14 @@ exp	:
 	|	SIZEOF_HEADERS
 			{ $$ = exp_nameop (SIZEOF_HEADERS,0); }
 
-	|	ALIGNOF '(' NAME ')'
-			{ $$ = exp_nameop (ALIGNOF,$3); }
-	|	SIZEOF '(' NAME ')'
-			{ $$ = exp_nameop (SIZEOF,$3); }
-	|	ADDR '(' NAME ')'
-			{ $$ = exp_nameop (ADDR,$3); }
-	|	LOADADDR '(' NAME ')'
-			{ $$ = exp_nameop (LOADADDR,$3); }
+	|	ALIGNOF paren_script_name
+			{ $$ = exp_nameop (ALIGNOF, $2); }
+	|	SIZEOF	paren_script_name
+			{ $$ = exp_nameop (SIZEOF, $2); }
+	|	ADDR	paren_script_name
+			{ $$ = exp_nameop (ADDR, $2); }
+	|	LOADADDR paren_script_name
+			{ $$ = exp_nameop (LOADADDR, $2); }
 	|	CONSTANT '(' NAME ')'
 			{ $$ = exp_nameop (CONSTANT,$3); }
 	|	ABSOLUTE '(' exp ')'
@@ -992,15 +995,16 @@ exp	:
 			{ $$ = exp_binop (DATA_SEGMENT_RELRO_END, $5, $3); }
 	|	DATA_SEGMENT_END '(' exp ')'
 			{ $$ = exp_unop (DATA_SEGMENT_END, $3); }
-	|	SEGMENT_START '(' NAME ',' exp ')'
+	|	SEGMENT_START { ldlex_script (); } '(' NAME
+			{ ldlex_popstate (); } ',' exp ')'
 			{ /* The operands to the expression node are
 			     placed in the opposite order from the way
 			     in which they appear in the script as
 			     that allows us to reuse more code in
 			     fold_binary.  */
 			  $$ = exp_binop (SEGMENT_START,
-					  $5,
-					  exp_nameop (NAME, $3)); }
+					  $7,
+					  exp_nameop (NAME, $4)); }
 	|	BLOCK '(' exp ')'
 			{ $$ = exp_unop (ALIGN_K,$3); }
 	|	NAME
@@ -1186,13 +1190,17 @@ overlay_section:
 	|	overlay_section
 		NAME
 			{
-			  ldlex_script ();
 			  lang_enter_overlay_section ($2);
 			}
 		'{' statement_list_opt '}'
-			{ ldlex_popstate (); ldlex_expression (); }
+			{ ldlex_expression (); }
 		phdr_opt fill_opt
 			{
+			  if (yychar == NAME)
+			    {
+			      yyclearin;
+			      ldlex_backup ();
+			    }
 			  ldlex_popstate ();
 			  lang_leave_overlay_section ($9, $8);
 			}
diff --git a/ld/testsuite/ld-elf/overlay.s b/ld/testsuite/ld-elf/overlay.s
index f153044e31b..20d8f4154dc 100644
--- a/ld/testsuite/ld-elf/overlay.s
+++ b/ld/testsuite/ld-elf/overlay.s
@@ -2,5 +2,9 @@
 	.space 0x10
 	.section .text2,"ax",%progbits
 	.space 0x20
+	.section .silly-name1,"ax",%progbits
+	.space 0x10
+	.section .silly-name2,"ax",%progbits
+	.space 0x20
 	.text
 	.space 0x30
diff --git a/ld/testsuite/ld-elf/overlay.t b/ld/testsuite/ld-elf/overlay.t
index dd374bb68b8..2c50a6b1a48 100644
--- a/ld/testsuite/ld-elf/overlay.t
+++ b/ld/testsuite/ld-elf/overlay.t
@@ -6,6 +6,10 @@ SECTIONS
   {
     .text1 {*(.text1)}
     .text2 {*(.text2)}
-  } 
+    .silly-name1 { *(.silly-name1) }
+    .silly-name2 { *(.silly-name2) }
+  }
   /DISCARD/ : { *(.*) }
+  ASSERT(ADDR(.text1)==ADDR(.text2), "overlay error")
+  ASSERT(ADDR(.silly-name1)==ADDR(.silly-name2), "silly overlay error")
 }

-- 
Alan Modra
Australia Development Lab, IBM
Florian Weimer via Binutils Aug. 3, 2021, 6:53 a.m. | #4
On 24.07.2021 12:23, Alan Modra wrote:
> Commit 40726f16a8d7 broke references to sections within ADDR(), and

> overlays with weird section names.

> 

> 	* ldgram.y (paren_script_name): New rule.

> 	(exp): Use it for ALIGNOF, SIZEOF, ADDR, and LOADADDR.  Similarly

> 	ensure script mode parsing for section name in SEGMENT_START.

> 	(overlay_section): Delete unnecessary ldlex_script call.  Backup

> 	on a lookahead NAME parsed in expression mode.

> 	* testsuite/ld-elf/overlay.s: Add more sections.

> 	* testsuite/ld-elf/overlay.t: Test '-' in section names.


Thanks Alan. I'm sorry, I didn't get around to test your fix yet, as
I've just returned from vacation. But I had no reason anyway to believe
it wouldn't deal with the issue.

Jan

> diff --git a/ld/ldgram.y b/ld/ldgram.y

> index 31e0071c6fc..2f9de59321a 100644

> --- a/ld/ldgram.y

> +++ b/ld/ldgram.y

> @@ -96,8 +96,7 @@ static int error_index;

>  %type <wildcard_list> section_name_list

>  %type <flag_info_list> sect_flag_list

>  %type <flag_info> sect_flags

> -%type <name> memspec_opt casesymlist

> -%type <name> memspec_at_opt

> +%type <name> memspec_opt memspec_at_opt paren_script_name casesymlist

>  %type <cname> wildcard_name

>  %type <wildcard> section_name_spec filename_spec wildcard_maybe_exclude

>  %token <bigint> INT

> @@ -906,6 +905,10 @@ nocrossref_list:

>  		}

>  	;

>  

> +paren_script_name:

> +		{ ldlex_script (); } '(' NAME  { ldlex_popstate (); } ')'

> +			{ $$ = $3; }

> +

>  mustbe_exp:		 { ldlex_expression (); }

>  		exp

>  			 { ldlex_popstate (); $$=$2;}

> @@ -970,14 +973,14 @@ exp	:

>  	|	SIZEOF_HEADERS

>  			{ $$ = exp_nameop (SIZEOF_HEADERS,0); }

>  

> -	|	ALIGNOF '(' NAME ')'

> -			{ $$ = exp_nameop (ALIGNOF,$3); }

> -	|	SIZEOF '(' NAME ')'

> -			{ $$ = exp_nameop (SIZEOF,$3); }

> -	|	ADDR '(' NAME ')'

> -			{ $$ = exp_nameop (ADDR,$3); }

> -	|	LOADADDR '(' NAME ')'

> -			{ $$ = exp_nameop (LOADADDR,$3); }

> +	|	ALIGNOF paren_script_name

> +			{ $$ = exp_nameop (ALIGNOF, $2); }

> +	|	SIZEOF	paren_script_name

> +			{ $$ = exp_nameop (SIZEOF, $2); }

> +	|	ADDR	paren_script_name

> +			{ $$ = exp_nameop (ADDR, $2); }

> +	|	LOADADDR paren_script_name

> +			{ $$ = exp_nameop (LOADADDR, $2); }

>  	|	CONSTANT '(' NAME ')'

>  			{ $$ = exp_nameop (CONSTANT,$3); }

>  	|	ABSOLUTE '(' exp ')'

> @@ -992,15 +995,16 @@ exp	:

>  			{ $$ = exp_binop (DATA_SEGMENT_RELRO_END, $5, $3); }

>  	|	DATA_SEGMENT_END '(' exp ')'

>  			{ $$ = exp_unop (DATA_SEGMENT_END, $3); }

> -	|	SEGMENT_START '(' NAME ',' exp ')'

> +	|	SEGMENT_START { ldlex_script (); } '(' NAME

> +			{ ldlex_popstate (); } ',' exp ')'

>  			{ /* The operands to the expression node are

>  			     placed in the opposite order from the way

>  			     in which they appear in the script as

>  			     that allows us to reuse more code in

>  			     fold_binary.  */

>  			  $$ = exp_binop (SEGMENT_START,

> -					  $5,

> -					  exp_nameop (NAME, $3)); }

> +					  $7,

> +					  exp_nameop (NAME, $4)); }

>  	|	BLOCK '(' exp ')'

>  			{ $$ = exp_unop (ALIGN_K,$3); }

>  	|	NAME

> @@ -1186,13 +1190,17 @@ overlay_section:

>  	|	overlay_section

>  		NAME

>  			{

> -			  ldlex_script ();

>  			  lang_enter_overlay_section ($2);

>  			}

>  		'{' statement_list_opt '}'

> -			{ ldlex_popstate (); ldlex_expression (); }

> +			{ ldlex_expression (); }

>  		phdr_opt fill_opt

>  			{

> +			  if (yychar == NAME)

> +			    {

> +			      yyclearin;

> +			      ldlex_backup ();

> +			    }

>  			  ldlex_popstate ();

>  			  lang_leave_overlay_section ($9, $8);

>  			}

> diff --git a/ld/testsuite/ld-elf/overlay.s b/ld/testsuite/ld-elf/overlay.s

> index f153044e31b..20d8f4154dc 100644

> --- a/ld/testsuite/ld-elf/overlay.s

> +++ b/ld/testsuite/ld-elf/overlay.s

> @@ -2,5 +2,9 @@

>  	.space 0x10

>  	.section .text2,"ax",%progbits

>  	.space 0x20

> +	.section .silly-name1,"ax",%progbits

> +	.space 0x10

> +	.section .silly-name2,"ax",%progbits

> +	.space 0x20

>  	.text

>  	.space 0x30

> diff --git a/ld/testsuite/ld-elf/overlay.t b/ld/testsuite/ld-elf/overlay.t

> index dd374bb68b8..2c50a6b1a48 100644

> --- a/ld/testsuite/ld-elf/overlay.t

> +++ b/ld/testsuite/ld-elf/overlay.t

> @@ -6,6 +6,10 @@ SECTIONS

>    {

>      .text1 {*(.text1)}

>      .text2 {*(.text2)}

> -  } 

> +    .silly-name1 { *(.silly-name1) }

> +    .silly-name2 { *(.silly-name2) }

> +  }

>    /DISCARD/ : { *(.*) }

> +  ASSERT(ADDR(.text1)==ADDR(.text2), "overlay error")

> +  ASSERT(ADDR(.silly-name1)==ADDR(.silly-name2), "silly overlay error")

>  }

>

Patch

--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -385,7 +385,7 @@  V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
                                  yylval.name = xstrdup (yytext + 2);
                                  return LNAME;
                                }
-<EXPRESSION>{SYMBOLNAMECHAR1}{NOCFILENAMECHAR}* {
+<EXPRESSION>{SYMBOLNAMECHAR1}{SYMBOLNAMECHAR}* {
                                  yylval.name = xstrdup (yytext);
                                  return NAME;
                                }