[7/8] x86: better respect quotes in parse_operands()

Message ID 7985ad53-489c-9434-3c48-9dbbf073aef8@suse.com
State New
Headers show
Series
  • x86: some tidying and quoted symbols fixes
Related show

Commit Message

Alan Modra via Binutils June 4, 2021, 1:42 p.m.
When d02603dc201f ("Allow symbol and label names to be enclosed in
double quotes") added the check for a double quote to the loop body
there, it didn't go quite far enough: Parentheses inside quotes
shouldn't be counted, and character restrictions also shouldn't apply
inside quoted regions.

In i386_att_operand(), which needs adjustment to remain in sync, besides
respecting double quotes now, also change the logic such that we don't
count parentheses anymore: Finding any opening or closing parenthesis or
any double quote means we're done, because the subsequent parsing code
wouldn't accept (extra) instances of these anyway.

Note that in parse_operands() this mimics get_symbol_name()'s
questionable behavior of treating \ specially only when ahead of ". (The
behavior is suspicious because the meaning of \\ then is ambiguous. It
is in particular impossible to have a (quoted) symbol name end in a
single \.) I would have used get_symbol_name() here, if that didn't
require fiddling with input_line_pointer.

gas/
2021-06-XX  Jan Beulich  <jbeulich@suse.com>

	* config/tc-i386.c (parse_operands): Reduce scope of
	paren_not_balanced, to match the new in_quotes. Skip over quoted
	regions of operands.
	(i386_att_operand): Remove (mis-named) parens_balanced. Respect
	double quote.
	* testsuite/gas/i386/unary.s: Add more cases.
	* testsuite/gas/i386/unary.d: Adjust expectations.

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5523,11 +5523,13 @@  parse_operands (char *l, const char *mne
   /* 1 if operand is pending after ','.  */
   unsigned int expecting_operand = 0;
 
-  /* Non-zero if operand parens not balanced.  */
-  unsigned int paren_not_balanced;
-
   while (*l != END_OF_INSN)
     {
+      /* Non-zero if operand parens not balanced.  */
+      unsigned int paren_not_balanced = 0;
+      /* True if inside double quotes.  */
+      bool in_quotes = false;
+
       /* Skip optional white space before operand.  */
       if (is_space_char (*l))
 	++l;
@@ -5539,11 +5541,16 @@  parse_operands (char *l, const char *mne
 	  return NULL;
 	}
       token_start = l;	/* After white space.  */
-      paren_not_balanced = 0;
-      while (paren_not_balanced || *l != ',')
+      while (in_quotes || paren_not_balanced || *l != ',')
 	{
 	  if (*l == END_OF_INSN)
 	    {
+	      if (in_quotes)
+		{
+		  as_bad (_("unbalanced double quotes in operand %d."),
+			  i.operands + 1);
+		  return NULL;
+		}
 	      if (paren_not_balanced)
 		{
 		  know (!intel_syntax);
@@ -5554,14 +5561,18 @@  parse_operands (char *l, const char *mne
 	      else
 		break;	/* we are done */
 	    }
-	  else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
+	  else if (*l == '\\' && l[1] == '"')
+	    ++l;
+	  else if (*l == '"')
+	    in_quotes = !in_quotes;
+	  else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
 	    {
 	      as_bad (_("invalid character %s in operand %d"),
 		      output_invalid (*l),
 		      i.operands + 1);
 	      return NULL;
 	    }
-	  if (!intel_syntax)
+	  if (!intel_syntax && !in_quotes)
 	    {
 	      if (*l == '(')
 		++paren_not_balanced;
@@ -11517,23 +11528,21 @@  i386_att_operand (char *operand_string)
       if (*base_string == ')')
 	{
 	  char *temp_string;
-	  unsigned int parens_balanced = 1;
+
 	  /* We've already checked that the number of left & right ()'s are
 	     equal, so this loop will not be infinite.  */
 	  do
 	    {
 	      base_string--;
-	      if (*base_string == ')')
-		parens_balanced++;
-	      if (*base_string == '(')
-		parens_balanced--;
 	    }
-	  while (parens_balanced);
+	  while (*base_string != '(' && *base_string != ')'
+		 && *base_string != '"');
 
 	  temp_string = base_string;
 
 	  /* Skip past '(' and whitespace.  */
-	  ++base_string;
+	  if (*base_string == '(')
+	    ++base_string;
 	  if (is_space_char (*base_string))
 	    ++base_string;
 
--- a/gas/testsuite/gas/i386/quoted.d
+++ b/gas/testsuite/gas/i386/quoted.d
@@ -10,6 +10,9 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:[ 	]*8b 80 00 00 00 00[	 ]+mov    0x0\(%eax\),%eax	[a-f0-9]+: (R_386_|dir)32	x\(y\)
 [ 	]*[a-f0-9]+:[ 	]*26 a1 00 00 00 00[	 ]+mov    %es:0x0,%eax	[a-f0-9]+: (R_386_|dir)32	x\(y\)
 [ 	]*[a-f0-9]+:[ 	]*26 8b 80 00 00 00 00[	 ]+mov    %es:0x0\(%eax\),%eax	[a-f0-9]+: (R_386_|dir)32	x\(y\)
+[ 	]*[a-f0-9]+:[ 	]*a1 00 00 00 00[	 ]+mov    0x0,%eax	[a-f0-9]+: (R_386_|dir)32	x\(y
+[ 	]*[a-f0-9]+:[ 	]*a1 00 00 00 00[	 ]+mov    0x0,%eax	[a-f0-9]+: (R_386_|dir)32	x\)y
+[ 	]*[a-f0-9]+:[ 	]*a1 00 00 00 00[	 ]+mov    0x0,%eax	[a-f0-9]+: (R_386_|dir)32	x\?y
 [ 	]*[a-f0-9]+:[ 	]*ff 15 00 00 00 00[	 ]+call   \*0x0	[a-f0-9]+: (R_386_|dir)32	x\(y\)
 [ 	]*[a-f0-9]+:[ 	]*26 ff 15 00 00 00 00[	 ]+call   \*%es:0x0	[a-f0-9]+: (R_386_|dir)32	x\(y\)
 [ 	]*[a-f0-9]+:[ 	]*26 ff 15 00 00 00 00[	 ]+call   \*%es:0x0	[a-f0-9]+: (R_386_|dir)32	x\(y\)
--- a/gas/testsuite/gas/i386/quoted.s
+++ b/gas/testsuite/gas/i386/quoted.s
@@ -5,6 +5,10 @@  quoted:
 	mov	%es:"x(y)", %eax
 	mov	%es:"x(y)"(%eax), %eax
 
+	mov	"x(y", %eax
+	mov	"x)y", %eax
+	mov	"x?y", %eax
+
 	call	*"x(y)"
 	call	*%es:"x(y)"
 	call	%es:*"x(y)"