[1/4] strncmp: Add a testcase for page boundary [BZ #25933]

Message ID 20200612201056.228614-1-hjl.tools@gmail.com
State New
Headers show
Series
  • [1/4] strncmp: Add a testcase for page boundary [BZ #25933]
Related show

Commit Message

Samuel Thibault via Libc-alpha June 12, 2020, 8:10 p.m.
Add a strncmp testcase to cover cases where one of strings ends on the
page boundary.
---
 string/test-strncmp.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

-- 
2.26.2

Comments

Samuel Thibault via Libc-alpha June 15, 2020, 8:29 p.m. | #1
On Fri, Jun 12, 2020 at 01:10:53PM -0700, H.J. Lu via Libc-alpha wrote:
> Add a strncmp testcase to cover cases where one of strings ends on the

> page boundary.

> ---

>  string/test-strncmp.c | 25 +++++++++++++++++++++++++

>  1 file changed, 25 insertions(+)

> 

> diff --git a/string/test-strncmp.c b/string/test-strncmp.c

> index d961ac4493..d0928a2864 100644

> --- a/string/test-strncmp.c

> +++ b/string/test-strncmp.c

> @@ -403,6 +403,30 @@ check2 (void)

>    free (s2);

>  }

> 

> +static void

> +check3 (void)

> +{

> +  size_t size = 32 * 4;

> +  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);

> +  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);

> +  int exp_result;

> +

> +  memset (s1, 'a', page_size);

> +  memset (s2, 'a', page_size);

> +  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;

> +

> +  for (size_t s = 99; s <= size; s++)

> +    for (size_t s1a = 31; s1a < 32; s1a++)

> +      for (size_t s2a = 30; s2a < 32; s2a++)

> +	{

> +	  CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;

> +	  CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;

> +	  exp_result = SIMPLE_STRNCMP (s1p, s2p, s);

> +	  FOR_EACH_IMPL (impl, 0)

> +	    check_result (impl, s1p, s2p, s, exp_result);

> +	}

> +}


There are lots of magic numbers here.

Could you add some context around those numbers?

If they are meant to reflect a cache line length, then it's
appropriate to support different cache line sizes.
Rafael Zinsly just did this with strncasecmp in the
last week or so.

> +

>  int

>  test_main (void)

>  {

> @@ -412,6 +436,7 @@ test_main (void)

> 

>    check1 ();

>    check2 ();

> +  check3 ();

> 

>    printf ("%23s", "");

>    FOR_EACH_IMPL (impl, 0)

> -- 

> 2.26.2

>
Samuel Thibault via Libc-alpha June 15, 2020, 9:34 p.m. | #2
On Mon, Jun 15, 2020 at 1:29 PM Paul A. Clarke <pc@us.ibm.com> wrote:
>

> On Fri, Jun 12, 2020 at 01:10:53PM -0700, H.J. Lu via Libc-alpha wrote:

> > Add a strncmp testcase to cover cases where one of strings ends on the

> > page boundary.

> > ---

> >  string/test-strncmp.c | 25 +++++++++++++++++++++++++

> >  1 file changed, 25 insertions(+)

> >

> > diff --git a/string/test-strncmp.c b/string/test-strncmp.c

> > index d961ac4493..d0928a2864 100644

> > --- a/string/test-strncmp.c

> > +++ b/string/test-strncmp.c

> > @@ -403,6 +403,30 @@ check2 (void)

> >    free (s2);

> >  }

> >

> > +static void

> > +check3 (void)

> > +{

> > +  size_t size = 32 * 4;

> > +  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);

> > +  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);

> > +  int exp_result;

> > +

> > +  memset (s1, 'a', page_size);

> > +  memset (s2, 'a', page_size);

> > +  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;

> > +

> > +  for (size_t s = 99; s <= size; s++)

> > +    for (size_t s1a = 31; s1a < 32; s1a++)

> > +      for (size_t s2a = 30; s2a < 32; s2a++)

> > +     {

> > +       CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;

> > +       CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;

> > +       exp_result = SIMPLE_STRNCMP (s1p, s2p, s);

> > +       FOR_EACH_IMPL (impl, 0)

> > +         check_result (impl, s1p, s2p, s, exp_result);

> > +     }

> > +}

>

> There are lots of magic numbers here.

>

> Could you add some context around those number


My commit log says

---
Add a strncmp testcase to cover cases where one of strings ends on the
page boundary.
---

> If they are meant to reflect a cache line length, then it's


No.  My testcase is for correctness, not for performance.

> appropriate to support different cache line sizes.

> Rafael Zinsly just did this with strncasecmp in the

> last week or so.

>

> > +

> >  int

> >  test_main (void)

> >  {

> > @@ -412,6 +436,7 @@ test_main (void)

> >

> >    check1 ();

> >    check2 ();

> > +  check3 ();

> >

> >    printf ("%23s", "");

> >    FOR_EACH_IMPL (impl, 0)

> > --

> > 2.26.2

> >




-- 
H.J.
Samuel Thibault via Libc-alpha June 15, 2020, 10:03 p.m. | #3
On Mon, Jun 15, 2020 at 02:34:13PM -0700, H.J. Lu via Libc-alpha wrote:
> On Mon, Jun 15, 2020 at 1:29 PM Paul A. Clarke <pc@us.ibm.com> wrote:

> >

> > On Fri, Jun 12, 2020 at 01:10:53PM -0700, H.J. Lu via Libc-alpha wrote:

> > > Add a strncmp testcase to cover cases where one of strings ends on the

> > > page boundary.

> > > ---

> > >  string/test-strncmp.c | 25 +++++++++++++++++++++++++

> > >  1 file changed, 25 insertions(+)

> > >

> > > diff --git a/string/test-strncmp.c b/string/test-strncmp.c

> > > index d961ac4493..d0928a2864 100644

> > > --- a/string/test-strncmp.c

> > > +++ b/string/test-strncmp.c

> > > @@ -403,6 +403,30 @@ check2 (void)

> > >    free (s2);

> > >  }

> > >

> > > +static void

> > > +check3 (void)

> > > +{

> > > +  size_t size = 32 * 4;

> > > +  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);

> > > +  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);

> > > +  int exp_result;

> > > +

> > > +  memset (s1, 'a', page_size);

> > > +  memset (s2, 'a', page_size);

> > > +  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;

> > > +

> > > +  for (size_t s = 99; s <= size; s++)

> > > +    for (size_t s1a = 31; s1a < 32; s1a++)

> > > +      for (size_t s2a = 30; s2a < 32; s2a++)

> > > +     {

> > > +       CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;

> > > +       CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;

> > > +       exp_result = SIMPLE_STRNCMP (s1p, s2p, s);

> > > +       FOR_EACH_IMPL (impl, 0)

> > > +         check_result (impl, s1p, s2p, s, exp_result);

> > > +     }

> > > +}

> >

> > There are lots of magic numbers here.

> >

> > Could you add some context around those number

> 

> My commit log says

> 

> ---

> Add a strncmp testcase to cover cases where one of strings ends on the

> page boundary.

> ---


Which says nothing about why you need to test over 90000 different
cases of a string ending on a page boundary, nor what any of the
magic numbers represent.

> > If they are meant to reflect a cache line length, then it's

> 

> No.  My testcase is for correctness, not for performance.


I made no reference to performance.

> > appropriate to support different cache line sizes.

> > Rafael Zinsly just did this with strncasecmp in the

> > last week or so.


PC
Samuel Thibault via Libc-alpha Sept. 23, 2020, 7:01 p.m. | #4
On 6/12/20 4:10 PM, H.J. Lu via Libc-alpha wrote:
> Add a strncmp testcase to cover cases where one of strings ends on the

> page boundary.


I would like to see this sequence of 4 patches committed because they *do*
correctly regression test swbz#25933, and I'd like to see this not regress
again.

However, I share Paul's concerns over the magic numbers, so I have made
some concrete suggestions for comments.

OK with the following changes:
- Add all suggested comments.
- s1a iterates over [30,32).

Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> ---

>  string/test-strncmp.c | 25 +++++++++++++++++++++++++

>  1 file changed, 25 insertions(+)

> 

> diff --git a/string/test-strncmp.c b/string/test-strncmp.c

> index d961ac4493..d0928a2864 100644

> --- a/string/test-strncmp.c

> +++ b/string/test-strncmp.c

> @@ -403,6 +403,30 @@ check2 (void)

>    free (s2);

>  }

>  

> +static void

> +check3 (void)

> +{

> +  size_t size = 32 * 4;


Add a comment:

/* To trigger bug 25933 we need a size that is equal to the
   vector length times 4. In the case of AVX2 for Intel we
   need 32 * 4. We make this test generic and run it for all
   architectures as additional boundary testing for such
   related algorithms.  */

This is my understanding, that we need 32*4 to trigger the bug.

> +  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);

> +  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);


We set s1 and s2 to point into the buffers at a point 1 page
before the end. We expect the test to add 1 page PROT_NONE at
the end. Thus s1 and s2 by default point to two pages, the
first page PROT_READ|PROT_WRITE and the second page PROT_NONE.
So far so good. No comment required. You have to understand
how these test cases work to read this.

> +  int exp_result;

> +

> +  memset (s1, 'a', page_size);

> +  memset (s2, 'a', page_size);


OK. Fill both full of 'a'.

> +  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;


OK. Null terminate s1.

Note that s2 is not null terminated.

> +


Add comment:

/* Iterate over a size that is just below where we expect
   the bug to trigger up to the size we expect will trigger
   the bug e.g. [99-128].  Likewise iterate the start of
   two strings between 30 and 31 bytes away from the
   boundary to simulate alignment changes.  */

> +  for (size_t s = 99; s <= size; s++)


OK. A bit of belt-and-suspenders here we make s range
from 99-128, so we iterate just before the size we care
about up to the size we expect to trigger the bug.

> +    for (size_t s1a = 31; s1a < 32; s1a++)


s1a iterates over [31,32) e.g. 31

> +      for (size_t s2a = 30; s2a < 32; s2a++)


s2a iterates over [30,32) e.g. 30, 31

> +	{

> +	  CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;


Set the pointer back from the PROT_NONE page by "s+s1a" bytes.

> +	  CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;


Set the pointer back from the PROT_NONE page by "s+s2a" bytes.

> +	  exp_result = SIMPLE_STRNCMP (s1p, s2p, s);


Then compare.

This code comes from Adhemerval's testing in comment #2,
where the test catches a second loop that has similar problems.

At most we test [30 sizes]x[1 offset for s1a]x[2 offets for s2a] = 60 tests.

Suggest:

s1a iterate over [30,32) like s2a for the sake of simplicity.

> +	  FOR_EACH_IMPL (impl, 0)

> +	    check_result (impl, s1p, s2p, s, exp_result);

> +	}

> +}

> +

>  int

>  test_main (void)

>  {

> @@ -412,6 +436,7 @@ test_main (void)

>  

>    check1 ();

>    check2 ();

> +  check3 ();


OK.

>  

>    printf ("%23s", "");

>    FOR_EACH_IMPL (impl, 0)

> 



-- 
Cheers,
Carlos.
Samuel Thibault via Libc-alpha Sept. 24, 2020, 2:05 p.m. | #5
On Wed, Sep 23, 2020 at 12:01 PM Carlos O'Donell <carlos@redhat.com> wrote:
>

> On 6/12/20 4:10 PM, H.J. Lu via Libc-alpha wrote:

> > Add a strncmp testcase to cover cases where one of strings ends on the

> > page boundary.

>

> I would like to see this sequence of 4 patches committed because they *do*

> correctly regression test swbz#25933, and I'd like to see this not regress

> again.

>

> However, I share Paul's concerns over the magic numbers, so I have made

> some concrete suggestions for comments.

>

> OK with the following changes:

> - Add all suggested comments.

> - s1a iterates over [30,32).

>

> Reviewed-by: Carlos O'Donell <carlos@redhat.com>

>

> > ---

> >  string/test-strncmp.c | 25 +++++++++++++++++++++++++

> >  1 file changed, 25 insertions(+)

> >

> > diff --git a/string/test-strncmp.c b/string/test-strncmp.c

> > index d961ac4493..d0928a2864 100644

> > --- a/string/test-strncmp.c

> > +++ b/string/test-strncmp.c

> > @@ -403,6 +403,30 @@ check2 (void)

> >    free (s2);

> >  }

> >

> > +static void

> > +check3 (void)

> > +{

> > +  size_t size = 32 * 4;

>

> Add a comment:

>

> /* To trigger bug 25933 we need a size that is equal to the

>    vector length times 4. In the case of AVX2 for Intel we

>    need 32 * 4. We make this test generic and run it for all

>    architectures as additional boundary testing for such

>    related algorithms.  */

>

> This is my understanding, that we need 32*4 to trigger the bug.

>

> > +  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);

> > +  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);

>

> We set s1 and s2 to point into the buffers at a point 1 page

> before the end. We expect the test to add 1 page PROT_NONE at

> the end. Thus s1 and s2 by default point to two pages, the

> first page PROT_READ|PROT_WRITE and the second page PROT_NONE.

> So far so good. No comment required. You have to understand

> how these test cases work to read this.

>

> > +  int exp_result;

> > +

> > +  memset (s1, 'a', page_size);

> > +  memset (s2, 'a', page_size);

>

> OK. Fill both full of 'a'.

>

> > +  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;

>

> OK. Null terminate s1.

>

> Note that s2 is not null terminated.

>

> > +

>

> Add comment:

>

> /* Iterate over a size that is just below where we expect

>    the bug to trigger up to the size we expect will trigger

>    the bug e.g. [99-128].  Likewise iterate the start of

>    two strings between 30 and 31 bytes away from the

>    boundary to simulate alignment changes.  */

>

> > +  for (size_t s = 99; s <= size; s++)

>

> OK. A bit of belt-and-suspenders here we make s range

> from 99-128, so we iterate just before the size we care

> about up to the size we expect to trigger the bug.

>

> > +    for (size_t s1a = 31; s1a < 32; s1a++)

>

> s1a iterates over [31,32) e.g. 31

>

> > +      for (size_t s2a = 30; s2a < 32; s2a++)

>

> s2a iterates over [30,32) e.g. 30, 31

>

> > +     {

> > +       CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;

>

> Set the pointer back from the PROT_NONE page by "s+s1a" bytes.

>

> > +       CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;

>

> Set the pointer back from the PROT_NONE page by "s+s2a" bytes.

>

> > +       exp_result = SIMPLE_STRNCMP (s1p, s2p, s);

>

> Then compare.

>

> This code comes from Adhemerval's testing in comment #2,

> where the test catches a second loop that has similar problems.

>

> At most we test [30 sizes]x[1 offset for s1a]x[2 offets for s2a] = 60 tests.

>

> Suggest:

>

> s1a iterate over [30,32) like s2a for the sake of simplicity.

>

> > +       FOR_EACH_IMPL (impl, 0)

> > +         check_result (impl, s1p, s2p, s, exp_result);

> > +     }

> > +}

> > +

> >  int

> >  test_main (void)

> >  {

> > @@ -412,6 +436,7 @@ test_main (void)

> >

> >    check1 ();

> >    check2 ();

> > +  check3 ();

>

> OK.

>

> >

> >    printf ("%23s", "");

> >    FOR_EACH_IMPL (impl, 0)

> >

>


Here is the updated patch I am checking in.

Thanks.

-- 
H.J.
From 48d19840f8642298823026bafc4c89244fc26889 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 7 May 2020 07:29:46 -0700
Subject: [PATCH] strncmp: Add a testcase for page boundary [BZ #25933]

Add a strncmp testcase to cover cases where one of strings ends on the
page boundary with the maximum string length less than the number bytes
of each AVX2 loop iteration and different offsets from page boundary.

The updated string/test-strncmp fails on Intel Core i7-8559U without

ommit 1c6432316bc434a72108d7b0c7cfbfdde64c3124
Author: Sunil K Pandey <skpgkp1@gmail.com>
Date:   Fri Jun 12 08:57:16 2020 -0700

    Fix avx2 strncmp offset compare condition check [BZ #25933]
---
 string/test-strncmp.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/string/test-strncmp.c b/string/test-strncmp.c
index d961ac4493..962679b384 100644
--- a/string/test-strncmp.c
+++ b/string/test-strncmp.c
@@ -403,6 +403,38 @@ check2 (void)
   free (s2);
 }
 
+static void
+check3 (void)
+{
+  /* To trigger bug 25933, we need a size that is equal to the vector
+     length times 4. In the case of AVX2 for Intel, we need 32 * 4.  We
+     make this test generic and run it for all architectures as additional
+     boundary testing for such related algorithms.  */
+  size_t size = 32 * 4;
+  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);
+  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);
+  int exp_result;
+
+  memset (s1, 'a', page_size);
+  memset (s2, 'a', page_size);
+  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;
+
+  /* Iterate over a size that is just below where we expect the bug to
+     trigger up to the size we expect will trigger the bug e.g. [99-128].
+     Likewise iterate the start of two strings between 30 and 31 bytes
+     away from the boundary to simulate alignment changes.  */
+  for (size_t s = 99; s <= size; s++)
+    for (size_t s1a = 30; s1a < 32; s1a++)
+      for (size_t s2a = 30; s2a < 32; s2a++)
+	{
+	  CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;
+	  CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;
+	  exp_result = SIMPLE_STRNCMP (s1p, s2p, s);
+	  FOR_EACH_IMPL (impl, 0)
+	    check_result (impl, s1p, s2p, s, exp_result);
+	}
+}
+
 int
 test_main (void)
 {
@@ -412,6 +444,7 @@ test_main (void)
 
   check1 ();
   check2 ();
+  check3 ();
 
   printf ("%23s", "");
   FOR_EACH_IMPL (impl, 0)

Patch

diff --git a/string/test-strncmp.c b/string/test-strncmp.c
index d961ac4493..d0928a2864 100644
--- a/string/test-strncmp.c
+++ b/string/test-strncmp.c
@@ -403,6 +403,30 @@  check2 (void)
   free (s2);
 }
 
+static void
+check3 (void)
+{
+  size_t size = 32 * 4;
+  CHAR *s1 = (CHAR *) (buf1 + (BUF1PAGES - 1) * page_size);
+  CHAR *s2 = (CHAR *) (buf2 + (BUF1PAGES - 1) * page_size);
+  int exp_result;
+
+  memset (s1, 'a', page_size);
+  memset (s2, 'a', page_size);
+  s1[(page_size / CHARBYTES) - 1] = (CHAR) 0;
+
+  for (size_t s = 99; s <= size; s++)
+    for (size_t s1a = 31; s1a < 32; s1a++)
+      for (size_t s2a = 30; s2a < 32; s2a++)
+	{
+	  CHAR *s1p = s1 + (page_size / CHARBYTES - s) - s1a;
+	  CHAR *s2p = s2 + (page_size / CHARBYTES - s) - s2a;
+	  exp_result = SIMPLE_STRNCMP (s1p, s2p, s);
+	  FOR_EACH_IMPL (impl, 0)
+	    check_result (impl, s1p, s2p, s, exp_result);
+	}
+}
+
 int
 test_main (void)
 {
@@ -412,6 +436,7 @@  test_main (void)
 
   check1 ();
   check2 ();
+  check3 ();
 
   printf ("%23s", "");
   FOR_EACH_IMPL (impl, 0)