malloc/tst-mallocfork2: Kill lingering process for unexpected failures

Message ID 20200220175840.25246-1-adhemerval.zanella@linaro.org
State New
Headers show
Series
  • malloc/tst-mallocfork2: Kill lingering process for unexpected failures
Related show

Commit Message

Adhemerval Zanella Feb. 20, 2020, 5:58 p.m.
If the test fails due some unexpected failure after the children
creation, either in the signal handler by calling abort or in the main
loop; the created children might not be killed properly.

This patches fixes it by:

  * Avoid aborting in the signal handler by setting a flag that
    an error has occured and add a check in the main loop.

  * Add a atfork handler to handle kill the signal sending
    processes.

Checked on x86_64-linux-gnu.
---
 malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

-- 
2.17.1

Comments

Paul Clarke Feb. 21, 2020, 8:07 p.m. | #1
On 2/20/20 11:58 AM, Adhemerval Zanella wrote:
> If the test fails due some unexpected failure after the children

> creation, either in the signal handler by calling abort or in the main

> loop; the created children might not be killed properly.

> 

> This patches fixes it by:

> 

>   * Avoid aborting in the signal handler by setting a flag that

>     an error has occured and add a check in the main loop.

> 

>   * Add a atfork handler to handle kill the signal sending

>     processes.


s/atfork/atexit/, per the code below.

"handle kill the signal sending processes" doesn't read well.
Perhaps "kill child processes"?

> Checked on x86_64-linux-gnu.

> ---

>  malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------

>  1 file changed, 28 insertions(+), 11 deletions(-)

> 

> diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c

> index 0602a94895..4caf61489f 100644

> --- a/malloc/tst-mallocfork2.c

> +++ b/malloc/tst-mallocfork2.c

> @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received;

>     progress.  Checked by liveness_signal_handler.  */

>  static volatile sig_atomic_t progress_indicator = 1;

> 

> +/* Set to 1 if an error occurs in the signal handler.  */

> +static volatile sig_atomic_t error_indicator = 0;

> +

>  static void

>  sigusr1_handler (int signo)

>  {

> @@ -72,7 +75,8 @@ sigusr1_handler (int signo)

>    if (pid == -1)

>      {

>        write_message ("error: fork\n");

> -      abort ();

> +      error_indicator = 1;

> +      return;

>      }

>    if (pid == 0)

>      _exit (0);

> @@ -81,12 +85,14 @@ sigusr1_handler (int signo)

>    if (ret < 0)

>      {

>        write_message ("error: waitpid\n");

> -      abort ();

> +      error_indicator = 1;

> +      return;

>      }

>    if (status != 0)

>      {

>        write_message ("error: unexpected exit status from subprocess\n");

> -      abort ();

> +      error_indicator = 1;

> +      return;

>      }

>  }

> 


OK

> @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep)

>      }

>  }

> 

> +/* Children processes.  */

> +static pid_t sigusr1_sender_pids[5] = { -1 };


Doesn't this set [0] to -1 and [1..4] to 0 ?

> +static pid_t sigusr2_sender_pid = -1;

> +

> +static void

> +kill_children (void)

> +{

> +  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

> +    if (sigusr1_sender_pids[i] != -1)

> +      kill (sigusr1_sender_pids[i], SIGKILL);


...and this will perform kill (0, SIGKILL) if not otherwise initialized, which will kill this task immediately, if I understand correctly.  I presume that's not what you want.

Why not use 0 as your initial/uninitialized value, and test against that?  Also, that way, you'll never issue kill (0, ...).

> +  if (sigusr2_sender_pid != -1)

> +    kill (sigusr2_sender_pid, SIGKILL);


Same.

> +}

> +

>  static int

>  do_test (void)

>  {

> +  atexit (kill_children);

> +

>    /* shared->barrier is intialized along with sigusr1_sender_pids

>       below.  */

>    shared = support_shared_allocate (sizeof (*shared));

> @@ -148,14 +170,13 @@ do_test (void)

>        return 1;

>      }

> 

> -  pid_t sigusr2_sender_pid = xfork ();

> +  sigusr2_sender_pid = xfork ();

>    if (sigusr2_sender_pid == 0)

>      signal_sender (SIGUSR2, true);

> 

>    /* Send SIGUSR1 signals from several processes.  Hopefully, one

>       signal will hit one of the ciritical functions.  Use a barrier to

>       avoid sending signals while not running fork/free/malloc.  */

> -  pid_t sigusr1_sender_pids[5];

>    {

>      pthread_barrierattr_t attr;

>      xpthread_barrierattr_init (&attr);

> @@ -166,7 +187,7 @@ do_test (void)

>    }

>    for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

>      {

> -      sigusr1_sender_pids[i] = fork ();

> +      sigusr1_sender_pids[i] = xfork ();

>        if (sigusr1_sender_pids[i] == 0)

>          signal_sender (SIGUSR1, false);

>      }

> @@ -211,7 +232,7 @@ do_test (void)

>          ++malloc_signals;

>        xpthread_barrier_wait (&shared->barrier);

> 

> -      if (objects[slot] == NULL)

> +      if (objects[slot] == NULL || error_indicator != 0)

>          {

>            printf ("error: malloc: %m\n");

>            for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

> @@ -225,10 +246,6 @@ do_test (void)

>    for (int slot = 0; slot < malloc_objects; ++slot)

>      free (objects[slot]);

> 

> -  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

> -    kill (sigusr1_sender_pids[i], SIGKILL);

> -  kill (sigusr2_sender_pid, SIGKILL);

> -


OK

PC
Adhemerval Zanella Feb. 27, 2020, 4:42 p.m. | #2
On 21/02/2020 17:07, Paul Clarke wrote:
> On 2/20/20 11:58 AM, Adhemerval Zanella wrote:

>> If the test fails due some unexpected failure after the children

>> creation, either in the signal handler by calling abort or in the main

>> loop; the created children might not be killed properly.

>>

>> This patches fixes it by:

>>

>>   * Avoid aborting in the signal handler by setting a flag that

>>     an error has occured and add a check in the main loop.

>>

>>   * Add a atfork handler to handle kill the signal sending

>>     processes.

> 

> s/atfork/atexit/, per the code below.


Ack.

> 

> "handle kill the signal sending processes" doesn't read well.

> Perhaps "kill child processes"?


Ack.

> 

>> Checked on x86_64-linux-gnu.

>> ---

>>  malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------

>>  1 file changed, 28 insertions(+), 11 deletions(-)

>>

>> diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c

>> index 0602a94895..4caf61489f 100644

>> --- a/malloc/tst-mallocfork2.c

>> +++ b/malloc/tst-mallocfork2.c

>> @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received;

>>     progress.  Checked by liveness_signal_handler.  */

>>  static volatile sig_atomic_t progress_indicator = 1;

>>

>> +/* Set to 1 if an error occurs in the signal handler.  */

>> +static volatile sig_atomic_t error_indicator = 0;

>> +

>>  static void

>>  sigusr1_handler (int signo)

>>  {

>> @@ -72,7 +75,8 @@ sigusr1_handler (int signo)

>>    if (pid == -1)

>>      {

>>        write_message ("error: fork\n");

>> -      abort ();

>> +      error_indicator = 1;

>> +      return;

>>      }

>>    if (pid == 0)

>>      _exit (0);

>> @@ -81,12 +85,14 @@ sigusr1_handler (int signo)

>>    if (ret < 0)

>>      {

>>        write_message ("error: waitpid\n");

>> -      abort ();

>> +      error_indicator = 1;

>> +      return;

>>      }

>>    if (status != 0)

>>      {

>>        write_message ("error: unexpected exit status from subprocess\n");

>> -      abort ();

>> +      error_indicator = 1;

>> +      return;

>>      }

>>  }

>>

> 

> OK

> 

>> @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep)

>>      }

>>  }

>>

>> +/* Children processes.  */

>> +static pid_t sigusr1_sender_pids[5] = { -1 };

> 

> Doesn't this set [0] to -1 and [1..4] to 0 ?

> 

>> +static pid_t sigusr2_sender_pid = -1;

>> +

>> +static void

>> +kill_children (void)

>> +{

>> +  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

>> +    if (sigusr1_sender_pids[i] != -1)

>> +      kill (sigusr1_sender_pids[i], SIGKILL);

> 

> ...and this will perform kill (0, SIGKILL) if not otherwise initialized, which will kill this task immediately, if I understand correctly.  I presume that's not what you want.

> 

> Why not use 0 as your initial/uninitialized value, and test against that?  Also, that way, you'll never issue kill (0, ...).


Ack, I changed to use 0.

> 

>> +  if (sigusr2_sender_pid != -1)

>> +    kill (sigusr2_sender_pid, SIGKILL);

> 

> Same.

> 

>> +}

>> +

>>  static int

>>  do_test (void)

>>  {

>> +  atexit (kill_children);

>> +

>>    /* shared->barrier is intialized along with sigusr1_sender_pids

>>       below.  */

>>    shared = support_shared_allocate (sizeof (*shared));

>> @@ -148,14 +170,13 @@ do_test (void)

>>        return 1;

>>      }

>>

>> -  pid_t sigusr2_sender_pid = xfork ();

>> +  sigusr2_sender_pid = xfork ();

>>    if (sigusr2_sender_pid == 0)

>>      signal_sender (SIGUSR2, true);

>>

>>    /* Send SIGUSR1 signals from several processes.  Hopefully, one

>>       signal will hit one of the ciritical functions.  Use a barrier to

>>       avoid sending signals while not running fork/free/malloc.  */

>> -  pid_t sigusr1_sender_pids[5];

>>    {

>>      pthread_barrierattr_t attr;

>>      xpthread_barrierattr_init (&attr);

>> @@ -166,7 +187,7 @@ do_test (void)

>>    }

>>    for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

>>      {

>> -      sigusr1_sender_pids[i] = fork ();

>> +      sigusr1_sender_pids[i] = xfork ();

>>        if (sigusr1_sender_pids[i] == 0)

>>          signal_sender (SIGUSR1, false);

>>      }

>> @@ -211,7 +232,7 @@ do_test (void)

>>          ++malloc_signals;

>>        xpthread_barrier_wait (&shared->barrier);

>>

>> -      if (objects[slot] == NULL)

>> +      if (objects[slot] == NULL || error_indicator != 0)

>>          {

>>            printf ("error: malloc: %m\n");

>>            for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

>> @@ -225,10 +246,6 @@ do_test (void)

>>    for (int slot = 0; slot < malloc_objects; ++slot)

>>      free (objects[slot]);

>>

>> -  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)

>> -    kill (sigusr1_sender_pids[i], SIGKILL);

>> -  kill (sigusr2_sender_pid, SIGKILL);

>> -

> 

> OK

> 

> PC

> 


Thanks for the review, I will push with your suggested changes.

Patch

diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c
index 0602a94895..4caf61489f 100644
--- a/malloc/tst-mallocfork2.c
+++ b/malloc/tst-mallocfork2.c
@@ -62,6 +62,9 @@  static volatile sig_atomic_t sigusr1_received;
    progress.  Checked by liveness_signal_handler.  */
 static volatile sig_atomic_t progress_indicator = 1;
 
+/* Set to 1 if an error occurs in the signal handler.  */
+static volatile sig_atomic_t error_indicator = 0;
+
 static void
 sigusr1_handler (int signo)
 {
@@ -72,7 +75,8 @@  sigusr1_handler (int signo)
   if (pid == -1)
     {
       write_message ("error: fork\n");
-      abort ();
+      error_indicator = 1;
+      return;
     }
   if (pid == 0)
     _exit (0);
@@ -81,12 +85,14 @@  sigusr1_handler (int signo)
   if (ret < 0)
     {
       write_message ("error: waitpid\n");
-      abort ();
+      error_indicator = 1;
+      return;
     }
   if (status != 0)
     {
       write_message ("error: unexpected exit status from subprocess\n");
-      abort ();
+      error_indicator = 1;
+      return;
     }
 }
 
@@ -122,9 +128,25 @@  signal_sender (int signo, bool sleep)
     }
 }
 
+/* Children processes.  */
+static pid_t sigusr1_sender_pids[5] = { -1 };
+static pid_t sigusr2_sender_pid = -1;
+
+static void
+kill_children (void)
+{
+  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
+    if (sigusr1_sender_pids[i] != -1)
+      kill (sigusr1_sender_pids[i], SIGKILL);
+  if (sigusr2_sender_pid != -1)
+    kill (sigusr2_sender_pid, SIGKILL);
+}
+
 static int
 do_test (void)
 {
+  atexit (kill_children);
+
   /* shared->barrier is intialized along with sigusr1_sender_pids
      below.  */
   shared = support_shared_allocate (sizeof (*shared));
@@ -148,14 +170,13 @@  do_test (void)
       return 1;
     }
 
-  pid_t sigusr2_sender_pid = xfork ();
+  sigusr2_sender_pid = xfork ();
   if (sigusr2_sender_pid == 0)
     signal_sender (SIGUSR2, true);
 
   /* Send SIGUSR1 signals from several processes.  Hopefully, one
      signal will hit one of the ciritical functions.  Use a barrier to
      avoid sending signals while not running fork/free/malloc.  */
-  pid_t sigusr1_sender_pids[5];
   {
     pthread_barrierattr_t attr;
     xpthread_barrierattr_init (&attr);
@@ -166,7 +187,7 @@  do_test (void)
   }
   for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
     {
-      sigusr1_sender_pids[i] = fork ();
+      sigusr1_sender_pids[i] = xfork ();
       if (sigusr1_sender_pids[i] == 0)
         signal_sender (SIGUSR1, false);
     }
@@ -211,7 +232,7 @@  do_test (void)
         ++malloc_signals;
       xpthread_barrier_wait (&shared->barrier);
 
-      if (objects[slot] == NULL)
+      if (objects[slot] == NULL || error_indicator != 0)
         {
           printf ("error: malloc: %m\n");
           for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
@@ -225,10 +246,6 @@  do_test (void)
   for (int slot = 0; slot < malloc_objects; ++slot)
     free (objects[slot]);
 
-  for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
-    kill (sigusr1_sender_pids[i], SIGKILL);
-  kill (sigusr2_sender_pid, SIGKILL);
-
   printf ("info: signals received during fork: %u\n", fork_signals);
   printf ("info: signals received during free: %u\n", free_signals);
   printf ("info: signals received during malloc: %u\n", malloc_signals);