[v7,5/5] gdb/infrun: handle already-exited threads when attempting to stop

Message ID e540fa513347a44c1ae4fc4aa41b506098ad97d8.1587563226.git.tankut.baris.aktemur@intel.com
State New
Headers show
Series
  • Untitled series #24757
Related show

Commit Message

Lancelot SIX via Gdb-patches April 22, 2020, 3 p.m.
In stop_all_threads, GDB sends signals to other threads in an attempt
to stop them.  While in a typical scenario the expected wait status is
TARGET_WAITKIND_STOPPED, it is possible that the thread GDB attempted
to stop has already terminated.  If so, a waitstatus other than
TARGET_WAITKIND_STOPPED would be received.  Handle this case
appropriately.

If a wait status that denotes thread termination is ignored, GDB goes
into an infinite loop in stop_all_threads.
E.g.:

  $ gdb ./a.out
  (gdb) start
  ...
  (gdb) add-inferior -exec ./a.out
  ...
  (gdb) inferior 2
  ...
  (gdb) start
  ...
  (gdb) set schedule-multiple on
  (gdb) set debug infrun 2
  (gdb) continue
  Continuing.
  infrun: clear_proceed_status_thread (process 10449)
  infrun: clear_proceed_status_thread (process 10453)
  infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)
  infrun: proceed: resuming process 10449
  infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 10449] at 0x55555555514e
  infrun: infrun_async(1)
  infrun: prepare_to_wait
  infrun: proceed: resuming process 10453
  infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 10453] at 0x55555555514e
  infrun: prepare_to_wait
  infrun: Found 2 inferiors, starting at #0
  infrun: target_wait (-1.0.0, status) =
  infrun:   10449.10449.0 [process 10449],
  infrun:   status->kind = exited, status = 0
  infrun: handle_inferior_event status->kind = exited, status = 0
  [Inferior 1 (process 10449) exited normally]
  infrun: stop_waiting
  infrun: stop_all_threads
  infrun: stop_all_threads, pass=0, iterations=0
  infrun:   process 10453 executing, need stop
  infrun: target_wait (-1.0.0, status) =
  infrun:   10453.10453.0 [process 10453],
  infrun:   status->kind = exited, status = 0
  infrun: stop_all_threads status->kind = exited, status = 0 process 10453
  infrun:   process 10453 executing, already stopping
  infrun: target_wait (-1.0.0, status) =
  infrun:   -1.0.0 [process -1],
  infrun:   status->kind = no-resumed
  infrun: infrun_async(0)
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  infrun: stop_all_threads status->kind = no-resumed process -1
  infrun:   process 10453 executing, already stopping
  ...

And this polling goes on forever.  This patch prevents the infinite
looping behavior.  For the same scenario above, we obtain the
following behavior:

  ...
  (gdb) continue
  Continuing.
  infrun: clear_proceed_status_thread (process 31229)
  infrun: clear_proceed_status_thread (process 31233)
  infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)
  infrun: proceed: resuming process 31229
  infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 31229] at 0x55555555514e
  infrun: infrun_async(1)
  infrun: prepare_to_wait
  infrun: proceed: resuming process 31233
  infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 31233] at 0x55555555514e
  infrun: prepare_to_wait
  infrun: Found 2 inferiors, starting at #0
  infrun: target_wait (-1.0.0, status) =
  infrun:   31229.31229.0 [process 31229],
  infrun:   status->kind = exited, status = 0
  infrun: handle_inferior_event status->kind = exited, status = 0
  [Inferior 1 (process 31229) exited normally]
  infrun: stop_waiting
  infrun: stop_all_threads
  infrun: stop_all_threads, pass=0, iterations=0
  infrun:   process 31233 executing, need stop
  infrun: target_wait (-1.0.0, status) =
  infrun:   31233.31233.0 [process 31233],
  infrun:   status->kind = exited, status = 0
  infrun: stop_all_threads status->kind = exited, status = 0 process 31233
  infrun: saving status status->kind = exited, status = 0 for 31233.31233.0
  infrun:   process 31233 not executing
  infrun: stop_all_threads, pass=1, iterations=1
  infrun:   process 31233 not executing
  infrun: stop_all_threads done
  (gdb)

The exit event from Inferior 1 is received and shown to the user.
The exit event from Inferior 2 is not displayed, but kept pending.

  (gdb) info inferiors
    Num  Description       Connection           Executable
  * 1    <null>                                 a.out
    2    process 31233     1 (native)           a.out
  (gdb) inferior 2
  [Switching to inferior 2 [process 31233] (a.out)]
  [Switching to thread 2.1 (process 31233)]
  Couldn't get registers: No such process.
  (gdb) continue
  Continuing.
  infrun: clear_proceed_status_thread (process 31233)
  infrun: clear_proceed_status_thread: thread process 31233 has pending wait status status->kind = exited, status = 0 (currently_stepping=0).
  infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)
  infrun: proceed: resuming process 31233
  infrun: resume: thread process 31233 has pending wait status status->kind = exited, status = 0 (currently_stepping=0).
  infrun: prepare_to_wait
  infrun: Using pending wait status status->kind = exited, status = 0 for process 31233.
  infrun: target_wait (-1.0.0, status) =
  infrun:   31233.31233.0 [process 31233],
  infrun:   status->kind = exited, status = 0
  infrun: handle_inferior_event status->kind = exited, status = 0
  [Inferior 2 (process 31233) exited normally]
  infrun: stop_waiting
  (gdb) info inferiors
    Num  Description       Connection           Executable
    1    <null>                                 a.out
  * 2    <null>                                 a.out
  (gdb)

Regression-tested on X86_64 Linux.

gdb/ChangeLog:
2020-02-05  Tankut Baris Aktemur  <tankut.baris.aktemur@intel.com>
	    Tom de Vries  <tdevries@suse.de>

	PR threads/25478
	* infrun.c (stop_all_threads): Do NOT ignore
	TARGET_WAITKIND_NO_RESUMED, TARGET_WAITKIND_THREAD_EXITED,
	TARGET_WAITKIND_EXITED, TARGET_WAITKIND_SIGNALLED wait statuses
	received from threads we attempt to stop.

gdb/testsuite/ChangeLog:
2019-11-04  Tankut Baris Aktemur  <tankut.baris.aktemur@intel.com>

	* gdb.multi/multi-exit.c: New file.
	* gdb.multi/multi-exit.exp: New file.
	* gdb.multi/multi-kill.c: New file.
	* gdb.multi/multi-kill.exp: New file.

Change-Id: I7cec98f40283773b79255d998511da434e9cd408
---
 gdb/infrun.c                           | 101 ++++++++++++++++++++--
 gdb/testsuite/gdb.multi/multi-exit.c   |  22 +++++
 gdb/testsuite/gdb.multi/multi-exit.exp | 111 +++++++++++++++++++++++++
 gdb/testsuite/gdb.multi/multi-kill.c   |  42 ++++++++++
 gdb/testsuite/gdb.multi/multi-kill.exp | 110 ++++++++++++++++++++++++
 5 files changed, 379 insertions(+), 7 deletions(-)
 create mode 100644 gdb/testsuite/gdb.multi/multi-exit.c
 create mode 100644 gdb/testsuite/gdb.multi/multi-exit.exp
 create mode 100644 gdb/testsuite/gdb.multi/multi-kill.c
 create mode 100644 gdb/testsuite/gdb.multi/multi-kill.exp

-- 
2.17.1

Comments

Lancelot SIX via Gdb-patches April 22, 2020, 4:07 p.m. | #1
On Wednesday, April 22, 2020 5:01 PM, Aktemur, Tankut Baris wrote:
> diff --git a/gdb/infrun.c b/gdb/infrun.c

> index 167d50ff3ab..93169269553 100644

> --- a/gdb/infrun.c

> +++ b/gdb/infrun.c

> @@ -4781,7 +4781,47 @@ stop_all_threads (void)

>  	{

>  	  int need_wait = 0;

> 

> -	  update_thread_list ();

> +	  for (inferior *inf : all_non_exited_inferiors ())

> +	    {


Sorry.  There is a 'switch_to_inferior_no_thread (inf);', here.

> +	      update_thread_list ();

> +

> +	      /* After updating the thread list, it's possible to end

> +		 up with pid != 0 but no threads, if the inf's process

> +		 has exited but we have not processed that event yet.

> +		 The exit event must be waiting somewhere in the queue

> +		 to be processed.  Silently add a thread so that we do

> +		 a wait_one() below to pick the pending event.  */

> +

> +	      bool has_threads = false;

> +	      for (thread_info *tp ATTRIBUTE_UNUSED

> +		     : inf->non_exited_threads ())

> +		{

> +		  has_threads = true;

> +		  break;

> +		}

> +

> +	      if (has_threads)

> +		continue;

> +

> +	      ptid_t ptid (inf->pid, inf->pid, 0);

> +

> +	      /* Re-surrect the thread, if not physically deleted.

> +		 Add a new one otherwise.  */

> +	      thread_info *t = find_thread_ptid (inf->process_target (), ptid);

> +	      gdb_assert (t == nullptr || t->state == THREAD_EXITED);

> +

> +	      if (debug_infrun)

> +		fprintf_unfiltered (gdb_stdlog, "infrun: stop_all_threads,"

> +				    " inf (pid %d): %s a thread", inf->pid,

> +				    (t == nullptr)

> +				    ? "adding" : "resurrecting");

> +

> +	      if (t == nullptr)

> +		t = add_thread_silent (inf->process_target (), ptid);

> +

> +	      set_executing (inf->process_target (), ptid, true);

> +	      t->state = THREAD_STOPPED;

> +	    }


Thanks.
-Baris


Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Gary Kershaw
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928
Lancelot SIX via Gdb-patches May 3, 2020, 3:38 p.m. | #2
On Wednesday, April 22, 2020 5:01 PM, Tankut Baris Aktemur wrote:
> In stop_all_threads, GDB sends signals to other threads in an attempt

> to stop them.  While in a typical scenario the expected wait status is

> TARGET_WAITKIND_STOPPED, it is possible that the thread GDB attempted

> to stop has already terminated.  If so, a waitstatus other than

> TARGET_WAITKIND_STOPPED would be received.  Handle this case

> appropriately.


I noticed that at the beginning of stop_all_threads, we have this:

  scoped_restore_current_thread restore_thread;

  target_thread_events (1);
  SCOPE_EXIT { target_thread_events (0); };

This is not quite right, because the current top target at the scope exit
time is not necessarily the same as the current top target at the function
start.  The 'scoped_restore_current_thread' should have come after 'SCOPE_EXIT
 { target_thread_events (0); };'.  Yet, this re-ordering of the statements
would still not be sufficient, because that would enable/disable the thread
events for only the current top target.  I think this has to be done for all
targets.

So, I've just emailed a two-patch series where the first one defines a convenience
method, 'all_non_exited_process_targets', to get a collection of targets:

  https://sourceware.org/pipermail/gdb-patches/2020-May/168172.html
  https://sourceware.org/pipermail/gdb-patches/2020-May/168173.html 

During the review period of the multi-target patches, Tom Tromey had commented
that such a method to allow for-each iteration over the targets would be useful.
For instance, a place where 'all_non_exited_process_targets' could be used is...

> diff --git a/gdb/infrun.c b/gdb/infrun.c

> index 167d50ff3ab..93169269553 100644

> --- a/gdb/infrun.c

> +++ b/gdb/infrun.c

> @@ -4781,7 +4781,47 @@ stop_all_threads (void)

>  	{

>  	  int need_wait = 0;

> 

> -	  update_thread_list ();

> +	  for (inferior *inf : all_non_exited_inferiors ())

> +	    {

> +	      update_thread_list ();


... this iteration, to avoid calling 'update_thread_list' multiple times on
a particular target.  I can submit a revision, if you think that's appropriate.

Regards.
-Baris


Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Gary Kershaw
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928
Lancelot SIX via Gdb-patches May 13, 2020, 9:15 p.m. | #3
Hi Tankut,

I've sent my updated version of this patch, along with the
series, here:
  https://sourceware.org/pipermail/gdb-patches/2020-May/168478.html

See some comments below.

On 4/22/20 4:00 PM, Tankut Baris Aktemur via Gdb-patches wrote:
> In stop_all_threads, GDB sends signals to other threads in an attempt

> to stop them.  While in a typical scenario the expected wait status is

> TARGET_WAITKIND_STOPPED, it is possible that the thread GDB attempted

> to stop has already terminated.  If so, a waitstatus other than

> TARGET_WAITKIND_STOPPED would be received.  Handle this case

> appropriately.

> 

> If a wait status that denotes thread termination is ignored, GDB goes

> into an infinite loop in stop_all_threads.

> E.g.:

> 

>   $ gdb ./a.out

>   (gdb) start

>   ...

>   (gdb) add-inferior -exec ./a.out

>   ...

>   (gdb) inferior 2

>   ...

>   (gdb) start

>   ...

>   (gdb) set schedule-multiple on

>   (gdb) set debug infrun 2

>   (gdb) continue

>   Continuing.

>   infrun: clear_proceed_status_thread (process 10449)

>   infrun: clear_proceed_status_thread (process 10453)

>   infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)

>   infrun: proceed: resuming process 10449

>   infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 10449] at 0x55555555514e

>   infrun: infrun_async(1)

>   infrun: prepare_to_wait

>   infrun: proceed: resuming process 10453

>   infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 10453] at 0x55555555514e

>   infrun: prepare_to_wait

>   infrun: Found 2 inferiors, starting at #0

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   10449.10449.0 [process 10449],

>   infrun:   status->kind = exited, status = 0

>   infrun: handle_inferior_event status->kind = exited, status = 0

>   [Inferior 1 (process 10449) exited normally]

>   infrun: stop_waiting

>   infrun: stop_all_threads

>   infrun: stop_all_threads, pass=0, iterations=0

>   infrun:   process 10453 executing, need stop

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   10453.10453.0 [process 10453],

>   infrun:   status->kind = exited, status = 0

>   infrun: stop_all_threads status->kind = exited, status = 0 process 10453

>   infrun:   process 10453 executing, already stopping

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   -1.0.0 [process -1],

>   infrun:   status->kind = no-resumed

>   infrun: infrun_async(0)

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   infrun: stop_all_threads status->kind = no-resumed process -1

>   infrun:   process 10453 executing, already stopping

>   ...

> 

> And this polling goes on forever.  This patch prevents the infinite

> looping behavior.  For the same scenario above, we obtain the

> following behavior:

> 

>   ...

>   (gdb) continue

>   Continuing.

>   infrun: clear_proceed_status_thread (process 31229)

>   infrun: clear_proceed_status_thread (process 31233)

>   infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)

>   infrun: proceed: resuming process 31229

>   infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 31229] at 0x55555555514e

>   infrun: infrun_async(1)

>   infrun: prepare_to_wait

>   infrun: proceed: resuming process 31233

>   infrun: resume (step=0, signal=GDB_SIGNAL_0), trap_expected=0, current thread [process 31233] at 0x55555555514e

>   infrun: prepare_to_wait

>   infrun: Found 2 inferiors, starting at #0

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   31229.31229.0 [process 31229],

>   infrun:   status->kind = exited, status = 0

>   infrun: handle_inferior_event status->kind = exited, status = 0

>   [Inferior 1 (process 31229) exited normally]

>   infrun: stop_waiting

>   infrun: stop_all_threads

>   infrun: stop_all_threads, pass=0, iterations=0

>   infrun:   process 31233 executing, need stop

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   31233.31233.0 [process 31233],

>   infrun:   status->kind = exited, status = 0

>   infrun: stop_all_threads status->kind = exited, status = 0 process 31233

>   infrun: saving status status->kind = exited, status = 0 for 31233.31233.0

>   infrun:   process 31233 not executing

>   infrun: stop_all_threads, pass=1, iterations=1

>   infrun:   process 31233 not executing

>   infrun: stop_all_threads done

>   (gdb)

> 

> The exit event from Inferior 1 is received and shown to the user.

> The exit event from Inferior 2 is not displayed, but kept pending.

> 

>   (gdb) info inferiors

>     Num  Description       Connection           Executable

>   * 1    <null>                                 a.out

>     2    process 31233     1 (native)           a.out

>   (gdb) inferior 2

>   [Switching to inferior 2 [process 31233] (a.out)]

>   [Switching to thread 2.1 (process 31233)]

>   Couldn't get registers: No such process.

>   (gdb) continue

>   Continuing.

>   infrun: clear_proceed_status_thread (process 31233)

>   infrun: clear_proceed_status_thread: thread process 31233 has pending wait status status->kind = exited, status = 0 (currently_stepping=0).

>   infrun: proceed (addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT)

>   infrun: proceed: resuming process 31233

>   infrun: resume: thread process 31233 has pending wait status status->kind = exited, status = 0 (currently_stepping=0).

>   infrun: prepare_to_wait

>   infrun: Using pending wait status status->kind = exited, status = 0 for process 31233.

>   infrun: target_wait (-1.0.0, status) =

>   infrun:   31233.31233.0 [process 31233],

>   infrun:   status->kind = exited, status = 0

>   infrun: handle_inferior_event status->kind = exited, status = 0

>   [Inferior 2 (process 31233) exited normally]

>   infrun: stop_waiting

>   (gdb) info inferiors

>     Num  Description       Connection           Executable

>     1    <null>                                 a.out

>   * 2    <null>                                 a.out

>   (gdb)

> 

> Regression-tested on X86_64 Linux.

> 

> gdb/ChangeLog:

> 2020-02-05  Tankut Baris Aktemur  <tankut.baris.aktemur@intel.com>

> 	    Tom de Vries  <tdevries@suse.de>

> 

> 	PR threads/25478

> 	* infrun.c (stop_all_threads): Do NOT ignore

> 	TARGET_WAITKIND_NO_RESUMED, TARGET_WAITKIND_THREAD_EXITED,

> 	TARGET_WAITKIND_EXITED, TARGET_WAITKIND_SIGNALLED wait statuses

> 	received from threads we attempt to stop.

> 

> gdb/testsuite/ChangeLog:

> 2019-11-04  Tankut Baris Aktemur  <tankut.baris.aktemur@intel.com>

> 

> 	* gdb.multi/multi-exit.c: New file.

> 	* gdb.multi/multi-exit.exp: New file.

> 	* gdb.multi/multi-kill.c: New file.

> 	* gdb.multi/multi-kill.exp: New file.

> 

> Change-Id: I7cec98f40283773b79255d998511da434e9cd408

> ---

>  gdb/infrun.c                           | 101 ++++++++++++++++++++--

>  gdb/testsuite/gdb.multi/multi-exit.c   |  22 +++++

>  gdb/testsuite/gdb.multi/multi-exit.exp | 111 +++++++++++++++++++++++++

>  gdb/testsuite/gdb.multi/multi-kill.c   |  42 ++++++++++

>  gdb/testsuite/gdb.multi/multi-kill.exp | 110 ++++++++++++++++++++++++

>  5 files changed, 379 insertions(+), 7 deletions(-)

>  create mode 100644 gdb/testsuite/gdb.multi/multi-exit.c

>  create mode 100644 gdb/testsuite/gdb.multi/multi-exit.exp

>  create mode 100644 gdb/testsuite/gdb.multi/multi-kill.c

>  create mode 100644 gdb/testsuite/gdb.multi/multi-kill.exp

> 

> diff --git a/gdb/infrun.c b/gdb/infrun.c

> index 167d50ff3ab..93169269553 100644

> --- a/gdb/infrun.c

> +++ b/gdb/infrun.c

> @@ -4781,7 +4781,47 @@ stop_all_threads (void)

>  	{

>  	  int need_wait = 0;

>  

> -	  update_thread_list ();

> +	  for (inferior *inf : all_non_exited_inferiors ())

> +	    {

> +	      update_thread_list ();

> +

> +	      /* After updating the thread list, it's possible to end

> +		 up with pid != 0 but no threads, if the inf's process

> +		 has exited but we have not processed that event yet.

> +		 The exit event must be waiting somewhere in the queue

> +		 to be processed.  Silently add a thread so that we do

> +		 a wait_one() below to pick the pending event.  */

> +

> +	      bool has_threads = false;

> +	      for (thread_info *tp ATTRIBUTE_UNUSED

> +		     : inf->non_exited_threads ())

> +		{

> +		  has_threads = true;

> +		  break;

> +		}

> +

> +	      if (has_threads)

> +		continue;

> +

> +	      ptid_t ptid (inf->pid, inf->pid, 0);

> +


This here is what make me go think through all this.  I don't really
like it to have common code make up the ptid to use for the new thread.
That should be the responsibility of the target backend.  It may
be that the target backend uses the tid field to store important
information, for example.

I also think that having to re-add a thread is not ideal.
As we move GDB towards more multi-target support, and potentially other
kinds of execution objects, I think that it's likely that we will always
make it the responsibility of the target backend to create a thread, since
it's going to the the target that knows the actual (C++) type of the thread
Imagine target-specific classes that inherit from a common thread_info
class, with virtual methods.

After giving it some thought and experimentation, I think we should
go back to your idea of not deleting the last thread of the process.
But let's keep it simple without current_inferior() checks.
When that solution was brought up before, I pointed at the code 
handle_no_resumed that handles the case of inferiors with no threads.
I managed to reproduce that scenario, and confirm that we still
handle it OK.

I ended up squashing the remote and infrun changes in a single patch,
as they're part of the same logical change.

Finally, while playing with the testcases, I thought I'd make them
spawn more inferiors, so that we're more sure we hit the race window.
The way I've adjusted the testcases, it's simple to make them spawn
any number of inferiors -- you just have to change one global.

Please take a look and let me know what you think.

Thanks,
Pedro Alves
Lancelot SIX via Gdb-patches May 14, 2020, 8:50 a.m. | #4
On Wednesday, May 13, 2020 11:15 PM, Pedro Alves wrote:
> > diff --git a/gdb/infrun.c b/gdb/infrun.c

> > index 167d50ff3ab..93169269553 100644

> > --- a/gdb/infrun.c

> > +++ b/gdb/infrun.c

> > @@ -4781,7 +4781,47 @@ stop_all_threads (void)

> >  	{

> >  	  int need_wait = 0;

> >

> > -	  update_thread_list ();

> > +	  for (inferior *inf : all_non_exited_inferiors ())

> > +	    {

> > +	      update_thread_list ();

> > +

> > +	      /* After updating the thread list, it's possible to end

> > +		 up with pid != 0 but no threads, if the inf's process

> > +		 has exited but we have not processed that event yet.

> > +		 The exit event must be waiting somewhere in the queue

> > +		 to be processed.  Silently add a thread so that we do

> > +		 a wait_one() below to pick the pending event.  */

> > +

> > +	      bool has_threads = false;

> > +	      for (thread_info *tp ATTRIBUTE_UNUSED

> > +		     : inf->non_exited_threads ())

> > +		{

> > +		  has_threads = true;

> > +		  break;

> > +		}

> > +

> > +	      if (has_threads)

> > +		continue;

> > +

> > +	      ptid_t ptid (inf->pid, inf->pid, 0);

> > +

> 

> This here is what make me go think through all this.  I don't really

> like it to have common code make up the ptid to use for the new thread.

> That should be the responsibility of the target backend.  It may

> be that the target backend uses the tid field to store important

> information, for example.

> 

> I also think that having to re-add a thread is not ideal.

> As we move GDB towards more multi-target support, and potentially other

> kinds of execution objects, I think that it's likely that we will always

> make it the responsibility of the target backend to create a thread, since

> it's going to the the target that knows the actual (C++) type of the thread

> Imagine target-specific classes that inherit from a common thread_info

> class, with virtual methods.

> 

> After giving it some thought and experimentation, I think we should

> go back to your idea of not deleting the last thread of the process.

> But let's keep it simple without current_inferior() checks.

> When that solution was brought up before, I pointed at the code

> handle_no_resumed that handles the case of inferiors with no threads.

> I managed to reproduce that scenario, and confirm that we still

> handle it OK.

> 

> I ended up squashing the remote and infrun changes in a single patch,

> as they're part of the same logical change.

> 

> Finally, while playing with the testcases, I thought I'd make them

> spawn more inferiors, so that we're more sure we hit the race window.

> The way I've adjusted the testcases, it's simple to make them spawn

> any number of inferiors -- you just have to change one global.

> 

> Please take a look and let me know what you think.


Thank you for checking and updating the broader impact!  This revision looks
very good to me.  I had minor comments about 3 patches and I emailed them.
I pushed the suggested changes to this branch for convenience:

  https://github.com/barisaktemur/gdb/commits/thread-exit-in-stop-all-threads-v9

I repeated the regression testing with this branch, too.

Regards,
-Baris


Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Gary Kershaw
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928
Lancelot SIX via Gdb-patches May 14, 2020, 11:25 a.m. | #5
On 5/13/20 10:15 PM, Pedro Alves via Gdb-patches wrote:

> After giving it some thought and experimentation, I think we should

> go back to your idea of not deleting the last thread of the process.


There's one point that I forgot to mention about the benefit of this
approach, which I should mention for the archives.

The current use case we're handling deals with the update_thread_list
call from within stop_all_threads.

However, update_thread_list calls can happen at any point while the
inferior is running.  Say, in non-stop mode or async background mode, and
the user types "info threads".  At that point, an inferior may have exited,
and GDB finds no threads for the process, before the inferior exit event
is seen.  If that happens, then the user is not able to switch to that
inferior any longer in order to collect the process exit.  The solution in
v8 handles this scenario too, while the "don't delete if pending event"
solution only handled the stop_all_threads scenario.

Thanks,
Pedro Alves
Lancelot SIX via Gdb-patches May 14, 2020, 11:32 a.m. | #6
On 5/14/20 9:50 AM, Aktemur, Tankut Baris via Gdb-patches wrote:

> Thank you for checking and updating the broader impact!  This revision looks

> very good to me.  I had minor comments about 3 patches and I emailed them.

> I pushed the suggested changes to this branch for convenience:

> 

>   https://github.com/barisaktemur/gdb/commits/thread-exit-in-stop-all-threads-v9

> 

> I repeated the regression testing with this branch, too.


Great, thanks for doing that!  Other than the small comments I made in response
to the patches in v8, I agree with all your changes.  This looks good to me.

Thanks,
Pedro Alves
Lancelot SIX via Gdb-patches May 14, 2020, 11:42 a.m. | #7
On Thursday, May 14, 2020 1:32 PM, Pedro Alves wrote:
> > Thank you for checking and updating the broader impact!  This revision looks

> > very good to me.  I had minor comments about 3 patches and I emailed them.

> > I pushed the suggested changes to this branch for convenience:

> >

> >   https://github.com/barisaktemur/gdb/commits/thread-exit-in-stop-all-threads-v9

> >

> > I repeated the regression testing with this branch, too.

> 

> Great, thanks for doing that!  Other than the small comments I made in response

> to the patches in v8, I agree with all your changes.  This looks good to me.

> 

> Thanks,

> Pedro Alves


I'll push the series later today.  Thank you.

-Baris


Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Gary Kershaw
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928

Patch

diff --git a/gdb/infrun.c b/gdb/infrun.c
index 167d50ff3ab..93169269553 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -4781,7 +4781,47 @@  stop_all_threads (void)
 	{
 	  int need_wait = 0;
 
-	  update_thread_list ();
+	  for (inferior *inf : all_non_exited_inferiors ())
+	    {
+	      update_thread_list ();
+
+	      /* After updating the thread list, it's possible to end
+		 up with pid != 0 but no threads, if the inf's process
+		 has exited but we have not processed that event yet.
+		 The exit event must be waiting somewhere in the queue
+		 to be processed.  Silently add a thread so that we do
+		 a wait_one() below to pick the pending event.  */
+
+	      bool has_threads = false;
+	      for (thread_info *tp ATTRIBUTE_UNUSED
+		     : inf->non_exited_threads ())
+		{
+		  has_threads = true;
+		  break;
+		}
+
+	      if (has_threads)
+		continue;
+
+	      ptid_t ptid (inf->pid, inf->pid, 0);
+
+	      /* Re-surrect the thread, if not physically deleted.
+		 Add a new one otherwise.  */
+	      thread_info *t = find_thread_ptid (inf->process_target (), ptid);
+	      gdb_assert (t == nullptr || t->state == THREAD_EXITED);
+
+	      if (debug_infrun)
+		fprintf_unfiltered (gdb_stdlog, "infrun: stop_all_threads,"
+				    " inf (pid %d): %s a thread", inf->pid,
+				    (t == nullptr)
+				    ? "adding" : "resurrecting");
+
+	      if (t == nullptr)
+		t = add_thread_silent (inf->process_target (), ptid);
+
+	      set_executing (inf->process_target (), ptid, true);
+	      t->state = THREAD_STOPPED;
+	    }
 
 	  /* Go through all threads looking for threads that we need
 	     to tell the target to stop.  */
@@ -4856,13 +4896,60 @@  stop_all_threads (void)
 				  target_pid_to_str (event.ptid).c_str ());
 	    }
 
-	  if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED
-	      || event.ws.kind == TARGET_WAITKIND_THREAD_EXITED
-	      || event.ws.kind == TARGET_WAITKIND_EXITED
-	      || event.ws.kind == TARGET_WAITKIND_SIGNALLED)
+	  if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED)
 	    {
-	      /* All resumed threads exited
-		 or one thread/process exited/signalled.  */
+	      /* All resumed threads exited.  */
+	    }
+	  else if (event.ws.kind == TARGET_WAITKIND_THREAD_EXITED
+		   || event.ws.kind == TARGET_WAITKIND_EXITED
+		   || event.ws.kind == TARGET_WAITKIND_SIGNALLED)
+	    {
+	      /* One thread/process exited/signalled.  */
+
+	      thread_info *t = nullptr;
+
+	      /* The target may have reported just a pid.  If so, try
+		 the first non-exited thread.  */
+	      if (event.ptid.is_pid ())
+		{
+		  int pid  = event.ptid.pid ();
+		  inferior *inf = find_inferior_pid (event.target, pid);
+		  for (thread_info *tp : inf->non_exited_threads ())
+		    {
+		      t = tp;
+		      break;
+		    }
+
+		  /* FIXME: If there is no available thread, the event
+		     would have to be appended to a per-inferior event
+		     list, which, unfortunately, does not exist yet.  We
+		     assert here instead of going into an infinite loop.  */
+		  gdb_assert (t != nullptr);
+
+		  if (debug_infrun)
+		    fprintf_unfiltered (gdb_stdlog,
+					"infrun: stop_all_threads, using %s\n",
+					target_pid_to_str (t->ptid).c_str ());
+		}
+	      else
+		{
+		  t = find_thread_ptid (event.target, event.ptid);
+		  /* Check if this is the first time we see this thread.
+		     Don't bother adding if it individually exited.  */
+		  if (t == nullptr
+		      && event.ws.kind != TARGET_WAITKIND_THREAD_EXITED)
+		    t = add_thread (event.target, event.ptid);
+		}
+
+	      if (t != nullptr)
+		{
+		  /* Set the threads as non-executing to avoid
+		     another stop attempt on them.  */
+		  mark_non_executing_threads (event.target, event.ptid,
+					      event.ws);
+		  save_waitstatus (t, &event.ws);
+		  t->stop_requested = false;
+		}
 	    }
 	  else
 	    {
diff --git a/gdb/testsuite/gdb.multi/multi-exit.c b/gdb/testsuite/gdb.multi/multi-exit.c
new file mode 100644
index 00000000000..f4825c8a7c1
--- /dev/null
+++ b/gdb/testsuite/gdb.multi/multi-exit.c
@@ -0,0 +1,22 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2020 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+int
+main ()
+{
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.multi/multi-exit.exp b/gdb/testsuite/gdb.multi/multi-exit.exp
new file mode 100644
index 00000000000..e8f188ca58a
--- /dev/null
+++ b/gdb/testsuite/gdb.multi/multi-exit.exp
@@ -0,0 +1,111 @@ 
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test receiving TARGET_WAITKIND_EXITED events from multiple
+# inferiors.  In all stop-mode, upon receiving the exit event from one
+# of the inferiors, GDB will try to stop the other inferior, too.  So,
+# a stop request will be sent.  Receiving a TARGET_WAITKIND_EXITED
+# status kind as a response to that stop request instead of a
+# TARGET_WAITKIND_STOPPED should be handled by GDB without problems.
+
+standard_testfile
+
+if {[use_gdb_stub]} {
+    return 0
+}
+
+if {[build_executable "failed to prepare" $testfile $srcfile]} {
+    return -1
+}
+
+# We are testing GDB's ability to stop all threads.
+# Hence, go with the all-stop-on-top-of-non-stop mode.
+save_vars { GDBFLAGS } {
+    append GDBFLAGS " -ex \"maint set target-non-stop on\""
+    clean_restart ${binfile}
+}
+
+with_test_prefix "inf 1" {
+    gdb_load $binfile
+
+    if {[gdb_start_cmd] < 0} {
+	fail "could not start"
+	return -1
+    }
+    gdb_test "" ".*reakpoint ., main .*${srcfile}.*" "start"
+}
+
+# Start another inferior.
+gdb_test "add-inferior" "Added inferior 2.*" \
+    "add empty inferior 2"
+gdb_test "inferior 2" "Switching to inferior 2.*" \
+    "switch to inferior 2"
+
+with_test_prefix "inf 2" {
+    gdb_load $binfile
+
+    if {[gdb_start_cmd] < 0} {
+	fail "could not start"
+	return -1
+    }
+    gdb_test "" ".*reakpoint ., main .*${srcfile}.*" "start"
+}
+
+# We want to continue both processes.
+gdb_test_no_output "set schedule-multiple on"
+
+set exited_inferior ""
+
+# We want GDB to complete the command and return the prompt
+# instead of going into an infinite loop.
+gdb_test_multiple "continue" "first continue" {
+    -re "Inferior ($decimal) \[^\n\r\]+ exited normally.*$gdb_prompt $" {
+	set exited_inferior $expect_out(1,string)
+	pass $gdb_test_name
+    }
+}
+
+if {$exited_inferior == ""} {
+    fail "first continue"
+    return -1
+}
+
+if {$exited_inferior == 1} {
+    set other_inferior 2
+} else {
+    set other_inferior 1
+}
+
+# Switch to the other inferior and check it, too, continues to the end.
+gdb_test "inferior $other_inferior" \
+    ".*Switching to inferior $other_inferior.*" \
+    "switch to the other inferior"
+
+gdb_continue_to_end
+
+# Finally, check if we can re-run both inferiors.
+delete_breakpoints
+
+gdb_test "run" "$inferior_exited_re normally\]" \
+    "re-run the other inferior"
+
+gdb_test "inferior $exited_inferior" \
+    ".*Switching to inferior $exited_inferior.*" \
+    "switch to the first exited inferior"
+
+gdb_test "run" "$inferior_exited_re normally\]" \
+    "re-run the first exited inferior"
diff --git a/gdb/testsuite/gdb.multi/multi-kill.c b/gdb/testsuite/gdb.multi/multi-kill.c
new file mode 100644
index 00000000000..66642bbb0e6
--- /dev/null
+++ b/gdb/testsuite/gdb.multi/multi-kill.c
@@ -0,0 +1,42 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2020 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <sys/types.h>
+#include <unistd.h>
+
+static pid_t pid;
+
+static void
+initialized ()
+{
+}
+
+int
+main ()
+{
+  pid = getpid ();
+  initialized ();
+
+  /* Don't run forever in case GDB crashes and DejaGNU fails to kill
+     this program.  */
+  alarm (10);
+
+  while (1)
+    ;
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.multi/multi-kill.exp b/gdb/testsuite/gdb.multi/multi-kill.exp
new file mode 100644
index 00000000000..706bbeb542c
--- /dev/null
+++ b/gdb/testsuite/gdb.multi/multi-kill.exp
@@ -0,0 +1,110 @@ 
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test receiving TARGET_WAITKIND_SIGNALLED events from multiple
+# inferiors.  In all stop-mode, upon receiving the exit event from one
+# of the inferiors, GDB will try to stop the other inferior, too.  So,
+# a stop request will be sent.  Receiving a TARGET_WAITKIND_SIGNALLED
+# status kind as a response to that stop request instead of a
+# TARGET_WAITKIND_STOPPED should be handled by GDB without problems.
+
+standard_testfile
+
+if {[use_gdb_stub]} {
+    return 0
+}
+
+if {[build_executable "failed to prepare" $testfile $srcfile {debug}]} {
+    return -1
+}
+
+# We are testing GDB's ability to stop all threads.
+# Hence, go with the all-stop-on-top-of-non-stop mode.
+save_vars { GDBFLAGS } {
+    append GDBFLAGS " -ex \"maint set target-non-stop on\""
+    clean_restart ${binfile}
+}
+
+# Initialize an inferior and return its pid.
+
+proc initialize_inferior {prefix} {
+    global binfile srcfile
+    with_test_prefix $prefix {
+	gdb_load $binfile
+
+	gdb_breakpoint "initialized" {temporary}
+	gdb_run_cmd
+	gdb_test "" ".*reakpoint ., initialized .*${srcfile}.*" "run"
+
+	return [get_integer_valueof "pid" -1]
+    }
+}
+
+set testpid1 [initialize_inferior "inf 1"]
+if {$testpid1 == -1} {
+    return -1
+}
+
+# Start another inferior.
+gdb_test "add-inferior" "Added inferior 2.*" \
+    "add empty inferior 2"
+gdb_test "inferior 2" "Switching to inferior 2.*" \
+    "switch to inferior 2"
+
+set testpid2 [initialize_inferior "inf 2"]
+if {$testpid2 == -1} {
+    return -1
+}
+
+# We want to continue both processes.
+gdb_test_no_output "set schedule-multiple on"
+
+# Resume, but then kill both from outside.
+gdb_test_multiple "continue" "continue processes" {
+    -re "Continuing.\[\r\n\]+" {
+	# Kill both processes at once.
+	remote_exec target "kill -9 ${testpid1} ${testpid2}"
+	exp_continue
+    }
+    -re "Program terminated with signal.*$gdb_prompt" {
+	pass $gdb_test_name
+    }
+}
+
+# Find the current inferior's id.
+set current_inferior 1
+gdb_test_multiple "info inferiors" "find the current inf id" {
+    -re "\\* 1 .*$gdb_prompt $" {
+	set current_inferior 1
+	set other_inferior 2
+	pass $gdb_test_name
+    }
+    -re "\\* 2 .*$gdb_prompt $" {
+	set current_inferior 2
+	set other_inferior 1
+	pass $gdb_test_name
+    }
+}
+
+# Switch to the other inferior and check it, too, continues to the end.
+gdb_test "inferior $other_inferior" \
+    ".*Switching to inferior $other_inferior.*" \
+    "switch to the other inferior"
+
+gdb_test "continue" \
+    "Program terminated with signal SIGKILL, .*" \
+    "continue the other inferior"