Skip to content

Commit 04d1cb6

Browse files
vstinnermiss-islington
authored andcommitted
bpo-36402: Fix threading._shutdown() race condition (pythonGH-13948)
Fix a race condition at Python shutdown when waiting for threads. Wait until the Python thread state of all non-daemon threads get deleted (join all non-daemon threads), rather than just wait until Python threads complete. * Add threading._shutdown_locks: set of Thread._tstate_lock locks of non-daemon threads used by _shutdown() to wait until all Python thread states get deleted. See Thread._set_tstate_lock(). * Add also threading._shutdown_locks_lock to protect access to threading._shutdown_locks. * Add test_finalization_shutdown() test. (cherry picked from commit 468e5fe) Co-authored-by: Victor Stinner <[email protected]>
1 parent d561f84 commit 04d1cb6

File tree

3 files changed

+96
-12
lines changed

3 files changed

+96
-12
lines changed

Lib/test/test_threading.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,41 @@ def __del__(self):
578578
self.assertEqual(data.splitlines(),
579579
["GC: True True True"] * 2)
580580

581+
def test_finalization_shutdown(self):
582+
# bpo-36402: Py_Finalize() calls threading._shutdown() which must wait
583+
# until Python thread states of all non-daemon threads get deleted.
584+
#
585+
# Test similar to SubinterpThreadingTests.test_threads_join_2(), but
586+
# test the finalization of the main interpreter.
587+
code = """if 1:
588+
import os
589+
import threading
590+
import time
591+
import random
592+
593+
def random_sleep():
594+
seconds = random.random() * 0.010
595+
time.sleep(seconds)
596+
597+
class Sleeper:
598+
def __del__(self):
599+
random_sleep()
600+
601+
tls = threading.local()
602+
603+
def f():
604+
# Sleep a bit so that the thread is still running when
605+
# Py_Finalize() is called.
606+
random_sleep()
607+
tls.x = Sleeper()
608+
random_sleep()
609+
610+
threading.Thread(target=f).start()
611+
random_sleep()
612+
"""
613+
rc, out, err = assert_python_ok("-c", code)
614+
self.assertEqual(err, b"")
615+
581616
def test_tstate_lock(self):
582617
# Test an implementation detail of Thread objects.
583618
started = _thread.allocate_lock()
@@ -875,15 +910,22 @@ def test_threads_join(self):
875910
self.addCleanup(os.close, w)
876911
code = r"""if 1:
877912
import os
913+
import random
878914
import threading
879915
import time
880916
917+
def random_sleep():
918+
seconds = random.random() * 0.010
919+
time.sleep(seconds)
920+
881921
def f():
882922
# Sleep a bit so that the thread is still running when
883923
# Py_EndInterpreter is called.
884-
time.sleep(0.05)
924+
random_sleep()
885925
os.write(%d, b"x")
926+
886927
threading.Thread(target=f).start()
928+
random_sleep()
887929
""" % (w,)
888930
ret = test.support.run_in_subinterp(code)
889931
self.assertEqual(ret, 0)
@@ -900,22 +942,29 @@ def test_threads_join_2(self):
900942
self.addCleanup(os.close, w)
901943
code = r"""if 1:
902944
import os
945+
import random
903946
import threading
904947
import time
905948
949+
def random_sleep():
950+
seconds = random.random() * 0.010
951+
time.sleep(seconds)
952+
906953
class Sleeper:
907954
def __del__(self):
908-
time.sleep(0.05)
955+
random_sleep()
909956
910957
tls = threading.local()
911958
912959
def f():
913960
# Sleep a bit so that the thread is still running when
914961
# Py_EndInterpreter is called.
915-
time.sleep(0.05)
962+
random_sleep()
916963
tls.x = Sleeper()
917964
os.write(%d, b"x")
965+
918966
threading.Thread(target=f).start()
967+
random_sleep()
919968
""" % (w,)
920969
ret = test.support.run_in_subinterp(code)
921970
self.assertEqual(ret, 0)

Lib/threading.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,11 @@ def _newname(template="Thread-%d"):
733733
_active = {} # maps thread id to Thread object
734734
_limbo = {}
735735
_dangling = WeakSet()
736+
# Set of Thread._tstate_lock locks of non-daemon threads used by _shutdown()
737+
# to wait until all Python thread states get deleted:
738+
# see Thread._set_tstate_lock().
739+
_shutdown_locks_lock = _allocate_lock()
740+
_shutdown_locks = set()
736741

737742
# Main class for threads
738743

@@ -899,6 +904,10 @@ def _set_tstate_lock(self):
899904
self._tstate_lock = _set_sentinel()
900905
self._tstate_lock.acquire()
901906

907+
if not self.daemon:
908+
with _shutdown_locks_lock:
909+
_shutdown_locks.add(self._tstate_lock)
910+
902911
def _bootstrap_inner(self):
903912
try:
904913
self._set_ident()
@@ -987,6 +996,9 @@ def _stop(self):
987996
assert not lock.locked()
988997
self._is_stopped = True
989998
self._tstate_lock = None
999+
if not self.daemon:
1000+
with _shutdown_locks_lock:
1001+
_shutdown_locks.discard(self._tstate_lock)
9901002

9911003
def _delete(self):
9921004
"Remove current thread from the dict of currently running threads."
@@ -1261,6 +1273,9 @@ def enumerate():
12611273
_main_thread = _MainThread()
12621274

12631275
def _shutdown():
1276+
"""
1277+
Wait until the Python thread state of all non-daemon threads get deleted.
1278+
"""
12641279
# Obscure: other threads may be waiting to join _main_thread. That's
12651280
# dubious, but some code does it. We can't wait for C code to release
12661281
# the main thread's tstate_lock - that won't happen until the interpreter
@@ -1269,23 +1284,33 @@ def _shutdown():
12691284
if _main_thread._is_stopped:
12701285
# _shutdown() was already called
12711286
return
1287+
1288+
# Main thread
12721289
tlock = _main_thread._tstate_lock
12731290
# The main thread isn't finished yet, so its thread state lock can't have
12741291
# been released.
12751292
assert tlock is not None
12761293
assert tlock.locked()
12771294
tlock.release()
12781295
_main_thread._stop()
1279-
t = _pickSomeNonDaemonThread()
1280-
while t:
1281-
t.join()
1282-
t = _pickSomeNonDaemonThread()
12831296

1284-
def _pickSomeNonDaemonThread():
1285-
for t in enumerate():
1286-
if not t.daemon and t.is_alive():
1287-
return t
1288-
return None
1297+
# Join all non-deamon threads
1298+
while True:
1299+
with _shutdown_locks_lock:
1300+
locks = list(_shutdown_locks)
1301+
_shutdown_locks.clear()
1302+
1303+
if not locks:
1304+
break
1305+
1306+
for lock in locks:
1307+
# mimick Thread.join()
1308+
lock.acquire()
1309+
lock.release()
1310+
1311+
# new threads can be spawned while we were waiting for the other
1312+
# threads to complete
1313+
12891314

12901315
def main_thread():
12911316
"""Return the main thread object.
@@ -1311,12 +1336,18 @@ def _after_fork():
13111336
# Reset _active_limbo_lock, in case we forked while the lock was held
13121337
# by another (non-forked) thread. http://bugs.python.org/issue874900
13131338
global _active_limbo_lock, _main_thread
1339+
global _shutdown_locks_lock, _shutdown_locks
13141340
_active_limbo_lock = _allocate_lock()
13151341

13161342
# fork() only copied the current thread; clear references to others.
13171343
new_active = {}
13181344
current = current_thread()
13191345
_main_thread = current
1346+
1347+
# reset _shutdown() locks: threads re-register their _tstate_lock below
1348+
_shutdown_locks_lock = _allocate_lock()
1349+
_shutdown_locks = set()
1350+
13201351
with _active_limbo_lock:
13211352
# Dangling thread instances must still have their locks reset,
13221353
# because someone may join() them.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a race condition at Python shutdown when waiting for threads. Wait until
2+
the Python thread state of all non-daemon threads get deleted (join all
3+
non-daemon threads), rather than just wait until non-daemon Python threads
4+
complete.

0 commit comments

Comments
 (0)